From b3b92208ed545293981a9ac789ee2fc0a335069b Mon Sep 17 00:00:00 2001 From: Abolfazl Date: Sat, 9 May 2026 03:20:59 +0330 Subject: [PATCH] Fixed youtube safe search and live (without needing exit node) --- src/core/constants.py | 11 +++ src/proxy/proxy_server.py | 142 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 147 insertions(+), 6 deletions(-) diff --git a/src/core/constants.py b/src/core/constants.py index 03bf06f..11be4b5 100644 --- a/src/core/constants.py +++ b/src/core/constants.py @@ -173,6 +173,17 @@ GOOGLE_OWNED_EXACT = frozenset({ }) +# ── Relay URL patterns ─────────────────────────────────────────────────── +# URL path prefixes that are forced through the Apps Script relay. +# Format: "host/path/prefix" (no scheme). The host is MITM'd so paths can +# be inspected; only URLs starting with the pattern go to relay, all other +# paths on that host are forwarded via SNI-rewrite (fast path). +# Can be extended via config.json "relay_url_patterns" key. +RELAY_URL_PATTERNS: tuple[str, ...] = ( + "youtube.com/youtubei/", +) + + # ── SNI-rewrite suffixes ────────────────────────────────────────────────── # Google-owned properties whose real SNI is DPI-blocked but are served by # the same edge IP as `front_domain`. Routed through the configured diff --git a/src/proxy/proxy_server.py b/src/proxy/proxy_server.py index fe2dbce..7580565 100644 --- a/src/proxy/proxy_server.py +++ b/src/proxy/proxy_server.py @@ -32,6 +32,7 @@ from core.constants import ( LARGE_FILE_EXTS, MAX_HEADER_BYTES, MAX_REQUEST_BODY_BYTES, + RELAY_URL_PATTERNS, SNI_REWRITE_SUFFIXES, TCP_CONNECT_TIMEOUT, TRACE_HOST_SUFFIXES, @@ -187,6 +188,49 @@ class ProxyServer: else: self._SNI_REWRITE_SUFFIXES = SNI_REWRITE_SUFFIXES + # relay_url_patterns: list of URL path prefixes + # (e.g. "youtube.com/youtubei/") that are forced through the Apps Script + # relay even when youtube_via_relay is false. + # The host is extracted and removed from SNI-rewrite so the proxy can + # MITM-decrypt and inspect paths. Requests whose URL contains the full + # pattern go to relay; all other paths on that host are forwarded + # directly via SNI-rewrite HTTP (fast path). + # When youtube_via_relay is true, RELAY_URL_PATTERNS is ignored entirely + # so all of youtube.com goes through relay without path inspection. + # Defaults to RELAY_URL_PATTERNS from constants.py; config key extends it. + _youtube_via_relay: bool = config.get("youtube_via_relay", False) + relay_patterns: list[str] = [ + p.strip() for p in config.get("relay_url_patterns", []) if str(p).strip() + ] + if not _youtube_via_relay: + relay_patterns = list(RELAY_URL_PATTERNS) + relay_patterns + + # Store the full patterns for per-request matching in _relay_smart. + self._relay_url_patterns: tuple[str, ...] = tuple( + re.sub(r'^https?://', '', p).lower() for p in relay_patterns + ) + if relay_patterns: + forced: set[str] = set() + for p in self._relay_url_patterns: + host_part = p.split('/')[0].lstrip('.') + if host_part: + forced.add(host_part) + # Remove matched suffixes from SNI-rewrite so they get MITM'd. + self._SNI_REWRITE_SUFFIXES = tuple( + s for s in self._SNI_REWRITE_SUFFIXES + if not any( + s == h or s.endswith('.' + h) or h.endswith('.' + s) + for h in forced + ) + ) + log.info( + "relay_url_patterns: MITM forced on %s; relay only for: %s", + ', '.join(sorted(forced)), + ', '.join(self._relay_url_patterns), + ) + else: + self._relay_url_patterns = () + try: from .mitm import MITMCertManager, CA_CERT_FILE self.mitm = MITMCertManager() @@ -1141,16 +1185,102 @@ class ProxyServer: # ── CORS helpers ────────────────────────────────────────────── # cors_preflight_response() and inject_cors_headers() live in proxy_support. + def _url_matches_relay_pattern(self, url: str) -> bool: + """Return True if url matches any entry in _relay_url_patterns. + + Pattern format: "host/path" (no scheme). The url host may have + extra subdomains (e.g. www.youtube.com matches youtube.com). + """ + normalized = re.sub(r'^https?://', '', url).lower() + slash = normalized.find('/') + url_host = normalized[:slash] if slash != -1 else normalized + url_path = normalized[slash:] if slash != -1 else '/' + for p in self._relay_url_patterns: + slash_p = p.find('/') + pat_host = p[:slash_p] if slash_p != -1 else p + pat_path = p[slash_p:] if slash_p != -1 else '/' + host_match = (url_host == pat_host or url_host.endswith('.' + pat_host)) + if host_match and url_path.startswith(pat_path): + return True + return False + + async def _forward_via_sni_rewrite(self, method: str, url: str, + headers: dict, body: bytes) -> bytes: + """Forward an HTTP request to its real origin via the SNI-rewrite path. + + Connects to google_ip:443 with SNI=front_domain (DPI only sees a safe + Google SNI), then sends the actual HTTP/1.1 request with the real Host + header so YouTube's edge serves the correct response. + """ + # Parse host and path from URL. + stripped = re.sub(r'^https?://', '', url) + slash = stripped.find('/') + if slash == -1: + host = stripped + path = '/' + else: + host = stripped[:slash] + path = stripped[slash:] + + # Build HTTP/1.1 request bytes. + req_headers = dict(headers) + req_headers['Host'] = host + # Use Connection: close so we don't need to manage keep-alive. + req_headers['Connection'] = 'close' + req_lines = [f"{method} {path} HTTP/1.1\r\n"] + for k, v in req_headers.items(): + req_lines.append(f"{k}: {v}\r\n") + req_lines.append("\r\n") + request_bytes = "".join(req_lines).encode() + (body or b"") + + r, w = await asyncio.wait_for( + asyncio.open_connection( + self.fronter.connect_host, + 443, + ssl=self.fronter._ssl_ctx(), + server_hostname=self.fronter.sni_host, + ), + timeout=self._tcp_connect_timeout, + ) + try: + w.write(request_bytes) + await w.drain() + chunks = [] + while True: + chunk = await asyncio.wait_for(r.read(65536), timeout=30) + if not chunk: + break + chunks.append(chunk) + finally: + try: + w.close() + except Exception: + pass + return b"".join(chunks) + async def _relay_smart(self, method, url, headers, body): """Choose optimal relay strategy based on request type. - - GET requests for likely-large downloads use parallel-range. - - All other requests (API calls, HTML, JSON, XHR) go through the - single-request relay. This avoids injecting a synthetic Range - header on normal traffic, which some origins honor by returning - 206 — breaking fetch()/XHR on sites like x.com or Cloudflare - challenge pages. + - If relay_url_patterns are configured and the URL does NOT match, + forward via SNI-rewrite HTTP (fast direct path). + - GET requests for likely-large downloads use parallel-range relay. + - All other requests go through the single-request relay. """ + # Path-level relay routing: only matching URL prefixes go through relay; + # everything else on the same host is forwarded via SNI-rewrite. + if self._relay_url_patterns and not self._url_matches_relay_pattern(url): + # Check if this host is one we pulled out of SNI-rewrite. + stripped = re.sub(r'^https?://', '', url).lower() + slash = stripped.find('/') + req_host = stripped[:slash] if slash != -1 else stripped + pattern_hosts = {p.split('/')[0] for p in self._relay_url_patterns} + host_covered = any( + req_host == h or req_host.endswith('.' + h) + for h in pattern_hosts + ) + if host_covered: + return await self._forward_via_sni_rewrite(method, url, headers, body) + if method == "GET" and not body: # Respect client's own Range header verbatim. if header_value(headers, "range"):