Youtube & x.com fixed

This commit is contained in:
Abolfazl
2026-04-21 10:14:24 +03:30
parent f7fe687b6c
commit e54994a679
3 changed files with 273 additions and 41 deletions
+3 -1
View File
@@ -8,5 +8,7 @@
"listen_host": "127.0.0.1", "listen_host": "127.0.0.1",
"listen_port": 8085, "listen_port": 8085,
"log_level": "INFO", "log_level": "INFO",
"verify_ssl": true "verify_ssl": true,
"_hosts_comment": "Optional SNI-rewrite overrides. YouTube, googlevideo, gstatic, fonts.googleapis.com, ytimg, ggpht, doubleclick, etc. are ALREADY handled automatically (routed via google_ip with SNI=front_domain, same trick as the Xray MITM-DomainFronting config). Add entries here only for custom domains, e.g. \"example.com\": \"216.239.38.120\".",
"hosts": {}
} }
+78 -7
View File
@@ -319,7 +319,8 @@ class DomainFronter:
"""Send periodic pings to keep Apps Script warm + H2 connection alive.""" """Send periodic pings to keep Apps Script warm + H2 connection alive."""
while True: while True:
try: try:
await asyncio.sleep(180) # 3 minutes (ahead of Google's ~4min timeout) await asyncio.sleep(240) # 4 minutes — saves ~90 quota hits/day vs 180s
# Google's container timeout is ~5 min idle
if not self._h2 or not self._h2.is_connected: if not self._h2 or not self._h2.is_connected:
try: try:
await self._h2.reconnect() await self._h2.reconnect()
@@ -581,7 +582,9 @@ class DomainFronter:
status, resp_hdrs, resp_body = self._split_raw_response(first_resp) status, resp_hdrs, resp_body = self._split_raw_response(first_resp)
# No range support → return the single response as-is # No range support → return the single response as-is (status 200
# from the origin). The client sent a plain GET, so 200 is what it
# expects.
if status != 206: if status != 206:
return first_resp return first_resp
@@ -589,12 +592,16 @@ class DomainFronter:
content_range = resp_hdrs.get("content-range", "") content_range = resp_hdrs.get("content-range", "")
m = re.search(r"/(\d+)", content_range) m = re.search(r"/(\d+)", content_range)
if not m: if not m:
return first_resp # Can't parse — downgrade to 200 so the client (which sent a
# plain GET) doesn't get confused by 206 + Content-Range.
return self._rewrite_206_to_200(first_resp)
total_size = int(m.group(1)) total_size = int(m.group(1))
# Small file: probe already fetched it all # Small file: probe already fetched it all. MUST rewrite to 200
# because the client never sent a Range header — a stray 206 here
# breaks fetch()/XHR on sites like x.com and Cloudflare challenges.
if total_size <= chunk_size or len(resp_body) >= total_size: if total_size <= chunk_size or len(resp_body) >= total_size:
return first_resp return self._rewrite_206_to_200(first_resp)
# Calculate remaining ranges # Calculate remaining ranges
ranges = [] ranges = []
@@ -665,6 +672,40 @@ class DomainFronter:
result += "\r\n" result += "\r\n"
return result.encode() + full_body return result.encode() + full_body
@staticmethod
def _rewrite_206_to_200(raw: bytes) -> bytes:
"""Rewrite a 206 Partial Content response to 200 OK.
Used when we probed with a synthetic Range header but the client
never asked for one. Handing a 206 back to the browser for a plain
GET breaks XHR/fetch on sites like x.com and Cloudflare challenges
(they see it as an aborted/partial response). We drop the
Content-Range header and set Content-Length to the body size.
"""
sep = b"\r\n\r\n"
if sep not in raw:
return raw
header_section, body = raw.split(sep, 1)
lines = header_section.decode(errors="replace").split("\r\n")
if not lines:
return raw
# Replace status line
first = lines[0]
if " 206" in first:
lines[0] = first.replace(" 206 Partial Content", " 200 OK")\
.replace(" 206", " 200 OK")
# Drop Content-Range and recalculate Content-Length
filtered = [lines[0]]
for ln in lines[1:]:
low = ln.lower()
if low.startswith("content-range:"):
continue
if low.startswith("content-length:"):
continue
filtered.append(ln)
filtered.append(f"Content-Length: {len(body)}")
return ("\r\n".join(filtered) + "\r\n\r\n").encode() + body
def _build_payload(self, method, url, headers, body): def _build_payload(self, method, url, headers, body):
"""Build the JSON relay payload dict.""" """Build the JSON relay payload dict."""
payload = { payload = {
@@ -1127,12 +1168,42 @@ class DomainFronter:
skip = {"transfer-encoding", "connection", "keep-alive", skip = {"transfer-encoding", "connection", "keep-alive",
"content-length", "content-encoding"} "content-length", "content-encoding"}
for k, v in resp_headers.items(): for k, v in resp_headers.items():
if k.lower() not in skip: if k.lower() in skip:
result += f"{k}: {v}\r\n" continue
# Apps Script returns multi-valued headers (e.g. Set-Cookie) as a
# JavaScript array. Emit each value as its own header line.
# A single string that holds multiple Set-Cookie values joined
# with ", " also needs to be split, otherwise the browser sees
# one malformed cookie and sites like x.com fail.
values = v if isinstance(v, list) else [v]
if k.lower() == "set-cookie":
expanded = []
for item in values:
expanded.extend(self._split_set_cookie(str(item)))
values = expanded
for val in values:
result += f"{k}: {val}\r\n"
result += f"Content-Length: {len(resp_body)}\r\n" result += f"Content-Length: {len(resp_body)}\r\n"
result += "\r\n" result += "\r\n"
return result.encode() + resp_body return result.encode() + resp_body
@staticmethod
def _split_set_cookie(blob: str) -> list[str]:
"""Split a Set-Cookie string that may contain multiple cookies.
Apps Script sometimes joins multiple Set-Cookie values with ", ",
which collides with the comma that legitimately appears inside the
`Expires` attribute (e.g. "Expires=Wed, 21 Oct 2026 ..."). We split
only on commas that are immediately followed by a cookie name=value
pair (token '=' ...), leaving date commas intact.
"""
if not blob:
return []
# Split on ", " but only when the following text looks like the start
# of a new cookie (a token followed by '=').
parts = re.split(r",\s*(?=[A-Za-z0-9!#$%&'*+\-.^_`|~]+=)", blob)
return [p.strip() for p in parts if p.strip()]
def _split_raw_response(self, raw: bytes): def _split_raw_response(self, raw: bytes):
"""Split a raw HTTP response into (status, headers_dict, body).""" """Split a raw HTTP response into (status, headers_dict, body)."""
if b"\r\n\r\n" not in raw: if b"\r\n\r\n" not in raw:
+192 -33
View File
@@ -12,6 +12,7 @@ Supports:
import asyncio import asyncio
import logging import logging
import re import re
import ssl
import time import time
from domain_fronter import DomainFronter from domain_fronter import DomainFronter
@@ -113,6 +114,10 @@ class ProxyServer:
self._http_tunnels: dict = {} self._http_tunnels: dict = {}
self._tunnel_lock = asyncio.Lock() self._tunnel_lock = asyncio.Lock()
# hosts override — DNS fake-map: domain/suffix → IP
# Checked before any real DNS lookup; supports exact and suffix matching.
self._hosts: dict[str, str] = config.get("hosts", {})
if self.mode == "apps_script": if self.mode == "apps_script":
try: try:
from mitm import MITMCertManager from mitm import MITMCertManager
@@ -185,9 +190,16 @@ class ProxyServer:
await writer.drain() await writer.drain()
if self.mode == "apps_script": if self.mode == "apps_script":
# Google services: tunnel directly (no MITM) to avoid override_ip = self._sni_rewrite_ip(host)
# Google's anti-bot detection from Apps Script IPs/UA. if override_ip:
if self._is_google_domain(host): # SNI-blocked domain: MITM-decrypt from browser, then
# re-connect to the override IP with SNI=front_domain so
# the ISP never sees the blocked hostname in the TLS handshake.
log.info("SNI-rewrite tunnel → %s via %s (SNI: %s)",
host, override_ip, self.fronter.sni_host)
await self._do_sni_rewrite_tunnel(host, port, reader, writer,
connect_ip=override_ip)
elif self._is_google_domain(host):
log.info("Direct tunnel → %s (Google domain, skipping relay)", host) log.info("Direct tunnel → %s (Google domain, skipping relay)", host)
await self._do_direct_tunnel(host, port, reader, writer) await self._do_direct_tunnel(host, port, reader, writer)
else: else:
@@ -195,11 +207,70 @@ class ProxyServer:
else: else:
await self.fronter.tunnel(host, port, reader, writer) await self.fronter.tunnel(host, port, reader, writer)
# ── Hosts override (fake DNS) ─────────────────────────────────
# Built-in list of domains that must be reached via Google's frontend IP
# with SNI rewritten to `front_domain` (default: www.google.com).
# These are Google-owned services whose real SNI is DPI-blocked in some
# countries, but that Google serves from the same edge IP as www.google.com.
# Users don't need to configure anything — any host matching one of these
# suffixes is transparently SNI-rewritten to the configured `google_ip`.
# Config's "hosts" map still takes precedence (for custom overrides).
_SNI_REWRITE_SUFFIXES = (
"youtube.com",
"youtu.be",
"youtube-nocookie.com",
"ytimg.com",
"ggpht.com",
"gvt1.com",
"gvt2.com",
"doubleclick.net",
"googlesyndication.com",
"googleadservices.com",
"google-analytics.com",
"googletagmanager.com",
"googletagservices.com",
"fonts.googleapis.com",
)
def _sni_rewrite_ip(self, host: str) -> str | None:
"""Return the IP to SNI-rewrite `host` through, or None.
Order of precedence:
1. Explicit entry in config `hosts` map (exact or suffix match).
2. Built-in `_SNI_REWRITE_SUFFIXES` → mapped to config `google_ip`.
"""
ip = self._hosts_ip(host)
if ip:
return ip
h = host.lower().rstrip(".")
for suffix in self._SNI_REWRITE_SUFFIXES:
if h == suffix or h.endswith("." + suffix):
return self.fronter.connect_host # configured google_ip
return None
def _hosts_ip(self, host: str) -> str | None:
"""Return override IP for host if defined in config 'hosts', else None.
Supports exact match and suffix match (e.g. 'youtube.com' matches
'www.youtube.com', 'm.youtube.com', etc.).
"""
h = host.lower().rstrip(".")
if h in self._hosts:
return self._hosts[h]
# suffix match: check every parent label
parts = h.split(".")
for i in range(1, len(parts)):
parent = ".".join(parts[i:])
if parent in self._hosts:
return self._hosts[parent]
return None
# ── Google domain detection ─────────────────────────────────── # ── Google domain detection ───────────────────────────────────
# Only domains whose SNI the ISP does NOT block. # Only domains whose SNI the ISP does NOT block — direct tunnel is safe.
# YouTube/googlevideo are blocked by SNI inspection in Iran, # YouTube/googlevideo SNIs are blocked; they go through _do_sni_rewrite_tunnel
# so they MUST go through the MITM relay (domain-fronted). # via the hosts map instead.
_GOOGLE_SUFFIXES = ( _GOOGLE_SUFFIXES = (
".google.com", ".google.co", ".google.com", ".google.co",
".googleapis.com", ".gstatic.com", ".googleapis.com", ".gstatic.com",
@@ -223,21 +294,22 @@ class ProxyServer:
async def _do_direct_tunnel(self, host: str, port: int, async def _do_direct_tunnel(self, host: str, port: int,
reader: asyncio.StreamReader, reader: asyncio.StreamReader,
writer: asyncio.StreamWriter): writer: asyncio.StreamWriter,
connect_ip: str | None = None):
"""Pipe raw TLS bytes directly to the target server. """Pipe raw TLS bytes directly to the target server.
Used for Google domains: the browser's TLS goes end-to-end connect_ip overrides DNS: the TCP connection goes to that IP
with Google, preserving real User-Agent and avoiding while the browser's TLS (SNI=host) is piped through unchanged.
Apps Script IP/bot-detection issues. Defaults to the configured google_ip for Google-category domains.
""" """
google_ip = self.fronter.connect_host target_ip = connect_ip or self.fronter.connect_host
try: try:
r_remote, w_remote = await asyncio.wait_for( r_remote, w_remote = await asyncio.wait_for(
asyncio.open_connection(google_ip, port), timeout=10 asyncio.open_connection(target_ip, port), timeout=10
) )
except Exception as e: except Exception as e:
log.error("Direct tunnel connect failed (%s via %s): %s", log.error("Direct tunnel connect failed (%s via %s): %s",
host, google_ip, e) host, target_ip, e)
return return
async def pipe(src, dst, label): async def pipe(src, dst, label):
@@ -263,6 +335,76 @@ class ProxyServer:
pipe(r_remote, writer, f"{host}→client"), pipe(r_remote, writer, f"{host}→client"),
) )
# ── SNI-rewrite tunnel ────────────────────────────────────────
async def _do_sni_rewrite_tunnel(self, host: str, port: int, reader, writer,
connect_ip: str | None = None):
"""MITM-decrypt TLS from browser, then re-encrypt toward connect_ip
using SNI=front_domain (e.g. www.google.com).
The ISP only ever sees SNI=www.google.com in the outgoing handshake,
hiding the blocked hostname (e.g. www.youtube.com).
"""
target_ip = connect_ip or self.fronter.connect_host
sni_out = self.fronter.sni_host # e.g. "www.google.com"
# Step 1: MITM — accept TLS from the browser
ssl_ctx_server = self.mitm.get_server_context(host)
loop = asyncio.get_event_loop()
transport = writer.transport
protocol = transport.get_protocol()
try:
new_transport = await loop.start_tls(
transport, protocol, ssl_ctx_server, server_side=True,
)
except Exception as e:
log.debug("SNI-rewrite TLS accept failed (%s): %s", host, e)
return
writer._transport = new_transport
# Step 2: open outgoing TLS to target IP with the safe SNI
ssl_ctx_client = ssl.create_default_context()
if not self.fronter.verify_ssl:
ssl_ctx_client.check_hostname = False
ssl_ctx_client.verify_mode = ssl.CERT_NONE
try:
r_out, w_out = await asyncio.wait_for(
asyncio.open_connection(
target_ip, port,
ssl=ssl_ctx_client,
server_hostname=sni_out,
),
timeout=10,
)
except Exception as e:
log.error("SNI-rewrite outbound connect failed (%s via %s): %s",
host, target_ip, e)
return
# Step 3: pipe application-layer bytes between the two TLS sessions
async def pipe(src, dst, label):
try:
while True:
data = await src.read(65536)
if not data:
break
dst.write(data)
await dst.drain()
except (ConnectionError, asyncio.CancelledError):
pass
except Exception as exc:
log.debug("Pipe %s ended: %s", label, exc)
finally:
try:
dst.close()
except Exception:
pass
await asyncio.gather(
pipe(reader, w_out, f"client→{host}"),
pipe(r_out, writer, f"{host}→client"),
)
# ── MITM CONNECT (apps_script mode) ─────────────────────────── # ── MITM CONNECT (apps_script mode) ───────────────────────────
async def _do_mitm_connect(self, host: str, port: int, reader, writer): async def _do_mitm_connect(self, host: str, port: int, reader, writer):
@@ -430,44 +572,61 @@ class ProxyServer:
@staticmethod @staticmethod
def _inject_cors_headers(response: bytes, origin: str) -> bytes: def _inject_cors_headers(response: bytes, origin: str) -> bytes:
"""Overwrite any existing CORS headers and inject permissive ones.""" """Inject CORS headers only if the upstream response lacks them.
We must NOT overwrite the origin server's CORS headers: sites like
x.com return carefully-scoped Access-Control-Allow-Headers that list
specific custom headers (e.g. x-csrf-token). Replacing them with
wildcards together with Allow-Credentials: true makes browsers
reject the response (per the Fetch spec, "*" is literal when
credentials are included), which the site then blames on privacy
extensions. So we only fill in what the server omitted.
"""
sep = b"\r\n\r\n" sep = b"\r\n\r\n"
if sep not in response: if sep not in response:
return response return response
header_section, body = response.split(sep, 1) header_section, body = response.split(sep, 1)
lines = header_section.decode(errors="replace").split("\r\n") lines = header_section.decode(errors="replace").split("\r\n")
# Drop existing Access-Control-* headers
lines = [ln for ln in lines if not ln.lower().startswith("access-control-")] existing = {ln.split(":", 1)[0].strip().lower()
for ln in lines if ":" in ln}
# If the upstream already handled CORS, leave it completely alone.
if "access-control-allow-origin" in existing:
return response
# Otherwise inject a minimal, credential-safe set (no wildcards,
# since wildcards combined with credentials are invalid).
allow_origin = origin or "*" allow_origin = origin or "*"
lines += [ additions = [f"Access-Control-Allow-Origin: {allow_origin}"]
f"Access-Control-Allow-Origin: {allow_origin}", if allow_origin != "*":
"Access-Control-Allow-Credentials: true", additions.append("Access-Control-Allow-Credentials: true")
"Access-Control-Allow-Methods: GET, POST, PUT, DELETE, PATCH, OPTIONS", additions.append("Vary: Origin")
"Access-Control-Allow-Headers: *", return ("\r\n".join(lines + additions) + "\r\n\r\n").encode() + body
"Access-Control-Expose-Headers: *",
]
return ("\r\n".join(lines) + "\r\n\r\n").encode() + body
async def _relay_smart(self, method, url, headers, body): async def _relay_smart(self, method, url, headers, body):
"""Choose optimal relay strategy based on request type. """Choose optimal relay strategy based on request type.
ALL GET requests go through relay_parallel: it does one probe - GET requests for likely-large downloads use parallel-range.
request and only splits into parallel chunks if the response - All other requests (API calls, HTML, JSON, XHR) go through the
is large and the server supports ranges. Small responses still single-request relay. This avoids injecting a synthetic Range
use a single request (no overhead). header on normal traffic, which some origins honor by returning
206 — breaking fetch()/XHR on sites like x.com or Cloudflare
challenge pages.
""" """
if method == "GET" and not body: if method == "GET" and not body:
# Skip parallel-range if the client already sent a Range header # Respect client's own Range header verbatim.
# (we must forward it verbatim, not modify it).
if headers: if headers:
for k in headers: for k in headers:
if k.lower() == "range": if k.lower() == "range":
return await self.fronter.relay( return await self.fronter.relay(
method, url, headers, body method, url, headers, body
) )
return await self.fronter.relay_parallel( # Only probe with Range when the URL looks like a big file.
method, url, headers, body if self._is_likely_download(url, headers):
) return await self.fronter.relay_parallel(
method, url, headers, body
)
return await self.fronter.relay(method, url, headers, body) return await self.fronter.relay(method, url, headers, body)
def _is_likely_download(self, url: str, headers: dict) -> bool: def _is_likely_download(self, url: str, headers: dict) -> bool: