Youtube & x.com fixed

This commit is contained in:
Abolfazl
2026-04-21 10:14:24 +03:30
parent f7fe687b6c
commit e54994a679
3 changed files with 273 additions and 41 deletions
+3 -1
View File
@@ -8,5 +8,7 @@
"listen_host": "127.0.0.1",
"listen_port": 8085,
"log_level": "INFO",
"verify_ssl": true
"verify_ssl": true,
"_hosts_comment": "Optional SNI-rewrite overrides. YouTube, googlevideo, gstatic, fonts.googleapis.com, ytimg, ggpht, doubleclick, etc. are ALREADY handled automatically (routed via google_ip with SNI=front_domain, same trick as the Xray MITM-DomainFronting config). Add entries here only for custom domains, e.g. \"example.com\": \"216.239.38.120\".",
"hosts": {}
}
+78 -7
View File
@@ -319,7 +319,8 @@ class DomainFronter:
"""Send periodic pings to keep Apps Script warm + H2 connection alive."""
while True:
try:
await asyncio.sleep(180) # 3 minutes (ahead of Google's ~4min timeout)
await asyncio.sleep(240) # 4 minutes — saves ~90 quota hits/day vs 180s
# Google's container timeout is ~5 min idle
if not self._h2 or not self._h2.is_connected:
try:
await self._h2.reconnect()
@@ -581,7 +582,9 @@ class DomainFronter:
status, resp_hdrs, resp_body = self._split_raw_response(first_resp)
# No range support → return the single response as-is
# No range support → return the single response as-is (status 200
# from the origin). The client sent a plain GET, so 200 is what it
# expects.
if status != 206:
return first_resp
@@ -589,12 +592,16 @@ class DomainFronter:
content_range = resp_hdrs.get("content-range", "")
m = re.search(r"/(\d+)", content_range)
if not m:
return first_resp
# Can't parse — downgrade to 200 so the client (which sent a
# plain GET) doesn't get confused by 206 + Content-Range.
return self._rewrite_206_to_200(first_resp)
total_size = int(m.group(1))
# Small file: probe already fetched it all
# Small file: probe already fetched it all. MUST rewrite to 200
# because the client never sent a Range header — a stray 206 here
# breaks fetch()/XHR on sites like x.com and Cloudflare challenges.
if total_size <= chunk_size or len(resp_body) >= total_size:
return first_resp
return self._rewrite_206_to_200(first_resp)
# Calculate remaining ranges
ranges = []
@@ -665,6 +672,40 @@ class DomainFronter:
result += "\r\n"
return result.encode() + full_body
@staticmethod
def _rewrite_206_to_200(raw: bytes) -> bytes:
"""Rewrite a 206 Partial Content response to 200 OK.
Used when we probed with a synthetic Range header but the client
never asked for one. Handing a 206 back to the browser for a plain
GET breaks XHR/fetch on sites like x.com and Cloudflare challenges
(they see it as an aborted/partial response). We drop the
Content-Range header and set Content-Length to the body size.
"""
sep = b"\r\n\r\n"
if sep not in raw:
return raw
header_section, body = raw.split(sep, 1)
lines = header_section.decode(errors="replace").split("\r\n")
if not lines:
return raw
# Replace status line
first = lines[0]
if " 206" in first:
lines[0] = first.replace(" 206 Partial Content", " 200 OK")\
.replace(" 206", " 200 OK")
# Drop Content-Range and recalculate Content-Length
filtered = [lines[0]]
for ln in lines[1:]:
low = ln.lower()
if low.startswith("content-range:"):
continue
if low.startswith("content-length:"):
continue
filtered.append(ln)
filtered.append(f"Content-Length: {len(body)}")
return ("\r\n".join(filtered) + "\r\n\r\n").encode() + body
def _build_payload(self, method, url, headers, body):
"""Build the JSON relay payload dict."""
payload = {
@@ -1127,12 +1168,42 @@ class DomainFronter:
skip = {"transfer-encoding", "connection", "keep-alive",
"content-length", "content-encoding"}
for k, v in resp_headers.items():
if k.lower() not in skip:
result += f"{k}: {v}\r\n"
if k.lower() in skip:
continue
# Apps Script returns multi-valued headers (e.g. Set-Cookie) as a
# JavaScript array. Emit each value as its own header line.
# A single string that holds multiple Set-Cookie values joined
# with ", " also needs to be split, otherwise the browser sees
# one malformed cookie and sites like x.com fail.
values = v if isinstance(v, list) else [v]
if k.lower() == "set-cookie":
expanded = []
for item in values:
expanded.extend(self._split_set_cookie(str(item)))
values = expanded
for val in values:
result += f"{k}: {val}\r\n"
result += f"Content-Length: {len(resp_body)}\r\n"
result += "\r\n"
return result.encode() + resp_body
@staticmethod
def _split_set_cookie(blob: str) -> list[str]:
"""Split a Set-Cookie string that may contain multiple cookies.
Apps Script sometimes joins multiple Set-Cookie values with ", ",
which collides with the comma that legitimately appears inside the
`Expires` attribute (e.g. "Expires=Wed, 21 Oct 2026 ..."). We split
only on commas that are immediately followed by a cookie name=value
pair (token '=' ...), leaving date commas intact.
"""
if not blob:
return []
# Split on ", " but only when the following text looks like the start
# of a new cookie (a token followed by '=').
parts = re.split(r",\s*(?=[A-Za-z0-9!#$%&'*+\-.^_`|~]+=)", blob)
return [p.strip() for p in parts if p.strip()]
def _split_raw_response(self, raw: bytes):
"""Split a raw HTTP response into (status, headers_dict, body)."""
if b"\r\n\r\n" not in raw:
+189 -30
View File
@@ -12,6 +12,7 @@ Supports:
import asyncio
import logging
import re
import ssl
import time
from domain_fronter import DomainFronter
@@ -113,6 +114,10 @@ class ProxyServer:
self._http_tunnels: dict = {}
self._tunnel_lock = asyncio.Lock()
# hosts override — DNS fake-map: domain/suffix → IP
# Checked before any real DNS lookup; supports exact and suffix matching.
self._hosts: dict[str, str] = config.get("hosts", {})
if self.mode == "apps_script":
try:
from mitm import MITMCertManager
@@ -185,9 +190,16 @@ class ProxyServer:
await writer.drain()
if self.mode == "apps_script":
# Google services: tunnel directly (no MITM) to avoid
# Google's anti-bot detection from Apps Script IPs/UA.
if self._is_google_domain(host):
override_ip = self._sni_rewrite_ip(host)
if override_ip:
# SNI-blocked domain: MITM-decrypt from browser, then
# re-connect to the override IP with SNI=front_domain so
# the ISP never sees the blocked hostname in the TLS handshake.
log.info("SNI-rewrite tunnel → %s via %s (SNI: %s)",
host, override_ip, self.fronter.sni_host)
await self._do_sni_rewrite_tunnel(host, port, reader, writer,
connect_ip=override_ip)
elif self._is_google_domain(host):
log.info("Direct tunnel → %s (Google domain, skipping relay)", host)
await self._do_direct_tunnel(host, port, reader, writer)
else:
@@ -195,11 +207,70 @@ class ProxyServer:
else:
await self.fronter.tunnel(host, port, reader, writer)
# ── Hosts override (fake DNS) ─────────────────────────────────
# Built-in list of domains that must be reached via Google's frontend IP
# with SNI rewritten to `front_domain` (default: www.google.com).
# These are Google-owned services whose real SNI is DPI-blocked in some
# countries, but that Google serves from the same edge IP as www.google.com.
# Users don't need to configure anything — any host matching one of these
# suffixes is transparently SNI-rewritten to the configured `google_ip`.
# Config's "hosts" map still takes precedence (for custom overrides).
_SNI_REWRITE_SUFFIXES = (
"youtube.com",
"youtu.be",
"youtube-nocookie.com",
"ytimg.com",
"ggpht.com",
"gvt1.com",
"gvt2.com",
"doubleclick.net",
"googlesyndication.com",
"googleadservices.com",
"google-analytics.com",
"googletagmanager.com",
"googletagservices.com",
"fonts.googleapis.com",
)
def _sni_rewrite_ip(self, host: str) -> str | None:
"""Return the IP to SNI-rewrite `host` through, or None.
Order of precedence:
1. Explicit entry in config `hosts` map (exact or suffix match).
2. Built-in `_SNI_REWRITE_SUFFIXES` → mapped to config `google_ip`.
"""
ip = self._hosts_ip(host)
if ip:
return ip
h = host.lower().rstrip(".")
for suffix in self._SNI_REWRITE_SUFFIXES:
if h == suffix or h.endswith("." + suffix):
return self.fronter.connect_host # configured google_ip
return None
def _hosts_ip(self, host: str) -> str | None:
"""Return override IP for host if defined in config 'hosts', else None.
Supports exact match and suffix match (e.g. 'youtube.com' matches
'www.youtube.com', 'm.youtube.com', etc.).
"""
h = host.lower().rstrip(".")
if h in self._hosts:
return self._hosts[h]
# suffix match: check every parent label
parts = h.split(".")
for i in range(1, len(parts)):
parent = ".".join(parts[i:])
if parent in self._hosts:
return self._hosts[parent]
return None
# ── Google domain detection ───────────────────────────────────
# Only domains whose SNI the ISP does NOT block.
# YouTube/googlevideo are blocked by SNI inspection in Iran,
# so they MUST go through the MITM relay (domain-fronted).
# Only domains whose SNI the ISP does NOT block — direct tunnel is safe.
# YouTube/googlevideo SNIs are blocked; they go through _do_sni_rewrite_tunnel
# via the hosts map instead.
_GOOGLE_SUFFIXES = (
".google.com", ".google.co",
".googleapis.com", ".gstatic.com",
@@ -223,21 +294,22 @@ class ProxyServer:
async def _do_direct_tunnel(self, host: str, port: int,
reader: asyncio.StreamReader,
writer: asyncio.StreamWriter):
writer: asyncio.StreamWriter,
connect_ip: str | None = None):
"""Pipe raw TLS bytes directly to the target server.
Used for Google domains: the browser's TLS goes end-to-end
with Google, preserving real User-Agent and avoiding
Apps Script IP/bot-detection issues.
connect_ip overrides DNS: the TCP connection goes to that IP
while the browser's TLS (SNI=host) is piped through unchanged.
Defaults to the configured google_ip for Google-category domains.
"""
google_ip = self.fronter.connect_host
target_ip = connect_ip or self.fronter.connect_host
try:
r_remote, w_remote = await asyncio.wait_for(
asyncio.open_connection(google_ip, port), timeout=10
asyncio.open_connection(target_ip, port), timeout=10
)
except Exception as e:
log.error("Direct tunnel connect failed (%s via %s): %s",
host, google_ip, e)
host, target_ip, e)
return
async def pipe(src, dst, label):
@@ -263,6 +335,76 @@ class ProxyServer:
pipe(r_remote, writer, f"{host}→client"),
)
# ── SNI-rewrite tunnel ────────────────────────────────────────
async def _do_sni_rewrite_tunnel(self, host: str, port: int, reader, writer,
connect_ip: str | None = None):
"""MITM-decrypt TLS from browser, then re-encrypt toward connect_ip
using SNI=front_domain (e.g. www.google.com).
The ISP only ever sees SNI=www.google.com in the outgoing handshake,
hiding the blocked hostname (e.g. www.youtube.com).
"""
target_ip = connect_ip or self.fronter.connect_host
sni_out = self.fronter.sni_host # e.g. "www.google.com"
# Step 1: MITM — accept TLS from the browser
ssl_ctx_server = self.mitm.get_server_context(host)
loop = asyncio.get_event_loop()
transport = writer.transport
protocol = transport.get_protocol()
try:
new_transport = await loop.start_tls(
transport, protocol, ssl_ctx_server, server_side=True,
)
except Exception as e:
log.debug("SNI-rewrite TLS accept failed (%s): %s", host, e)
return
writer._transport = new_transport
# Step 2: open outgoing TLS to target IP with the safe SNI
ssl_ctx_client = ssl.create_default_context()
if not self.fronter.verify_ssl:
ssl_ctx_client.check_hostname = False
ssl_ctx_client.verify_mode = ssl.CERT_NONE
try:
r_out, w_out = await asyncio.wait_for(
asyncio.open_connection(
target_ip, port,
ssl=ssl_ctx_client,
server_hostname=sni_out,
),
timeout=10,
)
except Exception as e:
log.error("SNI-rewrite outbound connect failed (%s via %s): %s",
host, target_ip, e)
return
# Step 3: pipe application-layer bytes between the two TLS sessions
async def pipe(src, dst, label):
try:
while True:
data = await src.read(65536)
if not data:
break
dst.write(data)
await dst.drain()
except (ConnectionError, asyncio.CancelledError):
pass
except Exception as exc:
log.debug("Pipe %s ended: %s", label, exc)
finally:
try:
dst.close()
except Exception:
pass
await asyncio.gather(
pipe(reader, w_out, f"client→{host}"),
pipe(r_out, writer, f"{host}→client"),
)
# ── MITM CONNECT (apps_script mode) ───────────────────────────
async def _do_mitm_connect(self, host: str, port: int, reader, writer):
@@ -430,41 +572,58 @@ class ProxyServer:
@staticmethod
def _inject_cors_headers(response: bytes, origin: str) -> bytes:
"""Overwrite any existing CORS headers and inject permissive ones."""
"""Inject CORS headers only if the upstream response lacks them.
We must NOT overwrite the origin server's CORS headers: sites like
x.com return carefully-scoped Access-Control-Allow-Headers that list
specific custom headers (e.g. x-csrf-token). Replacing them with
wildcards together with Allow-Credentials: true makes browsers
reject the response (per the Fetch spec, "*" is literal when
credentials are included), which the site then blames on privacy
extensions. So we only fill in what the server omitted.
"""
sep = b"\r\n\r\n"
if sep not in response:
return response
header_section, body = response.split(sep, 1)
lines = header_section.decode(errors="replace").split("\r\n")
# Drop existing Access-Control-* headers
lines = [ln for ln in lines if not ln.lower().startswith("access-control-")]
existing = {ln.split(":", 1)[0].strip().lower()
for ln in lines if ":" in ln}
# If the upstream already handled CORS, leave it completely alone.
if "access-control-allow-origin" in existing:
return response
# Otherwise inject a minimal, credential-safe set (no wildcards,
# since wildcards combined with credentials are invalid).
allow_origin = origin or "*"
lines += [
f"Access-Control-Allow-Origin: {allow_origin}",
"Access-Control-Allow-Credentials: true",
"Access-Control-Allow-Methods: GET, POST, PUT, DELETE, PATCH, OPTIONS",
"Access-Control-Allow-Headers: *",
"Access-Control-Expose-Headers: *",
]
return ("\r\n".join(lines) + "\r\n\r\n").encode() + body
additions = [f"Access-Control-Allow-Origin: {allow_origin}"]
if allow_origin != "*":
additions.append("Access-Control-Allow-Credentials: true")
additions.append("Vary: Origin")
return ("\r\n".join(lines + additions) + "\r\n\r\n").encode() + body
async def _relay_smart(self, method, url, headers, body):
"""Choose optimal relay strategy based on request type.
ALL GET requests go through relay_parallel: it does one probe
request and only splits into parallel chunks if the response
is large and the server supports ranges. Small responses still
use a single request (no overhead).
- GET requests for likely-large downloads use parallel-range.
- All other requests (API calls, HTML, JSON, XHR) go through the
single-request relay. This avoids injecting a synthetic Range
header on normal traffic, which some origins honor by returning
206 — breaking fetch()/XHR on sites like x.com or Cloudflare
challenge pages.
"""
if method == "GET" and not body:
# Skip parallel-range if the client already sent a Range header
# (we must forward it verbatim, not modify it).
# Respect client's own Range header verbatim.
if headers:
for k in headers:
if k.lower() == "range":
return await self.fronter.relay(
method, url, headers, body
)
# Only probe with Range when the URL looks like a big file.
if self._is_likely_download(url, headers):
return await self.fronter.relay_parallel(
method, url, headers, body
)