mirror of
https://github.com/masterking32/MasterHttpRelayVPN.git
synced 2026-05-17 21:24:37 +03:00
Youtube & x.com fixed
This commit is contained in:
+3
-1
@@ -8,5 +8,7 @@
|
||||
"listen_host": "127.0.0.1",
|
||||
"listen_port": 8085,
|
||||
"log_level": "INFO",
|
||||
"verify_ssl": true
|
||||
"verify_ssl": true,
|
||||
"_hosts_comment": "Optional SNI-rewrite overrides. YouTube, googlevideo, gstatic, fonts.googleapis.com, ytimg, ggpht, doubleclick, etc. are ALREADY handled automatically (routed via google_ip with SNI=front_domain, same trick as the Xray MITM-DomainFronting config). Add entries here only for custom domains, e.g. \"example.com\": \"216.239.38.120\".",
|
||||
"hosts": {}
|
||||
}
|
||||
|
||||
+78
-7
@@ -319,7 +319,8 @@ class DomainFronter:
|
||||
"""Send periodic pings to keep Apps Script warm + H2 connection alive."""
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(180) # 3 minutes (ahead of Google's ~4min timeout)
|
||||
await asyncio.sleep(240) # 4 minutes — saves ~90 quota hits/day vs 180s
|
||||
# Google's container timeout is ~5 min idle
|
||||
if not self._h2 or not self._h2.is_connected:
|
||||
try:
|
||||
await self._h2.reconnect()
|
||||
@@ -581,7 +582,9 @@ class DomainFronter:
|
||||
|
||||
status, resp_hdrs, resp_body = self._split_raw_response(first_resp)
|
||||
|
||||
# No range support → return the single response as-is
|
||||
# No range support → return the single response as-is (status 200
|
||||
# from the origin). The client sent a plain GET, so 200 is what it
|
||||
# expects.
|
||||
if status != 206:
|
||||
return first_resp
|
||||
|
||||
@@ -589,12 +592,16 @@ class DomainFronter:
|
||||
content_range = resp_hdrs.get("content-range", "")
|
||||
m = re.search(r"/(\d+)", content_range)
|
||||
if not m:
|
||||
return first_resp
|
||||
# Can't parse — downgrade to 200 so the client (which sent a
|
||||
# plain GET) doesn't get confused by 206 + Content-Range.
|
||||
return self._rewrite_206_to_200(first_resp)
|
||||
total_size = int(m.group(1))
|
||||
|
||||
# Small file: probe already fetched it all
|
||||
# Small file: probe already fetched it all. MUST rewrite to 200
|
||||
# because the client never sent a Range header — a stray 206 here
|
||||
# breaks fetch()/XHR on sites like x.com and Cloudflare challenges.
|
||||
if total_size <= chunk_size or len(resp_body) >= total_size:
|
||||
return first_resp
|
||||
return self._rewrite_206_to_200(first_resp)
|
||||
|
||||
# Calculate remaining ranges
|
||||
ranges = []
|
||||
@@ -665,6 +672,40 @@ class DomainFronter:
|
||||
result += "\r\n"
|
||||
return result.encode() + full_body
|
||||
|
||||
@staticmethod
|
||||
def _rewrite_206_to_200(raw: bytes) -> bytes:
|
||||
"""Rewrite a 206 Partial Content response to 200 OK.
|
||||
|
||||
Used when we probed with a synthetic Range header but the client
|
||||
never asked for one. Handing a 206 back to the browser for a plain
|
||||
GET breaks XHR/fetch on sites like x.com and Cloudflare challenges
|
||||
(they see it as an aborted/partial response). We drop the
|
||||
Content-Range header and set Content-Length to the body size.
|
||||
"""
|
||||
sep = b"\r\n\r\n"
|
||||
if sep not in raw:
|
||||
return raw
|
||||
header_section, body = raw.split(sep, 1)
|
||||
lines = header_section.decode(errors="replace").split("\r\n")
|
||||
if not lines:
|
||||
return raw
|
||||
# Replace status line
|
||||
first = lines[0]
|
||||
if " 206" in first:
|
||||
lines[0] = first.replace(" 206 Partial Content", " 200 OK")\
|
||||
.replace(" 206", " 200 OK")
|
||||
# Drop Content-Range and recalculate Content-Length
|
||||
filtered = [lines[0]]
|
||||
for ln in lines[1:]:
|
||||
low = ln.lower()
|
||||
if low.startswith("content-range:"):
|
||||
continue
|
||||
if low.startswith("content-length:"):
|
||||
continue
|
||||
filtered.append(ln)
|
||||
filtered.append(f"Content-Length: {len(body)}")
|
||||
return ("\r\n".join(filtered) + "\r\n\r\n").encode() + body
|
||||
|
||||
def _build_payload(self, method, url, headers, body):
|
||||
"""Build the JSON relay payload dict."""
|
||||
payload = {
|
||||
@@ -1127,12 +1168,42 @@ class DomainFronter:
|
||||
skip = {"transfer-encoding", "connection", "keep-alive",
|
||||
"content-length", "content-encoding"}
|
||||
for k, v in resp_headers.items():
|
||||
if k.lower() not in skip:
|
||||
result += f"{k}: {v}\r\n"
|
||||
if k.lower() in skip:
|
||||
continue
|
||||
# Apps Script returns multi-valued headers (e.g. Set-Cookie) as a
|
||||
# JavaScript array. Emit each value as its own header line.
|
||||
# A single string that holds multiple Set-Cookie values joined
|
||||
# with ", " also needs to be split, otherwise the browser sees
|
||||
# one malformed cookie and sites like x.com fail.
|
||||
values = v if isinstance(v, list) else [v]
|
||||
if k.lower() == "set-cookie":
|
||||
expanded = []
|
||||
for item in values:
|
||||
expanded.extend(self._split_set_cookie(str(item)))
|
||||
values = expanded
|
||||
for val in values:
|
||||
result += f"{k}: {val}\r\n"
|
||||
result += f"Content-Length: {len(resp_body)}\r\n"
|
||||
result += "\r\n"
|
||||
return result.encode() + resp_body
|
||||
|
||||
@staticmethod
|
||||
def _split_set_cookie(blob: str) -> list[str]:
|
||||
"""Split a Set-Cookie string that may contain multiple cookies.
|
||||
|
||||
Apps Script sometimes joins multiple Set-Cookie values with ", ",
|
||||
which collides with the comma that legitimately appears inside the
|
||||
`Expires` attribute (e.g. "Expires=Wed, 21 Oct 2026 ..."). We split
|
||||
only on commas that are immediately followed by a cookie name=value
|
||||
pair (token '=' ...), leaving date commas intact.
|
||||
"""
|
||||
if not blob:
|
||||
return []
|
||||
# Split on ", " but only when the following text looks like the start
|
||||
# of a new cookie (a token followed by '=').
|
||||
parts = re.split(r",\s*(?=[A-Za-z0-9!#$%&'*+\-.^_`|~]+=)", blob)
|
||||
return [p.strip() for p in parts if p.strip()]
|
||||
|
||||
def _split_raw_response(self, raw: bytes):
|
||||
"""Split a raw HTTP response into (status, headers_dict, body)."""
|
||||
if b"\r\n\r\n" not in raw:
|
||||
|
||||
+192
-33
@@ -12,6 +12,7 @@ Supports:
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import ssl
|
||||
import time
|
||||
|
||||
from domain_fronter import DomainFronter
|
||||
@@ -113,6 +114,10 @@ class ProxyServer:
|
||||
self._http_tunnels: dict = {}
|
||||
self._tunnel_lock = asyncio.Lock()
|
||||
|
||||
# hosts override — DNS fake-map: domain/suffix → IP
|
||||
# Checked before any real DNS lookup; supports exact and suffix matching.
|
||||
self._hosts: dict[str, str] = config.get("hosts", {})
|
||||
|
||||
if self.mode == "apps_script":
|
||||
try:
|
||||
from mitm import MITMCertManager
|
||||
@@ -185,9 +190,16 @@ class ProxyServer:
|
||||
await writer.drain()
|
||||
|
||||
if self.mode == "apps_script":
|
||||
# Google services: tunnel directly (no MITM) to avoid
|
||||
# Google's anti-bot detection from Apps Script IPs/UA.
|
||||
if self._is_google_domain(host):
|
||||
override_ip = self._sni_rewrite_ip(host)
|
||||
if override_ip:
|
||||
# SNI-blocked domain: MITM-decrypt from browser, then
|
||||
# re-connect to the override IP with SNI=front_domain so
|
||||
# the ISP never sees the blocked hostname in the TLS handshake.
|
||||
log.info("SNI-rewrite tunnel → %s via %s (SNI: %s)",
|
||||
host, override_ip, self.fronter.sni_host)
|
||||
await self._do_sni_rewrite_tunnel(host, port, reader, writer,
|
||||
connect_ip=override_ip)
|
||||
elif self._is_google_domain(host):
|
||||
log.info("Direct tunnel → %s (Google domain, skipping relay)", host)
|
||||
await self._do_direct_tunnel(host, port, reader, writer)
|
||||
else:
|
||||
@@ -195,11 +207,70 @@ class ProxyServer:
|
||||
else:
|
||||
await self.fronter.tunnel(host, port, reader, writer)
|
||||
|
||||
# ── Hosts override (fake DNS) ─────────────────────────────────
|
||||
|
||||
# Built-in list of domains that must be reached via Google's frontend IP
|
||||
# with SNI rewritten to `front_domain` (default: www.google.com).
|
||||
# These are Google-owned services whose real SNI is DPI-blocked in some
|
||||
# countries, but that Google serves from the same edge IP as www.google.com.
|
||||
# Users don't need to configure anything — any host matching one of these
|
||||
# suffixes is transparently SNI-rewritten to the configured `google_ip`.
|
||||
# Config's "hosts" map still takes precedence (for custom overrides).
|
||||
_SNI_REWRITE_SUFFIXES = (
|
||||
"youtube.com",
|
||||
"youtu.be",
|
||||
"youtube-nocookie.com",
|
||||
"ytimg.com",
|
||||
"ggpht.com",
|
||||
"gvt1.com",
|
||||
"gvt2.com",
|
||||
"doubleclick.net",
|
||||
"googlesyndication.com",
|
||||
"googleadservices.com",
|
||||
"google-analytics.com",
|
||||
"googletagmanager.com",
|
||||
"googletagservices.com",
|
||||
"fonts.googleapis.com",
|
||||
)
|
||||
|
||||
def _sni_rewrite_ip(self, host: str) -> str | None:
|
||||
"""Return the IP to SNI-rewrite `host` through, or None.
|
||||
|
||||
Order of precedence:
|
||||
1. Explicit entry in config `hosts` map (exact or suffix match).
|
||||
2. Built-in `_SNI_REWRITE_SUFFIXES` → mapped to config `google_ip`.
|
||||
"""
|
||||
ip = self._hosts_ip(host)
|
||||
if ip:
|
||||
return ip
|
||||
h = host.lower().rstrip(".")
|
||||
for suffix in self._SNI_REWRITE_SUFFIXES:
|
||||
if h == suffix or h.endswith("." + suffix):
|
||||
return self.fronter.connect_host # configured google_ip
|
||||
return None
|
||||
|
||||
def _hosts_ip(self, host: str) -> str | None:
|
||||
"""Return override IP for host if defined in config 'hosts', else None.
|
||||
|
||||
Supports exact match and suffix match (e.g. 'youtube.com' matches
|
||||
'www.youtube.com', 'm.youtube.com', etc.).
|
||||
"""
|
||||
h = host.lower().rstrip(".")
|
||||
if h in self._hosts:
|
||||
return self._hosts[h]
|
||||
# suffix match: check every parent label
|
||||
parts = h.split(".")
|
||||
for i in range(1, len(parts)):
|
||||
parent = ".".join(parts[i:])
|
||||
if parent in self._hosts:
|
||||
return self._hosts[parent]
|
||||
return None
|
||||
|
||||
# ── Google domain detection ───────────────────────────────────
|
||||
|
||||
# Only domains whose SNI the ISP does NOT block.
|
||||
# YouTube/googlevideo are blocked by SNI inspection in Iran,
|
||||
# so they MUST go through the MITM relay (domain-fronted).
|
||||
# Only domains whose SNI the ISP does NOT block — direct tunnel is safe.
|
||||
# YouTube/googlevideo SNIs are blocked; they go through _do_sni_rewrite_tunnel
|
||||
# via the hosts map instead.
|
||||
_GOOGLE_SUFFIXES = (
|
||||
".google.com", ".google.co",
|
||||
".googleapis.com", ".gstatic.com",
|
||||
@@ -223,21 +294,22 @@ class ProxyServer:
|
||||
|
||||
async def _do_direct_tunnel(self, host: str, port: int,
|
||||
reader: asyncio.StreamReader,
|
||||
writer: asyncio.StreamWriter):
|
||||
writer: asyncio.StreamWriter,
|
||||
connect_ip: str | None = None):
|
||||
"""Pipe raw TLS bytes directly to the target server.
|
||||
|
||||
Used for Google domains: the browser's TLS goes end-to-end
|
||||
with Google, preserving real User-Agent and avoiding
|
||||
Apps Script IP/bot-detection issues.
|
||||
connect_ip overrides DNS: the TCP connection goes to that IP
|
||||
while the browser's TLS (SNI=host) is piped through unchanged.
|
||||
Defaults to the configured google_ip for Google-category domains.
|
||||
"""
|
||||
google_ip = self.fronter.connect_host
|
||||
target_ip = connect_ip or self.fronter.connect_host
|
||||
try:
|
||||
r_remote, w_remote = await asyncio.wait_for(
|
||||
asyncio.open_connection(google_ip, port), timeout=10
|
||||
asyncio.open_connection(target_ip, port), timeout=10
|
||||
)
|
||||
except Exception as e:
|
||||
log.error("Direct tunnel connect failed (%s via %s): %s",
|
||||
host, google_ip, e)
|
||||
host, target_ip, e)
|
||||
return
|
||||
|
||||
async def pipe(src, dst, label):
|
||||
@@ -263,6 +335,76 @@ class ProxyServer:
|
||||
pipe(r_remote, writer, f"{host}→client"),
|
||||
)
|
||||
|
||||
# ── SNI-rewrite tunnel ────────────────────────────────────────
|
||||
|
||||
async def _do_sni_rewrite_tunnel(self, host: str, port: int, reader, writer,
|
||||
connect_ip: str | None = None):
|
||||
"""MITM-decrypt TLS from browser, then re-encrypt toward connect_ip
|
||||
using SNI=front_domain (e.g. www.google.com).
|
||||
|
||||
The ISP only ever sees SNI=www.google.com in the outgoing handshake,
|
||||
hiding the blocked hostname (e.g. www.youtube.com).
|
||||
"""
|
||||
target_ip = connect_ip or self.fronter.connect_host
|
||||
sni_out = self.fronter.sni_host # e.g. "www.google.com"
|
||||
|
||||
# Step 1: MITM — accept TLS from the browser
|
||||
ssl_ctx_server = self.mitm.get_server_context(host)
|
||||
loop = asyncio.get_event_loop()
|
||||
transport = writer.transport
|
||||
protocol = transport.get_protocol()
|
||||
try:
|
||||
new_transport = await loop.start_tls(
|
||||
transport, protocol, ssl_ctx_server, server_side=True,
|
||||
)
|
||||
except Exception as e:
|
||||
log.debug("SNI-rewrite TLS accept failed (%s): %s", host, e)
|
||||
return
|
||||
writer._transport = new_transport
|
||||
|
||||
# Step 2: open outgoing TLS to target IP with the safe SNI
|
||||
ssl_ctx_client = ssl.create_default_context()
|
||||
if not self.fronter.verify_ssl:
|
||||
ssl_ctx_client.check_hostname = False
|
||||
ssl_ctx_client.verify_mode = ssl.CERT_NONE
|
||||
try:
|
||||
r_out, w_out = await asyncio.wait_for(
|
||||
asyncio.open_connection(
|
||||
target_ip, port,
|
||||
ssl=ssl_ctx_client,
|
||||
server_hostname=sni_out,
|
||||
),
|
||||
timeout=10,
|
||||
)
|
||||
except Exception as e:
|
||||
log.error("SNI-rewrite outbound connect failed (%s via %s): %s",
|
||||
host, target_ip, e)
|
||||
return
|
||||
|
||||
# Step 3: pipe application-layer bytes between the two TLS sessions
|
||||
async def pipe(src, dst, label):
|
||||
try:
|
||||
while True:
|
||||
data = await src.read(65536)
|
||||
if not data:
|
||||
break
|
||||
dst.write(data)
|
||||
await dst.drain()
|
||||
except (ConnectionError, asyncio.CancelledError):
|
||||
pass
|
||||
except Exception as exc:
|
||||
log.debug("Pipe %s ended: %s", label, exc)
|
||||
finally:
|
||||
try:
|
||||
dst.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await asyncio.gather(
|
||||
pipe(reader, w_out, f"client→{host}"),
|
||||
pipe(r_out, writer, f"{host}→client"),
|
||||
)
|
||||
|
||||
# ── MITM CONNECT (apps_script mode) ───────────────────────────
|
||||
|
||||
async def _do_mitm_connect(self, host: str, port: int, reader, writer):
|
||||
@@ -430,44 +572,61 @@ class ProxyServer:
|
||||
|
||||
@staticmethod
|
||||
def _inject_cors_headers(response: bytes, origin: str) -> bytes:
|
||||
"""Overwrite any existing CORS headers and inject permissive ones."""
|
||||
"""Inject CORS headers only if the upstream response lacks them.
|
||||
|
||||
We must NOT overwrite the origin server's CORS headers: sites like
|
||||
x.com return carefully-scoped Access-Control-Allow-Headers that list
|
||||
specific custom headers (e.g. x-csrf-token). Replacing them with
|
||||
wildcards together with Allow-Credentials: true makes browsers
|
||||
reject the response (per the Fetch spec, "*" is literal when
|
||||
credentials are included), which the site then blames on privacy
|
||||
extensions. So we only fill in what the server omitted.
|
||||
"""
|
||||
sep = b"\r\n\r\n"
|
||||
if sep not in response:
|
||||
return response
|
||||
header_section, body = response.split(sep, 1)
|
||||
lines = header_section.decode(errors="replace").split("\r\n")
|
||||
# Drop existing Access-Control-* headers
|
||||
lines = [ln for ln in lines if not ln.lower().startswith("access-control-")]
|
||||
|
||||
existing = {ln.split(":", 1)[0].strip().lower()
|
||||
for ln in lines if ":" in ln}
|
||||
|
||||
# If the upstream already handled CORS, leave it completely alone.
|
||||
if "access-control-allow-origin" in existing:
|
||||
return response
|
||||
|
||||
# Otherwise inject a minimal, credential-safe set (no wildcards,
|
||||
# since wildcards combined with credentials are invalid).
|
||||
allow_origin = origin or "*"
|
||||
lines += [
|
||||
f"Access-Control-Allow-Origin: {allow_origin}",
|
||||
"Access-Control-Allow-Credentials: true",
|
||||
"Access-Control-Allow-Methods: GET, POST, PUT, DELETE, PATCH, OPTIONS",
|
||||
"Access-Control-Allow-Headers: *",
|
||||
"Access-Control-Expose-Headers: *",
|
||||
]
|
||||
return ("\r\n".join(lines) + "\r\n\r\n").encode() + body
|
||||
additions = [f"Access-Control-Allow-Origin: {allow_origin}"]
|
||||
if allow_origin != "*":
|
||||
additions.append("Access-Control-Allow-Credentials: true")
|
||||
additions.append("Vary: Origin")
|
||||
return ("\r\n".join(lines + additions) + "\r\n\r\n").encode() + body
|
||||
|
||||
async def _relay_smart(self, method, url, headers, body):
|
||||
"""Choose optimal relay strategy based on request type.
|
||||
|
||||
ALL GET requests go through relay_parallel: it does one probe
|
||||
request and only splits into parallel chunks if the response
|
||||
is large and the server supports ranges. Small responses still
|
||||
use a single request (no overhead).
|
||||
- GET requests for likely-large downloads use parallel-range.
|
||||
- All other requests (API calls, HTML, JSON, XHR) go through the
|
||||
single-request relay. This avoids injecting a synthetic Range
|
||||
header on normal traffic, which some origins honor by returning
|
||||
206 — breaking fetch()/XHR on sites like x.com or Cloudflare
|
||||
challenge pages.
|
||||
"""
|
||||
if method == "GET" and not body:
|
||||
# Skip parallel-range if the client already sent a Range header
|
||||
# (we must forward it verbatim, not modify it).
|
||||
# Respect client's own Range header verbatim.
|
||||
if headers:
|
||||
for k in headers:
|
||||
if k.lower() == "range":
|
||||
return await self.fronter.relay(
|
||||
method, url, headers, body
|
||||
)
|
||||
return await self.fronter.relay_parallel(
|
||||
method, url, headers, body
|
||||
)
|
||||
# Only probe with Range when the URL looks like a big file.
|
||||
if self._is_likely_download(url, headers):
|
||||
return await self.fronter.relay_parallel(
|
||||
method, url, headers, body
|
||||
)
|
||||
return await self.fronter.relay(method, url, headers, body)
|
||||
|
||||
def _is_likely_download(self, url: str, headers: dict) -> bool:
|
||||
|
||||
Reference in New Issue
Block a user