MasterHttpRelayVPN/src/proxy/proxy_server.py

"""
Local HTTP proxy server.

Intercepts the user's browser traffic and forwards everything through
the Apps Script relay (MITM-decrypts HTTPS locally, forwards requests
as JSON to script.google.com fronted through www.google.com).
"""

import asyncio
import logging
import re
import socket
import ssl
import time
import ipaddress

try:
    import certifi
except Exception:  # optional dependency fallback
    certifi = None

from core.constants import (
    CACHE_MAX_MB,
    CLIENT_IDLE_TIMEOUT,
    DEFAULT_BYPASS_HOSTS,
    GOOGLE_DIRECT_ALLOW_EXACT,
    GOOGLE_DIRECT_ALLOW_SUFFIXES,
    GOOGLE_DIRECT_EXACT_EXCLUDE,
    GOOGLE_DIRECT_SUFFIX_EXCLUDE,
    GOOGLE_OWNED_EXACT,
    GOOGLE_OWNED_SUFFIXES,
    LARGE_FILE_EXTS,
    MAX_HEADER_BYTES,
    MAX_REQUEST_BODY_BYTES,
    RELAY_URL_PATTERNS,
    SNI_REWRITE_SUFFIXES,
    TCP_CONNECT_TIMEOUT,
    TRACE_HOST_SUFFIXES,
    UNCACHEABLE_HEADER_NAMES,
)
from relay.domain_fronter import DomainFronter
from .socks5 import negotiate_socks5
from .proxy_support import (
    ResponseCache,
    cors_preflight_response,
    has_unsupported_transfer_encoding,
    header_value,
    host_matches_rules,
    inject_cors_headers,
    is_ip_literal,
    load_host_rules,
    log_response_summary,
    parse_content_length,
)
from relay.relay_response import split_raw_response

log = logging.getLogger("Proxy")


class ProxyServer:
    # Pulled from constants.py so users can override any subset via config.
    _GOOGLE_DIRECT_EXACT_EXCLUDE  = GOOGLE_DIRECT_EXACT_EXCLUDE
    _GOOGLE_DIRECT_SUFFIX_EXCLUDE = GOOGLE_DIRECT_SUFFIX_EXCLUDE
    _GOOGLE_DIRECT_ALLOW_EXACT    = GOOGLE_DIRECT_ALLOW_EXACT
    _GOOGLE_DIRECT_ALLOW_SUFFIXES = GOOGLE_DIRECT_ALLOW_SUFFIXES
    _TRACE_HOST_SUFFIXES          = TRACE_HOST_SUFFIXES
    _DOWNLOAD_DEFAULT_EXTS        = tuple(sorted(LARGE_FILE_EXTS))
    _DOWNLOAD_ACCEPT_MARKERS      = (
        "application/octet-stream",
        "application/zip",
        "application/x-bittorrent",
        "video/",
        "audio/",
    )

    def __init__(self, config: dict):
        self.host = config.get("listen_host", "127.0.0.1")
        # Prefer the new key (http_port) but keep listen_port for old configs.
        self.port = config.get("http_port", config.get("listen_port", 8080))
        self.socks_enabled = True
        self.socks_host = config.get("socks5_host", self.host)
        self.socks_port = config.get("socks5_port", 1080)
        if self.socks_enabled and self.socks_host == self.host \
                and int(self.socks_port) == int(self.port):
            raise ValueError(
                f"http_port and socks5_port must differ on the same host "
                f"(both set to {self.port} on {self.host}). "
                f"Change one of them in config.json."
            )
        self.fronter = DomainFronter(config)
        self.mitm = None
        self._cache = ResponseCache(max_mb=CACHE_MAX_MB)
        self._direct_fail_until: dict[str, float] = {}
        self._servers: list[asyncio.base_events.Server] = []
        self._client_tasks: set[asyncio.Task] = set()
        self._tcp_connect_timeout = self._cfg_float(
            config, "tcp_connect_timeout", TCP_CONNECT_TIMEOUT, minimum=1.0,
        )
        self._download_min_size = self._cfg_int(
            config, "chunked_download_min_size", 5 * 1024 * 1024, minimum=0,
        )
        self._download_chunk_size = self._cfg_int(
            config, "chunked_download_chunk_size", 512 * 1024, minimum=64 * 1024,
        )
        self._download_max_parallel = self._cfg_int(
            config, "chunked_download_max_parallel", 8, minimum=1,
        )
        self._download_max_chunks = self._cfg_int(
            config, "chunked_download_max_chunks", 256, minimum=1,
        )
        self._warmup_before_listen = True
        self._warmup_timeout = 20.0
        self._download_extensions, self._download_any_extension = (
            self._normalize_download_extensions(
                config.get(
                    "chunked_download_extensions",
                    list(self._DOWNLOAD_DEFAULT_EXTS),
                )
            )
        )

        # hosts override — DNS fake-map: domain/suffix → IP
        # Checked before any real DNS lookup; supports exact and suffix matching.
        self._hosts: dict[str, str] = config.get("hosts", {})
        configured_direct_exclude = config.get("direct_google_exclude", [])
        self._direct_google_exclude = {
            h.lower().rstrip(".")
            for h in (
                list(self._GOOGLE_DIRECT_EXACT_EXCLUDE) +
                list(configured_direct_exclude)
            )
        }
        configured_direct_allow = config.get("direct_google_allow", [])
        self._direct_google_allow = {
            h.lower().rstrip(".")
            for h in (
                list(self._GOOGLE_DIRECT_ALLOW_EXACT) +
                list(configured_direct_allow)
            )
        }

        # ── Per-host policy ────────────────────────────────────────
        # block_hosts   — refuse traffic entirely (close or 403)
        # direct_hosts  — route directly (no MITM, no relay)
        # bypass_hosts  — legacy alias kept for backward compatibility
        # Both accept exact hostnames and leading-dot suffix patterns,
        # e.g. ".local" matches any *.local domain.
        self._block_hosts  = load_host_rules(config.get("block_hosts", []))

        # ── Adblock host lists ─────────────────────────────────────
        # adblock_lists: list of URLs to hosts-format blocklists.
        # Lists are loaded from disk cache at startup (fast), then
        # re-downloaded in background when the cache is stale.
        self._adblock_urls: list[str] = [
            str(u).strip() for u in config.get("adblock_lists", []) if u
        ]
        if self._adblock_urls:
            try:
                from core.adblock import load_all
                _ab_domains = load_all(self._adblock_urls)
                self._adblock_hosts = load_host_rules(_ab_domains)
                log.info(
                    "Adblock: %d domains active (%d lists)",
                    len(_ab_domains), len(self._adblock_urls),
                )
            except Exception as exc:
                log.warning("Adblock: failed to load lists at startup: %s", exc)
                self._adblock_hosts = (set(), ())
        else:
            self._adblock_hosts = (set(), ())

        direct_hosts = config.get("direct_hosts", [])
        bypass_hosts = config.get("bypass_hosts")
        if bypass_hosts is None:
            bypass_hosts = list(DEFAULT_BYPASS_HOSTS)
        self._bypass_hosts = load_host_rules(
            list(bypass_hosts) + list(direct_hosts)
        )

        # Route YouTube through the relay when requested; the Google frontend
        # IP can enforce SafeSearch on the SNI-rewrite path.
        # Also force YouTube through relay if exit_node is in full mode,
        # so the exit node can intercept ALL traffic including YouTube.
        _youtube_via_relay = config.get("youtube_via_relay", False)
        _exit_node_full_mode = (
            self.fronter._exit_node_enabled and
            self.fronter._exit_node_mode == "full"
        )

        if _youtube_via_relay or _exit_node_full_mode:
            self._SNI_REWRITE_SUFFIXES = tuple(
                s for s in SNI_REWRITE_SUFFIXES
                if s not in self._YOUTUBE_SNI_SUFFIXES
            )
            reason = []
            if _youtube_via_relay:
                reason.append("youtube_via_relay=true")
            if _exit_node_full_mode:
                reason.append("exit_node.mode=full")
            log.info("YouTube routed through relay (%s)", ", ".join(reason))
        else:
            self._SNI_REWRITE_SUFFIXES = SNI_REWRITE_SUFFIXES

        # relay_url_patterns: list of URL path prefixes
        # (e.g. "youtube.com/youtubei/") that are forced through the Apps Script
        # relay even when youtube_via_relay is false.
        # The host is extracted and removed from SNI-rewrite so the proxy can
        # MITM-decrypt and inspect paths. Requests whose URL contains the full
        # pattern go to relay; all other paths on that host are forwarded
        # directly via SNI-rewrite HTTP (fast path).
        # When youtube_via_relay is true or exit_node.mode=full, RELAY_URL_PATTERNS
        # is still applied so those hosts get MITM-decrypted.
        # Defaults to RELAY_URL_PATTERNS from constants.py; config key extends it.
        relay_patterns: list[str] = [
            p.strip() for p in config.get("relay_url_patterns", []) if str(p).strip()
        ]
        if not _youtube_via_relay:
            relay_patterns = list(RELAY_URL_PATTERNS) + relay_patterns

        # Store the full patterns for per-request matching in _relay_smart.
        self._relay_url_patterns: tuple[str, ...] = tuple(
            re.sub(r'^https?://', '', p).lower() for p in relay_patterns
        )
        if relay_patterns:
            forced: set[str] = set()
            for p in self._relay_url_patterns:
                host_part = p.split('/')[0].lstrip('.')
                if host_part:
                    forced.add(host_part)
            # Remove matched suffixes from SNI-rewrite so they get MITM'd.
            self._SNI_REWRITE_SUFFIXES = tuple(
                s for s in self._SNI_REWRITE_SUFFIXES
                if not any(
                    s == h or s.endswith('.' + h) or h.endswith('.' + s)
                    for h in forced
                )
            )
            log.info(
                "relay_url_patterns: MITM forced on %s; relay only for: %s",
                ', '.join(sorted(forced)),
                ', '.join(self._relay_url_patterns),
            )
        else:
            self._relay_url_patterns = ()

        try:
            from .mitm import MITMCertManager, CA_CERT_FILE
            self.mitm = MITMCertManager()
            self._ca_cert_file = CA_CERT_FILE
        except ImportError:
            log.error("Apps Script relay requires the 'cryptography' package.")
            log.error("Run: pip install cryptography")
            raise SystemExit(1)

        # When LAN sharing is active, serve the CA cert over HTTP so other
        # devices on the network can download and install it easily.
        self._lan_sharing: bool = bool(config.get("lan_sharing", False))

    # ── Host-policy helpers ───────────────────────────────────────

    @staticmethod
    def _cfg_int(config: dict, key: str, default: int, *, minimum: int = 1) -> int:
        try:
            value = int(config.get(key, default))
        except (TypeError, ValueError):
            value = default
        return max(minimum, value)

    @staticmethod
    def _cfg_float(config: dict, key: str, default: float,
                   *, minimum: float = 0.1) -> float:
        try:
            value = float(config.get(key, default))
        except (TypeError, ValueError):
            value = default
        return max(minimum, value)

    @classmethod
    def _normalize_download_extensions(cls, raw) -> tuple[tuple[str, ...], bool]:
        values = raw if isinstance(raw, (list, tuple)) else cls._DOWNLOAD_DEFAULT_EXTS
        normalized: list[str] = []
        any_extension = False
        seen: set[str] = set()
        for item in values:
            ext = str(item).strip().lower()
            if not ext:
                continue
            if ext in {"*", ".*"}:
                any_extension = True
                continue
            if not ext.startswith("."):
                ext = "." + ext
            if ext not in seen:
                seen.add(ext)
                normalized.append(ext)
        if not normalized and not any_extension:
            normalized = list(cls._DOWNLOAD_DEFAULT_EXTS)
        return tuple(normalized), any_extension

    def _track_current_task(self) -> asyncio.Task | None:
        task = asyncio.current_task()
        if task is not None:
            self._client_tasks.add(task)
        return task

    def _untrack_task(self, task: asyncio.Task | None) -> None:
        if task is not None:
            self._client_tasks.discard(task)

    def _is_blocked(self, host: str) -> bool:
        return (
            host_matches_rules(host, self._block_hosts)
            or host_matches_rules(host, self._adblock_hosts)
        )

    async def _refresh_adblock_lists(self) -> None:
        """Background task: re-download stale adblock lists and hot-swap rules."""
        if not self._adblock_urls:
            return
        try:
            from core.adblock import refresh_all

            def _update(domains: list[str]) -> None:
                self._adblock_hosts = load_host_rules(domains)
                log.info(
                    "Adblock: rules updated — %d domains active", len(domains)
                )

            await refresh_all(self._adblock_urls, callback=_update)
        except Exception as exc:
            log.warning("Adblock: background refresh failed: %s", exc)

    def _is_bypassed(self, host: str) -> bool:
        return host_matches_rules(host, self._bypass_hosts)

    def _cache_allowed(self, method: str, url: str,
                       headers: dict | None, body: bytes) -> bool:
        if method.upper() != "GET" or body:
            return False
        for name in UNCACHEABLE_HEADER_NAMES:
            if header_value(headers, name):
                return False
        return self.fronter._is_static_asset_url(url)

    async def start(self):
        if self._warmup_before_listen:
            log.info(
                "Relay warmup in progress... waiting up to %.0fs before opening listeners",
                self._warmup_timeout,
            )
            ready = await self.fronter.wait_until_warm(timeout=self._warmup_timeout)
            if ready:
                log.info("Relay warmup complete — enabling HTTP/SOCKS listeners")
            else:
                log.warning(
                    "Relay warmup timed out after %.0fs — starting listeners anyway",
                    self._warmup_timeout,
                )

        http_srv = await asyncio.start_server(self._on_client, self.host, self.port)
        socks_srv = None

        if self.socks_enabled:
            try:
                socks_srv = await asyncio.start_server(
                    self._on_socks_client, self.socks_host, self.socks_port
                )
            except OSError as e:
                log.error("SOCKS5 listener failed on %s:%d: %s",
                          self.socks_host, self.socks_port, e)

        self._servers = [s for s in (http_srv, socks_srv) if s]

        log.info(
            "HTTP proxy listening on %s:%d",
            self.host, self.port,
        )
        if socks_srv:
            log.info(
                "SOCKS5 proxy listening on %s:%d",
                self.socks_host, self.socks_port,
            )

        # Kick off adblock refresh in the background — won't block startup.
        if self._adblock_urls:
            asyncio.create_task(self._refresh_adblock_lists())

        try:
            async with http_srv:
                if socks_srv:
                    async with socks_srv:
                        await asyncio.gather(
                            http_srv.serve_forever(),
                            socks_srv.serve_forever(),
                        )
                else:
                    await http_srv.serve_forever()
        except asyncio.CancelledError:
            raise

    async def stop(self):
        """Shut down all listeners and release relay resources."""
        for srv in self._servers:
            try:
                srv.close()
            except Exception:
                pass
        for srv in self._servers:
            try:
                await srv.wait_closed()
            except Exception:
                pass
        self._servers = []

        current = asyncio.current_task()
        client_tasks = [task for task in self._client_tasks if task is not current]
        for task in client_tasks:
            task.cancel()
        if client_tasks:
            await asyncio.gather(*client_tasks, return_exceptions=True)
        self._client_tasks.clear()

        try:
            await self.fronter.close()
        except Exception as exc:
            log.debug("fronter.close: %s", exc)

    # ── client handler ────────────────────────────────────────────

    async def _serve_ca_cert(self, writer: asyncio.StreamWriter) -> None:
        """Serve the MITM CA certificate so LAN devices can install it."""
        import os as _os
        ca_path = getattr(self, "_ca_cert_file", None)
        if not ca_path or not _os.path.exists(ca_path):
            writer.write(
                b"HTTP/1.1 404 Not Found\r\n"
                b"Content-Length: 0\r\n"
                b"Connection: close\r\n\r\n"
            )
            await writer.drain()
            return
        with open(ca_path, "rb") as f:
            cert_data = f.read()
        headers = (
            b"HTTP/1.1 200 OK\r\n"
            b"Content-Type: application/x-x509-ca-cert\r\n"
            b"Content-Disposition: attachment; filename=\"ca.crt\"\r\n"
            + b"Content-Length: " + str(len(cert_data)).encode() + b"\r\n"
            + b"Connection: close\r\n\r\n"
        )
        writer.write(headers + cert_data)
        await writer.drain()
        log.info("Served CA certificate to LAN device")

    async def _on_client(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter):
        addr = writer.get_extra_info("peername")
        task = self._track_current_task()
        try:
            first_line = await asyncio.wait_for(reader.readline(), timeout=30)
            if not first_line:
                return

            # Read remaining headers
            header_block = first_line
            while True:
                line = await asyncio.wait_for(reader.readline(), timeout=10)
                header_block += line
                if len(header_block) > MAX_HEADER_BYTES:
                    log.warning("Request header block exceeds cap — closing")
                    return
                if line in (b"\r\n", b"\n", b""):
                    break

            if has_unsupported_transfer_encoding(header_block):
                log.warning("Unsupported Transfer-Encoding on client request")
                writer.write(
                    b"HTTP/1.1 501 Not Implemented\r\n"
                    b"Connection: close\r\n"
                    b"Content-Length: 0\r\n\r\n"
                )
                await writer.drain()
                return

            request_line = first_line.decode(errors="replace").strip()
            parts = request_line.split(" ", 2)
            if len(parts) < 2:
                return

            method = parts[0].upper()
            path = parts[1] if len(parts) >= 2 else "/"

            if method == "GET" and path == "/ca.crt" and self._lan_sharing:
                await self._serve_ca_cert(writer)
                return

            if method == "CONNECT":
                await self._do_connect(parts[1], reader, writer)
            else:
                await self._do_http(header_block, reader, writer)

        except asyncio.CancelledError:
            pass
        except asyncio.TimeoutError:
            log.debug("Timeout: %s", addr)
        except Exception as e:
            log.error("Error (%s): %s", addr, e)
        finally:
            self._untrack_task(task)
            try:
                writer.close()
                await writer.wait_closed()
            except Exception:
                pass

    async def _on_socks_client(self, reader: asyncio.StreamReader,
                               writer: asyncio.StreamWriter):
        addr = writer.get_extra_info("peername")
        task = self._track_current_task()
        try:
            result = await negotiate_socks5(reader, writer)
            if result is None:
                return
            host, port = result
            log.info("SOCKS5 CONNECT → %s:%d", host, port)
            await self._handle_target_tunnel(host, port, reader, writer)
        except asyncio.IncompleteReadError:
            pass
        except asyncio.CancelledError:
            pass
        except asyncio.TimeoutError:
            log.debug("SOCKS5 timeout: %s", addr)
        except Exception as e:
            log.error("SOCKS5 error (%s): %s", addr, e)
        finally:
            self._untrack_task(task)
            try:
                writer.close()
                await writer.wait_closed()
            except Exception:
                pass

    # ── CONNECT (HTTPS tunnelling) ────────────────────────────────

    async def _do_connect(self, target: str, reader, writer):
        host, _, port_str = target.rpartition(":")
        try:
            port = int(port_str) if port_str else 443
        except ValueError:
            log.warning("CONNECT invalid target: %r", target)
            writer.write(b"HTTP/1.1 400 Bad Request\r\n\r\n")
            await writer.drain()
            return
        if not host:
            host, port = target, 443

        log.info("CONNECT → %s:%d", host, port)

        writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n")
        await writer.drain()

        await self._handle_target_tunnel(host, port, reader, writer)

    async def _handle_target_tunnel(self, host: str, port: int,
                                    reader: asyncio.StreamReader,
                                    writer: asyncio.StreamWriter):
        """Route a target connection through the Apps Script relay."""
        # ── Block / bypass policy ─────────────────────────────────
        if self._is_blocked(host):
            log.warning("BLOCKED → %s:%d (matches block_hosts)", host, port)
            try:
                writer.write(b"HTTP/1.1 403 Forbidden\r\nContent-Length: 0\r\n\r\n")
                await writer.drain()
            except Exception:
                pass
            return

        if self._is_bypassed(host):
            log.info("Direct tunnel → %s:%d (matches direct_hosts/bypass_hosts)", host, port)
            await self._do_direct_tunnel(host, port, reader, writer)
            return

        # ── IP-literal destinations ───────────────────────────────
        # Prefer a direct tunnel first (works for unblocked IPs and keeps
        # TLS end-to-end). If the network blocks the route (common for
        # Telegram data-centers behind DPI), fall back to:
        #   • port 443 → MITM + relay through Apps Script
        #   • port 80  → plain-HTTP relay through Apps Script
        #   • other    → give up (non-HTTP; can't be relayed)
        # We use a shorter connect timeout for IP literals (4 s) because
        # when the route is DPI-dropped, waiting longer doesn't help and
        # clients like Telegram speed up DC-rotation when we fail fast.
        # We remember per-IP failures for a short while so subsequent
        # connects skip the doomed direct attempt.
        if is_ip_literal(host):
            if not self._direct_temporarily_disabled(host):
                log.info("Direct tunnel → %s:%d (IP literal)", host, port)
                ok = await self._do_direct_tunnel(
                    host, port, reader, writer, timeout=4.0,
                )
                if ok:
                    return
                self._remember_direct_failure(host, ttl=300)
                if port not in (80, 443):
                    log.warning("Direct tunnel failed for %s:%d", host, port)
                    return
                log.warning(
                    "Direct tunnel fallback → %s:%d (switching to relay)",
                    host, port,
                )
            else:
                log.info(
                    "Relay fallback → %s:%d (direct temporarily disabled)",
                    host, port,
                )
            if port == 443:
                await self._do_mitm_connect(host, port, reader, writer)
            elif port == 80:
                await self._do_plain_http_tunnel(host, port, reader, writer)
            return

        override_ip = self._sni_rewrite_ip(host)
        if override_ip:
            # SNI-blocked domain: MITM-decrypt from browser, then
            # re-connect to the override IP with SNI=front_domain so
            # the ISP never sees the blocked hostname in the TLS handshake.
            log.info("SNI-rewrite tunnel → %s via %s (SNI: %s)",
                     host, override_ip, self.fronter.sni_host)
            await self._do_sni_rewrite_tunnel(host, port, reader, writer,
                                              connect_ip=override_ip)
        elif self._is_google_domain(host):
            if not self._direct_temporarily_disabled(host):
                log.info("Direct tunnel → %s (Google domain, skipping relay)", host)
                ok = await self._do_direct_tunnel(host, port, reader, writer)
                if ok:
                    return
                self._remember_direct_failure(host)

            # Direct failed or is temporarily disabled.
            # For port 443: try SNI-rewrite through configured google_ip before
            # burning an Apps Script execution on a plain Google domain.
            if port == 443:
                log.info(
                    "SNI-rewrite fallback → %s via %s (direct blocked)",
                    host, self.fronter.connect_host,
                )
                await self._do_sni_rewrite_tunnel(
                    host, port, reader, writer,
                    connect_ip=self.fronter.connect_host,
                )
            else:
                await self._do_plain_http_tunnel(host, port, reader, writer)
        elif port == 443:
            await self._do_mitm_connect(host, port, reader, writer)
        elif port == 80:
            await self._do_plain_http_tunnel(host, port, reader, writer)
        else:
            # Non-HTTP port (e.g. mtalk:5228 XMPP, IMAP, SMTP, SSH) —
            # payload isn't HTTP, so we can't relay or MITM. Tunnel bytes.
            log.info("Direct tunnel → %s:%d (non-HTTP port)", host, port)
            ok = await self._do_direct_tunnel(host, port, reader, writer)
            if not ok:
                log.warning("Direct tunnel failed for %s:%d", host, port)

    # ── Hosts override (fake DNS) ─────────────────────────────────

    # Built-in list of domains that must be reached via Google's frontend IP
    # with SNI rewritten to `front_domain` (default: www.google.com).
    # Source: constants.SNI_REWRITE_SUFFIXES.
    # When youtube_via_relay is enabled the YouTube suffixes are removed so
    # YouTube goes through the Apps Script relay instead.
    _YOUTUBE_SNI_SUFFIXES = frozenset({
        "youtube.com", "youtu.be", "youtube-nocookie.com",
    })
    _SNI_REWRITE_SUFFIXES = SNI_REWRITE_SUFFIXES

    def _sni_rewrite_ip(self, host: str) -> str | None:
        """Return the IP to SNI-rewrite `host` through, or None.

        Order of precedence:
          1. Explicit entry in config `hosts` map (exact or suffix match).
          2. Built-in `_SNI_REWRITE_SUFFIXES` → mapped to config `google_ip`.
        """
        ip = self._hosts_ip(host)
        if ip:
            return ip
        h = host.lower().rstrip(".")
        for suffix in self._SNI_REWRITE_SUFFIXES:
            if h == suffix or h.endswith("." + suffix):
                return self.fronter.connect_host  # configured google_ip
        return None

    def _hosts_ip(self, host: str) -> str | None:
        """Return override IP for host if defined in config 'hosts', else None.

        Supports exact match and suffix match (e.g. 'youtube.com' matches
        'www.youtube.com', 'm.youtube.com', etc.).
        """
        h = host.lower().rstrip(".")
        if h in self._hosts:
            return self._hosts[h]
        # suffix match: check every parent label
        parts = h.split(".")
        for i in range(1, len(parts)):
            parent = ".".join(parts[i:])
            if parent in self._hosts:
                return self._hosts[parent]
        return None

    # ── Google domain detection ───────────────────────────────────

    # Google-owned domains that may use the raw direct-tunnel shortcut.
    # YouTube/googlevideo SNIs are blocked; they go through
    # _do_sni_rewrite_tunnel via the hosts map instead.
    # Source: constants.GOOGLE_OWNED_SUFFIXES / GOOGLE_OWNED_EXACT.
    _GOOGLE_OWNED_SUFFIXES = GOOGLE_OWNED_SUFFIXES
    _GOOGLE_OWNED_EXACT = GOOGLE_OWNED_EXACT

    def _is_google_domain(self, host: str) -> bool:
        """Return True if host should use the raw direct Google shortcut."""
        h = host.lower().rstrip(".")
        if self._is_direct_google_excluded(h):
            return False
        if not self._is_google_owned_domain(h):
            return False
        return self._is_direct_google_allowed(h)

    def _is_google_owned_domain(self, host: str) -> bool:
        if host in self._GOOGLE_OWNED_EXACT:
            return True
        for suffix in self._GOOGLE_OWNED_SUFFIXES:
            if host.endswith(suffix):
                return True
        return False

    def _is_direct_google_excluded(self, host: str) -> bool:
        if host in self._direct_google_exclude:
            return True
        for suffix in self._GOOGLE_DIRECT_SUFFIX_EXCLUDE:
            if host.endswith(suffix):
                return True
        for token in self._direct_google_exclude:
            if token.startswith(".") and host.endswith(token):
                return True
        return False

    def _is_direct_google_allowed(self, host: str) -> bool:
        if host in self._direct_google_allow:
            return True
        for suffix in self._GOOGLE_DIRECT_ALLOW_SUFFIXES:
            if host.endswith(suffix):
                return True
        for token in self._direct_google_allow:
            if token.startswith(".") and host.endswith(token):
                return True
        return False

    def _direct_temporarily_disabled(self, host: str) -> bool:
        h = host.lower().rstrip(".")
        now = time.time()
        disabled = False
        for key in self._direct_failure_keys(h):
            until = self._direct_fail_until.get(key, 0)
            if until > now:
                disabled = True
            else:
                self._direct_fail_until.pop(key, None)
        return disabled

    def _remember_direct_failure(self, host: str, ttl: int = 600):
        until = time.time() + ttl
        for key in self._direct_failure_keys(host.lower().rstrip(".")):
            self._direct_fail_until[key] = until

    def _direct_failure_keys(self, host: str) -> tuple[str, ...]:
        keys = [host]
        if host.endswith(".google.com") or host == "google.com":
            keys.append("*.google.com")
        if host.endswith(".googleapis.com") or host == "googleapis.com":
            keys.append("*.googleapis.com")
        if host.endswith(".gstatic.com") or host == "gstatic.com":
            keys.append("*.gstatic.com")
        if host.endswith(".googleusercontent.com") or host == "googleusercontent.com":
            keys.append("*.googleusercontent.com")
        return tuple(dict.fromkeys(keys))

    async def _open_tcp_connection(self, target: str, port: int,
                                   timeout: float = 10.0):
        """Connect with IPv4-first resolution and clearer failure reporting."""
        errors: list[str] = []
        loop = asyncio.get_running_loop()

        # Strip IPv6 brackets (CONNECT may deliver "[::1]" as the hostname).
        # ipaddress.ip_address() rejects the bracketed form, which would
        # otherwise force a DNS lookup for an IP literal and fail.
        lookup_target = target.strip()
        if lookup_target.startswith("[") and lookup_target.endswith("]"):
            lookup_target = lookup_target[1:-1]

        try:
            ipaddress.ip_address(lookup_target)
            candidates = [(0, lookup_target)]
        except ValueError:
            try:
                infos = await asyncio.wait_for(
                    loop.getaddrinfo(
                        lookup_target,
                        port,
                        family=socket.AF_UNSPEC,
                        type=socket.SOCK_STREAM,
                    ),
                    timeout=timeout,
                )
            except Exception as exc:
                raise OSError(f"dns lookup failed for {lookup_target}: {exc!r}") from exc

            candidates = []
            seen = set()
            for family, _type, _proto, _canon, sockaddr in infos:
                ip = sockaddr[0]
                key = (family, ip)
                if key in seen:
                    continue
                seen.add(key)
                candidates.append((family, ip))

            candidates.sort(key=lambda item: 0 if item[0] == socket.AF_INET else 1)

        for family, ip in candidates:
            try:
                return await asyncio.wait_for(
                    asyncio.open_connection(ip, port, family=family or 0),
                    timeout=timeout,
                )
            except Exception as exc:
                fam = "ipv4" if family == socket.AF_INET else (
                    "ipv6" if family == socket.AF_INET6 else "auto"
                )
                errors.append(f"{ip} ({fam}): {exc!r}")

        raise OSError("; ".join(errors) or f"connect failed for {target}:{port}")

    # ── Direct tunnel (no MITM) ───────────────────────────────────

    async def _do_direct_tunnel(self, host: str, port: int,
                                reader: asyncio.StreamReader,
                                writer: asyncio.StreamWriter,
                                connect_ip: str | None = None,
                                timeout: float | None = None):
        """Pipe raw TLS bytes directly to the target server.

        connect_ip overrides DNS: the TCP connection goes to that IP
        while the browser's TLS (SNI=host) is piped through unchanged.
        Without an override we connect to the real hostname so browser-safe
        Google properties (Gemini assets, Play, Accounts, etc.) use their
        normal edge instead of being forced onto the fronting IP.
        """
        target_ip = connect_ip or host
        effective_timeout = (
            self._tcp_connect_timeout if timeout is None else float(timeout)
        )
        try:
            r_remote, w_remote = await self._open_tcp_connection(
                target_ip, port, timeout=effective_timeout,
            )
        except Exception as e:
            log.error("Direct tunnel connect failed (%s via %s): %s",
                      host, target_ip, e)
            return False

        async def pipe(src, dst, label):
            try:
                while True:
                    data = await src.read(65536)
                    if not data:
                        break
                    dst.write(data)
                    await dst.drain()
            except (ConnectionError, asyncio.CancelledError):
                pass
            except Exception as e:
                log.debug("Pipe %s ended: %s", label, e)
            finally:
                # Half-close rather than hard-close so the other direction
                # can still flush final bytes (important for TLS close_notify).
                try:
                    if not dst.is_closing() and dst.can_write_eof():
                        dst.write_eof()
                except Exception:
                    try:
                        dst.close()
                    except Exception:
                        pass

        await asyncio.gather(
            pipe(reader, w_remote, f"client→{host}"),
            pipe(r_remote, writer, f"{host}→client"),
        )
        return True

    # ── SNI-rewrite tunnel ────────────────────────────────────────

    async def _do_sni_rewrite_tunnel(self, host: str, port: int, reader, writer,
                                     connect_ip: str | None = None):
        """MITM-decrypt TLS from browser, then re-encrypt toward connect_ip
        using SNI=front_domain (e.g. www.google.com).

        The ISP only ever sees SNI=www.google.com in the outgoing handshake,
        hiding the blocked hostname (e.g. www.youtube.com).
        """
        target_ip = connect_ip or self.fronter.connect_host
        sni_out   = self.fronter.sni_host  # e.g. "www.google.com"

        # Step 1: MITM — accept TLS from the browser
        ssl_ctx_server = self.mitm.get_server_context(host)
        loop = asyncio.get_running_loop()
        transport = writer.transport
        protocol  = transport.get_protocol()
        try:
            new_transport = await loop.start_tls(
                transport, protocol, ssl_ctx_server, server_side=True,
            )
        except Exception as e:
            log.debug("SNI-rewrite TLS accept failed (%s): %s", host, e)
            return
        writer._transport = new_transport

        # Step 2: open outgoing TLS to target IP with the safe SNI.
        # Reuse the SSLContext already built by DomainFronter (certifi bundle,
        # verify_ssl flag) — no need to rebuild it on every CONNECT.
        try:
            r_out, w_out = await asyncio.wait_for(
                asyncio.open_connection(
                    target_ip, port,
                    ssl=self.fronter._ssl_ctx(),
                    server_hostname=sni_out,
                ),
                timeout=self._tcp_connect_timeout,
            )
        except Exception as e:
            log.error("SNI-rewrite outbound connect failed (%s via %s): %s",
                      host, target_ip, e)
            return

        # Step 3: pipe application-layer bytes between the two TLS sessions
        async def pipe(src, dst, label):
            try:
                while True:
                    data = await src.read(65536)
                    if not data:
                        break
                    dst.write(data)
                    await dst.drain()
            except (ConnectionError, asyncio.CancelledError):
                pass
            except Exception as exc:
                log.debug("Pipe %s ended: %s", label, exc)
            finally:
                try:
                    dst.close()
                except Exception:
                    pass

        await asyncio.gather(
            pipe(reader, w_out, f"client→{host}"),
            pipe(r_out,  writer, f"{host}→client"),
        )

    # ── MITM CONNECT (apps_script mode) ───────────────────────────

    async def _do_plain_http_tunnel(self, host: str, port: int, reader, writer):
        """Handle plain HTTP over SOCKS5 in apps_script mode."""
        log.info("Plain HTTP relay → %s:%d", host, port)
        await self._relay_http_stream(host, port, reader, writer)

    async def _do_mitm_connect(self, host: str, port: int, reader, writer):
        """Intercept TLS, decrypt HTTP, and relay through Apps Script."""
        ssl_ctx = self.mitm.get_server_context(host)

        # Upgrade the existing connection to TLS (we are the server)
        loop = asyncio.get_running_loop()
        transport = writer.transport
        protocol = transport.get_protocol()

        try:
            new_transport = await loop.start_tls(
                transport, protocol, ssl_ctx, server_side=True,
            )
        except Exception as e:
            # TLS handshake failed. Common causes:
            #   • Telegram Desktop / MTProto over port 443 sends obfuscated
            #     non-TLS bytes — we literally cannot decrypt these, and
            #     since the target IP is blocked we can't direct-tunnel
            #     either. Telegram will rotate to another DC on its own;
            #     failing fast here lets that happen sooner.
            #   • Client CONNECTs but never speaks TLS (some probes).
            if is_ip_literal(host) and port == 443:
                log.info(
                    "Non-TLS traffic on %s:%d (likely Telegram MTProto / "
                    "obfuscated protocol). This DC appears blocked; the "
                    "client should rotate to another endpoint shortly.",
                    host, port,
                )
            elif port != 443:
                log.debug(
                    "TLS handshake skipped for %s:%d (non-HTTPS): %s",
                    host, port, e,
                )
            else:
                log.debug("TLS handshake failed for %s: %s", host, e)
            # Close the client side so it fails fast and can retry, rather
            # than hanging on a half-open connection.
            try:
                if not writer.is_closing():
                    writer.close()
            except Exception:
                pass
            return

        # Update writer to use the new TLS transport
        writer._transport = new_transport

        await self._relay_http_stream(host, port, reader, writer)

    async def _relay_http_stream(self, host: str, port: int, reader, writer):
        """Read decrypted/origin-form HTTP requests and relay them."""
        # Read and relay HTTP requests from the browser (now decrypted)
        while True:
            try:
                first_line = await asyncio.wait_for(
                    reader.readline(), timeout=CLIENT_IDLE_TIMEOUT
                )
                if not first_line:
                    break

                header_block = first_line
                oversized_headers = False
                while True:
                    line = await asyncio.wait_for(reader.readline(), timeout=10)
                    header_block += line
                    if len(header_block) > MAX_HEADER_BYTES:
                        oversized_headers = True
                        break
                    if line in (b"\r\n", b"\n", b""):
                        break

                # Reject truncated / oversized header blocks cleanly rather
                # than forwarding a half-parsed request to the relay — doing
                # so would send malformed JSON payloads to Apps Script and
                # leave the client hanging until its own timeout fires.
                if oversized_headers:
                    log.warning(
                        "MITM header block exceeds %d bytes — closing (%s)",
                        MAX_HEADER_BYTES, host,
                    )
                    try:
                        writer.write(
                            b"HTTP/1.1 431 Request Header Fields Too Large\r\n"
                            b"Connection: close\r\n"
                            b"Content-Length: 0\r\n\r\n"
                        )
                        await writer.drain()
                    except Exception:
                        pass
                    break

                # Read body
                body = b""
                if has_unsupported_transfer_encoding(header_block):
                    log.warning("Unsupported Transfer-Encoding → %s:%d", host, port)
                    writer.write(
                        b"HTTP/1.1 501 Not Implemented\r\n"
                        b"Connection: close\r\n"
                        b"Content-Length: 0\r\n\r\n"
                    )
                    await writer.drain()
                    break
                length = parse_content_length(header_block)
                if length > MAX_REQUEST_BODY_BYTES:
                    raise ValueError(f"Request body too large: {length} bytes")
                if length > 0:
                    body = await reader.readexactly(length)

                # Parse the request
                request_line = first_line.decode(errors="replace").strip()
                parts = request_line.split(" ", 2)
                if len(parts) < 2:
                    break

                method = parts[0]
                path = parts[1]

                # Parse headers
                headers = {}
                for raw_line in header_block.split(b"\r\n")[1:]:
                    if b":" in raw_line:
                        k, v = raw_line.decode(errors="replace").split(":", 1)
                        headers[k.strip()] = v.strip()

                # Shortening the length of X API URLs to prevent relay errors.
                if (host == "x.com" or host == "twitter.com") and  re.match(r"/i/api/graphql/[^/]+/[^?]+\?variables=", path):
                    path = path.split("&")[0]

                # MITM traffic arrives as origin-form paths; SOCKS/plain HTTP can
                # also send absolute-form requests. Normalize both to full URLs.
                if path.startswith("http://") or path.startswith("https://"):
                    url = path
                elif port == 443:
                    url = f"https://{host}{path}"
                elif port == 80:
                    url = f"http://{host}{path}"
                else:
                    url = f"http://{host}:{port}{path}"

                log.info("MITM → %s %s", method, url)

                # ── CORS: extract relevant request headers ─────────────
                origin = header_value(headers, "origin")
                acr_method = header_value(
                    headers, "access-control-request-method",
                )
                acr_headers = header_value(
                    headers, "access-control-request-headers",
                )

                # CORS preflight — respond directly. Apps Script's
                # UrlFetchApp does not support the OPTIONS method, so
                # forwarding preflights would always fail and break every
                # cross-origin fetch/XHR the browser runs through us.
                if method.upper() == "OPTIONS" and acr_method:
                    log.debug(
                        "CORS preflight → %s (responding locally)",
                        url[:60],
                    )
                    writer.write(cors_preflight_response(
                        origin, acr_method, acr_headers,
                    ))
                    await writer.drain()
                    continue

                if await self._maybe_stream_download(method, url, headers, body, writer):
                    continue

                # Check local cache first (GET only)
                response = None
                if self._cache_allowed(method, url, headers, body):
                    response = self._cache.get(url)
                    if response:
                        log.debug("Cache HIT: %s", url[:60])

                if response is None:
                    # Relay through Apps Script
                    try:
                        response = await self._relay_smart(method, url, headers, body)
                    except Exception as e:
                        log.error("Relay error (%s): %s", url[:60], e)
                        err_body = f"Relay error: {e}".encode()
                        response = (
                            b"HTTP/1.1 502 Bad Gateway\r\n"
                            b"Content-Type: text/plain\r\n"
                            b"Content-Length: " + str(len(err_body)).encode() + b"\r\n"
                            b"\r\n" + err_body
                        )

                    # Cache successful GET responses
                    if self._cache_allowed(method, url, headers, body) and response:
                        ttl = ResponseCache.parse_ttl(response, url)
                        if ttl > 0:
                            self._cache.put(url, response, ttl)
                            log.debug("Cached (%ds): %s", ttl, url[:60])

                # Inject permissive CORS headers whenever the browser sent
                # an Origin (cross-origin XHR / fetch). Without this, the
                # browser blocks the response even though the relay fetched
                # it successfully.
                if origin and response:
                    response = inject_cors_headers(response, origin)

                log_response_summary(
                    logger=log,
                    split_raw_response=split_raw_response,
                    trace_suffixes=self._TRACE_HOST_SUFFIXES,
                    url=url,
                    response=response,
                )

                writer.write(response)
                await writer.drain()

            except asyncio.TimeoutError:
                break
            except asyncio.IncompleteReadError:
                break
            except ConnectionError:
                break
            except Exception as e:
                log.error("MITM handler error (%s): %s", host, e)
                break

    # ── CORS helpers ──────────────────────────────────────────────
    # cors_preflight_response() and inject_cors_headers() live in proxy_support.

    def _url_matches_relay_pattern(self, url: str) -> bool:
        """Return True if url matches any entry in _relay_url_patterns.

        Pattern format: "host/path" (no scheme).  The url host may have
        extra subdomains (e.g. www.youtube.com matches youtube.com).
        """
        normalized = re.sub(r'^https?://', '', url).lower()
        slash = normalized.find('/')
        url_host = normalized[:slash] if slash != -1 else normalized
        url_path = normalized[slash:] if slash != -1 else '/'
        for p in self._relay_url_patterns:
            slash_p = p.find('/')
            pat_host = p[:slash_p] if slash_p != -1 else p
            pat_path = p[slash_p:] if slash_p != -1 else '/'
            host_match = (url_host == pat_host or url_host.endswith('.' + pat_host))
            if host_match and url_path.startswith(pat_path):
                return True
        return False

    async def _forward_via_sni_rewrite(self, method: str, url: str,
                                       headers: dict, body: bytes) -> bytes:
        """Forward an HTTP request to its real origin via the SNI-rewrite path.

        Connects to google_ip:443 with SNI=front_domain (DPI only sees a safe
        Google SNI), then sends the actual HTTP/1.1 request with the real Host
        header so YouTube's edge serves the correct response.
        """
        # Parse host and path from URL.
        stripped = re.sub(r'^https?://', '', url)
        slash = stripped.find('/')
        if slash == -1:
            host = stripped
            path = '/'
        else:
            host = stripped[:slash]
            path = stripped[slash:]

        # Build HTTP/1.1 request bytes.
        req_headers = dict(headers)
        req_headers['Host'] = host
        # Use Connection: close so we don't need to manage keep-alive.
        req_headers['Connection'] = 'close'
        req_lines = [f"{method} {path} HTTP/1.1\r\n"]
        for k, v in req_headers.items():
            req_lines.append(f"{k}: {v}\r\n")
        req_lines.append("\r\n")
        request_bytes = "".join(req_lines).encode() + (body or b"")

        r, w = await asyncio.wait_for(
            asyncio.open_connection(
                self.fronter.connect_host,
                443,
                ssl=self.fronter._ssl_ctx(),
                server_hostname=self.fronter.sni_host,
            ),
            timeout=self._tcp_connect_timeout,
        )
        try:
            w.write(request_bytes)
            await w.drain()
            chunks = []
            while True:
                chunk = await asyncio.wait_for(r.read(65536), timeout=30)
                if not chunk:
                    break
                chunks.append(chunk)
        finally:
            try:
                w.close()
            except Exception:
                pass
        return b"".join(chunks)

    async def _relay_smart(self, method, url, headers, body):
        """Choose optimal relay strategy based on request type.

        - If relay_url_patterns are configured and the URL does NOT match,
          forward via SNI-rewrite HTTP (fast direct path).
        - GET requests for likely-large downloads use parallel-range relay.
        - All other requests go through the single-request relay.
        """
        # Path-level relay routing: only matching URL prefixes go through relay;
        # everything else on the same host is forwarded via SNI-rewrite.
        if self._relay_url_patterns and not self._url_matches_relay_pattern(url):
            # Check if this host is one we pulled out of SNI-rewrite.
            stripped = re.sub(r'^https?://', '', url).lower()
            slash = stripped.find('/')
            req_host = stripped[:slash] if slash != -1 else stripped
            pattern_hosts = {p.split('/')[0] for p in self._relay_url_patterns}
            host_covered = any(
                req_host == h or req_host.endswith('.' + h)
                for h in pattern_hosts
            )
            if host_covered:
                return await self._forward_via_sni_rewrite(method, url, headers, body)

        if method == "GET" and not body:
            # Respect client's own Range header verbatim.
            if header_value(headers, "range"):
                return await self.fronter.relay(method, url, headers, body)
            # Only probe with Range when the URL looks like a big file.
            if self._is_likely_download(url, headers):
                return await self.fronter.relay_parallel(
                    method,
                    url,
                    headers,
                    body,
                    chunk_size=self._download_chunk_size,
                    max_parallel=self._download_max_parallel,
                    max_chunks=self._download_max_chunks,
                    min_size=self._download_min_size,
                )
        return await self.fronter.relay(method, url, headers, body)

    def _is_likely_download(self, url: str, headers: dict) -> bool:
        """Heuristic: is this URL likely a large file download?"""
        path = url.split("?")[0].lower()
        if self._download_any_extension:
            return True
        for ext in self._download_extensions:
            if path.endswith(ext):
                return True
        accept = header_value(headers, "accept").lower()
        if any(marker in accept for marker in self._DOWNLOAD_ACCEPT_MARKERS):
            return True
        return False

    async def _maybe_stream_download(self, method: str, url: str,
                                     headers: dict | None, body: bytes,
                                     writer) -> bool:
        if method.upper() != "GET" or body:
            return False
        if header_value(headers, "range"):
            return False
        effective_headers = headers or {}
        if not self._is_likely_download(url, effective_headers):
            return False
        if not self.fronter.stream_download_allowed(url):
            return False
        return await self.fronter.stream_parallel_download(
            url,
            effective_headers,
            writer,
            chunk_size=self._download_chunk_size,
            max_parallel=self._download_max_parallel,
            max_chunks=self._download_max_chunks,
            min_size=self._download_min_size,
        )

    # ── Plain HTTP forwarding ─────────────────────────────────────

    async def _do_http(self, header_block: bytes, reader, writer):
        body = b""
        if has_unsupported_transfer_encoding(header_block):
            log.warning("Unsupported Transfer-Encoding on plain HTTP request")
            writer.write(
                b"HTTP/1.1 501 Not Implemented\r\n"
                b"Connection: close\r\n"
                b"Content-Length: 0\r\n\r\n"
            )
            await writer.drain()
            return
        length = parse_content_length(header_block)
        if length > MAX_REQUEST_BODY_BYTES:
            writer.write(b"HTTP/1.1 413 Content Too Large\r\n\r\n")
            await writer.drain()
            return
        if length > 0:
            body = await reader.readexactly(length)

        first_line = header_block.split(b"\r\n")[0].decode(errors="replace")
        log.info("HTTP → %s", first_line)

        # Parse request and relay through Apps Script
        parts = first_line.strip().split(" ", 2)
        method = parts[0] if parts else "GET"
        url = parts[1] if len(parts) > 1 else "/"

        headers = {}
        for raw_line in header_block.split(b"\r\n")[1:]:
            if b":" in raw_line:
                k, v = raw_line.decode(errors="replace").split(":", 1)
                headers[k.strip()] = v.strip()

        # ── CORS preflight over plain HTTP ─────────────────────────────
        origin = header_value(headers, "origin")
        acr_method = header_value(headers, "access-control-request-method")
        acr_headers = header_value(headers, "access-control-request-headers")
        if method.upper() == "OPTIONS" and acr_method:
            log.debug("CORS preflight (HTTP) → %s (responding locally)", url[:60])
            writer.write(cors_preflight_response(
                origin, acr_method, acr_headers,
            ))
            await writer.drain()
            return

        if await self._maybe_stream_download(method, url, headers, body, writer):
            return

        # Cache check for GET
        response = None
        if self._cache_allowed(method, url, headers, body):
            response = self._cache.get(url)
            if response:
                log.debug("Cache HIT (HTTP): %s", url[:60])

        if response is None:
            response = await self._relay_smart(method, url, headers, body)
            # Cache successful GET
            if self._cache_allowed(method, url, headers, body) and response:
                ttl = ResponseCache.parse_ttl(response, url)
                if ttl > 0:
                    self._cache.put(url, response, ttl)

        if origin and response:
            response = inject_cors_headers(response, origin)

        log_response_summary(
            logger=log,
            split_raw_response=split_raw_response,
            trace_suffixes=self._TRACE_HOST_SUFFIXES,
            url=url,
            response=response,
        )

        writer.write(response)
        await writer.drain()