diff --git a/src/core/constants.py b/src/core/constants.py index 46fca87..766fde2 100644 --- a/src/core/constants.py +++ b/src/core/constants.py @@ -73,9 +73,9 @@ WARM_POOL_COUNT = 30 # ── Batch windows ───────────────────────────────────────────────────────── -BATCH_WINDOW_MICRO = 0.005 # 5 ms -BATCH_WINDOW_MACRO = 0.050 # 50 ms -BATCH_MAX = 50 +BATCH_WINDOW_MICRO = 0.015 # 15 ms: captures short asset bursts +BATCH_WINDOW_MACRO = 0.120 # 120 ms: balances batching and latency +BATCH_MAX = 64 # enough headroom for browser fan-out waves # ── Fan-out relay (parallel Apps Script instances) ──────────────────────── @@ -201,7 +201,7 @@ TRACE_HOST_SUFFIXES: tuple[str, ...] = ( STATIC_EXTS: tuple[str, ...] = ( ".css", ".js", ".mjs", ".woff", ".woff2", ".ttf", ".eot", ".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg", ".ico", - ".mp3", ".mp4", ".webm", ".wasm", ".avif", + ".mp3", ".mp4", ".webm", ".wasm", ".avif", ".json", ".map", ) LARGE_FILE_EXTS = frozenset({ ".bin", diff --git a/src/proxy/proxy_server.py b/src/proxy/proxy_server.py index 5a09789..28c0bae 100644 --- a/src/proxy/proxy_server.py +++ b/src/proxy/proxy_server.py @@ -105,6 +105,8 @@ class ProxyServer: self._download_max_chunks = self._cfg_int( config, "chunked_download_max_chunks", 256, minimum=1, ) + self._warmup_before_listen = True + self._warmup_timeout = 20.0 self._download_extensions, self._download_any_extension = ( self._normalize_download_extensions( config.get( @@ -228,6 +230,20 @@ class ProxyServer: return self.fronter._is_static_asset_url(url) async def start(self): + if self._warmup_before_listen: + log.info( + "Relay warmup in progress... waiting up to %.0fs before opening listeners", + self._warmup_timeout, + ) + ready = await self.fronter.wait_until_warm(timeout=self._warmup_timeout) + if ready: + log.info("Relay warmup complete — enabling HTTP/SOCKS listeners") + else: + log.warning( + "Relay warmup timed out after %.0fs — starting listeners anyway", + self._warmup_timeout, + ) + http_srv = await asyncio.start_server(self._on_client, self.host, self.port) socks_srv = None diff --git a/src/relay/domain_fronter.py b/src/relay/domain_fronter.py index 0c59689..874fdb9 100644 --- a/src/relay/domain_fronter.py +++ b/src/relay/domain_fronter.py @@ -201,6 +201,12 @@ class DomainFronter: "Execution monitor enabled: reporting total every %.0fs", self._execution_report_interval, ) + log.info( + "Batch config: micro=%.0fms macro=%.0fms max=%d", + self._batch_window_micro * 1000.0, + self._batch_window_macro * 1000.0, + self._batch_max, + ) # Exit node — optional second-hop relay with a non-Google exit IP. # Useful for sites that block GCP/Apps Script IPs (e.g. ChatGPT). @@ -881,6 +887,23 @@ class DomainFronter: # active its _keepalive_loop skips the ping; they do not double-fire. self._spawn(self._h1_container_keepalive()) + async def wait_until_warm(self, timeout: float | None = None) -> bool: + """Start warmup and wait until the initial pool-open phase finishes. + + Returns True if warmup finished before timeout, else False. + """ + await self._warm_pool() + if self._pool_ready.is_set(): + return True + try: + if timeout is None or timeout <= 0: + await self._pool_ready.wait() + else: + await asyncio.wait_for(self._pool_ready.wait(), timeout=timeout) + return True + except asyncio.TimeoutError: + return False + def _spawn(self, coro) -> asyncio.Task: """Create a task and keep a strong reference for clean cancellation.""" task = asyncio.create_task(coro) @@ -1801,20 +1824,32 @@ class DomainFronter: if method not in {"GET", "HEAD"} or body: return True + # Static assets are safe to batch in parallel as independent requests. + is_static = cls._is_static_asset_url(url) + if headers: - for name in STATEFUL_HEADER_NAMES: + # Hard stateful markers: preserve strict ordering / isolation. + for name in ("cookie", "authorization", "proxy-authorization"): if cls._header_value(headers, name): return True accept = cls._header_value(headers, "accept").lower() - if "text/html" in accept or "application/json" in accept: + if "text/html" in accept: return True fetch_mode = cls._header_value(headers, "sec-fetch-mode").lower() - if fetch_mode in {"navigate", "cors"}: + if fetch_mode == "navigate": return True - return not cls._is_static_asset_url(url) + fetch_dest = cls._header_value(headers, "sec-fetch-dest").lower() + if fetch_dest in {"document", "iframe", "frame"}: + return True + + # Non-static JSON/API calls are treated as stateful by default. + if (not is_static) and "application/json" in accept: + return True + + return not is_static # ── Batch collector ─────────────────────────────────────────── @@ -2241,14 +2276,9 @@ class DomainFronter: payloads: list[dict]) -> list[bytes]: """Parse a batch response body into individual results.""" text = resp_body.decode(errors="replace").strip() - try: - data = json.loads(text) - except json.JSONDecodeError: - m = re.search(r'\{.*\}', text, re.DOTALL) - try: - data = json.loads(m.group()) if m else None - except json.JSONDecodeError: - data = None + # Apps Script can wrap JSON inside an HTML shell; reuse the same + # robust loader used by single-response parsing. + data = load_relay_json(text) if not data: raise RuntimeError(f"Bad batch response: {text[:200]}")