diff --git a/apps_script/Code.gs b/apps_script/Code.gs index 51e508a..411295a 100644 --- a/apps_script/Code.gs +++ b/apps_script/Code.gs @@ -26,6 +26,10 @@ const SKIP_HEADERS = { "x-forwarded-port": 1, "x-real-ip": 1, "forwarded": 1, "via": 1, // Internal relay hop-count header — must not be forwarded to target sites. "x-mhr-hop": 1, + // UrlFetchApp does not decompress gzip/br/deflate responses — stripping + // accept-encoding forces targets to reply with plain (uncompressed) bodies + // so the relay never has to handle compressed content it cannot decode. + "accept-encoding": 1, }; // Pattern that matches any Google Apps Script execution endpoint. diff --git a/apps_script/cloudflare_worker.js b/apps_script/cloudflare_worker.js index 58c10c5..cf9d9ea 100644 --- a/apps_script/cloudflare_worker.js +++ b/apps_script/cloudflare_worker.js @@ -19,6 +19,9 @@ const STRIP_HEADERS = new Set([ "via", // Internal relay hop header — must not propagate to the final target. "x-mhr-hop", + // Workers cannot decompress gzip/br/deflate — stripping accept-encoding + // forces targets to reply with plain bodies the Worker can forward as-is. + "accept-encoding", ]); function decodeBase64ToBytes(input) { diff --git a/apps_script/vps_exit_node.py b/apps_script/vps_exit_node.py index f6edf5b..6f74cfc 100644 --- a/apps_script/vps_exit_node.py +++ b/apps_script/vps_exit_node.py @@ -77,6 +77,9 @@ _STRIP_HEADERS = frozenset( "x-real-ip", "forwarded", "via", + # urllib.request cannot decompress gzip/br/deflate — stripping this + # forces targets to reply with plain bodies the server can forward. + "accept-encoding", ] ) diff --git a/src/relay/domain_fronter.py b/src/relay/domain_fronter.py index 03440a6..d48c37c 100644 --- a/src/relay/domain_fronter.py +++ b/src/relay/domain_fronter.py @@ -1202,9 +1202,18 @@ class DomainFronter: body of the outer Apps Script relay call, so Apps Script POSTs it to the exit node URL on our behalf. """ - # Build inner payload: what the exit node will execute + # Build inner payload: what the exit node will execute. + # Strip accept-encoding from the inner headers so the target site + # returns an uncompressed body. Exit nodes (CF Worker, VPS) make + # plain Python/JS fetch() calls that don't auto-decompress, so a + # compressed response body would be forwarded as garbled bytes. inner = dict(payload) inner["k"] = self._exit_node_psk + if isinstance(inner.get("h"), dict): + inner["h"] = { + k: v for k, v in inner["h"].items() + if k.lower() != "accept-encoding" + } inner_json = json.dumps(inner).encode() # Build outer payload: what Apps Script will fetch diff --git a/src/relay/relay_response.py b/src/relay/relay_response.py index 8248267..1260fe6 100644 --- a/src/relay/relay_response.py +++ b/src/relay/relay_response.py @@ -22,9 +22,11 @@ classify_relay_error(raw) -> str import base64 import codecs +import gzip import json import logging import re +import zlib log = logging.getLogger("Fronter") @@ -230,6 +232,39 @@ def parse_relay_json(data: dict, max_body_bytes: int) -> bytes: status = data.get("s", 200) resp_headers = data.get("h", {}) resp_body = base64.b64decode(data.get("b", "")) + + # ── Decompress if the target sent a compressed body ───────────────────────── + # UrlFetchApp does NOT auto-decompress gzip/deflate responses, so if the + # client's Accept-Encoding header was forwarded and the server compressed + # its reply, we receive raw compressed bytes. We decompress here so the + # browser always gets plain content (and we can safely drop the header). + _ce = "" + for _k, _v in resp_headers.items(): + if _k.lower() == "content-encoding": + _ce = str(_v).lower().strip() + break + if _ce == "gzip": + try: + resp_body = gzip.decompress(resp_body) + except Exception as _exc: + log.debug("gzip decompress skipped (%s) — body may already be plain", _exc) + elif _ce in ("deflate", "zlib"): + try: + # Try zlib wrapper first, then raw deflate + resp_body = zlib.decompress(resp_body) + except Exception: + try: + resp_body = zlib.decompress(resp_body, -15) + except Exception as _exc: + log.debug("deflate decompress skipped (%s)", _exc) + elif _ce == "br": + # Brotli is uncommon in this relay path but log if seen so it is visible + log.debug("brotli-encoded response from target — install 'brotli' package for support") + try: + import brotli # type: ignore + resp_body = brotli.decompress(resp_body) + except Exception: + pass # leave body as-is; browser will likely fail gracefully if len(resp_body) > max_body_bytes: return error_response( 502,