mirror of
https://github.com/therealaleph/MasterHttpRelayVPN-RUST.git
synced 2026-05-17 21:24:48 +03:00
feat(code.gs): gzip cache bodies + status-aware TTL (#953)
Three orthogonal cache-correctness wins on `Code.gs`'s spreadsheet response cache. All compound: more URLs become cacheable, the cache stops being poisoned by transient outages, and 4xx storms cost zero quota. **1. Gzip body before base64 storage** (new `Z` column, base64(gzip(rawBytes)) when worthwhile, skipped when already encoded or under 256 bytes). 3-5× compression on text bodies — many ~100-150 KB responses now fit under the 35 KB cell ceiling that previously rejected them. `_getFromCache` decompresses on hit and re-encodes for the wire — relay protocol's `b` field stays `base64(rawBytes)`, no client change. **2. 5xx never enters the cache.** A flapping upstream that returned 503 once was previously pinned for 24h, breaking the URL for every subsequent client. `status >= 500` now early-returns the live response without writing. **3. Negative caching for persistent 4xx** (404/410/451 → 5-minute TTL when upstream is silent on Cache-Control). Long enough to absorb favicon / telemetry / dev-tools-probe storms; short enough that transient 404s self-heal. Origin-stated max-age still wins when present. **Schema migration**: cache sheet grows 7 → 8 columns (added `Z` flag). Existing 7-column rows read back with `Z = undefined` (falsy → falls through the not-gzipped branch) — fully backward-compatible, no user action required. **Verified locally** on top of v1.9.18 / main: - `node --check Code.gs` clean - `cargo build --bins --lib` clean - `cargo test --lib --release`: 208/208 (no client-side change so this is a sanity check only) Behavior changes flagged in the PR body for users relying on the old "everything cached for 24h" default — the 5xx-never-cached fix is intentional, the 4xx 5-min default is overrideable via origin Cache-Control. Squash-merging. Reviewed via Anthropic Claude. Co-Authored-By: dazzling-no-more <noreply@github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+99
-25
@@ -10,9 +10,14 @@
|
|||||||
* Set CACHE_SPREADSHEET_ID to a valid Google Sheet ID (must be owned by
|
* Set CACHE_SPREADSHEET_ID to a valid Google Sheet ID (must be owned by
|
||||||
* the same account). When enabled, public GET requests are stored in the
|
* the same account). When enabled, public GET requests are stored in the
|
||||||
* sheet and served from there on repeat visits, reducing UrlFetchApp
|
* sheet and served from there on repeat visits, reducing UrlFetchApp
|
||||||
* quota consumption. The cache is Vary-aware (Accept-Encoding and
|
* quota consumption. Bodies are gzipped before base64 storage so larger
|
||||||
* Accept-Language are hashed into the compound cache key). Leave
|
* responses fit under the per-cell character limit, and persistent
|
||||||
* CACHE_SPREADSHEET_ID as-is to disable caching entirely — zero overhead.
|
* 4xx (404/410/451) get a short negative-cache TTL so buggy clients
|
||||||
|
* that hammer dead URLs cost zero quota; 5xx is never cached so a
|
||||||
|
* flapping upstream cannot poison a 24h slot with a transient outage.
|
||||||
|
* The cache is Vary-aware (Accept-Encoding and Accept-Language are
|
||||||
|
* hashed into the compound cache key). Leave CACHE_SPREADSHEET_ID as-is
|
||||||
|
* to disable caching entirely — zero overhead.
|
||||||
*
|
*
|
||||||
* DEPLOYMENT:
|
* DEPLOYMENT:
|
||||||
* 1. Go to https://script.google.com → New project
|
* 1. Go to https://script.google.com → New project
|
||||||
@@ -54,6 +59,22 @@ const CACHE_MAX_ROWS = 5000; // circular buffer capacity
|
|||||||
const CACHE_MAX_BODY_BYTES = 35000; // skip responses larger than ~35 KB
|
const CACHE_MAX_BODY_BYTES = 35000; // skip responses larger than ~35 KB
|
||||||
const CACHE_DEFAULT_TTL_SECONDS = 86400; // 24-hour fallback when no Cache-Control
|
const CACHE_DEFAULT_TTL_SECONDS = 86400; // 24-hour fallback when no Cache-Control
|
||||||
|
|
||||||
|
// ── Negative Caching ────────────────────────────────────────
|
||||||
|
// Persistent 4xx errors get a short TTL when the upstream is silent on
|
||||||
|
// Cache-Control. Buggy clients hammer dead URLs (favicons, telemetry
|
||||||
|
// pixels, dev-tools probes); a 5-minute floor absorbs the storm at
|
||||||
|
// zero quota cost while letting transient 404s self-heal quickly.
|
||||||
|
// 5xx is never cached — see _fetchAndCache.
|
||||||
|
const NEGATIVE_CACHE_STATUSES = { 404: 1, 410: 1, 451: 1 };
|
||||||
|
const NEGATIVE_CACHE_TTL_SECONDS = 300;
|
||||||
|
|
||||||
|
// ── Body Compression ────────────────────────────────────────
|
||||||
|
// Bodies are gzipped before base64 storage when worthwhile. Gzip has
|
||||||
|
// ~20 bytes of header overhead, so very small payloads can bloat;
|
||||||
|
// skip below this threshold. Already-encoded responses (gzip/br/etc.)
|
||||||
|
// are stored as-is to avoid double-compression.
|
||||||
|
const GZIP_MIN_BYTES = 256;
|
||||||
|
|
||||||
// ── Vary-Aware Cache Key ────────────────────────────────────
|
// ── Vary-Aware Cache Key ────────────────────────────────────
|
||||||
// These request headers are hashed into the compound cache key
|
// These request headers are hashed into the compound cache key
|
||||||
// alongside the URL so that responses with different encodings
|
// alongside the URL so that responses with different encodings
|
||||||
@@ -320,9 +341,12 @@ function _initCacheSheet() {
|
|||||||
var sheet = ss.getSheetByName(CACHE_SHEET_NAME);
|
var sheet = ss.getSheetByName(CACHE_SHEET_NAME);
|
||||||
if (!sheet) {
|
if (!sheet) {
|
||||||
sheet = ss.insertSheet(CACHE_SHEET_NAME);
|
sheet = ss.insertSheet(CACHE_SHEET_NAME);
|
||||||
// Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At
|
// Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At | Z
|
||||||
sheet.getRange(1, 1, 1, 7).setValues([[
|
// Z is 1 when Body is base64(gzip(rawBytes)), 0/empty when base64(rawBytes).
|
||||||
"URL_Hash", "URL", "Status", "Headers", "Body", "Timestamp", "Expires_At"
|
// Legacy 7-column rows from older deployments read back as Z=undefined,
|
||||||
|
// which the cache hit path treats as "not gzipped" — fully compatible.
|
||||||
|
sheet.getRange(1, 1, 1, 8).setValues([[
|
||||||
|
"URL_Hash", "URL", "Status", "Headers", "Body", "Timestamp", "Expires_At", "Z"
|
||||||
]]);
|
]]);
|
||||||
}
|
}
|
||||||
return sheet;
|
return sheet;
|
||||||
@@ -539,17 +563,34 @@ function _getFromCache(url, reqHeaders) {
|
|||||||
var found = finder.findNext();
|
var found = finder.findNext();
|
||||||
|
|
||||||
if (found) {
|
if (found) {
|
||||||
var row = sheet.getRange(found.getRow(), 1, 1, 7).getValues()[0];
|
// 8-column read. Legacy 7-column rows return undefined for the Z slot,
|
||||||
|
// which is falsy and falls through the not-gzipped branch below — fully
|
||||||
|
// compatible with caches written before the gzip-storage change.
|
||||||
|
var row = sheet.getRange(found.getRow(), 1, 1, 8).getValues()[0];
|
||||||
|
|
||||||
var expiresAt = row[6];
|
var expiresAt = row[6];
|
||||||
if (expiresAt && expiresAt instanceof Date && expiresAt < new Date()) {
|
if (expiresAt && expiresAt instanceof Date && expiresAt < new Date()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var storedBody = row[4];
|
||||||
|
var body;
|
||||||
|
if (row[7]) {
|
||||||
|
// Stored as base64(gzip(rawBytes)). The relay protocol's `b` field
|
||||||
|
// is base64(rawBytes), so decompress and re-encode for the wire.
|
||||||
|
var gzipped = Utilities.base64Decode(storedBody);
|
||||||
|
var raw = Utilities
|
||||||
|
.ungzip(Utilities.newBlob(gzipped, "application/x-gzip"))
|
||||||
|
.getBytes();
|
||||||
|
body = Utilities.base64Encode(raw);
|
||||||
|
} else {
|
||||||
|
body = storedBody;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
status: row[2],
|
status: row[2],
|
||||||
headers: _refreshCachedHeaders(row[3], row[5]),
|
headers: _refreshCachedHeaders(row[3], row[5]),
|
||||||
body: row[4],
|
body: body,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
@@ -557,9 +598,11 @@ function _getFromCache(url, reqHeaders) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch a URL and store the response in the spreadsheet cache
|
* Fetch a URL and store the response in the spreadsheet cache
|
||||||
* using a circular buffer (O(1) writes). Skips storage when the
|
* using a circular buffer (O(1) writes). Skips storage on 5xx
|
||||||
* encoded body exceeds CACHE_MAX_BODY_BYTES or when Cache-Control
|
* (transient outages must not poison a 24h slot), when Cache-Control
|
||||||
* forbids caching. Returns the fetch result regardless.
|
* forbids caching, or when the post-compression body exceeds
|
||||||
|
* CACHE_MAX_BODY_BYTES. Always returns the fetch result so the caller
|
||||||
|
* can serve the live response even when the cache write is skipped.
|
||||||
*/
|
*/
|
||||||
function _fetchAndCache(url, reqHeaders) {
|
function _fetchAndCache(url, reqHeaders) {
|
||||||
var sheet = _initCacheSheet();
|
var sheet = _initCacheSheet();
|
||||||
@@ -569,22 +612,52 @@ function _fetchAndCache(url, reqHeaders) {
|
|||||||
var response = UrlFetchApp.fetch(url, { muteHttpExceptions: true });
|
var response = UrlFetchApp.fetch(url, { muteHttpExceptions: true });
|
||||||
var status = response.getResponseCode();
|
var status = response.getResponseCode();
|
||||||
var headers = _respHeaders(response);
|
var headers = _respHeaders(response);
|
||||||
var body = Utilities.base64Encode(response.getContent());
|
var bodyBytes = response.getContent();
|
||||||
|
var rawB64 = Utilities.base64Encode(bodyBytes);
|
||||||
|
var headersJson = JSON.stringify(headers);
|
||||||
|
var liveResult = { status: status, headers: headersJson, body: rawB64 };
|
||||||
|
|
||||||
// Cell-size safety gate
|
// 5xx never enters the cache. A flapping upstream returning 503 once
|
||||||
if (body.length > CACHE_MAX_BODY_BYTES) {
|
// would otherwise pin that response for 24h and break the URL for
|
||||||
return { status: status, headers: JSON.stringify(headers), body: body };
|
// every subsequent client until expiry.
|
||||||
}
|
if (status >= 500) return liveResult;
|
||||||
|
|
||||||
// TTL extraction
|
|
||||||
var cacheControl =
|
var cacheControl =
|
||||||
headers["Cache-Control"] || headers["cache-control"] || null;
|
headers["Cache-Control"] || headers["cache-control"] || null;
|
||||||
var ttlSeconds = _parseMaxAge(cacheControl);
|
var ttlSeconds = _parseMaxAge(cacheControl);
|
||||||
|
|
||||||
if (ttlSeconds === 0) {
|
if (ttlSeconds === 0) return liveResult;
|
||||||
return { status: status, headers: JSON.stringify(headers), body: body };
|
|
||||||
|
// Negative caching: cap TTL on persistent 4xx when upstream is silent.
|
||||||
|
// If they explicitly stated a max-age for the 404, we honor it instead
|
||||||
|
// — the origin knows best when it spoke up.
|
||||||
|
if (NEGATIVE_CACHE_STATUSES[status] && !cacheControl) {
|
||||||
|
ttlSeconds = NEGATIVE_CACHE_TTL_SECONDS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decide whether to gzip-store. Skip when upstream is already encoded
|
||||||
|
// (avoids double-compressing gzip/br/zstd payloads) and when the body
|
||||||
|
// is too small to overcome gzip's header overhead.
|
||||||
|
var contentEncoding = String(
|
||||||
|
headers["Content-Encoding"] || headers["content-encoding"] || ""
|
||||||
|
).toLowerCase();
|
||||||
|
var alreadyEncoded = contentEncoding && contentEncoding !== "identity";
|
||||||
|
var storedBody;
|
||||||
|
var storedZ;
|
||||||
|
if (alreadyEncoded || bodyBytes.length < GZIP_MIN_BYTES) {
|
||||||
|
storedBody = rawB64;
|
||||||
|
storedZ = 0;
|
||||||
|
} else {
|
||||||
|
storedBody = Utilities.base64Encode(
|
||||||
|
Utilities.gzip(Utilities.newBlob(bodyBytes)).getBytes()
|
||||||
|
);
|
||||||
|
storedZ = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cell-size safety gate, applied after compression so that a 100 KB
|
||||||
|
// text body that gzips to ~15 KB now fits where it previously bailed.
|
||||||
|
if (storedBody.length > CACHE_MAX_BODY_BYTES) return liveResult;
|
||||||
|
|
||||||
var hash = _getCacheKey(url, reqHeaders);
|
var hash = _getCacheKey(url, reqHeaders);
|
||||||
var timestamp = new Date();
|
var timestamp = new Date();
|
||||||
var expiresAt = new Date(timestamp.getTime() + ttlSeconds * 1000);
|
var expiresAt = new Date(timestamp.getTime() + ttlSeconds * 1000);
|
||||||
@@ -598,10 +671,11 @@ function _fetchAndCache(url, reqHeaders) {
|
|||||||
hash,
|
hash,
|
||||||
url,
|
url,
|
||||||
status,
|
status,
|
||||||
JSON.stringify(headers),
|
headersJson,
|
||||||
body,
|
storedBody,
|
||||||
timestamp.toISOString(),
|
timestamp.toISOString(),
|
||||||
expiresAt,
|
expiresAt,
|
||||||
|
storedZ,
|
||||||
];
|
];
|
||||||
|
|
||||||
// Circular buffer write (O(1))
|
// Circular buffer write (O(1))
|
||||||
@@ -609,14 +683,14 @@ function _fetchAndCache(url, reqHeaders) {
|
|||||||
if (metaSheet) {
|
if (metaSheet) {
|
||||||
_ensureRowsAllocated(sheet);
|
_ensureRowsAllocated(sheet);
|
||||||
var writeRow = _getNextCursor(sheet, metaSheet);
|
var writeRow = _getNextCursor(sheet, metaSheet);
|
||||||
sheet.getRange(writeRow, 1, 1, 7).setValues([rowData]);
|
sheet.getRange(writeRow, 1, 1, 8).setValues([rowData]);
|
||||||
_advanceCursor(metaSheet, writeRow);
|
_advanceCursor(metaSheet, writeRow);
|
||||||
} else {
|
} else {
|
||||||
// Fallback: simple append if meta sheet is unavailable
|
// Fallback: simple append if meta sheet is unavailable
|
||||||
sheet.appendRow(rowData);
|
sheet.appendRow(rowData);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { status: status, headers: JSON.stringify(headers), body: body };
|
return liveResult;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -684,7 +758,7 @@ function clearExpiredCache() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (var j = 0; j < rowsToClear.length; j++) {
|
for (var j = 0; j < rowsToClear.length; j++) {
|
||||||
sheet.getRange(rowsToClear[j], 1, 1, 7).clearContent();
|
sheet.getRange(rowsToClear[j], 1, 1, 8).clearContent();
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("Cleared " + rowsToClear.length + " expired entries (" +
|
console.log("Cleared " + rowsToClear.length + " expired entries (" +
|
||||||
@@ -696,7 +770,7 @@ function clearEntireCache() {
|
|||||||
if (sheet) {
|
if (sheet) {
|
||||||
var totalRows = sheet.getDataRange().getNumRows();
|
var totalRows = sheet.getDataRange().getNumRows();
|
||||||
if (totalRows > 1) {
|
if (totalRows > 1) {
|
||||||
sheet.getRange(2, 1, totalRows - 1, 7).clearContent();
|
sheet.getRange(2, 1, totalRows - 1, 8).clearContent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user