/**
 * MHR-CFW Exit Worker — Cloudflare Workers companion to Code.cfw.gs.
 *
 * Architecture (alternative backend, opt-in):
 *   mhrv-rs → Apps Script (Code.cfw.gs) → THIS Worker → target site
 *
 * Apps Script in this configuration is a thin relay: it authenticates
 * the inbound request from mhrv-rs, then forwards to this Worker. The
 * Worker does the actual outbound fetch(es), base64-encodes the body,
 * and returns the same JSON envelope shape the standard Code.gs would
 * have returned. The mhrv-rs client is unaware that the work happened
 * on Cloudflare — same `{u, m, h, b, ct, r}` request, same `{s, h, b}`
 * response.
 *
 * Two request shapes are accepted:
 *   1. Single:  { k, u, m, h, b, ct, r }            → { s, h, b }
 *   2. Batch:   { k, q: [{u,m,h,b,ct,r}, ...] }     → { q: [{s,h,b} | {e}, ...] }
 *
 * The batch shape is what makes this design actually save Apps Script
 * UrlFetchApp quota. Without it, Code.cfw.gs would have to do
 * `UrlFetchApp.fetchAll(N worker calls)` to fan out an N-URL batch,
 * which costs N quota — same as the standard Code.gs. With it,
 * Code.cfw.gs does ONE fetch to this Worker (1 quota) and we fan out
 * inside the Worker via Promise.all. For a typical mhrv-rs batch of
 * 5-30 URLs that's a 5-30x reduction in GAS daily quota.
 *
 * Why bother:
 *   - Faster per-call latency (~10-50 ms at CF edge vs ~250-500 ms in
 *     Apps Script), which matters most for many small requests
 *     (Telegram realtime, page navigation chatter).
 *   - Apps Script *runtime* quota (90 min/day on consumer accounts)
 *     stretches further because GAS spends each call almost entirely
 *     on its single forward to the Worker rather than on body fetch
 *     + base64 + header munging.
 *   - With the batch shape (above), Apps Script *UrlFetchApp count*
 *     quota also stretches roughly Nx for an N-URL batch — typically
 *     5-30x for mhrv-rs.
 *
 * What this does NOT change:
 *   - Cloudflare anti-bot challenges on the destination. The exit IP
 *     becomes a Workers IP (inside Cloudflare's network), which CF's
 *     own anti-bot can fingerprint as a worker-internal request —
 *     often *stricter* than a Google IP. This is a different problem
 *     than DPI bypass; see docs.
 *   - YouTube long-form streaming gets WORSE, not better. Apps Script
 *     allows ~6 min wall per execution; CF Workers cap at 30s wall.
 *     The SABR cliff arrives sooner. Keep the standard `apps_script`
 *     mode (Code.gs) for YouTube-heavy use.
 *   - The 30s wall now applies to the *slowest URL in the batch*
 *     because Promise.all only resolves once every fetch finishes.
 *     mhrv-rs already retries failed batch items individually, so a
 *     single slow target degrades to a per-item timeout rather than
 *     a hard failure — but it's a real behavioural difference vs the
 *     per-URL wall under the standard Code.gs path.
 *
 * Deployment:
 *   1. Cloudflare dashboard → Workers & Pages → Create → Hello World
 *   2. Edit code → delete the template, paste this entire file
 *   3. Change AUTH_KEY below to the same value you set in Code.cfw.gs
 *      AND in your mhrv-rs config.json (auth_key). All three must match.
 *   4. Deploy. Note the *.workers.dev URL; paste it into Code.cfw.gs as
 *      WORKER_URL.
 *
 * SECURITY NOTE: this Worker accepts unauthenticated POSTs from anyone
 * who knows the URL unless AUTH_KEY is changed. The check below is
 * cheap; do not skip it. The point of the AUTH_KEY is to keep the
 * Worker from becoming an open HTTP-relay for arbitrary attackers if
 * its URL leaks. Same secret as Code.cfw.gs by convention — if you
 * want compartmentalisation, use a different one and have Code.cfw.gs
 * forward both keys.
 *
 * Hardened over the upstream mhr-cfw worker.js by adding the AUTH_KEY
 * check and batch handling. Upstream credit: github.com/denuitt1/mhr-cfw.
 */

const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
const DEFAULT_AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";

// Loop-prevention tag. The Worker tags its OUTBOUND request to the
// target with `x-relay-hop: 1` (see processOne). If a subsequent
// request comes back into the Worker with that header set, the Worker
// has been chained back to itself somehow — most likely the user's
// `item.u` resolved to this Worker's own URL. Bail out instead of
// fetching to avoid a stack-overflow loop.
//
// Note: Code.cfw.gs does NOT set this header on its GAS→Worker call
// (and could not check for it on inbound anyway — Apps Script's
// doPost event doesn't expose request headers). So this guard
// catches Worker-↔-Worker cycles, not GAS-↔-Worker cycles. The
// `targetUrl.hostname === selfHost` check in processOne is the
// primary defence for the common misconfiguration.
const RELAY_HOP_HEADER = "x-relay-hop";

// Soft cap on batch size. Cloudflare Workers allow up to 50
// subrequests per invocation on the free tier (1000 on paid). We
// keep a margin for retries and internal CF traffic. mhrv-rs's
// typical batches are 5-30 URLs so this is rarely the binding limit.
//
// **Must match `WORKER_BATCH_CHUNK` in Code.cfw.gs.** If the GAS side
// chunks at a different size, oversized chunks here return a top-level
// error and the entire chunk's slots fail. Tune both together.
const MAX_BATCH_SIZE = 40;

// Hop-by-hop headers and headers Cloudflare manages itself. Stripped
// before forwarding so the inbound request doesn't poison the outbound.
// Kept in sync with Code.cfw.gs / Code.gs SKIP_HEADERS so the Worker
// is correct as a defence-in-depth even when called directly (the
// AUTH_KEY check is the primary gate, but GAS scrubs first in the
// normal flow).
const SKIP_HEADERS = new Set([
  "host",
  "connection",
  "content-length",
  "transfer-encoding",
  "proxy-connection",
  "proxy-authorization",
  "priority",
  "te",
]);

export default {
  async fetch(request) {
    // Fail-closed if the deployer forgot to change AUTH_KEY from the
    // template default. Without this guard a forgotten edit would
    // accept any client that also happens to send the placeholder —
    // effectively running as an open relay. Prefer a loud 500 over
    // a silent open door.
    if (AUTH_KEY === DEFAULT_AUTH_KEY) {
      return json({ e: "configure AUTH_KEY in worker.js" }, 500);
    }

    if (request.method !== "POST") {
      return json({ e: "method not allowed" }, 405);
    }

    if (request.headers.get(RELAY_HOP_HEADER) === "1") {
      return json({ e: "loop detected" }, 508);
    }

    let req;
    try {
      req = await request.json();
    } catch (_err) {
      return json({ e: "bad json" }, 400);
    }

    if (!req || req.k !== AUTH_KEY) {
      // Same shape as Code.cfw.gs unauthorized so downstream errors are
      // uniform. The Worker URL is generally not user-discoverable; the
      // GAS in front of it is the public surface, and probes hit GAS
      // first. We don't bother with the decoy-HTML treatment here.
      return json({ e: "unauthorized" }, 401);
    }

    const selfHost = new URL(request.url).hostname;

    // Batch mode: { k, q: [{u,m,h,b,ct,r}, ...] }. Process all items in
    // parallel via Promise.all. Per-item failures are per-item `{e}`s in
    // the response array; the envelope itself stays 200 unless the batch
    // is malformed at the top level.
    if (Array.isArray(req.q)) {
      if (req.q.length === 0) return json({ q: [] });
      if (req.q.length > MAX_BATCH_SIZE) {
        return json({
          e: "batch too large (" + req.q.length + " > " + MAX_BATCH_SIZE + ")",
        }, 400);
      }
      const results = await Promise.all(
        req.q.map((item) => processOne(item, selfHost).catch((err) => ({
          e: "fetch failed: " + String(err),
        })))
      );
      return json({ q: results });
    }

    // Single mode: { k, u, m, h, b, ct, r }
    let result;
    try {
      result = await processOne(req, selfHost);
    } catch (err) {
      return json({ e: "fetch failed: " + String(err) }, 502);
    }
    if (result.e) {
      // Per-item validation errors get HTTP 400 in single mode so
      // mhrv-rs sees the same shape as in standard Code.gs ("bad url"
      // etc are already client-error-coded there).
      return json(result, 400);
    }
    return json(result);
  },
};

/**
 * Process one item, whether it came in as the top-level single
 * request or as one slot of a batch. Returns a plain object — never
 * throws to the caller; Promise.all's .catch above only triggers on
 * exceptions from this function's own internals (programmer error).
 *
 * Result shape mirrors what Code.gs would return for the same item:
 *   - Success: { s: status, h: {...}, b: base64Body }
 *   - Validation / fetch failure: { e: "..." }
 */
async function processOne(item, selfHost) {
  if (!item || typeof item !== "object") {
    return { e: "bad item" };
  }
  if (!item.u || typeof item.u !== "string" || !/^https?:\/\//i.test(item.u)) {
    return { e: "bad url" };
  }

  let targetUrl;
  try {
    targetUrl = new URL(item.u);
  } catch (_err) {
    return { e: "bad url" };
  }
  if (targetUrl.hostname === selfHost) {
    return { e: "self-fetch blocked" };
  }

  const headers = new Headers();
  if (item.h && typeof item.h === "object") {
    for (const [k, v] of Object.entries(item.h)) {
      if (SKIP_HEADERS.has(k.toLowerCase())) continue;
      try {
        headers.set(k, v);
      } catch (_err) {
        // Worker rejects some headers (e.g. forbidden ones); skip
        // rather than fail the whole item.
      }
    }
  }
  headers.set(RELAY_HOP_HEADER, "1");

  const method = (item.m || "GET").toUpperCase();
  const fetchOptions = {
    method,
    headers,
    redirect: item.r === false ? "manual" : "follow",
  };

  // Code.gs/UrlFetchApp tolerates a body on GET/HEAD (browsers don't
  // do this, but custom clients sometimes do); Workers' native fetch
  // throws TypeError if you set a body on a body-prohibited method.
  // To match Code.gs's permissiveness, silently drop the body for
  // those methods rather than failing the whole item.
  const bodyAllowed = method !== "GET" && method !== "HEAD";
  if (item.b && bodyAllowed) {
    try {
      const binary = Uint8Array.from(atob(item.b), (c) => c.charCodeAt(0));
      fetchOptions.body = binary;
      if (item.ct && !headers.has("content-type")) {
        headers.set("content-type", item.ct);
      }
    } catch (_err) {
      return { e: "bad body base64" };
    }
  }

  let resp;
  try {
    resp = await fetch(targetUrl.toString(), fetchOptions);
  } catch (err) {
    return { e: "fetch failed: " + String(err) };
  }

  const buffer = await resp.arrayBuffer();
  const uint8 = new Uint8Array(buffer);

  // Avoid call-stack overflow from String.fromCharCode.apply on big
  // bodies — chunk the conversion.
  let binary = "";
  const chunkSize = 0x8000;
  for (let i = 0; i < uint8.length; i += chunkSize) {
    binary += String.fromCharCode.apply(null, uint8.subarray(i, i + chunkSize));
  }
  const base64 = btoa(binary);

  // Note: Headers.forEach delivers keys lowercased per the Fetch
  // spec, whereas Code.gs's getAllHeaders preserves the origin's
  // casing. mhrv-rs treats headers case-insensitively, but anything
  // downstream that does a case-sensitive string compare will see
  // a backend-dependent difference. There is no Workers API to
  // recover the origin casing, so we accept the divergence.
  const responseHeaders = {};
  resp.headers.forEach((v, k) => {
    responseHeaders[k] = v;
  });

  return {
    s: resp.status,
    h: responseHeaders,
    b: base64,
  };
}

function json(obj, status = 200) {
  return new Response(JSON.stringify(obj), {
    status,
    headers: { "content-type": "application/json" },
  });
}