From 989fec3cec7a2e838299c50b979cf21248a1ce55 Mon Sep 17 00:00:00 2001 From: Sarto Date: Thu, 30 Apr 2026 00:47:11 +0330 Subject: [PATCH] feat: :sparkles: relays for download media --- README-FA.md | 64 +- README.md | 100 ++-- cmd/server/main.go | 97 ++- internal/client/fetcher.go | 75 +++ internal/client/relay_info_test.go | 25 + internal/protocol/dns.go | 4 + internal/protocol/media.go | 142 +++-- internal/protocol/media_test.go | 129 ++-- internal/protocol/relay.go | 79 +++ internal/server/feed.go | 84 +++ internal/server/github_relay.go | 685 ++++++++++++++++++++++ internal/server/github_relay_test.go | 242 ++++++++ internal/server/media.go | 207 +++++-- internal/server/media_http.go | 14 +- internal/server/media_integration_test.go | 25 +- internal/server/media_telegram.go | 21 +- internal/server/media_test.go | 30 +- internal/server/public.go | 49 +- internal/server/server.go | 89 ++- internal/server/telegram.go | 61 +- internal/server/xpublic.go | 44 +- internal/web/media.go | 47 +- internal/web/relay_info.go | 228 +++++++ internal/web/static/index.html | 225 +++++-- internal/web/web.go | 16 + scripts/install.sh | 103 ++++ 26 files changed, 2517 insertions(+), 368 deletions(-) create mode 100644 internal/client/relay_info_test.go create mode 100644 internal/protocol/relay.go create mode 100644 internal/server/github_relay.go create mode 100644 internal/server/github_relay_test.go create mode 100644 internal/web/relay_info.go diff --git a/README-FA.md b/README-FA.md index ba1f3ae..9decb93 100644 --- a/README-FA.md +++ b/README-FA.md @@ -12,19 +12,31 @@ thefeed یک سیستم تونل DNS است که به شما اجازه می‌دهد پیام‌های کانال‌های تلگرام را حتی وقتی تلگرام و اینترنت فیلتر شده، بخوانید. تنها چیزی که نیاز دارید **DNS** است — که تقریباً هیچ‌وقت مسدود نمی‌شود. +
+ ``` - کلاینت درخواست DNS TXT سرور MTProto تلگرام - (رابط وب) ─────────────────────▸ (DNS) ──────────────────────▸ API - ◂───────────────────── ◂────────────────────── - پاسخ رمزنگاری‌شده + Encrypted DNS TXT + ┌──────────────┐ feed meta + small media ┌──────────────────┐ MTProto ┌──────────┐ + │ │ ─────────────────────────▸ │ Server │ ─────────────▸ │ Telegram │ + │ Client │ ◂───────────────────────── │ (DNS auth + │ ◂───────────── │ API │ + │ (Web UI) │ │ media relays) │ RSS / HTTP ┌──────────┐ + │ │ large media (fast relay) │ │ ─────────────▸ │ Nitter │ + │ │ ◂───── api.github.com ◂── │ │ ◂───────────── │ (X feed) │ + └──────────────┘ (uploaded by server) └──────────────────┘ └──────────┘ ``` +
+ ## ✨ ویژگی‌ها ### سمت سرور (خارج از ایران) - اتصال به تلگرام و خواندن پیام‌ کانال‌ها - دریافت پست‌های عمومی X از حساب‌های تنظیم‌شده (بدون لاگین) -- سرو داده‌ها به صورت پاسخ DNS TXT رمزنگاری‌شده +- سرو متادیتا و فایل‌های کوچک به صورت پاسخ DNS TXT رمزنگاری‌شده +- **رله‌های مدیا** — یک فایل، چند مسیر تحویل: + - **رله DNS** (کند، مقاوم به سانسور) فایل را به بلاک‌های DNS تقسیم می‌کند + - **رله گیتهاب** (سریع، پیش‌فرض خاموش) بایت‌ها را روی یک ریپازیتوری گیتهاب می‌گذارد و کلاینت با HTTPS ساده می‌گیرد؛ مناسب فایل‌های بزرگ‌تر از سقف DNS + - رله‌های آینده در کنار همین‌ها اضافه می‌شوند بدون اینکه کلاینت‌های قدیمی را خراب کنند - padding تصادفی برای جلوگیری از شناسایی DPI - ذخیره‌سازی session — یک‌بار لاگین، همیشه اجرا - پشتیبانی از حالت بدون تلگرام (`--no-telegram`) — خواندن کانال‌های عمومی بدون نیاز به ورود به تلگرام @@ -33,6 +45,8 @@ thefeed یک سیستم تونل DNS است که به شما اجازه می‌ - رابط کاربری وب با پشتیبانی RTL/فارسی (فونت وزیرمتن) - ارسال پیام به کانال‌ها و چت‌های خصوصی (نیاز به `--allow-manage` سمت سرور و ورود به تلگرام) - مدیریت کانال‌ها از راه دور ( افزودن/حذف کانال‌ها از طریق دستورات ادمین وقتی `--allow-manage` فعال است) +- **دانلود مدیا با رله‌ها** — اگر فایل روی رله سریع (گیتهاب) موجود باشد، اول از همان مسیر تلاش می‌کند، در صورت خطا چند بار retry می‌کند و قبل از سوییچ به رله DNS از کاربر می‌پرسد. هش و سایز هر فایل دانلود‌شده حتماً اعتبارسنجی می‌شود +- **به‌روزرسانی خودکار هر کانال**: کانال‌های دلخواه را پین کنید تا در پس‌زمینه به‌طور دوره‌ای رفرش شوند (به ازای هر پروفایل ذخیره می‌شود) - فشرده‌سازی پیام‌ها (deflate) - محافظت رابط وب با رمز عبور (`--password` سمت کلاینت) - لاگ زنده درخواست‌های DNS در مرورگر @@ -336,6 +350,34 @@ make test # اجرای تست‌ها make upx # فشرده‌سازی باینری‌ها با UPX ``` +## 🎞️ رله‌های مدیا + +هر رله مستقل است — یک فایل می‌تواند هم‌زمان روی DNS و گیتهاب (و رله‌های آینده) قابل دسترسی باشد. کلاینت بر اساس فلگ‌هایی که در پیام دیده، سریع‌ترین مسیر در دسترس را انتخاب می‌کند، در صورت خطا چند بار retry می‌کند و قبل از فال‌بک به مسیر کندتر از کاربر می‌پرسد. هش و سایز هر دانلود اعتبارسنجی می‌شود. + +دو رله الان موجود هست: + +- **رله DNS** (کند، پیش‌فرض روشن). بایت‌ها به بلاک‌های DNS تقسیم می‌شوند. در شبکه‌های فیلترشده کار می‌کند. سقف پیش‌فرض: ۱۰۰ کیلوبایت. +- **رله گیتهاب** (سریع، پیش‌فرض خاموش). فایل‌ها در یک ریپازیتوری آپلود می‌شوند و کلاینت‌ها از طریق `api.github.com` (که در خیلی از کشورها برخلاف `raw.githubusercontent.com` در دسترس است) با HTTPS می‌گیرند. به یک Personal Access Token با اسکوپ `contents:write` نیاز دارد. مسیر فایل‌ها `//_` است تا چند سرور بتوانند یک ریپازیتوری مشترک داشته باشند. سقف پیش‌فرض: ۲۵ مگابایت. + +پرچم‌ها / متغیرهای محیطی: + +
+ +| Flag | Env | Default | توضیح | +|----------------------------|--------------------------------------|--------------|------------------------------------| +| `--dns-media-enabled` | `THEFEED_DNS_MEDIA_ENABLED` | `false` | فعال/غیرفعال کردن رله DNS | +| `--dns-media-max-size` | `THEFEED_DNS_MEDIA_MAX_SIZE_KB` | `100` (KB) | سقف هر فایل | +| `--dns-media-cache-ttl` | `THEFEED_DNS_MEDIA_CACHE_TTL_MIN` | `600` (min) | TTL | +| `--dns-media-compression` | `THEFEED_DNS_MEDIA_COMPRESSION` | `gzip` | `none` / `gzip` / `deflate` | +| `--github-relay-enabled` | `THEFEED_GITHUB_RELAY_ENABLED` | `false` | فعال‌سازی رله گیتهاب | +| `--github-relay-token` | `THEFEED_GITHUB_RELAY_TOKEN` | — | PAT با دسترسی `contents:write` | +| `--github-relay-repo` | `THEFEED_GITHUB_RELAY_REPO` | — | `owner/repo` | +| `--github-relay-branch` | `THEFEED_GITHUB_RELAY_BRANCH` | `main` | برنچ کامیت | +| `--github-relay-max-size` | `THEFEED_GITHUB_RELAY_MAX_SIZE_KB` | `25600` (KB) | سقف هر فایل | +| `--github-relay-ttl` | `THEFEED_GITHUB_RELAY_TTL_MIN` | `600` (min) | فایل‌های یتیم در سیکل بعدی پاک می‌شوند | + +
+ ## 📋 پرچم‌های سرور | پرچم | پیش‌فرض | توضیح | @@ -354,7 +396,19 @@ make upx # فشرده‌سازی باینری‌ها با UPX | `--no-telegram` | `false` | اجرا بدون ورود به تلگرام (فقط کانال‌های عمومی) | | `--padding` | `32` | حداکثر padding تصادفی | | `--msg-limit` | `15` | حداکثر تعداد پیام‌ها برای هر کانال تلگرام | +| `--fetch-interval` | `10` | فاصله چرخه فتچ بر حسب دقیقه (حداقل ۳) | | `--allow-manage` | `false` | فعال‌سازی مدیریت از راه دور (ارسال پیام و مدیریت کانال‌ها) | +| `--debug` | `false` | لاگ کردن هر کوئری DNS رمزگشایی‌شده | +| `--dns-media-enabled` | `false` | سرو مدیا از طریق DNS (مسیر کند) | +| `--dns-media-max-size` | `100` | سقف هر فایل برای رله DNS بر حسب KB | +| `--dns-media-cache-ttl` | `600` | TTL رله DNS بر حسب دقیقه | +| `--dns-media-compression` | `gzip` | فشرده‌سازی رله DNS: `none` / `gzip` / `deflate` | +| `--github-relay-enabled` | `false` | سرو مدیا از طریق گیتهاب (مسیر سریع) | +| `--github-relay-token` | | توکن گیتهاب (`contents:write`) | +| `--github-relay-repo` | | `owner/repo` ریپازیتوری رله | +| `--github-relay-branch` | `main` | برنچی که رله روش کامیت می‌کند | +| `--github-relay-max-size` | `25600` | سقف هر فایل برای رله گیتهاب بر حسب KB | +| `--github-relay-ttl` | `600` | TTL رله گیتهاب بر حسب دقیقه | ## 📋 پرچم‌های کلاینت diff --git a/README.md b/README.md index 5f73165..5d5f066 100644 --- a/README.md +++ b/README.md @@ -7,35 +7,44 @@ DNS-based feed reader for Telegram channels and public X accounts. Designed for ## How It Works ``` -┌──────────────┐ DNS TXT Query ┌──────────────┐ MTProto ┌──────────┐ -│ Client │ ──────────────────────▸ │ Server │ ──────────────▸ │ Telegram │ -│ (Web UI) │ ◂────────────────────── │ (DNS auth) │ ◂────────────── │ API │ -└──────────────┘ Encrypted TXT └──────────────┘ └──────────┘ + Encrypted DNS TXT + ┌──────────────┐ feed meta + small media ┌──────────────────┐ MTProto ┌──────────┐ + │ │ ─────────────────────────▸ │ Server │ ─────────────▸ │ Telegram │ + │ Client │ ◂───────────────────────── │ (DNS auth + │ ◂───────────── │ API │ + │ (Web UI) │ │ media relays) │ RSS / HTTP ┌──────────┐ + │ │ large media (fast relay) │ │ ─────────────▸ │ Nitter │ + │ │ ◂───── api.github.com ◂── │ │ ◂───────────── │ (X feed) │ + └──────────────┘ (uploaded by server) └──────────────────┘ └──────────┘ ``` **Server** (runs outside censored network): - Connects to Telegram, reads messages from configured channels -- Fetches public X posts from configured usernames via RSS-compatible public mirrors (no login) -- Serves feed data as encrypted DNS TXT responses -- Random padding on responses to vary size (anti-DPI) +- Fetches public X posts via RSS-compatible mirrors (no login) +- Serves feed metadata + small media as encrypted DNS TXT responses +- **Media relays** — same file, multiple delivery paths: + - **DNS relay** (slow, censorship-resistant) splits bytes across DNS blocks + - **GitHub relay** (fast, default off) uploads bytes to a repo so clients pull via plain HTTPS; intended for files that are too big for DNS + - Future relays slot in alongside without breaking older clients +- Random padding on responses (anti-DPI) - Session persistence — login once, run forever -- No-Telegram mode (`--no-telegram`) — reads public channels without needing Telegram credentials +- No-Telegram mode (`--no-telegram`) — reads public channels without credentials - All data stored in a single directory **Client** (runs inside censored network): - Browser-based web UI with RTL/Farsi support (VazirMatn font) -- Sends encrypted DNS TXT queries via available resolvers -- **Resolver Bank**: shared pool of DNS resolvers used across all profiles — no more per-profile resolver lists. Resolvers are added via scanner, import, or manual entry and scored automatically -- **Resolver scoring**: tracks per-resolver success rate and latency with persistent scores; healthier resolvers are preferred automatically. Users can clean up low-scoring resolvers from the bank -- **Scatter mode**: fans out the same DNS request to multiple resolvers simultaneously and uses the fastest response (default: 2 concurrent resolvers per request) -- Send messages to channels and private chats (requires server `--allow-manage` and login to telegram) +- Sends encrypted DNS TXT queries via the resolver bank +- **Resolver Bank**: shared pool of DNS resolvers used across all profiles. Resolvers are added via scanner, import, or manual entry and scored automatically +- **Resolver scoring**: per-resolver success-rate + latency scoreboard with persistent scores; healthier resolvers are preferred. Low-scoring entries can be pruned +- **Scatter mode**: fans out the same DNS request to multiple resolvers and uses the fastest response (default: 2 concurrent) +- **Relay-aware media downloads** — picks the fast relay when the manifest advertises one, retries on transient failure, asks before falling back to the slow DNS path. Hash + size verified on every download +- Send messages to channels and private chats (requires server `--allow-manage` + Telegram login) - Channel management (add/remove channels remotely via admin commands when `--allow-manage` is enabled) +- **Per-channel auto-update**: pin specific channels for periodic background refresh, persisted per profile - Message compression (deflate) for efficient transfer - Web UI password protection (`--password` on client) -- New message indicators and next-fetch countdown timer -- Channel type badges (Private/Public) -- X channel badges (`x/username`) with separate color in the sidebar -- Media type detection (`[IMAGE]`, `[VIDEO]`, etc.) +- New-message indicators (channel-list NEW badge + in-chat separator), next-fetch countdown timer +- Channel type badges (Private/Public/X) with separate colors +- Media type detection (`[IMAGE]`, `[VIDEO]`, etc.) and inline rendering - Live DNS query log in the browser ## Anti-DPI Features @@ -56,23 +65,42 @@ All communication is encrypted with AES-256 and transmitted via standard DNS TXT Messages with attached photos, files, GIFs, audio, and videos can be cached on the server and downloaded over the same encrypted DNS channel. -The server downloads each attached media file (deduped by upstream id and content hash), assigns it a slot in a reserved channel range (`10000`–`60000`), and splits the bytes into the same-sized blocks used elsewhere. The message text gains a small metadata header: +The server downloads each attached media file (deduped by upstream id and content hash), pushes the bytes to every enabled relay, and adds a small metadata header to the message text: ``` -[IMAGE]:
::: +[IMAGE]::::[:] optional caption ``` -`
=0` means the file exceeded the server's size cap and isn't cached. Old clients render the header as a regular caption line. +`` is a comma-separated list of per-relay availability bits (`1`=available, `0`=not). Slot 0 is DNS, slot 1 is the GitHub relay; future relays append. Older clients ignore slots they don't know. -Block 0 of every cached file begins with a 16-byte protocol header — 4 bytes CRC32 of the (decompressed) content, 1 byte version, 1 byte compression, 10 bytes reserved for future fields. The client checks the CRC against the expected value from the message metadata before delivering any bytes, so a stale message pointing to a slot the server has since reused for a different file is rejected after a single block. The remaining bytes are decompressed per the compression byte. Downloads are cached on the client (IndexedDB, 7 days) and on the local thefeed-client server (`/media-cache/`, 7 days) so multiple devices behind one client share a single DNS-tunnelled fetch. Concurrent downloads are limited to one at a time; extra clicks are queued. +Block 0 of every DNS-cached file begins with a 16-byte protocol header — 4 bytes CRC32 of the (decompressed) content, 1 byte version, 1 byte compression, 10 bytes reserved for future fields. The client checks the CRC against the expected value before delivering any bytes. The remaining bytes are decompressed per the compression byte. Downloads are cached on the client (IndexedDB, 7 days) and on the local thefeed-client server (`/media-cache/`, 7 days). Concurrent downloads are limited and extra clicks are queued. -Server flags: +### Media relays -- `--no-media` — disable the feature. -- `--media-max-size` (KB, default 100) — per-file size cap. -- `--media-cache-ttl` (minutes, default 600) — entry lifetime. -- `--media-compression` (default `gzip`) — `none`, `gzip`, or `deflate`. The compression byte is carried in the block-0 header so the client can decompress without prior knowledge. +Each relay is independent — the same file can be served via DNS *and* GitHub *and* future relays at the same time. Clients pick whichever the message manifest advertises and prefer the fastest available; on failure they retry, then ask before falling back to a slower one. Hash + size are verified on every download. + +Two relays ship today: + +- **DNS relay** (slow, default on). Bytes are split into DNS blocks. Survives in censored networks. Default cap: 100 KB. +- **GitHub relay** (fast, default off). Bytes are uploaded to a repo and pulled by clients over plain HTTPS. Needs a personal access token with `contents:write`. Files land at `//_` so multiple deployments can share one repo. Default cap: 25 MB. + +Block 0 of every DNS-cached file begins with a 16-byte protocol header — 4 bytes CRC32 of the (decompressed) content, 1 byte version, 1 byte compression, 10 bytes reserved. The remaining bytes are decompressed per the compression byte. Downloads are cached on the client (IndexedDB, 7 days) and on the local thefeed-client server (`/media-cache/`, 7 days). Concurrent downloads are limited and extra clicks are queued. + +Server flags / env vars: + +| Flag | Env | Default | Notes | +|-------------------------------|--------------------------------------|-------------|------------------------------------| +| `--dns-media-enabled` | `THEFEED_DNS_MEDIA_ENABLED` | `false` | toggle DNS relay | +| `--dns-media-max-size` | `THEFEED_DNS_MEDIA_MAX_SIZE_KB` | `100` (KB) | per-file cap | +| `--dns-media-cache-ttl` | `THEFEED_DNS_MEDIA_CACHE_TTL_MIN` | `600` (min) | TTL | +| `--dns-media-compression` | `THEFEED_DNS_MEDIA_COMPRESSION` | `gzip` | `none`, `gzip`, or `deflate` | +| `--github-relay-enabled` | `THEFEED_GITHUB_RELAY_ENABLED` | `false` | toggle GitHub relay | +| `--github-relay-token` | `THEFEED_GITHUB_RELAY_TOKEN` | — | PAT, `contents:write` | +| `--github-relay-repo` | `THEFEED_GITHUB_RELAY_REPO` | — | `owner/repo` | +| `--github-relay-branch` | `THEFEED_GITHUB_RELAY_BRANCH` | `main` | branch to commit relay objects to | +| `--github-relay-max-size` | `THEFEED_GITHUB_RELAY_MAX_SIZE_KB` | `25600` (KB)| per-file cap | +| `--github-relay-ttl` | `THEFEED_GITHUB_RELAY_TTL_MIN` | `600` (min) | orphans pruned next refresh cycle | The hourly DNS report includes `totalMediaQueries` and a `mediaCache` block (entries, bytes, hits, misses, evictions). @@ -328,7 +356,7 @@ make build-server All data files (session, channels, x accounts) are stored in the `--data-dir` directory (default: `./data`). -Environment variables: `THEFEED_DOMAIN`, `THEFEED_KEY`, `THEFEED_MSG_LIMIT`, `THEFEED_ALLOW_MANAGE` (set to `0` to force-disable even if the flag is baked into the service), `THEFEED_X_RSS_INSTANCES`, `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, `TELEGRAM_PHONE`, `TELEGRAM_PASSWORD` +Environment variables: `THEFEED_DOMAIN`, `THEFEED_KEY`, `THEFEED_MSG_LIMIT`, `THEFEED_FETCH_INTERVAL`, `THEFEED_ALLOW_MANAGE` (set to `0` to force-disable even if the flag is baked into the service), `THEFEED_X_RSS_INSTANCES`, `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, `TELEGRAM_PHONE`, `TELEGRAM_PASSWORD` #### Server Flags @@ -349,11 +377,19 @@ Environment variables: `THEFEED_DOMAIN`, `THEFEED_KEY`, `THEFEED_MSG_LIMIT`, `TH | `--listen` | `:5300` | DNS listen address | | `--padding` | `32` | Max random padding bytes (0=disabled) | | `--msg-limit` | `15` | Maximum messages to fetch per Telegram channel | +| `--fetch-interval` | `10` | Fetch cycle interval in minutes (min 3) | | `--allow-manage` | `false` | Allow remote send/channel management (default: disabled) | -| `--no-media` | `false` | Disable downloading and serving image/file media | -| `--media-max-size` | `100` | Per-file size cap for cached media in KB (0 = no cap) | -| `--media-cache-ttl` | `600` | How long a cached media entry stays available, in minutes | -| `--media-compression` | `gzip` | Compression for cached media: `none`, `gzip`, or `deflate` | +| `--debug` | `false` | Log every decoded DNS query | +| `--dns-media-enabled` | `false` | Serve media via DNS (slow relay) | +| `--dns-media-max-size` | `100` | Per-file cap for the DNS relay in KB (0 = no cap) | +| `--dns-media-cache-ttl` | `600` | DNS-relay TTL, in minutes | +| `--dns-media-compression` | `gzip` | DNS-relay compression: `none`, `gzip`, or `deflate` | +| `--github-relay-enabled` | `false` | Serve media via the GitHub fast relay | +| `--github-relay-token` | | PAT with `contents:write` (or `THEFEED_GITHUB_RELAY_TOKEN`) | +| `--github-relay-repo` | | `owner/repo` for the relay | +| `--github-relay-branch` | `main` | Branch to commit relay objects to | +| `--github-relay-max-size` | `25600` | Per-file cap for the GitHub relay in KB | +| `--github-relay-ttl` | `600` | GitHub-relay TTL in minutes (orphans pruned next cycle) | | `--version` | | Show version and exit | ### Client @@ -404,8 +440,6 @@ chmod +x thefeed-client > download it from the latest release assets: `thefeed-android-arm64.apk` -Also available: `thefeed-android-arm64-upx.apk` (UPX-compressed embedded client). - The Android app automatically requests battery optimization exemption on first launch so the background service is not killed by the OS. diff --git a/cmd/server/main.go b/cmd/server/main.go index 8190295..59f13de 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -12,6 +12,7 @@ import ( "strconv" "strings" "syscall" + "time" "golang.org/x/term" @@ -35,12 +36,19 @@ func main() { sessionPath := flag.String("session", "", "Path to Telegram session file (default: {data-dir}/session.json)") maxPadding := flag.Int("padding", 32, "Max random padding bytes in DNS responses (anti-DPI, 0=disabled)") msgLimit := flag.Int("msg-limit", 15, "Maximum messages to fetch per Telegram channel") + fetchIntervalMin := flag.Int("fetch-interval", 10, "Fetch cycle interval in minutes (min 3, default 10)") allowManage := flag.Bool("allow-manage", false, "Allow remote channel management and sending via DNS") debug := flag.Bool("debug", false, "Log every decoded DNS query") - noMedia := flag.Bool("no-media", false, "Disable downloading and serving image/file media (clients see [TAG] only)") - mediaMaxSizeKB := flag.Int("media-max-size", 100, "Per-file size cap for cached media in KB (0 = no cap)") - mediaCacheTTLMin := flag.Int("media-cache-ttl", 600, "How long a cached media entry stays available, in minutes") - mediaCompression := flag.String("media-compression", "gzip", "Compression for cached media: none|gzip|deflate") + dnsMediaEnabled := flag.Bool("dns-media-enabled", false, "Serve media via DNS (slow relay)") + dnsMediaMaxSizeKB := flag.Int("dns-media-max-size", 100, "Per-file cap for the DNS relay in KB (0 = no cap)") + dnsMediaCacheTTLMin := flag.Int("dns-media-cache-ttl", 600, "TTL for DNS-relay cached media, in minutes") + dnsMediaCompression := flag.String("dns-media-compression", "gzip", "Compression for DNS-relay media bytes: none|gzip|deflate") + ghEnabled := flag.Bool("github-relay-enabled", false, "Serve media via GitHub (fast relay)") + ghToken := flag.String("github-relay-token", "", "GitHub PAT with contents:write on the relay repo") + ghRepo := flag.String("github-relay-repo", "", "GitHub repo for the fast relay, e.g. owner/repo") + ghBranch := flag.String("github-relay-branch", "main", "Default branch to commit to (e.g. main, master)") + ghMaxSizeKB := flag.Int("github-relay-max-size", 25*1024, "Per-file cap for the GitHub relay in KB (0 = no cap)") + ghCacheTTLMin := flag.Int("github-relay-ttl", 600, "TTL for GitHub-relay objects in minutes") showVersion := flag.Bool("version", false, "Show version and exit") flag.Usage = func() { fmt.Fprintf(os.Stderr, "thefeed-server %s\n\nServes Telegram/X feed content over encrypted DNS for censorship-resistant access.\n\nUsage:\n thefeed-server [flags]\n\nFlags:\n", version.Version) @@ -53,6 +61,15 @@ func main() { os.Exit(0) } + // Catch the common --bool true mistake: Go's flag package stops parsing at + // the first positional, so any flags after it are silently dropped. Bool + // flags must use --foo or --foo=true (no space). + if flag.NArg() > 0 { + fmt.Fprintf(os.Stderr, "Error: unexpected positional argument(s): %v\n", flag.Args()) + fmt.Fprintln(os.Stderr, "Hint: bool flags must be written as --flag or --flag=true (NOT --flag true).") + os.Exit(1) + } + // Create data directory if err := os.MkdirAll(*dataDir, 0700); err != nil { log.Fatalf("Create data dir: %v", err) @@ -82,6 +99,17 @@ func main() { if os.Getenv("THEFEED_ALLOW_MANAGE") == "0" { *allowManage = false } + if *fetchIntervalMin == 10 { + if v := os.Getenv("THEFEED_FETCH_INTERVAL"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + *fetchIntervalMin = n + } + } + } + if *fetchIntervalMin < 3 { + fmt.Fprintf(os.Stderr, "Error: --fetch-interval must be at least 3 minutes (got %d)\n", *fetchIntervalMin) + os.Exit(1) + } if *msgLimit == 15 { if v := os.Getenv("THEFEED_MSG_LIMIT"); v != "" { if n, err := strconv.Atoi(v); err == nil && n > 0 { @@ -144,6 +172,49 @@ func main() { } } + if env := os.Getenv("THEFEED_DNS_MEDIA_ENABLED"); env == "0" { + *dnsMediaEnabled = false + } else if env == "1" { + *dnsMediaEnabled = true + } + if env := os.Getenv("THEFEED_DNS_MEDIA_MAX_SIZE_KB"); env != "" { + if n, err := strconv.Atoi(env); err == nil { + *dnsMediaMaxSizeKB = n + } + } + if env := os.Getenv("THEFEED_DNS_MEDIA_CACHE_TTL_MIN"); env != "" { + if n, err := strconv.Atoi(env); err == nil { + *dnsMediaCacheTTLMin = n + } + } + if env := os.Getenv("THEFEED_DNS_MEDIA_COMPRESSION"); env != "" { + *dnsMediaCompression = env + } + if !*ghEnabled && os.Getenv("THEFEED_GITHUB_RELAY_ENABLED") == "1" { + *ghEnabled = true + } + if *ghToken == "" { + *ghToken = os.Getenv("THEFEED_GITHUB_RELAY_TOKEN") + } + if *ghRepo == "" { + *ghRepo = os.Getenv("THEFEED_GITHUB_RELAY_REPO") + } + if *ghBranch == "main" { + if v := os.Getenv("THEFEED_GITHUB_RELAY_BRANCH"); v != "" { + *ghBranch = v + } + } + if env := os.Getenv("THEFEED_GITHUB_RELAY_MAX_SIZE_KB"); env != "" { + if n, err := strconv.Atoi(env); err == nil { + *ghMaxSizeKB = n + } + } + if env := os.Getenv("THEFEED_GITHUB_RELAY_TTL_MIN"); env != "" { + if n, err := strconv.Atoi(env); err == nil { + *ghCacheTTLMin = n + } + } + cfg := server.Config{ ListenAddr: *listen, Domain: *domain, @@ -156,10 +227,20 @@ func main() { NoTelegram: *noTelegram, AllowManage: *allowManage, Debug: *debug, - NoMedia: *noMedia, - MediaMaxSize: int64(*mediaMaxSizeKB) * 1024, - MediaCacheTTL: *mediaCacheTTLMin, - MediaCompression: *mediaCompression, + DNSMediaEnabled: *dnsMediaEnabled, + DNSMediaMaxSize: int64(*dnsMediaMaxSizeKB) * 1024, + DNSMediaCacheTTL: *dnsMediaCacheTTLMin, + DNSMediaCompression: *dnsMediaCompression, + FetchInterval: time.Duration(*fetchIntervalMin) * time.Minute, + GitHubRelay: server.GitHubRelayConfig{ + Enabled: *ghEnabled, + Token: *ghToken, + Repo: *ghRepo, + Branch: *ghBranch, + StatePath: filepath.Join(*dataDir, "gh_relay_state.json"), + MaxBytes: int64(*ghMaxSizeKB) * 1024, + TTLMinutes: *ghCacheTTLMin, + }, Telegram: server.TelegramConfig{ APIID: id, APIHash: *apiHash, diff --git a/internal/client/fetcher.go b/internal/client/fetcher.go index 4e1b301..7fe54fc 100644 --- a/internal/client/fetcher.go +++ b/internal/client/fetcher.go @@ -682,6 +682,81 @@ func (f *Fetcher) FetchLatestVersion(ctx context.Context) (string, error) { return protocol.DecodeVersionData(data) } +// RelayInfo carries the relay-discovery data the server publishes on +// RelayInfoChannel. Empty fields mean "not configured". +type RelayInfo struct { + GitHubRepo string // "owner/repo" +} + +// FetchRelayInfo pulls the relay-info payload from RelayInfoChannel. +// Block 0 carries a uint16 total-block count prefix; if more than one +// block is needed the rest are fetched in parallel and concatenated. An +// empty payload yields a zero-value RelayInfo. +func (f *Fetcher) FetchRelayInfo(ctx context.Context) (RelayInfo, error) { + fetchCtx, cancel := context.WithTimeout(ctx, time.Minute) + defer cancel() + + block0, err := f.FetchBlock(fetchCtx, protocol.RelayInfoChannel, 0) + if err != nil { + return RelayInfo{}, fmt.Errorf("fetch relay-info: %w", err) + } + if len(block0) < 2 { + return RelayInfo{}, nil + } + totalBlocks := int(binary.BigEndian.Uint16(block0)) + payload0 := block0[2:] + if totalBlocks <= 1 { + return ParseRelayInfo(payload0), nil + } + + type blockResult struct { + data []byte + err error + } + results := make([]blockResult, totalBlocks) + results[0] = blockResult{data: payload0} + + var wg sync.WaitGroup + for blk := 1; blk < totalBlocks; blk++ { + wg.Add(1) + go func(blk int) { + defer wg.Done() + data, fetchErr := f.FetchBlock(fetchCtx, protocol.RelayInfoChannel, uint16(blk)) + results[blk] = blockResult{data: data, err: fetchErr} + }(blk) + } + wg.Wait() + + var allData []byte + for _, r := range results { + if r.err != nil { + return RelayInfo{}, fmt.Errorf("fetch relay-info block: %w", r.err) + } + allData = append(allData, r.data...) + } + return ParseRelayInfo(allData), nil +} + +// ParseRelayInfo decodes the relay-info payload (one "key=value" pair per +// line). Unknown keys are ignored so future relays can be added without +// breaking older clients. +func ParseRelayInfo(data []byte) RelayInfo { + var info RelayInfo + for _, line := range strings.Split(string(data), "\n") { + eq := strings.IndexByte(line, '=') + if eq < 0 { + continue + } + k := strings.TrimSpace(line[:eq]) + v := strings.TrimSpace(line[eq+1:]) + switch k { + case "gh": + info.GitHubRepo = v + } + } + return info +} + // FetchTitles fetches and decodes the channel display name map from TitlesChannel. // Returns an empty map (not an error) when the server does not support TitlesChannel. // Block 0 carries a uint16 total-block count prefix; remaining blocks are fetched in diff --git a/internal/client/relay_info_test.go b/internal/client/relay_info_test.go new file mode 100644 index 0000000..0214862 --- /dev/null +++ b/internal/client/relay_info_test.go @@ -0,0 +1,25 @@ +package client + +import "testing" + +func TestParseRelayInfo(t *testing.T) { + cases := []struct { + name string + data string + want RelayInfo + }{ + {"empty", "", RelayInfo{}}, + {"gh only", "gh=owner/repo\n", RelayInfo{GitHubRepo: "owner/repo"}}, + {"unknown keys ignored", "gh=owner/repo\nfuture=meh\n", RelayInfo{GitHubRepo: "owner/repo"}}, + {"trims whitespace", " gh = a/b \n", RelayInfo{GitHubRepo: "a/b"}}, + {"random noise tolerated", "weird text\nbut: fine\ngh=x/y\n", RelayInfo{GitHubRepo: "x/y"}}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := ParseRelayInfo([]byte(c.data)) + if got != c.want { + t.Errorf("got %+v, want %+v", got, c.want) + } + }) + } +} diff --git a/internal/protocol/dns.go b/internal/protocol/dns.go index 025b3a3..1c47f2d 100644 --- a/internal/protocol/dns.go +++ b/internal/protocol/dns.go @@ -35,6 +35,10 @@ const ( // TitlesChannel serves per-channel human-readable display names. TitlesChannel uint16 = 0xFFF9 + // RelayInfoChannel serves the relay-discovery payload (GitHub + // owner/repo + domain segment). Block 0 carries it. + RelayInfoChannel uint16 = 0xFFF8 + // MaxUpstreamBlockPayload keeps uploaded query chunks comfortably below DNS // name limits across typical domains and resolver paths. MaxUpstreamBlockPayload = 8 diff --git a/internal/protocol/media.go b/internal/protocol/media.go index db6817f..6cf4f40 100644 --- a/internal/protocol/media.go +++ b/internal/protocol/media.go @@ -8,42 +8,100 @@ import ( "strings" ) +// Relay indices: each MediaMeta.Relays[N] flags whether the file is +// reachable via that relay. Order is fixed so the wire format is positional. +// Future relays append to this list; older clients ignore unknown trailing +// flags. +const ( + RelayDNS = 0 // slow path — bytes assembled from DNS blocks + RelayGitHub = 1 // fast path — bytes pulled from a GitHub repo +) + // MediaMeta describes a downloadable media blob attached to a feed message. // -// Wire format embedded in a message's text body (immediately after the media -// tag, before any caption): +// Wire format (immediately after the media tag, before any caption): // -// [IMAGE]:
:::[:] -// caption goes here on the next line(s) +// [IMAGE]:,,...:::[:] // -// The filename field is optional; when present it carries an OS-friendly -// suggested filename (server-sanitised: no newlines, no path separators, no -// control characters, length-capped). Old clients that split on ':' and -// only read parts[0..4] keep working — they just ignore the trailing field. +// where each is "1" or "0" indicating availability via relay N. +// : are only meaningful when f0 (RelayDNS) is set. type MediaMeta struct { - Tag string // e.g. MediaImage, MediaVideo, MediaFile - Size int64 - Downloadable bool - Channel uint16 - Blocks uint16 - CRC32 uint32 - Filename string + Tag string // e.g. MediaImage, MediaVideo, MediaFile + Size int64 + Relays []bool // index = relay constant, value = availability + Channel uint16 // DNS channel (when Relays[RelayDNS]) + Blocks uint16 // DNS block count (when Relays[RelayDNS]) + CRC32 uint32 + Filename string } -// String renders the metadata in the wire format documented above, including -// the leading tag and trailing newline that separates the metadata row from -// any caption. +// HasRelay reports whether the relay at idx is available. Out-of-range and +// nil-relay-list both return false. +func (m MediaMeta) HasRelay(idx int) bool { + if idx < 0 || idx >= len(m.Relays) { + return false + } + return m.Relays[idx] +} + +// HasAnyRelay reports whether at least one relay can serve this file. +func (m MediaMeta) HasAnyRelay() bool { + for _, on := range m.Relays { + if on { + return true + } + } + return false +} + +// String renders the metadata in the wire format documented above. func (m MediaMeta) String() string { - dl := 0 - if m.Downloadable { - dl = 1 - } + flags := encodeRelayFlags(m.Relays) if fn := SanitiseMediaFilename(m.Filename); fn != "" { - return fmt.Sprintf("%s%d:%d:%d:%d:%08x:%s\n", - m.Tag, m.Size, dl, m.Channel, m.Blocks, m.CRC32, fn) + return fmt.Sprintf("%s%d:%s:%d:%d:%08x:%s\n", + m.Tag, m.Size, flags, m.Channel, m.Blocks, m.CRC32, fn) } - return fmt.Sprintf("%s%d:%d:%d:%d:%08x\n", - m.Tag, m.Size, dl, m.Channel, m.Blocks, m.CRC32) + return fmt.Sprintf("%s%d:%s:%d:%d:%08x\n", + m.Tag, m.Size, flags, m.Channel, m.Blocks, m.CRC32) +} + +// encodeRelayFlags serialises a relay list as "1,0,1". An empty list is +// "0,0" (DNS off, GitHub off) so older clients always see at least the two +// known relay slots. +func encodeRelayFlags(relays []bool) string { + n := len(relays) + if n < 2 { + n = 2 + } + parts := make([]string, n) + for i := 0; i < n; i++ { + on := i < len(relays) && relays[i] + if on { + parts[i] = "1" + } else { + parts[i] = "0" + } + } + return strings.Join(parts, ",") +} + +// parseRelayFlags decodes "1,0,1" into a relay slice sized to the input. +// Caller-side accessors guard against out-of-range reads, so future relays +// can be added without breaking older clients. +func parseRelayFlags(s string) ([]bool, bool) { + if s == "" { + return nil, false + } + parts := strings.Split(s, ",") + out := make([]bool, len(parts)) + for i, p := range parts { + p = strings.TrimSpace(p) + if p != "0" && p != "1" { + return nil, false + } + out[i] = p == "1" + } + return out, true } // SanitiseMediaFilename returns a filename safe to embed in the wire @@ -122,14 +180,8 @@ func EncodeMediaText(meta MediaMeta, caption string) string { } // ParseMediaText parses a message body that begins with a known media tag. -// On success it returns the metadata and the remaining caption (which may be -// empty). When the body uses the legacy "[TAG]\ncaption" form (no metadata -// suffix), ParseMediaText returns ok=true with Downloadable=false and -// Channel=0 — the caller can treat it as a non-downloadable placeholder -// exactly like before. -// -// Unknown tags return ok=false. Malformed metadata for a known tag also -// returns ok=false so the caller falls back to legacy display. +// Returns metadata + remaining caption. Legacy "[TAG]\ncaption" bodies parse +// with empty Relays (HasAnyRelay()==false). Unknown tags return ok=false. func ParseMediaText(body string) (meta MediaMeta, caption string, ok bool) { tag, rest, found := splitKnownMediaTag(body) if !found { @@ -156,9 +208,6 @@ func ParseMediaText(body string) (meta MediaMeta, caption string, ok bool) { parts := strings.Split(metaLine, ":") if len(parts) < 5 { - // Looks like a caption line that happens to start with this tag (e.g. - // "[IMAGE]nice photo"). Don't claim a structured parse — return the - // whole `rest` as caption so the message still renders. return MediaMeta{Tag: tag}, rest, true } @@ -166,8 +215,8 @@ func ParseMediaText(body string) (meta MediaMeta, caption string, ok bool) { if err != nil || size < 0 { return MediaMeta{Tag: tag}, rest, true } - dl, err := strconv.Atoi(parts[1]) - if err != nil || (dl != 0 && dl != 1) { + relays, ok := parseRelayFlags(parts[1]) + if !ok { return MediaMeta{Tag: tag}, rest, true } ch, err := strconv.ParseUint(parts[2], 10, 16) @@ -182,24 +231,19 @@ func ParseMediaText(body string) (meta MediaMeta, caption string, ok bool) { if err != nil { return MediaMeta{Tag: tag}, rest, true } - // Reject any channel claimed inside a parseable metadata line that falls - // outside the reserved media range — that can only be a malformed message - // or a tampering attempt; refuse to surface it as downloadable. + // Reject DNS availability if the channel/block range is malformed — + // other relays stay as-claimed. channel := uint16(ch) - downloadable := dl == 1 - if downloadable && (!IsMediaChannel(channel) || blk == 0) { - downloadable = false + if len(relays) > RelayDNS && relays[RelayDNS] && (!IsMediaChannel(channel) || blk == 0) { + relays[RelayDNS] = false } meta.Size = size - meta.Downloadable = downloadable + meta.Relays = relays meta.Channel = channel meta.Blocks = uint16(blk) meta.CRC32 = uint32(crc) if len(parts) >= 6 { - // SanitiseMediaFilename strips the field separator, so we can't - // reach this point with a colon inside the filename. Take parts[5] - // directly and re-sanitise defensively. meta.Filename = SanitiseMediaFilename(parts[5]) } return meta, caption, true diff --git a/internal/protocol/media_test.go b/internal/protocol/media_test.go index 916658e..6cf13e6 100644 --- a/internal/protocol/media_test.go +++ b/internal/protocol/media_test.go @@ -12,58 +12,62 @@ func TestEncodeMediaTextRoundTrip(t *testing.T) { caption string }{ { - name: "image with caption", + name: "dns only", meta: MediaMeta{ - Tag: MediaImage, - Size: 123456, - Downloadable: true, - Channel: 12345, - Blocks: 42, - CRC32: 0xabcdef01, + Tag: MediaImage, + Size: 123456, + Relays: []bool{true, false}, + Channel: 12345, + Blocks: 42, + CRC32: 0xabcdef01, }, caption: "hello world\nmulti-line", }, { - name: "file with filename", + name: "dns + github", meta: MediaMeta{ - Tag: MediaFile, - Size: 800, - Downloadable: true, - Channel: MediaChannelStart, - Blocks: 2, - CRC32: 0, - Filename: "report.zip", + Tag: MediaFile, + Size: 800, + Relays: []bool{true, true}, + Channel: MediaChannelStart, + Blocks: 2, + CRC32: 0, + Filename: "report.zip", }, caption: "", }, { - name: "filename strips path traversal", + name: "github only", meta: MediaMeta{ - Tag: MediaFile, - Size: 100, - Downloadable: true, - Channel: MediaChannelStart + 1, - Blocks: 1, - CRC32: 0xdeadbeef, - // Server-side sanitisation strips dirs, control chars, and ":" - // before the metadata reaches the wire — so a parsed filename - // is never going to contain any of those. - Filename: "/tmp/../etc/passwd:bad\nname", + Tag: MediaImage, + Size: 12_000_000, + Relays: []bool{false, true}, + CRC32: 0xdeadbeef, }, - caption: "", + caption: "fast path", }, { - name: "non-downloadable image", + name: "no relays available", meta: MediaMeta{ - Tag: MediaImage, - Size: 50_000_000, - Downloadable: false, - Channel: 0, - Blocks: 0, - CRC32: 0xdeadbeef, + Tag: MediaImage, + Size: 50_000_000, + Relays: []bool{false, false}, + CRC32: 0xdeadbeef, }, caption: "too big", }, + { + name: "future third relay flag survives roundtrip", + meta: MediaMeta{ + Tag: MediaFile, + Size: 1024, + Relays: []bool{true, false, true}, + Channel: MediaChannelStart + 5, + Blocks: 3, + CRC32: 0xcafebabe, + }, + caption: "future relay", + }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { @@ -81,8 +85,11 @@ func TestEncodeMediaTextRoundTrip(t *testing.T) { if meta.Size != tc.meta.Size { t.Fatalf("Size = %d, want %d", meta.Size, tc.meta.Size) } - if meta.Downloadable != tc.meta.Downloadable { - t.Fatalf("Downloadable = %v, want %v", meta.Downloadable, tc.meta.Downloadable) + // Relays roundtrip: every input slot must reflect on the wire. + for i, want := range tc.meta.Relays { + if got := meta.HasRelay(i); got != want { + t.Errorf("Relay %d = %v, want %v (body=%q)", i, got, want, body) + } } if meta.Channel != tc.meta.Channel { t.Fatalf("Channel = %d, want %d", meta.Channel, tc.meta.Channel) @@ -101,6 +108,28 @@ func TestEncodeMediaTextRoundTrip(t *testing.T) { } } +// TestParseMediaTextUnknownRelaysIgnored is the forward-compat guarantee: +// older clients reading a wire form with extra relay flags must not fail. +func TestParseMediaTextUnknownRelaysIgnored(t *testing.T) { + body := "[FILE]200:0,1,1,0:0:0:deadbeef:f.bin\ncap" + meta, _, ok := ParseMediaText(body) + if !ok { + t.Fatalf("ok=false on multi-flag body") + } + if meta.HasRelay(RelayDNS) { + t.Fatalf("RelayDNS should be false") + } + if !meta.HasRelay(RelayGitHub) { + t.Fatalf("RelayGitHub should be true") + } + if !meta.HasRelay(2) { + t.Fatalf("relay 2 should be true") + } + if meta.HasRelay(99) { + t.Fatalf("unknown relay 99 must read as false, not panic") + } +} + func TestSanitiseMediaFilename(t *testing.T) { cases := map[string]string{ "": "", @@ -134,8 +163,6 @@ func TestSanitiseMediaFilenameLongName(t *testing.T) { } } -// Backward compat: legacy "[IMAGE]\ncaption" must still parse cleanly with -// caption preserved and Downloadable=false. func TestParseMediaTextLegacy(t *testing.T) { body := "[IMAGE]\nlook at this" meta, caption, ok := ParseMediaText(body) @@ -145,15 +172,14 @@ func TestParseMediaTextLegacy(t *testing.T) { if meta.Tag != MediaImage { t.Fatalf("Tag = %q, want %q", meta.Tag, MediaImage) } - if meta.Downloadable { - t.Fatalf("Downloadable should be false on legacy body") + if meta.HasAnyRelay() { + t.Fatalf("legacy body should have no available relays") } if caption != "look at this" { t.Fatalf("caption = %q, want %q", caption, "look at this") } } -// Backward compat: legacy [IMAGE] with no caption. func TestParseMediaTextLegacyNoCaption(t *testing.T) { for _, body := range []string{"[IMAGE]", "[IMAGE]\n"} { meta, caption, ok := ParseMediaText(body) @@ -163,8 +189,8 @@ func TestParseMediaTextLegacyNoCaption(t *testing.T) { if meta.Tag != MediaImage { t.Fatalf("Tag = %q, want [IMAGE]", meta.Tag) } - if meta.Downloadable { - t.Fatalf("legacy body should not be downloadable") + if meta.HasAnyRelay() { + t.Fatalf("legacy body should have no available relays") } if caption != "" { t.Fatalf("caption = %q, want empty", caption) @@ -172,16 +198,14 @@ func TestParseMediaTextLegacyNoCaption(t *testing.T) { } } -// A normal caption that happens to lead with a media tag should not be -// misparsed as downloadable metadata. func TestParseMediaTextHumanCaption(t *testing.T) { body := "[IMAGE]nice picture\nrest of post" meta, caption, ok := ParseMediaText(body) if !ok { t.Fatalf("ok=false on caption-leading body") } - if meta.Downloadable { - t.Fatalf("downloadable should be false for a human caption") + if meta.HasAnyRelay() { + t.Fatalf("human caption must not be flagged as downloadable") } if meta.Channel != 0 { t.Fatalf("channel should be 0 for non-metadata body, got %d", meta.Channel) @@ -201,15 +225,18 @@ func TestParseMediaTextUnknownTag(t *testing.T) { } // A metadata line that names a channel outside the media range must NOT be -// surfaced as downloadable. +// surfaced as DNS-downloadable; other relay flags stay as-claimed. func TestParseMediaTextRejectsOutOfRangeChannel(t *testing.T) { - body := "[IMAGE]100:1:5:200:00000000\ncaption" + body := "[IMAGE]100:1,1:5:200:00000000\ncaption" meta, _, ok := ParseMediaText(body) if !ok { t.Fatalf("ok=false on otherwise-valid metadata") } - if meta.Downloadable { - t.Fatalf("Downloadable should be false for channel %d outside media range", meta.Channel) + if meta.HasRelay(RelayDNS) { + t.Fatalf("RelayDNS should be false for channel %d outside media range", meta.Channel) + } + if !meta.HasRelay(RelayGitHub) { + t.Fatalf("RelayGitHub flag should survive even when DNS is rejected") } } diff --git a/internal/protocol/relay.go b/internal/protocol/relay.go new file mode 100644 index 0000000..47ed45b --- /dev/null +++ b/internal/protocol/relay.go @@ -0,0 +1,79 @@ +package protocol + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + + "golang.org/x/crypto/hkdf" +) + +// HKDF context labels — kept distinct from "thefeed-query"/"thefeed-response" +// so the relay key is cryptographically independent of the DNS keys. +const ( + relayKeyContext = "thefeed-relay" + relayDomainContext = "thefeed-relay-domain" + relayObjectContext = "thefeed-relay-object" +) + +// Truncated HMAC widths (in hex chars). 64 bits for the domain folder is +// plenty — repos typically host a handful of deployments. 96 bits for object +// names guards against an attacker confirming "is file X in the repo?". +const ( + relayDomainHexLen = 16 + relayObjectHexLen = 24 +) + +// DeriveRelayKey derives the AES-256 key used to encrypt blobs uploaded to a +// shared relay (e.g. a public GitHub repo) and to HMAC the path segments. +// Returns an error only if HKDF fails; the result is deterministic for a +// given passphrase. +func DeriveRelayKey(passphrase string) ([KeySize]byte, error) { + var key [KeySize]byte + master := sha256.Sum256([]byte(passphrase)) + rdr := hkdf.New(sha256.New, master[:], nil, []byte(relayKeyContext)) + if _, err := io.ReadFull(rdr, key[:]); err != nil { + return key, err + } + return key, nil +} + +// RelayDomainSegment returns the path segment that scopes a deployment's +// files inside a shared relay repo. Computed as HMAC-SHA256 over the domain, +// keyed by a passphrase-derived secret, then truncated. Without the +// passphrase an observer cannot tell which deployment a folder belongs to. +func RelayDomainSegment(domain, passphrase string) string { + return relayHMAC(passphrase, relayDomainContext, domain)[:relayDomainHexLen] +} + +// RelayObjectName returns the per-file path segment under the domain folder. +// Computed from (size, crc) so the same content always lives at the same +// path (dedup), but HMAC'd with the passphrase so an observer can't probe +// "is a known file present in this repo?". +func RelayObjectName(size int64, crc uint32, passphrase string) string { + label := fmt.Sprintf("%d_%08x", size, crc) + return relayHMAC(passphrase, relayObjectContext, label)[:relayObjectHexLen] +} + +// EncryptRelayBlob seals plaintext with the relay key. Output is +// nonce||ciphertext||tag, identical framing to the DNS response cipher so +// clients can reuse Decrypt for both paths. +func EncryptRelayBlob(key [KeySize]byte, plaintext []byte) ([]byte, error) { + return Encrypt(key, plaintext) +} + +// DecryptRelayBlob is the inverse of EncryptRelayBlob. +func DecryptRelayBlob(key [KeySize]byte, blob []byte) ([]byte, error) { + return Decrypt(key, blob) +} + +func relayHMAC(passphrase, ctx, msg string) string { + master := sha256.Sum256([]byte(passphrase)) + h := hmac.New(sha256.New, master[:]) + h.Write([]byte(ctx)) + h.Write([]byte{0}) // separator: prevents ctx||msg collisions + h.Write([]byte(msg)) + return hex.EncodeToString(h.Sum(nil)) +} diff --git a/internal/server/feed.go b/internal/server/feed.go index 6e9f247..6ff37c0 100644 --- a/internal/server/feed.go +++ b/internal/server/feed.go @@ -1,8 +1,10 @@ package server import ( + "context" "crypto/rand" "fmt" + "log" "sync" "time" @@ -34,6 +36,14 @@ type Feed struct { // rejects queries to media channels with a not-found error, mirroring // pre-feature behaviour. media *MediaCache + + // gitHubRelay (optional) lets clients fetch media bytes over plain + // HTTPS from a GitHub repo. nil when disabled. + gitHubRelay *GitHubRelay + // relayInfoBlocks serves the relay-discovery channel + // (RelayInfoChannel) — block 0 contains the GitHub "owner/repo" + // string, or an empty payload if the relay is off. + relayInfoBlocks [][]byte } // NewFeed creates a new Feed with the given channel names. @@ -95,6 +105,9 @@ func (f *Feed) GetBlock(channel, block int) ([]byte, error) { if channel == int(protocol.TitlesChannel) { return f.getTitlesBlock(block) } + if channel == int(protocol.RelayInfoChannel) { + return f.getRelayInfoBlock(block) + } // Channel sits in the binary media range — delegate to MediaCache. We // drop the read lock first because MediaCache uses its own lock and we // don't want to hold f.mu across that path. @@ -132,6 +145,65 @@ func (f *Feed) MediaCache() *MediaCache { return f.media } +// SetGitHubRelay attaches the GitHub fast relay. Safe to call once at +// startup. nil disables. +func (f *Feed) SetGitHubRelay(r *GitHubRelay) { + f.mu.Lock() + defer f.mu.Unlock() + f.gitHubRelay = r + f.rebuildRelayInfoBlocks() +} + +// GitHubRelay returns the configured relay, or nil. +func (f *Feed) GitHubRelay() *GitHubRelay { + f.mu.RLock() + defer f.mu.RUnlock() + return f.gitHubRelay +} + +// AfterFetchCycle: touch live media → flush pending → prune stale. +// Touch must come first so files referenced by skipped fetches don't age out. +func (f *Feed) AfterFetchCycle(ctx context.Context) { + gh := f.GitHubRelay() + if gh == nil { + return + } + if mc := f.MediaCache(); mc != nil { + mc.TouchRelayEntries() + } + if err := gh.Flush(ctx); err != nil { + log.Printf("[gh-relay] flush after fetch: %v", err) + } + if ttl := gh.TTL(); ttl > 0 { + cutoff := time.Now().Add(-ttl) + if n, err := gh.PruneStale(ctx, cutoff); err != nil { + log.Printf("[gh-relay] prune after fetch: %v", err) + } else if n > 0 { + log.Printf("[gh-relay] pruned %d stale file(s) after fetch", n) + } + } +} + +// rebuildRelayInfoBlocks builds the discovery payload served on +// RelayInfoChannel. Format: "key=value\n" lines (UTF-8). Block 0 is +// prefixed with a uint16 total-block count so the client can fetch the +// rest in parallel. +// +// Keys are short (gh = github owner/repo) to keep packets small. +func (f *Feed) rebuildRelayInfoBlocks() { + var payload []byte + if r := f.gitHubRelay; r != nil { + payload = []byte(fmt.Sprintf("gh=%s\n", r.Repo())) + } + blocks := protocol.SplitIntoBlocks(payload) + if len(blocks) == 0 { + blocks = [][]byte{nil} + } + prefix := []byte{byte(len(blocks) >> 8), byte(len(blocks))} + blocks[0] = append(prefix, blocks[0]...) + f.relayInfoBlocks = blocks +} + func (f *Feed) getVersionBlock(block int) ([]byte, error) { blocks := f.versionBlocks if len(blocks) == 0 { @@ -198,6 +270,18 @@ func (f *Feed) getTitlesBlock(block int) ([]byte, error) { return blocks[block], nil } +func (f *Feed) getRelayInfoBlock(block int) ([]byte, error) { + blocks := f.relayInfoBlocks + if len(blocks) == 0 { + f.rebuildRelayInfoBlocks() + blocks = f.relayInfoBlocks + } + if block < 0 || block >= len(blocks) { + return nil, fmt.Errorf("relay-info block %d out of range (%d blocks)", block, len(blocks)) + } + return blocks[block], nil +} + // rebuildTitlesBlocks re-serializes the display name map and splits it into blocks. // Block 0 is prefixed with a uint16 total-block count so the client can fetch all // remaining blocks in parallel after reading the first one. diff --git a/internal/server/github_relay.go b/internal/server/github_relay.go new file mode 100644 index 0000000..dd9d53c --- /dev/null +++ b/internal/server/github_relay.go @@ -0,0 +1,685 @@ +package server + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "hash/crc32" + "io" + "log" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/sartoopjj/thefeed/internal/protocol" +) + +// githubAPI is the canonical REST endpoint. Tests can override it. +var githubAPI = "https://api.github.com" + +const flushBatchLimit = 100 + +// GitHubRelay uploads encrypted media to a GitHub repo. Domain and object +// names are HMAC'd; blobs are AES-256-GCM. Uploads are batched into one +// Git Data API commit per flush. +type GitHubRelay struct { + cfg GitHubRelayConfig + passphrase string + domain string + relayKey [protocol.KeySize]byte + branch string + + client *http.Client + + mu sync.Mutex + known map[string]*ghEntry + pending map[string]*pendingUpload + statePath string + dirty bool + + // commitMu serialises ref-advancing operations so concurrent flushes + // don't race on updateRef. + commitMu sync.Mutex +} + +type ghEntry struct { + size int64 + crc uint32 + lastSeen time.Time +} + +type pendingUpload struct { + blob []byte + size int64 + crc uint32 +} + +// NewGitHubRelay returns nil when the config is incomplete. +func NewGitHubRelay(cfg GitHubRelayConfig, domain, passphrase string) *GitHubRelay { + if !cfg.Active() || domain == "" || passphrase == "" { + return nil + } + relayKey, err := protocol.DeriveRelayKey(passphrase) + if err != nil { + return nil + } + branch := cfg.Branch + if branch == "" { + branch = "main" + } + r := &GitHubRelay{ + cfg: cfg, + passphrase: passphrase, + domain: protocol.RelayDomainSegment(domain, passphrase), + relayKey: relayKey, + branch: branch, + client: &http.Client{Timeout: 2 * time.Minute}, + known: make(map[string]*ghEntry), + pending: make(map[string]*pendingUpload), + statePath: cfg.StatePath, + } + if r.statePath != "" { + if err := r.loadState(); err != nil { + log.Printf("[gh-relay] load state %s: %v", r.statePath, err) + } + } + return r +} + +type persistedEntry struct { + Size int64 `json:"size"` + CRC uint32 `json:"crc"` + LastSeen time.Time `json:"lastSeen"` +} + +func (g *GitHubRelay) loadState() error { + f, err := os.Open(g.statePath) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + var raw map[string]persistedEntry + if err := json.NewDecoder(f).Decode(&raw); err != nil { + return err + } + g.mu.Lock() + defer g.mu.Unlock() + for k, v := range raw { + g.known[k] = &ghEntry{size: v.Size, crc: v.CRC, lastSeen: v.LastSeen} + } + log.Printf("[gh-relay] loaded %d entries from %s", len(raw), g.statePath) + return nil +} + +// saveStateLocked writes `known` to disk via a tmp+rename so a crash mid-write +// doesn't leave a truncated file. Caller must hold g.mu. +func (g *GitHubRelay) saveStateLocked() error { + if g.statePath == "" { + return nil + } + out := make(map[string]persistedEntry, len(g.known)) + for k, e := range g.known { + out[k] = persistedEntry{Size: e.size, CRC: e.crc, LastSeen: e.lastSeen} + } + dir := filepath.Dir(g.statePath) + if err := os.MkdirAll(dir, 0o700); err != nil { + return err + } + tmp, err := os.CreateTemp(dir, "gh-relay-*.json") + if err != nil { + return err + } + enc := json.NewEncoder(tmp) + enc.SetIndent("", " ") + if err := enc.Encode(out); err != nil { + tmp.Close() + os.Remove(tmp.Name()) + return err + } + if err := tmp.Close(); err != nil { + os.Remove(tmp.Name()) + return err + } + g.dirty = false + return os.Rename(tmp.Name(), g.statePath) +} + +// Repo returns the configured "owner/repo" so the discovery channel can +// expose it to clients without leaking the token. +func (g *GitHubRelay) Repo() string { + if g == nil { + return "" + } + return g.cfg.Repo +} + +// MaxBytes is the per-file cap. 0 means no cap. +func (g *GitHubRelay) MaxBytes() int64 { + if g == nil { + return 0 + } + return g.cfg.MaxBytes +} + +// TTL returns the configured object lifetime. +func (g *GitHubRelay) TTL() time.Duration { + if g == nil { + return 0 + } + return time.Duration(g.cfg.TTLMinutes) * time.Minute +} + +// Domain is the HMAC'd path segment used inside the relay repo. +func (g *GitHubRelay) Domain() string { + if g == nil { + return "" + } + return g.domain +} + +// Upload encrypts body and queues it for the next batched commit. +// ErrTooLarge if body exceeds the configured cap. +func (g *GitHubRelay) Upload(ctx context.Context, body []byte) error { + if g == nil { + return errors.New("github relay disabled") + } + if g.cfg.MaxBytes > 0 && int64(len(body)) > g.cfg.MaxBytes { + return ErrTooLarge + } + + size := int64(len(body)) + crc := crc32.ChecksumIEEE(body) + key := protocol.RelayObjectName(size, crc, g.passphrase) + + g.mu.Lock() + if e, ok := g.known[key]; ok { + e.lastSeen = time.Now() + g.dirty = true + g.mu.Unlock() + return nil + } + if _, ok := g.pending[key]; ok { + g.mu.Unlock() + return nil + } + g.mu.Unlock() + + blob, err := protocol.EncryptRelayBlob(g.relayKey, body) + if err != nil { + return fmt.Errorf("encrypt relay blob: %w", err) + } + + g.mu.Lock() + if e, ok := g.known[key]; ok { + e.lastSeen = time.Now() + g.dirty = true + g.mu.Unlock() + return nil + } + if _, ok := g.pending[key]; ok { + g.mu.Unlock() + return nil + } + g.pending[key] = &pendingUpload{blob: blob, size: size, crc: crc} + overLimit := len(g.pending) >= flushBatchLimit + g.mu.Unlock() + + if overLimit { + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + if err := g.flushPending(ctx); err != nil { + log.Printf("[gh-relay] limit flush: %v", err) + } + }() + } + return nil +} + +// Has reports whether the file is committed or queued for the next commit. +func (g *GitHubRelay) Has(size int64, crc uint32) bool { + if g == nil { + return false + } + key := protocol.RelayObjectName(size, crc, g.passphrase) + g.mu.Lock() + defer g.mu.Unlock() + if _, ok := g.known[key]; ok { + return true + } + _, ok := g.pending[key] + return ok +} + +// Touch refreshes the lastSeen timestamp without re-uploading. Used when +// upstream re-delivers a file that's already in the relay. +func (g *GitHubRelay) Touch(size int64, crc uint32) { + if g == nil { + return + } + key := protocol.RelayObjectName(size, crc, g.passphrase) + g.mu.Lock() + if e, ok := g.known[key]; ok { + e.lastSeen = time.Now() + g.dirty = true + } + g.mu.Unlock() +} + +// PruneStale removes every file in `known` whose lastSeen is older than +// cutoff. Selection happens INSIDE commitMu so concurrent prunes from +// different readers can't pick the same files and race the resulting +// commits (which used to produce 422 BadObjectState). +func (g *GitHubRelay) PruneStale(ctx context.Context, cutoff time.Time) (int, error) { + if g == nil { + return 0, nil + } + g.commitMu.Lock() + defer g.commitMu.Unlock() + + g.mu.Lock() + var entries []treeEntry + var keys []string + for k, e := range g.known { + if e.lastSeen.Before(cutoff) { + entries = append(entries, treeEntry{ + Path: g.domain + "/" + k, + Mode: "100644", + Type: "blob", + SHA: nil, + }) + keys = append(keys, k) + } + } + g.mu.Unlock() + + if len(entries) == 0 { + return 0, nil + } + log.Printf("[gh-relay] starting prune of %d file(s)", len(entries)) + + headSHA, err := g.getRef(ctx, g.branch) + if err != nil { + return 0, fmt.Errorf("get ref: %w", err) + } + parentTree, err := g.getCommitTree(ctx, headSHA) + if err != nil { + return 0, fmt.Errorf("get commit %s: %w", headSHA, err) + } + newTree, err := g.createTree(ctx, parentTree, entries) + if err != nil { + return 0, fmt.Errorf("create tree: %w", err) + } + msg := fmt.Sprintf("thefeed: prune %d file(s)", len(entries)) + commitSHA, err := g.createCommit(ctx, msg, newTree, []string{headSHA}) + if err != nil { + return 0, fmt.Errorf("create commit: %w", err) + } + if err := g.updateRef(ctx, g.branch, commitSHA); err != nil { + return 0, fmt.Errorf("update ref %s: %w", g.branch, err) + } + + g.mu.Lock() + for _, k := range keys { + delete(g.known, k) + } + g.dirty = true + if err := g.saveStateLocked(); err != nil { + log.Printf("[gh-relay] save state after prune: %v", err) + } + g.mu.Unlock() + return len(entries), nil +} + +// --- Flush loop ------------------------------------------------------------- + +// Run waits for shutdown and flushes any remaining pending uploads on the +// way out. Flush + prune during normal operation are driven by +// Feed.AfterFetchCycle so they line up with the natural cadence of upstream +// fetches. A best-effort backstop tick handles the case where nothing has +// fetched in a long time (e.g. all channels were skipped from cache). +func (g *GitHubRelay) Run(ctx context.Context) { + if g == nil { + return + } + tick := time.NewTicker(10 * time.Minute) + defer tick.Stop() + saveTick := time.NewTicker(5 * time.Minute) + defer saveTick.Stop() + for { + select { + case <-saveTick.C: + g.mu.Lock() + if g.dirty && g.statePath != "" { + if err := g.saveStateLocked(); err != nil { + log.Printf("[gh-relay] periodic save: %v", err) + } + } + g.mu.Unlock() + + case <-ctx.Done(): + fctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + if err := g.flushPending(fctx); err != nil { + log.Printf("[gh-relay] shutdown flush: %v", err) + } + cancel() + g.mu.Lock() + if g.dirty { + if err := g.saveStateLocked(); err != nil { + log.Printf("[gh-relay] shutdown save: %v", err) + } + } + g.mu.Unlock() + return + case <-tick.C: + if g.queueSize() == 0 { + continue + } + fctx, cancel := context.WithTimeout(ctx, 5*time.Minute) + if err := g.flushPending(fctx); err != nil { + log.Printf("[gh-relay] backstop flush: %v", err) + } + cancel() + } + } +} + +func (g *GitHubRelay) queueSize() int { + g.mu.Lock() + n := len(g.pending) + g.mu.Unlock() + return n +} + +// Flush forces an immediate commit of any pending uploads. Safe to call +// from tests or graceful shutdown; does nothing if the queue is empty. +func (g *GitHubRelay) Flush(ctx context.Context) error { + if g == nil { + return nil + } + return g.flushPending(ctx) +} + +// flushPending drains the pending map into a single Git commit via the Git +// Data API. On any error the batch is re-queued so the next tick retries. +func (g *GitHubRelay) flushPending(ctx context.Context) error { + g.mu.Lock() + if len(g.pending) == 0 { + g.mu.Unlock() + return nil + } + batch := g.pending + g.pending = make(map[string]*pendingUpload) + g.mu.Unlock() + + if err := g.commitBatch(ctx, batch); err != nil { + // Re-queue. A peer goroutine may have queued newer entries with + // the same key; prefer those. + g.mu.Lock() + for k, v := range batch { + if _, exists := g.pending[k]; !exists { + g.pending[k] = v + } + } + g.mu.Unlock() + return err + } + + now := time.Now() + g.mu.Lock() + for k, p := range batch { + g.known[k] = &ghEntry{size: p.size, crc: p.crc, lastSeen: now} + } + g.dirty = true + if err := g.saveStateLocked(); err != nil { + log.Printf("[gh-relay] save state: %v", err) + } + g.mu.Unlock() + log.Printf("[gh-relay] committed %d file(s)", len(batch)) + return nil +} + +// treeEntry is the Git Data API tree-item shape used by both upload +// (SHA = newly-created blob) and delete (SHA = nil → entry removed from +// the resulting tree). +type treeEntry struct { + Path string `json:"path"` + Mode string `json:"mode"` + Type string `json:"type"` + SHA *string `json:"sha"` // pointer so nil serialises as JSON `null` +} + +// commitBatch performs the Git Data API dance: +// +// GET ref → POST blobs → POST tree (with base_tree) → POST commit → PATCH ref. +// +// A single commit covers every file in the batch, regardless of count. +func (g *GitHubRelay) commitBatch(ctx context.Context, batch map[string]*pendingUpload) error { + if len(batch) == 0 { + return nil + } + g.commitMu.Lock() + defer g.commitMu.Unlock() + + log.Printf("[gh-relay] starting upload of %d file(s)", len(batch)) + headSHA, err := g.getRef(ctx, g.branch) + if err != nil { + return fmt.Errorf("get ref: %w", err) + } + parentTree, err := g.getCommitTree(ctx, headSHA) + if err != nil { + return fmt.Errorf("get commit %s: %w", headSHA, err) + } + + entries := make([]treeEntry, 0, len(batch)) + for objKey, p := range batch { + blobSHA, err := g.createBlob(ctx, p.blob) + if err != nil { + return fmt.Errorf("create blob %s: %w", objKey, err) + } + s := blobSHA + entries = append(entries, treeEntry{ + Path: g.domain + "/" + objKey, + Mode: "100644", + Type: "blob", + SHA: &s, + }) + } + + newTree, err := g.createTree(ctx, parentTree, entries) + if err != nil { + return fmt.Errorf("create tree: %w", err) + } + msg := fmt.Sprintf("thefeed: upload %d file(s)", len(batch)) + commitSHA, err := g.createCommit(ctx, msg, newTree, []string{headSHA}) + if err != nil { + return fmt.Errorf("create commit: %w", err) + } + if err := g.updateRef(ctx, g.branch, commitSHA); err != nil { + return fmt.Errorf("update ref %s: %w", g.branch, err) + } + return nil +} + +// --- Git Data API plumbing -------------------------------------------------- + +func (g *GitHubRelay) getRef(ctx context.Context, branch string) (string, error) { + req, err := g.newReq(ctx, http.MethodGet, "/repos/"+g.cfg.Repo+"/git/ref/heads/"+branch, nil) + if err != nil { + return "", err + } + resp, err := g.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("%s — %s", resp.Status, string(body)) + } + var out struct { + Object struct { + SHA string `json:"sha"` + } `json:"object"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", err + } + return out.Object.SHA, nil +} + +func (g *GitHubRelay) getCommitTree(ctx context.Context, commitSHA string) (string, error) { + req, err := g.newReq(ctx, http.MethodGet, "/repos/"+g.cfg.Repo+"/git/commits/"+commitSHA, nil) + if err != nil { + return "", err + } + resp, err := g.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("%s — %s", resp.Status, string(body)) + } + var out struct { + Tree struct { + SHA string `json:"sha"` + } `json:"tree"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", err + } + return out.Tree.SHA, nil +} + +func (g *GitHubRelay) createBlob(ctx context.Context, content []byte) (string, error) { + body, _ := json.Marshal(map[string]any{ + "encoding": "base64", + "content": base64.StdEncoding.EncodeToString(content), + }) + req, err := g.newReq(ctx, http.MethodPost, "/repos/"+g.cfg.Repo+"/git/blobs", bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + resp, err := g.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + raw, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("%s — %s", resp.Status, string(raw)) + } + var out struct { + SHA string `json:"sha"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", err + } + return out.SHA, nil +} + +func (g *GitHubRelay) createTree(ctx context.Context, baseTree string, entries any) (string, error) { + body, _ := json.Marshal(map[string]any{ + "base_tree": baseTree, + "tree": entries, + }) + req, err := g.newReq(ctx, http.MethodPost, "/repos/"+g.cfg.Repo+"/git/trees", bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + resp, err := g.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + raw, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("%s — %s", resp.Status, string(raw)) + } + var out struct { + SHA string `json:"sha"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", err + } + return out.SHA, nil +} + +func (g *GitHubRelay) createCommit(ctx context.Context, message, treeSHA string, parents []string) (string, error) { + body, _ := json.Marshal(map[string]any{ + "message": message, + "tree": treeSHA, + "parents": parents, + }) + req, err := g.newReq(ctx, http.MethodPost, "/repos/"+g.cfg.Repo+"/git/commits", bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + resp, err := g.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + raw, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("%s — %s", resp.Status, string(raw)) + } + var out struct { + SHA string `json:"sha"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", err + } + return out.SHA, nil +} + +func (g *GitHubRelay) updateRef(ctx context.Context, branch, commitSHA string) error { + body, _ := json.Marshal(map[string]any{ + "sha": commitSHA, + "force": false, + }) + req, err := g.newReq(ctx, http.MethodPatch, "/repos/"+g.cfg.Repo+"/git/refs/heads/"+branch, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + resp, err := g.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + raw, _ := io.ReadAll(resp.Body) + return fmt.Errorf("%s — %s", resp.Status, string(raw)) + } + return nil +} + +// --- HTTP plumbing ---------------------------------------------------------- + +func (g *GitHubRelay) newReq(ctx context.Context, method, urlPath string, body io.Reader) (*http.Request, error) { + full := strings.TrimRight(githubAPI, "/") + urlPath + req, err := http.NewRequestWithContext(ctx, method, full, body) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", "Bearer "+g.cfg.Token) + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + req.Header.Set("User-Agent", "thefeed-server") + return req, nil +} diff --git a/internal/server/github_relay_test.go b/internal/server/github_relay_test.go new file mode 100644 index 0000000..d5af73c --- /dev/null +++ b/internal/server/github_relay_test.go @@ -0,0 +1,242 @@ +package server + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "hash/crc32" + "net/http" + "net/http/httptest" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +// fakeGitHub stubs the slice of GitHub's REST API the relay uses: +// - Git Data API (refs / commits / blobs / trees) for batched uploads +// - Contents API (list / delete) for PruneStale +type fakeGitHub struct { + mu sync.Mutex + files map[string][]byte // repoPath → ciphertext (committed) + commits int // number of commits created (rate-limit metric) + blobs int // blob create count + deletes int // contents-api deletions + + // Tree state — dumb counter; we don't model real Git history. + headSHA string + treeSHA string + nextSeq int +} + +func (f *fakeGitHub) sha(prefix string) string { + f.nextSeq++ + return prefix + "-" + strconv.Itoa(f.nextSeq) +} + +func (f *fakeGitHub) handler() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + f.mu.Lock() + defer f.mu.Unlock() + path := strings.TrimPrefix(r.URL.Path, "/repos/owner/repo/") + + // --- Git Data API --------------------------------------------------- + switch { + case r.Method == http.MethodGet && strings.HasPrefix(path, "git/ref/heads/"): + if f.headSHA == "" { + f.headSHA = f.sha("commit") + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "object": map[string]any{"sha": f.headSHA}, + }) + return + + case r.Method == http.MethodGet && strings.HasPrefix(path, "git/commits/"): + if f.treeSHA == "" { + f.treeSHA = f.sha("tree") + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "tree": map[string]any{"sha": f.treeSHA}, + }) + return + + case r.Method == http.MethodPost && path == "git/blobs": + var body struct{ Content string } + _ = json.NewDecoder(r.Body).Decode(&body) + f.blobs++ + s := f.sha("blob") + _ = json.NewEncoder(w).Encode(map[string]any{"sha": s}) + return + + case r.Method == http.MethodPost && path == "git/trees": + // SHA is *string so null serialises as JSON null and decodes back to nil. + var body struct { + BaseTree string `json:"base_tree"` + Tree []struct { + Path string `json:"path"` + SHA *string `json:"sha"` + } `json:"tree"` + } + _ = json.NewDecoder(r.Body).Decode(&body) + for _, e := range body.Tree { + if e.SHA == nil { + delete(f.files, e.Path) + f.deletes++ + } else { + f.files[e.Path] = []byte("committed") + } + } + f.treeSHA = f.sha("tree") + _ = json.NewEncoder(w).Encode(map[string]any{"sha": f.treeSHA}) + return + + case r.Method == http.MethodPost && path == "git/commits": + f.commits++ + f.headSHA = f.sha("commit") + _ = json.NewEncoder(w).Encode(map[string]any{"sha": f.headSHA}) + return + + case r.Method == http.MethodPatch && strings.HasPrefix(path, "git/refs/heads/"): + w.WriteHeader(http.StatusOK) + return + } + + // --- Contents API (used only for the directory listing in PruneStale) --- + if r.Method == http.MethodGet { + repoPath := strings.TrimPrefix(path, "contents/") + items := []map[string]any{} + prefix := repoPath + "/" + for k, v := range f.files { + if strings.HasPrefix(k, prefix) { + items = append(items, map[string]any{ + "path": k, "sha": "sha-" + k, "type": "file", "size": len(v), + }) + } + } + _ = json.NewEncoder(w).Encode(items) + } + }) +} + +func newFakeGitHub(t *testing.T) (*fakeGitHub, func()) { + f := &fakeGitHub{files: map[string][]byte{}} + srv := httptest.NewServer(f.handler()) + prev := githubAPI + githubAPI = srv.URL + t.Cleanup(func() { githubAPI = prev; srv.Close() }) + return f, srv.Close +} + +func TestGitHubRelayUploadAndDedup(t *testing.T) { + fk, _ := newFakeGitHub(t) + r := NewGitHubRelay(GitHubRelayConfig{Enabled: true, Token: "tok", Repo: "owner/repo", MaxBytes: 1 << 20, TTLMinutes: 60}, "feed.example.com", "test-passphrase") + if r == nil { + t.Fatal("relay should activate with full config") + } + + body := []byte("hello relay world") + if err := r.Upload(context.Background(), body); err != nil { + t.Fatalf("first upload: %v", err) + } + // Second upload of the same content must dedup before reaching GitHub. + if err := r.Upload(context.Background(), body); err != nil { + t.Fatalf("second upload: %v", err) + } + // Force the batch to commit synchronously. + if err := r.Flush(context.Background()); err != nil { + t.Fatalf("flush: %v", err) + } + if fk.commits != 1 { + t.Errorf("commits = %d, want 1 (one batch)", fk.commits) + } + if fk.blobs != 1 { + t.Errorf("blobs = %d, want 1 (dedup before flush)", fk.blobs) + } + if !r.Has(int64(len(body)), crc32.ChecksumIEEE(body)) { + t.Errorf("Has should return true after upload") + } + // A third Flush with no new uploads must be a no-op (no new commit). + if err := r.Flush(context.Background()); err != nil { + t.Fatalf("noop flush: %v", err) + } + if fk.commits != 1 { + t.Errorf("commits after noop flush = %d, want 1", fk.commits) + } +} + +func TestGitHubRelayMaxBytes(t *testing.T) { + newFakeGitHub(t) + r := NewGitHubRelay(GitHubRelayConfig{Enabled: true, Token: "tok", Repo: "owner/repo", MaxBytes: 16, TTLMinutes: 60}, "ex.test", "pp") + err := r.Upload(context.Background(), bytes.Repeat([]byte("x"), 32)) + if !errors.Is(err, ErrTooLarge) { + t.Fatalf("err = %v, want ErrTooLarge", err) + } +} + +func TestGitHubRelayPruneStale(t *testing.T) { + fk, _ := newFakeGitHub(t) + r := NewGitHubRelay(GitHubRelayConfig{Enabled: true, Token: "tok", Repo: "owner/repo", MaxBytes: 1 << 20, TTLMinutes: 1}, "ex.test", "pp") + if err := r.Upload(context.Background(), []byte("stays")); err != nil { + t.Fatalf("upload stays: %v", err) + } + if err := r.Upload(context.Background(), []byte("goes")); err != nil { + t.Fatalf("upload goes: %v", err) + } + // Commit the batch so PruneStale can find files in the listing. + if err := r.Flush(context.Background()); err != nil { + t.Fatalf("flush: %v", err) + } + // Roll back the lastSeen of the "goes" entry so PruneStale removes it. + // "stays" is 5 bytes, "goes" is 4 — match by size. + r.mu.Lock() + for _, e := range r.known { + if e.size == 4 { + e.lastSeen = time.Now().Add(-2 * time.Hour) + } + } + r.mu.Unlock() + + commitsBefore := fk.commits + removed, err := r.PruneStale(context.Background(), time.Now().Add(-time.Hour)) + if err != nil { + t.Fatalf("prune: %v", err) + } + if removed != 1 { + t.Errorf("removed = %d, want 1", removed) + } + if fk.deletes != 1 { + t.Errorf("tree-deletes = %d, want 1", fk.deletes) + } + if got := fk.commits - commitsBefore; got != 1 { + t.Errorf("prune commits = %d, want 1 (single batched commit)", got) + } +} + +// TestGitHubRelayStatePersistence: known map survives a fresh relay +// instance pointed at the same statePath. +func TestGitHubRelayStatePersistence(t *testing.T) { + newFakeGitHub(t) + dir := t.TempDir() + statePath := dir + "/gh_relay_state.json" + + cfg := GitHubRelayConfig{Enabled: true, Token: "tok", Repo: "owner/repo", MaxBytes: 1 << 20, TTLMinutes: 60, StatePath: statePath} + r1 := NewGitHubRelay(cfg, "ex.test", "pp") + if err := r1.Upload(context.Background(), []byte("survive me")); err != nil { + t.Fatalf("upload: %v", err) + } + if err := r1.Flush(context.Background()); err != nil { + t.Fatalf("flush: %v", err) + } + body := []byte("survive me") + if !r1.Has(int64(len(body)), crc32.ChecksumIEEE(body)) { + t.Fatal("r1 should know the file after flush") + } + + r2 := NewGitHubRelay(cfg, "ex.test", "pp") + if !r2.Has(int64(len(body)), crc32.ChecksumIEEE(body)) { + t.Fatal("r2 should have loaded the file from statePath") + } +} + diff --git a/internal/server/media.go b/internal/server/media.go index af86101..cd8a13f 100644 --- a/internal/server/media.go +++ b/internal/server/media.go @@ -4,6 +4,7 @@ import ( "bytes" "compress/flate" "compress/gzip" + "context" "errors" "fmt" "hash/crc32" @@ -27,11 +28,12 @@ type MediaCache struct { maxFileBytes int64 ttl time.Duration compression protocol.MediaCompression + dnsEnabled bool // when false, RelayDNS stays unset on the wire - // Logger receives an info line per cache event when set (Store hits/misses, - // evictions). The default is a silent no-op so tests don't print noise. logf func(format string, args ...interface{}) + gh *GitHubRelay + mu sync.RWMutex byKey map[string]*mediaEntry // upstream key (file_id / URL) → entry byChannel map[uint16]*mediaEntry // assigned channel → entry @@ -67,16 +69,11 @@ type mediaEntry struct { // MediaCacheConfig configures a new MediaCache. type MediaCacheConfig struct { - // MaxFileBytes is the largest individual file the cache will accept. - // Files larger than this are rejected by Store with ErrTooLarge. - MaxFileBytes int64 - // TTL is how long an entry stays cached after its last refresh. - TTL time.Duration - // Compression is the wire-format compression used for media blocks. - // Defaults to MediaCompressionNone when zero. - Compression protocol.MediaCompression - // Logf receives info-level cache events. Optional. - Logf func(format string, args ...interface{}) + MaxFileBytes int64 + TTL time.Duration + Compression protocol.MediaCompression + Logf func(format string, args ...interface{}) + DNSRelayEnabled bool // controls Relays[RelayDNS] on the wire } // ErrTooLarge is returned by Store when content exceeds MaxFileBytes. @@ -99,6 +96,7 @@ func NewMediaCache(cfg MediaCacheConfig) *MediaCache { maxFileBytes: cfg.MaxFileBytes, ttl: cfg.TTL, compression: cfg.Compression, + dnsEnabled: cfg.DNSRelayEnabled, logf: logf, byKey: make(map[string]*mediaEntry), byChannel: make(map[uint16]*mediaEntry), @@ -128,14 +126,18 @@ func (c *MediaCache) Store(cacheKey, tag string, content []byte, mimeType, filen tag = protocol.MediaFile } size := int64(len(content)) - if c.maxFileBytes > 0 && size > c.maxFileBytes { + // Reject only when no enabled relay could host this file. A file too big + // for DNS but small enough for GitHub still belongs in the cache — + // MaxAcceptableBytes() collapses both caps into a single ceiling. + if max := c.MaxAcceptableBytes(); max > 0 && size > max { atomic.AddUint64(&c.storeRejected, 1) return protocol.MediaMeta{ - Tag: tag, - Size: size, - Downloadable: false, + Tag: tag, + Size: size, + Relays: nil, }, ErrTooLarge } + dnsFits := c.maxFileBytes == 0 || size <= c.maxFileBytes now := time.Now() hash := crc32.ChecksumIEEE(content) @@ -144,18 +146,15 @@ func (c *MediaCache) Store(cacheKey, tag string, content []byte, mimeType, filen defer c.mu.Unlock() if existing, ok := c.byKey[cacheKey]; ok && existing.crc32 == hash { - // Same upstream id and same content — just refresh the TTL. existing.expiresAt = c.expiry(now) atomic.AddUint64(&c.storeHits, 1) c.logf("media: refresh tag=%s key=%s ch=%d size=%d", tag, cacheKey, existing.channel, existing.size) + if c.gh != nil { + c.gh.Touch(existing.size, existing.crc32) + } return c.metaForLocked(existing), nil } - // Cross-key content match: a different upstream id pointed at exactly - // the same bytes. Bind the new cache key to the existing entry so any - // future Lookup under either key works, and refresh the TTL. This is - // the case the spec asks for: "same media → just reset TTL, don't take - // a new channel slot". if existing, ok := c.byHash[hash]; ok { existing.expiresAt = c.expiry(now) if cacheKey != existing.cacheKey { @@ -173,6 +172,9 @@ func (c *MediaCache) Store(cacheKey, tag string, content []byte, mimeType, filen c.byKey[cacheKey] = existing atomic.AddUint64(&c.storeHits, 1) c.logf("media: dedup tag=%s key=%s ch=%d size=%d (hash match)", tag, cacheKey, existing.channel, existing.size) + if c.gh != nil { + c.gh.Touch(existing.size, existing.crc32) + } return c.metaForLocked(existing), nil } @@ -190,29 +192,38 @@ func (c *MediaCache) Store(cacheKey, tag string, content []byte, mimeType, filen // active entries; n is capped by the media-channel range. c.sweepExpiredLocked(now) - channel, err := c.allocateChannelLocked(now) - if err != nil { - return protocol.MediaMeta{}, err - } - - blocks, encErr := splitMediaBlocks(hash, content, c.compression) - if encErr != nil { - return protocol.MediaMeta{}, encErr - } - if size > 0 { - var compressedBody int - for _, b := range blocks { - compressedBody += len(b) + var ( + channel uint16 + blocks [][]byte + ) + if dnsFits { + var err error + channel, err = c.allocateChannelLocked(now) + if err != nil { + return protocol.MediaMeta{}, err } - compressedBody -= protocol.MediaBlockHeaderLen - if compressedBody < 0 { - compressedBody = 0 + var encErr error + blocks, encErr = splitMediaBlocks(hash, content, c.compression) + if encErr != nil { + return protocol.MediaMeta{}, encErr } - var savedPct int - if c.compression != protocol.MediaCompressionNone && size > 0 { - savedPct = int((size - int64(compressedBody)) * 100 / size) + if size > 0 { + var compressedBody int + for _, b := range blocks { + compressedBody += len(b) + } + compressedBody -= protocol.MediaBlockHeaderLen + if compressedBody < 0 { + compressedBody = 0 + } + var savedPct int + if c.compression != protocol.MediaCompressionNone && size > 0 { + savedPct = int((size - int64(compressedBody)) * 100 / size) + } + c.logf("media: compress=%s key=%s orig=%d body=%d saved=%d%%", c.compression, cacheKey, size, compressedBody, savedPct) } - c.logf("media: compress=%s key=%s orig=%d body=%d saved=%d%%", c.compression, cacheKey, size, compressedBody, savedPct) + } else { + c.logf("media: store key=%s size=%d too big for DNS — relay only", cacheKey, size) } entry := &mediaEntry{ channel: channel, @@ -226,13 +237,29 @@ func (c *MediaCache) Store(cacheKey, tag string, content []byte, mimeType, filen expiresAt: c.expiry(now), } c.byKey[cacheKey] = entry - c.byChannel[channel] = entry + if dnsFits { + c.byChannel[channel] = entry + } c.byHash[hash] = entry atomic.AddUint64(&c.storeMisses, 1) atomic.AddInt64(&c.currentEntries, 1) atomic.AddInt64(&c.currentBytes, size) c.logf("media: store tag=%s key=%s ch=%d size=%d blocks=%d", tag, cacheKey, channel, size, len(blocks)) + // Best-effort relay upload — copy of `content` because the caller may + // reuse the slice. Failures are logged but never block the DNS path. + if c.gh != nil { + gh := c.gh + body := append([]byte(nil), content...) + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + if err := gh.Upload(ctx, body); err != nil { + c.logf("media: gh-relay upload failed: %v", err) + } + }() + } + return c.metaForLocked(entry), nil } @@ -439,15 +466,93 @@ func (c *MediaCache) expiry(now time.Time) time.Time { } func (c *MediaCache) metaForLocked(entry *mediaEntry) protocol.MediaMeta { - return protocol.MediaMeta{ - Tag: entry.tag, - Size: entry.size, - Downloadable: true, - Channel: entry.channel, - Blocks: uint16(len(entry.blocks)), - CRC32: entry.crc32, - Filename: entry.filename, + // DNS bit only when DNS is enabled AND we actually computed blocks for + // this entry. Files larger than the DNS cap have len(blocks)==0. + dnsOK := c.dnsEnabled && len(entry.blocks) > 0 + // GitHub bit reflects "the relay would serve this file": relay enabled + // and the file fits its cap. We don't require the upload to have + // finished — small files in particular would otherwise miss the bit on + // first render because the upload runs asynchronously. The web layer + // retries transient 404s while the upload is still in flight. + ghOK := false + if c.gh != nil { + ghMax := c.gh.MaxBytes() + ghOK = ghMax == 0 || entry.size <= ghMax } + relays := []bool{dnsOK, ghOK} + meta := protocol.MediaMeta{ + Tag: entry.tag, + Size: entry.size, + Relays: relays, + CRC32: entry.crc32, + Filename: entry.filename, + } + if dnsOK { + meta.Channel = entry.channel + meta.Blocks = uint16(len(entry.blocks)) + } + return meta +} + +// SetGitHubRelay attaches the GitHub fast relay. Store calls (and Lookup +// hits) will then surface RelayGitHub when the relay has the bytes. +func (c *MediaCache) SetGitHubRelay(g *GitHubRelay) { + c.mu.Lock() + defer c.mu.Unlock() + c.gh = g +} + +// TouchRelayEntries refreshes relay lastSeen for every cached file so +// files referenced by skipped-fetch cycles aren't pruned. +func (c *MediaCache) TouchRelayEntries() { + if c == nil { + return + } + c.mu.RLock() + gh := c.gh + if gh == nil { + c.mu.RUnlock() + return + } + pairs := make([][2]uint64, 0, len(c.byHash)) + for _, e := range c.byHash { + pairs = append(pairs, [2]uint64{uint64(e.size), uint64(e.crc32)}) + } + c.mu.RUnlock() + for _, p := range pairs { + gh.Touch(int64(p[0]), uint32(p[1])) + } +} + +// MaxAcceptableBytes returns the largest file size any enabled relay would +// accept. Callers use it as the "should we even fetch this?" gate so that +// files which fit GitHub but not DNS still get pulled. 0 means "no cap". +func (c *MediaCache) MaxAcceptableBytes() int64 { + if c == nil { + return 0 + } + c.mu.RLock() + gh := c.gh + c.mu.RUnlock() + dns := c.maxFileBytes + var ghMax int64 + if gh != nil { + ghMax = gh.MaxBytes() + } + // 0 from any enabled relay means "no cap" — propagate. + if (dns == 0 && c.dnsEnabled) || (gh != nil && ghMax == 0) { + return 0 + } + if !c.dnsEnabled { + return ghMax + } + if gh == nil { + return dns + } + if ghMax > dns { + return ghMax + } + return dns } // splitMediaBlocks compresses the content (when compression != none), diff --git a/internal/server/media_http.go b/internal/server/media_http.go index 5c2bd6a..392f323 100644 --- a/internal/server/media_http.go +++ b/internal/server/media_http.go @@ -90,13 +90,13 @@ func downloadHTTPMedia(ctx context.Context, cache *MediaCache, tag, rawURL strin return protocol.MediaMeta{}, false } - maxBytes := cache.maxFileBytes + maxBytes := cache.MaxAcceptableBytes() if maxBytes > 0 && resp.ContentLength > 0 && resp.ContentLength > maxBytes { size := resp.ContentLength return protocol.MediaMeta{ - Tag: tag, - Size: size, - Downloadable: false, + Tag: tag, + Size: size, + Relays: nil, }, true } @@ -115,9 +115,9 @@ func downloadHTTPMedia(ctx context.Context, cache *MediaCache, tag, rawURL strin } if maxBytes > 0 && int64(len(bytes)) > maxBytes { return protocol.MediaMeta{ - Tag: tag, - Size: int64(len(bytes)), - Downloadable: false, + Tag: tag, + Size: int64(len(bytes)), + Relays: nil, }, true } diff --git a/internal/server/media_integration_test.go b/internal/server/media_integration_test.go index 4f40d37..660c8b5 100644 --- a/internal/server/media_integration_test.go +++ b/internal/server/media_integration_test.go @@ -27,7 +27,7 @@ func TestApplyHTTPMediaSourcesEndToEnd(t *testing.T) { })) defer srv.Close() - cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour}) + cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour, DNSRelayEnabled: true}) msgs := []protocol.Message{ {ID: 100, Timestamp: 1, Text: protocol.MediaImage + "\nhello"}, @@ -42,7 +42,7 @@ func TestApplyHTTPMediaSourcesEndToEnd(t *testing.T) { if !ok { t.Fatalf("ParseMediaText ok=false on rewritten message: %q", msgs[0].Text) } - if !meta.Downloadable { + if !meta.HasRelay(protocol.RelayDNS) { t.Fatalf("expected downloadable meta, got %+v (text=%q)", meta, msgs[0].Text) } if meta.Tag != protocol.MediaImage { @@ -80,7 +80,7 @@ func TestApplyHTTPMediaSourcesEndToEnd(t *testing.T) { } } -// TestApplyHTTPMediaSourcesGzipRoundTrip: with --media-compression=gzip, +// TestApplyHTTPMediaSourcesGzipRoundTrip: with --dns-media-compression=gzip, // a successful upstream fetch lands compressed blocks in the cache. A // client decompressing the assembled blocks recovers the original bytes // verbatim and the embedded CRC32 matches. @@ -94,9 +94,10 @@ func TestApplyHTTPMediaSourcesGzipRoundTrip(t *testing.T) { defer srv.Close() cache := NewMediaCache(MediaCacheConfig{ - MaxFileBytes: 1 << 20, - TTL: time.Hour, - Compression: protocol.MediaCompressionGzip, + MaxFileBytes: 1 << 20, + TTL: time.Hour, + Compression: protocol.MediaCompressionGzip, + DNSRelayEnabled: true, }) msgs := []protocol.Message{{ID: 100, Timestamp: 1, Text: protocol.MediaImage + "\n"}} sources := []mediaSource{{tag: protocol.MediaImage, url: srv.URL + "/big.png"}} @@ -106,7 +107,7 @@ func TestApplyHTTPMediaSourcesGzipRoundTrip(t *testing.T) { applyHTTPMediaSources(ctx, cache, msgs, sources) meta, _, ok := protocol.ParseMediaText(msgs[0].Text) - if !ok || !meta.Downloadable { + if !ok || !meta.HasRelay(protocol.RelayDNS) { t.Fatalf("expected downloadable meta, got %+v", meta) } @@ -169,7 +170,7 @@ func TestApplyHTTPMediaSourcesAlbum(t *testing.T) { })) defer srv.Close() - cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour}) + cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour, DNSRelayEnabled: true}) // Mirror what parsePublicMessagesWithMedia produces for a 3-image album: // stacked [IMAGE] headers + caption, plus an extraURLs slice on the source. @@ -207,7 +208,7 @@ func TestApplyHTTPMediaSourcesAlbum(t *testing.T) { if !ok { t.Fatalf("ParseMediaText #%d ok=false on %q", i, rest) } - if !meta.Downloadable { + if !meta.HasRelay(protocol.RelayDNS) { t.Errorf("header #%d not downloadable: %+v", i, meta) } if int(meta.Size) != len(images[i]) { @@ -236,7 +237,7 @@ func TestApplyHTTPMediaSourcesAlbumPartialFailure(t *testing.T) { })) defer srv.Close() - cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour}) + cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour, DNSRelayEnabled: true}) body := protocol.MediaImage + "\n" + protocol.MediaImage + "\ncap" msgs := []protocol.Message{{ID: 5, Timestamp: 1, Text: body}} @@ -273,7 +274,7 @@ func TestApplyHTTPMediaSourcesRejectsOversize(t *testing.T) { })) defer srv.Close() - cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 100, TTL: time.Hour}) + cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 100, TTL: time.Hour, DNSRelayEnabled: true}) msgs := []protocol.Message{{ID: 1, Timestamp: 1, Text: protocol.MediaImage + "\ncap"}} sources := []mediaSource{{tag: protocol.MediaImage, url: srv.URL + "/big.jpg"}} @@ -285,7 +286,7 @@ func TestApplyHTTPMediaSourcesRejectsOversize(t *testing.T) { if !ok { t.Fatalf("ParseMediaText ok=false") } - if meta.Downloadable { + if meta.HasRelay(protocol.RelayDNS) { t.Fatalf("oversized file should not be downloadable; got meta=%+v", meta) } if meta.Size != int64(len(bigBody)) { diff --git a/internal/server/media_telegram.go b/internal/server/media_telegram.go index 6ab441a..f52e6de 100644 --- a/internal/server/media_telegram.go +++ b/internal/server/media_telegram.go @@ -78,12 +78,13 @@ func (tr *TelegramReader) downloadTelegramPhoto(ctx context.Context, api *tg.Cli return protocol.MediaMeta{}, false } // Honour the configured max-size early so we don't even open the RPC for - // objects we'll just throw away. - if maxBytes := cache.maxFileBytes; maxBytes > 0 && bestBytes > maxBytes { + // objects no enabled relay would accept. Files that fit GitHub but not + // DNS still get fetched. + if maxBytes := cache.MaxAcceptableBytes(); maxBytes > 0 && bestBytes > maxBytes { return protocol.MediaMeta{ - Tag: protocol.MediaImage, - Size: bestBytes, - Downloadable: false, + Tag: protocol.MediaImage, + Size: bestBytes, + Relays: nil, }, true } @@ -128,11 +129,11 @@ func (tr *TelegramReader) downloadTelegramDocument(ctx context.Context, api *tg. return protocol.MediaMeta{}, false } - if maxBytes := cache.maxFileBytes; maxBytes > 0 && doc.Size > maxBytes { + if maxBytes := cache.MaxAcceptableBytes(); maxBytes > 0 && doc.Size > maxBytes { return protocol.MediaMeta{ - Tag: tag, - Size: doc.Size, - Downloadable: false, + Tag: tag, + Size: doc.Size, + Relays: nil, }, true } @@ -170,7 +171,7 @@ func (tr *TelegramReader) downloadTelegramFile(ctx context.Context, api *tg.Clie cache := tr.feed.MediaCache() maxBytes := int64(0) if cache != nil { - maxBytes = cache.maxFileBytes + maxBytes = cache.MaxAcceptableBytes() } var ( diff --git a/internal/server/media_test.go b/internal/server/media_test.go index 0b25a65..0eaad3e 100644 --- a/internal/server/media_test.go +++ b/internal/server/media_test.go @@ -12,7 +12,24 @@ import ( ) func newTestCache(maxBytes int64, ttl time.Duration) *MediaCache { - return NewMediaCache(MediaCacheConfig{MaxFileBytes: maxBytes, TTL: ttl}) + return NewMediaCache(MediaCacheConfig{MaxFileBytes: maxBytes, TTL: ttl, DNSRelayEnabled: true}) +} + +// TestMediaCacheRelayFlags: with DNS off the wire flag stays clear, and +// when a GitHub relay is attached the cache surfaces RelayGitHub. +func TestMediaCacheRelayFlags(t *testing.T) { + cfg := MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour, DNSRelayEnabled: false} + cache := NewMediaCache(cfg) + meta, err := cache.Store("k", protocol.MediaImage, []byte("payload"), "image/jpeg", "") + if err != nil { + t.Fatalf("Store: %v", err) + } + if meta.HasRelay(protocol.RelayDNS) { + t.Errorf("DNS relay should be off when DNSRelayEnabled=false") + } + if meta.HasRelay(protocol.RelayGitHub) { + t.Errorf("GitHub relay should be off when no relay is attached") + } } func TestMediaCacheStoreAndGetBlock(t *testing.T) { @@ -23,8 +40,8 @@ func TestMediaCacheStoreAndGetBlock(t *testing.T) { if err != nil { t.Fatalf("Store: %v", err) } - if !meta.Downloadable { - t.Fatalf("Downloadable = false, want true") + if !meta.HasRelay(protocol.RelayDNS) { + t.Fatalf("RelayDNS = false, want true") } if !protocol.IsMediaChannel(meta.Channel) { t.Fatalf("Channel %d not in media range", meta.Channel) @@ -72,9 +89,10 @@ func TestMediaCacheStoreAndGetBlock(t *testing.T) { // original. func TestMediaCacheStoreGzip(t *testing.T) { cache := NewMediaCache(MediaCacheConfig{ - MaxFileBytes: 1 << 20, - TTL: time.Hour, - Compression: protocol.MediaCompressionGzip, + MaxFileBytes: 1 << 20, + TTL: time.Hour, + Compression: protocol.MediaCompressionGzip, + DNSRelayEnabled: true, }) content := bytes.Repeat([]byte("compress-me "), 200) diff --git a/internal/server/public.go b/internal/server/public.go index 6008a59..5c60e05 100644 --- a/internal/server/public.go +++ b/internal/server/public.go @@ -28,13 +28,26 @@ type PublicReader struct { client *http.Client baseURL string - mu sync.RWMutex - cache map[string]cachedMessages - cacheTTL time.Duration + mu sync.RWMutex + cache map[string]cachedMessages + cacheTTL time.Duration + fetchInterval time.Duration refreshCh chan struct{} } +// SetFetchInterval overrides the default 10m fetch cadence. Caller must +// invoke before Run starts. +func (pr *PublicReader) SetFetchInterval(d time.Duration) { + if d <= 0 { + return + } + pr.mu.Lock() + pr.fetchInterval = d + pr.cacheTTL = d + pr.mu.Unlock() +} + // NewPublicReader creates a reader for public channels without Telegram login. func NewPublicReader(channelUsernames []string, feed *Feed, msgLimit int, baseCh int) *PublicReader { cleaned := make([]string, len(channelUsernames)) @@ -56,9 +69,10 @@ func NewPublicReader(channelUsernames []string, feed *Feed, msgLimit int, baseCh Timeout: 30 * time.Second, }, baseURL: "https://t.me/s", - cache: make(map[string]cachedMessages), - cacheTTL: 10 * time.Minute, - refreshCh: make(chan struct{}, 1), + cache: make(map[string]cachedMessages), + cacheTTL: 10 * time.Minute, + fetchInterval: 10 * time.Minute, + refreshCh: make(chan struct{}, 1), } } @@ -67,9 +81,10 @@ func (pr *PublicReader) Run(ctx context.Context) error { pr.feed.SetTelegramLoggedIn(false) pr.fetchAll(ctx) - ticker := time.NewTicker(10 * time.Minute) + interval := pr.fetchInterval + ticker := time.NewTicker(interval) defer ticker.Stop() - pr.feed.SetNextFetch(uint32(time.Now().Add(10 * time.Minute).Unix())) + pr.feed.SetNextFetch(uint32(time.Now().Add(interval).Unix())) for { select { @@ -77,14 +92,14 @@ func (pr *PublicReader) Run(ctx context.Context) error { return ctx.Err() case <-ticker.C: pr.fetchAll(ctx) - pr.feed.SetNextFetch(uint32(time.Now().Add(10 * time.Minute).Unix())) + pr.feed.SetNextFetch(uint32(time.Now().Add(interval).Unix())) case <-pr.refreshCh: pr.mu.Lock() pr.cache = make(map[string]cachedMessages) pr.mu.Unlock() pr.fetchAll(ctx) - ticker.Reset(10 * time.Minute) - pr.feed.SetNextFetch(uint32(time.Now().Add(10 * time.Minute).Unix())) + ticker.Reset(interval) + pr.feed.SetNextFetch(uint32(time.Now().Add(interval).Unix())) } } } @@ -112,14 +127,18 @@ func (pr *PublicReader) UpdateChannels(channels []string) { func (pr *PublicReader) fetchAll(ctx context.Context) { log.Printf("[public] fetch cycle started for %d channels", len(pr.channels)) start := time.Now() - var fetched, failed int + var fetched, failed, skipped int + pr.mu.RLock() + cacheTTL := pr.cacheTTL + pr.mu.RUnlock() for i, username := range pr.channels { chNum := pr.baseCh + i pr.mu.RLock() cached, ok := pr.cache[username] pr.mu.RUnlock() - if ok && time.Since(cached.fetched) < pr.cacheTTL { + if ok && time.Since(cached.fetched) < cacheTTL { + skipped++ continue } @@ -148,7 +167,9 @@ func (pr *PublicReader) fetchAll(ctx context.Context) { fetched++ log.Printf("[public] updated %s (%s): %d messages", username, title, len(msgs)) } - log.Printf("[public] fetch cycle done in %s: %d fetched, %d failed, %d total", time.Since(start).Round(time.Millisecond), fetched, failed, len(pr.channels)) + log.Printf("[public] fetch cycle done in %s: %d fetched, %d failed, %d skipped, %d total", + time.Since(start).Round(time.Millisecond), fetched, failed, skipped, len(pr.channels)) + pr.feed.AfterFetchCycle(ctx) } func (pr *PublicReader) fetchChannel(ctx context.Context, username string) ([]protocol.Message, string, error) { diff --git a/internal/server/server.go b/internal/server/server.go index 0dddd1b..3e67fb3 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -25,21 +25,32 @@ type Config struct { NoTelegram bool // if true, fetch public channels without Telegram login AllowManage bool // if true, remote channel management and sending via DNS is allowed Debug bool // if true, log every decoded DNS query - // NoMedia disables downloading and serving image/file media. When set, the - // server emits the legacy [TAG]\ncaption form for media messages so old - // clients keep working unchanged. - NoMedia bool - // MediaMaxSize is the per-file cap in bytes for cached media. 0 means no - // cap (not recommended in production). - MediaMaxSize int64 - // MediaCacheTTL is the cache lifetime in minutes for a single entry. The - // effective TTL is reset whenever the same upstream id is fetched again. - MediaCacheTTL int - // MediaCompression names the compression applied to cached media bytes - // before they're split into DNS blocks. One of "none", "gzip", - // "deflate". Empty defaults to "gzip". - MediaCompression string - Telegram TelegramConfig + // DNSMediaEnabled toggles the slow DNS-relay path. When false the + // server still ingests media bytes (so other relays can serve them) + // but the wire-format DNS flag is unset for clients. + DNSMediaEnabled bool + DNSMediaMaxSize int64 // per-file cap for the DNS relay (0 = no cap) + DNSMediaCacheTTL int // DNS-relay TTL in minutes + DNSMediaCompression string // DNS-relay compression: none|gzip|deflate + FetchInterval time.Duration // 0 = default 10m; floor enforced by main + GitHubRelay GitHubRelayConfig + Telegram TelegramConfig +} + +// GitHubRelayConfig configures the GitHub fast relay. Active() requires +// Enabled + Token + Repo. +type GitHubRelayConfig struct { + Enabled bool + Token string + Repo string + Branch string // default branch to commit to; "" → "main" + StatePath string // file used to persist lastSeen across restarts + MaxBytes int64 + TTLMinutes int +} + +func (g GitHubRelayConfig) Active() bool { + return g.Enabled && g.Token != "" && g.Repo != "" } // Server orchestrates the DNS server and Telegram reader. @@ -81,35 +92,52 @@ func (s *Server) Run(ctx context.Context) error { SetMediaDebugLogs(s.cfg.Debug) - // Configure media cache before any reader starts so the very first fetch - // cycle can populate it. When --no-media is set we leave Feed.media as - // nil; the readers fall through to the legacy [TAG]\ncaption form, and - // Feed.GetBlock rejects media-channel queries with not-found. - if !s.cfg.NoMedia { - ttlMin := s.cfg.MediaCacheTTL + // Spin up the media cache when at least one relay is enabled. The cache + // owns the byte pipeline; whether DNS or GitHub serves bytes to clients + // is controlled by per-relay flags on each MediaMeta. + anyRelay := s.cfg.DNSMediaEnabled || s.cfg.GitHubRelay.Active() + if anyRelay { + ttlMin := s.cfg.DNSMediaCacheTTL if ttlMin <= 0 { ttlMin = 600 } ttl := time.Duration(ttlMin) * time.Minute - compName := s.cfg.MediaCompression + compName := s.cfg.DNSMediaCompression if compName == "" { compName = "gzip" } compression, err := protocol.ParseMediaCompressionName(compName) if err != nil { - return fmt.Errorf("--media-compression: %w", err) + return fmt.Errorf("--dns-media-compression: %w", err) } mediaCache := NewMediaCache(MediaCacheConfig{ - MaxFileBytes: s.cfg.MediaMaxSize, - TTL: ttl, - Compression: compression, - Logf: logfMedia, + MaxFileBytes: s.cfg.DNSMediaMaxSize, + TTL: ttl, + Compression: compression, + Logf: logfMedia, + DNSRelayEnabled: s.cfg.DNSMediaEnabled, }) s.feed.SetMediaCache(mediaCache) - log.Printf("[server] media cache enabled: max-size=%d bytes, ttl=%s, compression=%s", s.cfg.MediaMaxSize, ttl, compression) + log.Printf("[server] media: dns=%v max=%d ttl=%s compression=%s", + s.cfg.DNSMediaEnabled, s.cfg.DNSMediaMaxSize, ttl, compression) go s.runMediaSweep(ctx, mediaCache, ttl) + + if s.cfg.GitHubRelay.Active() { + gh := NewGitHubRelay(s.cfg.GitHubRelay, s.cfg.Domain, s.cfg.Passphrase) + if gh != nil { + mediaCache.SetGitHubRelay(gh) + s.feed.SetGitHubRelay(gh) + go gh.Run(ctx) + branch := s.cfg.GitHubRelay.Branch + if branch == "" { + branch = "main" + } + log.Printf("[server] github relay: repo=%s branch=%s max=%d ttl=%dm", + gh.Repo(), branch, gh.MaxBytes(), s.cfg.GitHubRelay.TTLMinutes) + } + } } else { - log.Println("[server] media cache disabled (--no-media)") + log.Println("[server] media disabled (no relays enabled)") } go startLatestVersionTracker(ctx, s.feed) @@ -129,6 +157,7 @@ func (s *Server) Run(ctx context.Context) error { } if len(s.telegramChannels) > 0 { reader := NewTelegramReader(s.cfg.Telegram, s.telegramChannels, s.feed, msgLimit, 1) + reader.SetFetchInterval(s.cfg.FetchInterval) s.reader = reader channelCtl = reader go func() { @@ -148,6 +177,7 @@ func (s *Server) Run(ctx context.Context) error { msgLimit = 15 } publicReader := NewPublicReader(s.telegramChannels, s.feed, msgLimit, 1) + publicReader.SetFetchInterval(s.cfg.FetchInterval) channelCtl = publicReader go func() { log.Println("[public] reader goroutine started") @@ -167,6 +197,7 @@ func (s *Server) Run(ctx context.Context) error { msgLimit = 15 } xReader = NewXPublicReader(s.xAccounts, s.feed, msgLimit, len(s.telegramChannels)+1, s.cfg.XRSSInstances) + xReader.SetFetchInterval(s.cfg.FetchInterval) go func() { log.Println("[x] reader goroutine started") if err := xReader.Run(ctx); err != nil && ctx.Err() == nil { diff --git a/internal/server/telegram.go b/internal/server/telegram.go index 33e5b2e..96ba9e2 100644 --- a/internal/server/telegram.go +++ b/internal/server/telegram.go @@ -63,9 +63,10 @@ type TelegramReader struct { msgLimit int // max messages to fetch per channel baseCh int - mu sync.RWMutex - cache map[string]cachedMessages - cacheTTL time.Duration + mu sync.RWMutex + cache map[string]cachedMessages + cacheTTL time.Duration + fetchInterval time.Duration // api is set once authenticated, used for sending messages. apiMu sync.RWMutex @@ -74,6 +75,17 @@ type TelegramReader struct { refreshCh chan struct{} // signals Run() to re-fetch immediately } +// SetFetchInterval overrides the default 10m fetch cadence. +func (tr *TelegramReader) SetFetchInterval(d time.Duration) { + if d <= 0 { + return + } + tr.mu.Lock() + tr.fetchInterval = d + tr.cacheTTL = d + tr.mu.Unlock() +} + // resolvedPeer holds the resolved Telegram peer along with its chat type. type resolvedPeer struct { peer tg.InputPeerClass @@ -100,14 +112,15 @@ func NewTelegramReader(cfg TelegramConfig, channelUsernames []string, feed *Feed baseCh = 1 } return &TelegramReader{ - cfg: cfg, - channels: cleaned, - feed: feed, - msgLimit: msgLimit, - baseCh: baseCh, - cache: make(map[string]cachedMessages), - cacheTTL: 10 * time.Minute, - refreshCh: make(chan struct{}, 1), + cfg: cfg, + channels: cleaned, + feed: feed, + msgLimit: msgLimit, + baseCh: baseCh, + cache: make(map[string]cachedMessages), + cacheTTL: 10 * time.Minute, + fetchInterval: 10 * time.Minute, + refreshCh: make(chan struct{}, 1), } } @@ -146,11 +159,11 @@ func (tr *TelegramReader) Run(ctx context.Context) error { // Initial fetch tr.fetchAll(ctx, api) - // Periodic fetch loop - ticker := time.NewTicker(10 * time.Minute) + interval := tr.fetchInterval + ticker := time.NewTicker(interval) defer ticker.Stop() - tr.feed.SetNextFetch(uint32(time.Now().Add(10 * time.Minute).Unix())) + tr.feed.SetNextFetch(uint32(time.Now().Add(interval).Unix())) for { select { @@ -158,15 +171,14 @@ func (tr *TelegramReader) Run(ctx context.Context) error { return ctx.Err() case <-ticker.C: tr.fetchAll(ctx, api) - tr.feed.SetNextFetch(uint32(time.Now().Add(10 * time.Minute).Unix())) + tr.feed.SetNextFetch(uint32(time.Now().Add(interval).Unix())) case <-tr.refreshCh: - // Invalidate cache so fetchAll re-fetches everything. tr.mu.Lock() tr.cache = make(map[string]cachedMessages) tr.mu.Unlock() tr.fetchAll(ctx, api) - ticker.Reset(10 * time.Minute) - tr.feed.SetNextFetch(uint32(time.Now().Add(10 * time.Minute).Unix())) + ticker.Reset(interval) + tr.feed.SetNextFetch(uint32(time.Now().Add(interval).Unix())) } } }) @@ -222,15 +234,18 @@ func (tr *TelegramReader) authenticate(ctx context.Context, client *telegram.Cli func (tr *TelegramReader) fetchAll(ctx context.Context, api *tg.Client) { log.Printf("[telegram] fetch cycle started for %d channels", len(tr.channels)) start := time.Now() - var fetched, failed int + var fetched, failed, skipped int + tr.mu.RLock() + cacheTTL := tr.cacheTTL + tr.mu.RUnlock() for i, username := range tr.channels { chNum := tr.baseCh + i - // Check cache tr.mu.RLock() cached, ok := tr.cache[username] tr.mu.RUnlock() - if ok && time.Since(cached.fetched) < tr.cacheTTL { + if ok && time.Since(cached.fetched) < cacheTTL { + skipped++ continue } @@ -272,7 +287,9 @@ func (tr *TelegramReader) fetchAll(ctx context.Context, api *tg.Client) { fetched++ log.Printf("[telegram] updated %s (%s): %d messages (type=%d, canSend=%v)", username, rp.title, len(msgs), rp.chatType, rp.canSend) } - log.Printf("[telegram] fetch cycle done in %s: %d fetched, %d failed, %d total", time.Since(start).Round(time.Millisecond), fetched, failed, len(tr.channels)) + log.Printf("[telegram] fetch cycle done in %s: %d fetched, %d failed, %d skipped, %d total", + time.Since(start).Round(time.Millisecond), fetched, failed, skipped, len(tr.channels)) + tr.feed.AfterFetchCycle(ctx) } // resolvePeer resolves a Telegram username to an InputPeer, handling channels, diff --git a/internal/server/xpublic.go b/internal/server/xpublic.go index caed9ce..2a53216 100644 --- a/internal/server/xpublic.go +++ b/internal/server/xpublic.go @@ -32,13 +32,25 @@ type XPublicReader struct { client *http.Client instances []string - mu sync.RWMutex - cache map[string]cachedMessages - cacheTTL time.Duration + mu sync.RWMutex + cache map[string]cachedMessages + cacheTTL time.Duration + fetchInterval time.Duration refreshCh chan struct{} } +// SetFetchInterval overrides the default 10m fetch cadence. +func (xr *XPublicReader) SetFetchInterval(d time.Duration) { + if d <= 0 { + return + } + xr.mu.Lock() + xr.fetchInterval = d + xr.cacheTTL = d + xr.mu.Unlock() +} + const maxXRSSBodyBytes int64 = 2 << 20 // 2 MiB var xSnowflakeRe = regexp.MustCompile(`\d{8,}`) @@ -74,10 +86,11 @@ func NewXPublicReader(accounts []string, feed *Feed, msgLimit int, baseCh int, i return http.ErrUseLastResponse }, }, - instances: instances, - cache: make(map[string]cachedMessages), - cacheTTL: 10 * time.Minute, - refreshCh: make(chan struct{}, 1), + instances: instances, + cache: make(map[string]cachedMessages), + cacheTTL: 10 * time.Minute, + fetchInterval: 10 * time.Minute, + refreshCh: make(chan struct{}, 1), } } @@ -124,7 +137,8 @@ func normalizeXRSSInstances(instancesCSV string) []string { func (xr *XPublicReader) Run(ctx context.Context) error { xr.fetchAll(ctx) - ticker := time.NewTicker(10 * time.Minute) + interval := xr.fetchInterval + ticker := time.NewTicker(interval) defer ticker.Stop() for { @@ -138,7 +152,7 @@ func (xr *XPublicReader) Run(ctx context.Context) error { xr.cache = make(map[string]cachedMessages) xr.mu.Unlock() xr.fetchAll(ctx) - ticker.Reset(10 * time.Minute) + ticker.Reset(interval) } } } @@ -162,14 +176,13 @@ func (xr *XPublicReader) SetBaseCh(baseCh int) { func (xr *XPublicReader) fetchAll(ctx context.Context) { log.Printf("[x] fetch cycle started for %d accounts (instances: %v)", len(xr.accounts), xr.instances) start := time.Now() - var fetched, failed int + var fetched, failed, skipped int xr.mu.RLock() baseCh := xr.baseCh + cacheTTL := xr.cacheTTL xr.mu.RUnlock() - // Always set ChatType for all X accounts upfront, so channels show the X flag - // even if the Nitter fetch fails or the cache is still valid. for i := range xr.accounts { xr.feed.SetChatInfo(baseCh+i, protocol.ChatTypeX, false) } @@ -180,7 +193,8 @@ func (xr *XPublicReader) fetchAll(ctx context.Context) { xr.mu.RLock() cached, ok := xr.cache[account] xr.mu.RUnlock() - if ok && time.Since(cached.fetched) < xr.cacheTTL { + if ok && time.Since(cached.fetched) < cacheTTL { + skipped++ continue } @@ -210,7 +224,9 @@ func (xr *XPublicReader) fetchAll(ctx context.Context) { fetched++ log.Printf("[x] updated @%s: %d posts", account, len(msgs)) } - log.Printf("[x] fetch cycle done in %s: %d fetched, %d failed, %d total", time.Since(start).Round(time.Millisecond), fetched, failed, len(xr.accounts)) + log.Printf("[x] fetch cycle done in %s: %d fetched, %d failed, %d skipped, %d total", + time.Since(start).Round(time.Millisecond), fetched, failed, skipped, len(xr.accounts)) + xr.feed.AfterFetchCycle(ctx) } func (xr *XPublicReader) fetchAccount(ctx context.Context, username string) ([]protocol.Message, string, error) { diff --git a/internal/web/media.go b/internal/web/media.go index 75a5641..4cc4ced 100644 --- a/internal/web/media.go +++ b/internal/web/media.go @@ -68,22 +68,10 @@ func (s *Server) handleMediaGet(w http.ResponseWriter, r *http.Request) { } q := r.URL.Query() - ch64, err := strconv.ParseUint(q.Get("ch"), 10, 16) - if err != nil { - http.Error(w, "bad ch", http.StatusBadRequest) - return + source := strings.ToLower(strings.TrimSpace(q.Get("source"))) + if source == "" { + source = "slow" } - channel := uint16(ch64) - if !protocol.IsMediaChannel(channel) { - http.Error(w, "ch out of media range", http.StatusBadRequest) - return - } - blk64, err := strconv.ParseUint(q.Get("blk"), 10, 16) - if err != nil || blk64 == 0 { - http.Error(w, "bad blk", http.StatusBadRequest) - return - } - blockCount := uint16(blk64) const maxClaimedSize = 100 * 1024 * 1024 expectedSize, _ := strconv.ParseInt(q.Get("size"), 10, 64) @@ -102,6 +90,35 @@ func (s *Server) handleMediaGet(w http.ResponseWriter, r *http.Request) { expectedCRC = uint32(c) } + // Fast path: stream straight from the GitHub relay (or its server-side + // disk cache). On failure we return 502 instead of silently falling + // through to DNS — the client can then prompt the user before + // retrying with source=slow. + if source == "fast" { + if served := s.serveFromGitHubRelay(w, r, expectedSize, expectedCRC, q.Get("name"), q.Get("type")); served { + return + } + http.Error(w, "fast relay unavailable", http.StatusBadGateway) + return + } + + ch64, err := strconv.ParseUint(q.Get("ch"), 10, 16) + if err != nil { + http.Error(w, "bad ch", http.StatusBadRequest) + return + } + channel := uint16(ch64) + if !protocol.IsMediaChannel(channel) { + http.Error(w, "ch out of media range", http.StatusBadRequest) + return + } + blk64, err := strconv.ParseUint(q.Get("blk"), 10, 16) + if err != nil || blk64 == 0 { + http.Error(w, "bad blk", http.StatusBadRequest) + return + } + blockCount := uint16(blk64) + s.mu.RLock() fetcher := s.fetcher s.mu.RUnlock() diff --git a/internal/web/relay_info.go b/internal/web/relay_info.go new file mode 100644 index 0000000..8a83304 --- /dev/null +++ b/internal/web/relay_info.go @@ -0,0 +1,228 @@ +package web + +import ( + "context" + "errors" + "fmt" + "hash/crc32" + "io" + "net/http" + "strconv" + "sync" + "time" + + "github.com/sartoopjj/thefeed/internal/client" + "github.com/sartoopjj/thefeed/internal/protocol" +) + +// relayInfoTTL is how long the cached repo-discovery payload stays valid. +// Re-fetched after expiry, on profile switch, or after a download failure. +const relayInfoTTL = time.Hour + +// relayCache holds the most recent answer from RelayInfoChannel so we don't +// hit DNS for every fast-path media fetch. +type relayCache struct { + mu sync.Mutex + info client.RelayInfo + fetched time.Time + fetching bool + cond *sync.Cond +} + +func newRelayCache() *relayCache { + rc := &relayCache{} + rc.cond = sync.NewCond(&rc.mu) + return rc +} + +func (c *relayCache) invalidate() { + c.mu.Lock() + c.info = client.RelayInfo{} + c.fetched = time.Time{} + c.mu.Unlock() +} + +func (c *relayCache) get(ctx context.Context, fetcher *client.Fetcher) (client.RelayInfo, error) { + c.mu.Lock() + if !c.fetched.IsZero() && time.Since(c.fetched) < relayInfoTTL { + info := c.info + c.mu.Unlock() + return info, nil + } + for c.fetching { + c.cond.Wait() + if !c.fetched.IsZero() && time.Since(c.fetched) < relayInfoTTL { + info := c.info + c.mu.Unlock() + return info, nil + } + } + c.fetching = true + c.mu.Unlock() + + info, err := fetcher.FetchRelayInfo(ctx) + + c.mu.Lock() + defer c.mu.Unlock() + c.fetching = false + c.cond.Broadcast() + if err != nil { + return client.RelayInfo{}, err + } + c.info = info + c.fetched = time.Now() + return info, nil +} + +// serveFromGitHubRelay tries to stream the file from raw.githubusercontent.com +// Returns true if the request was fully handled (success or terminal error +// already written). Returns false to let the caller fall back to DNS. +func (s *Server) serveFromGitHubRelay(w http.ResponseWriter, r *http.Request, size int64, crc uint32, filename, mimeOverride string) bool { + if size <= 0 || crc == 0 { + return false + } + s.mu.RLock() + fetcher := s.fetcher + rc := s.relayInfo + cache := s.mediaCache + cfg := s.config + s.mu.RUnlock() + if fetcher == nil || rc == nil || cfg == nil || cfg.Domain == "" { + return false + } + + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + + info, err := rc.get(ctx, fetcher) + if err != nil || info.GitHubRepo == "" { + return false + } + if cfg.Key == "" { + return false + } + relayKey, err := protocol.DeriveRelayKey(cfg.Key) + if err != nil { + return false + } + domainSeg := protocol.RelayDomainSegment(cfg.Domain, cfg.Key) + objectSeg := protocol.RelayObjectName(size, crc, cfg.Key) + + // Disk cache short-circuit (same as DNS path) — we cache PLAINTEXT under + // (size, crc), so a hit doesn't need to decrypt. + if cache != nil { + if body, mime, ok := cache.Get(size, crc); ok { + servedMime := pickMime(mimeOverride, mime, body) + writeMediaHeaders(w, servedMime, size, filename, "HIT-relay") + if _, err := w.Write(body); err != nil { + s.addLog(fmt.Sprintf("relay: hit-cache write: %v", err)) + } + return true + } + } + + // Use api.github.com (a *.github.com host) instead of + // raw.githubusercontent.com — the latter is blocked in some countries + // where the api host still resolves. The Accept header asks for raw + // bytes instead of the default JSON envelope. Both path segments are + // HMAC'd with the passphrase so the URL itself doesn't leak the domain + // or which file is being requested. + url := fmt.Sprintf("https://api.github.com/repos/%s/contents/%s/%s", + info.GitHubRepo, domainSeg, objectSeg) + // The blob on disk is AES-256-GCM(nonce||ct||tag) over the plaintext. + // Cap the fetch at plaintext size + small overhead. + const aeadOverhead = protocol.NonceSize + 16 // GCM tag is 16 bytes + encBody, _, err := fetchGitHubRaw(ctx, url, size+int64(aeadOverhead)) + if err != nil { + s.addLog(fmt.Sprintf("relay: fetch %s: %v", url, err)) + // Not handled — caller falls back to DNS. + rc.invalidate() // refresh next time in case the repo URL changed + return false + } + body, err := protocol.DecryptRelayBlob(relayKey, encBody) + if err != nil { + s.addLog(fmt.Sprintf("relay: decrypt %s: %v", url, err)) + return false + } + if int64(len(body)) != size || crc32.ChecksumIEEE(body) != crc { + s.addLog(fmt.Sprintf("relay: hash/size mismatch from %s", url)) + return false + } + mime := http.DetectContentType(body) + + servedMime := pickMime(mimeOverride, mime, body) + writeMediaHeaders(w, servedMime, size, filename, "MISS-relay") + if _, err := w.Write(body); err != nil { + s.addLog(fmt.Sprintf("relay: stream: %v", err)) + } + if cache != nil { + if err := cache.Put(size, crc, body, servedMime); err != nil { + s.addLog(fmt.Sprintf("relay: cache put %d_%08x: %v", size, crc, err)) + } else { + s.addLog(fmt.Sprintf("media cached (relay): %d bytes, crc=%08x, mime=%s", size, crc, servedMime)) + } + } + return true +} + +func fetchGitHubRaw(ctx context.Context, url string, expectedSize int64) ([]byte, string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, "", err + } + req.Header.Set("User-Agent", "thefeed-client") + // Ask the contents API for raw bytes; without this it returns a JSON + // envelope with the body base64-encoded inside. + req.Header.Set("Accept", "application/vnd.github.raw") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, "", err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + return nil, "", fmt.Errorf("github raw: %s", resp.Status) + } + limit := expectedSize + if limit <= 0 { + limit = 100 * 1024 * 1024 // 100 MiB ceiling + } + body, err := io.ReadAll(io.LimitReader(resp.Body, limit+1)) + if err != nil { + return nil, "", err + } + if int64(len(body)) > limit { + return nil, "", errors.New("github raw: body exceeds expected size") + } + return body, resp.Header.Get("Content-Type"), nil +} + +func pickMime(override, fromCache string, sniff []byte) string { + if m := sanitizeMime(override); m != "" && m != "application/octet-stream" { + return m + } + if fromCache != "" { + if m := sanitizeMime(fromCache); m != "" { + return m + } + } + if sniff != nil { + if m := sanitizeMime(http.DetectContentType(sniff)); m != "" { + return m + } + } + return "application/octet-stream" +} + +func writeMediaHeaders(w http.ResponseWriter, mime string, size int64, filename, cacheTag string) { + w.Header().Set("Content-Type", mime) + if size > 0 { + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + } + w.Header().Set("Cache-Control", "private, max-age=86400") + if cacheTag != "" { + w.Header().Set("X-Cache", cacheTag) + } + if fn := sanitizeFilename(filename); fn != "" { + w.Header().Set("Content-Disposition", "inline; filename=\""+fn+"\"") + } +} diff --git a/internal/web/static/index.html b/internal/web/static/index.html index 88e6f6d..911008d 100644 --- a/internal/web/static/index.html +++ b/internal/web/static/index.html @@ -2591,6 +2591,9 @@ auto_update_toggle: 'به‌روزرسانی خودکار این کانال', auto_update_on: 'به‌روزرسانی خودکار برای @{name} روشن شد', auto_update_off: 'به‌روزرسانی خودکار برای @{name} خاموش شد', + media_relay_fallback: 'دانلود از مسیر سریع نشد. از مسیر کند (DNS) امتحان کنیم؟ توجه: ممکن است خیلی کند باشد.', + media_slow_only: 'رله گیتهاب در دسترس نیست. دانلود از مسیر DNS خیلی کند است. ادامه می‌دهی؟', + media_size_mismatch: 'سایز فایل با چیزی که سرور گفته بود نمی‌خونه', clear_cache: 'پاک کردن کش', clear_cache_btn: '🗑 پاک کردن کش', cache_cleared: 'کش پاک شد!', saved_resolvers_title: 'شروع سریع', saved_resolvers_msg: 'آخرین اسکن ({t}) نتیجه داد: {n} سرور DNS سالم پیدا شد. همین‌ها را استفاده کنیم یا دوباره اسکن کنیم؟', @@ -2755,6 +2758,9 @@ auto_update_toggle: 'Auto-update this channel', auto_update_on: 'Auto-update is now ON for @{name}', auto_update_off: 'Auto-update is now OFF for @{name}', + media_relay_fallback: 'Fast relay failed. Try the slow DNS path? Note: this can be very slow.', + media_slow_only: 'GitHub relay is unavailable for this file. The DNS path is very slow. Download anyway?', + media_size_mismatch: 'Downloaded size doesn\'t match the manifest', clear_cache: 'Clear Cache', clear_cache_btn: '🗑 Clear Cache', cache_cleared: 'Cache cleared!', saved_resolvers_title: 'Quick Start', saved_resolvers_msg: 'Last scan ({t}) found {n} healthy DNS servers. Use them now (no scan needed), or scan again to re-verify.', @@ -3262,13 +3268,30 @@ } async function clearCache() { try { await mediaWipeIDB(); } catch (e) { } - // Drop in-memory blobs too so the next render forces a fresh fetch. Object.keys(mediaBlobURLs).forEach(function (k) { try { URL.revokeObjectURL(mediaBlobURLs[k]); } catch (e) { } }); mediaBlobURLs = {}; mediaBlobs = {}; + // Clear last-seen tracking. Keep thefeed_lang / thefeed_theme / + // version-update flags. Profiles live server-side, untouched. + try { + localStorage.removeItem('thefeed_seen_ids'); + localStorage.removeItem('thefeed_seen_hashes'); + var toRemove = []; + for (var i = 0; i < localStorage.length; i++) { + var k = localStorage.key(i); + if (k && k.indexOf('thefeed_seen_ts_') === 0) toRemove.push(k); + } + toRemove.forEach(function (k) { localStorage.removeItem(k); }); + } catch (e) { } + previousMsgIDs = {}; + previousContentHashes = {}; + autoFetchedChannels = {}; try { var r = await fetch('/api/cache/clear', { method: 'POST' }); var j = await r.json(); if (j.ok) { alert(t('cache_cleared')) } } catch (e) { } + // Server cache was just wiped; force a fresh DNS-backed refresh of the + // selected channel. Other channels auto-refresh on first open. + try { await doRefresh(false); } catch (e) { } } async function mediaWipeIDB() { @@ -3942,11 +3965,34 @@ } // ===== MESSAGES ===== + // Tracks channels we've already auto-fetched on first open so we don't + // re-trigger a refresh for genuinely empty channels. + var autoFetchedChannels = {}; async function loadMessages(chNum) { try { var r = await fetch('/api/messages/' + chNum); if (chNum !== selectedChannel) return; var data = await r.json(); if (chNum !== selectedChannel) return; renderMessages(data.messages || [], data.gaps || []); + // If the server has nothing cached for this channel and we haven't + // already kicked off a fetch this session, trigger one. Covers the + // post-clear / fresh-restart case where the on-disk cache is empty. + if ((!data.messages || data.messages.length === 0) && !autoFetchedChannels[chNum] && !refreshingChannels[chNum]) { + autoFetchedChannels[chNum] = true; + refreshingChannels[chNum] = true; + var ch = channels[chNum - 1]; + if (ch) showChannelFetchProgress(chNum, ch.Name || ch.name || ''); + try { await fetch('/api/refresh?channel=' + chNum, { method: 'POST' }); } catch (e) { } + // Fail-safe: if SSE never delivers the 'channels' update (server + // refresh failed silently, transport dropped, etc.), clear the + // flag after 60s so the user can manually retry. + setTimeout(function () { + if (refreshingChannels[chNum]) { + delete refreshingChannels[chNum]; + var fb = document.getElementById('prog-fetch-ch-' + chNum); + if (fb) fb.remove(); + } + }, 60000); + } if (!refreshingChannels[chNum]) { var fetchBar = document.getElementById('prog-fetch-ch-' + chNum); if (fetchBar) fetchBar.remove(); } @@ -4114,16 +4160,11 @@ var card = cards[i]; var msgID = card.getAttribute('data-msg'); if (!msgID) continue; - var ch = parseInt(card.getAttribute('data-ch'), 10); - if (!ch || ch < 10000 || ch > 60000) continue; var entry = mediaBlobs[msgID]; if (entry) { mediaShowBlob(card, entry.url); continue; } - // A download from before the re-render is still running; rebuild the - // progress overlay so the user keeps seeing the bar instead of an - // already-clicked-but-now-empty action chip. if (mediaInflight[msgID]) { mediaShowProgress(card); continue; @@ -4132,34 +4173,61 @@ mediaShowQueued(card); continue; } - mediaRestoreFromCache(msgID); + // Disk-cache restore only makes sense for DNS-channel media because + // the cache is keyed off the DNS channel number; GH-only files are + // re-fetched on demand. + var ch = parseInt(card.getAttribute('data-ch'), 10); + if (ch >= 10000 && ch <= 60000) { + mediaRestoreFromCache(msgID); + } } } + // Relay slot indices — match protocol/media.go. + var RELAY_DNS = 0, RELAY_GITHUB = 1; + function parseDownloadableMedia(text, tag) { var rest = text.substring(tag.length); var nl = rest.indexOf('\n'); var head = nl >= 0 ? rest.substring(0, nl) : rest; var caption = nl >= 0 ? rest.substring(nl + 1) : ''; head = head.trim(); - if (!head) { - return { downloadable: false, size: 0, channel: 0, blocks: 0, crc: '', caption: caption }; - } + var empty = { downloadable: false, dnsAvailable: false, githubAvailable: false, relays: [], size: 0, channel: 0, blocks: 0, crc: '', filename: '', caption: caption }; + if (!head) return empty; var parts = head.split(':'); if (parts.length < 5) { - return { downloadable: false, size: 0, channel: 0, blocks: 0, crc: '', caption: rest.replace(/^\n/, '') }; + empty.caption = rest.replace(/^\n/, ''); + return empty; } var size = parseInt(parts[0], 10); - var dl = parseInt(parts[1], 10); + // parts[1] is a comma-separated relay flag list, e.g. "1,0". + var flagParts = parts[1].split(','); + var relays = []; + var flagsOK = true; + for (var fi = 0; fi < flagParts.length; fi++) { + var f = flagParts[fi].trim(); + if (f !== '0' && f !== '1') { flagsOK = false; break; } + relays.push(f === '1'); + } + if (!flagsOK) { + empty.caption = rest.replace(/^\n/, ''); + return empty; + } var ch = parseInt(parts[2], 10); var blk = parseInt(parts[3], 10); var crc = parts[4].toLowerCase(); - if (isNaN(size) || isNaN(dl) || isNaN(ch) || isNaN(blk) || !/^[0-9a-f]+$/.test(crc)) { - return { downloadable: false, size: 0, channel: 0, blocks: 0, crc: '', filename: '', caption: rest.replace(/^\n/, '') }; + if (isNaN(size) || isNaN(ch) || isNaN(blk) || !/^[0-9a-f]+$/.test(crc)) { + empty.caption = rest.replace(/^\n/, ''); + return empty; } var filename = parts.length >= 6 ? parts.slice(5).join(':') : ''; + var dnsOK = !!relays[RELAY_DNS] && ch >= 10000 && ch <= 60000 && blk > 0; + var ghOK = !!relays[RELAY_GITHUB]; return { - downloadable: dl === 1 && ch >= 10000 && ch <= 60000 && blk > 0, + downloadable: dnsOK || ghOK, + dnsAvailable: dnsOK, + githubAvailable: ghOK, + relays: relays, size: size, channel: ch, blocks: blk, @@ -4223,7 +4291,9 @@ var dataAttrs = 'id="' + domID + '" data-tag="' + escAttr(tag) + '" data-ch="' + parsed.channel + '" data-blk="' + parsed.blocks + '" data-size="' + parsed.size + '" data-crc="' + parsed.crc - + '" data-fname="' + escAttr(parsed.filename || '') + '" data-msg="' + msgID + '"'; + + '" data-dns="' + (parsed.dnsAvailable ? '1' : '0') + '"' + + ' data-gh="' + (parsed.githubAvailable ? '1' : '0') + '"' + + ' data-fname="' + escAttr(parsed.filename || '') + '" data-msg="' + msgID + '"'; if (mediaIsImageTag(tag)) { var imageInner; @@ -4350,7 +4420,12 @@ } } - function mediaRunDownload(domID) { + // GH_MAX_RETRIES + GH_RETRY_DELAY_MS govern the fast-path retry budget + // before we ask the user whether to fall back to slow DNS. + var GH_MAX_RETRIES = 3, GH_RETRY_DELAY_MS = 800; + + async function mediaRunDownload(domID, opts) { + opts = opts || {}; var card = document.getElementById(domID); if (!card) { mediaPumpQueue(); return; } var msgID = card.getAttribute('data-msg'); @@ -4359,12 +4434,43 @@ var size = card.getAttribute('data-size'); var crc = card.getAttribute('data-crc'); var fname = card.getAttribute('data-fname') || ''; - var url = '/api/media/get?ch=' + encodeURIComponent(ch) + var ghAvail = card.getAttribute('data-gh') === '1'; + var dnsAvail = card.getAttribute('data-dns') === '1'; + // forceSource is set when restartWith() retries after a fallback prompt + // — in that case the user has already consented and we skip the + // slow-only confirmation. + var source = opts.forceSource || (ghAvail ? 'fast' : 'slow'); + if (!opts.forceSource && source === 'slow' && dnsAvail) { + var ok = await showConfirmDialog( + t('media_slow_only') || 'GitHub relay is unavailable for this file. The DNS path is very slow. Download anyway?', + t('yes') || 'Yes', t('no') || 'No'); + if (!ok) { + mediaPumpQueue(); + return; + } + } + var baseUrl = '/api/media/get?ch=' + encodeURIComponent(ch) + '&blk=' + encodeURIComponent(blk) + '&size=' + encodeURIComponent(size) + '&crc=' + encodeURIComponent(crc) + (fname ? '&name=' + encodeURIComponent(fname) : ''); + var url = baseUrl + '&source=' + source; mediaActiveCount++; + var attempt = 0; + debugLog('media: download msg=' + msgID + ' source=' + source + ' size=' + size); + + // restartWith re-runs the download with a forced source. Pass it + // through so the prompt isn't shown again — user consent already + // happened in the fallback dialog. + function restartWith(newSource) { + try { stopPoll(); } catch (e) { } + try { xhr.abort(); } catch (e) { } + delete mediaInflight[msgID]; + mediaActiveCount = Math.max(0, mediaActiveCount - 1); + delete mediaProgressState[msgID]; + mediaRunDownload(domID, { forceSource: newSource }); + } + var xhr = new XMLHttpRequest(); xhr.responseType = 'blob'; xhr.open('GET', url); @@ -4378,11 +4484,20 @@ async function deliverBlob() { if (xhr.status >= 200 && xhr.status < 300) { var expectedCRC = parseInt(crc, 16); + var expectedSize = parseInt(size, 10); + // Hash + size verification — required by spec for any relay. + if (xhr.response && !isNaN(expectedSize) && expectedSize > 0 && xhr.response.size !== expectedSize) { + await mediaForgetCache(card); + if (source === 'fast') { handleFastFailure(t('media_size_mismatch') || 'Size mismatch'); return; } + mediaShowError(card, t('media_size_mismatch') || 'Size mismatch'); + return; + } if (!isNaN(expectedCRC) && expectedCRC > 0) { try { var got = await blobCRC32(xhr.response); if (got !== expectedCRC) { await mediaForgetCache(card); + if (source === 'fast') { handleFastFailure(t('media_hash_mismatch') || 'Content hash mismatch'); return; } mediaShowError(card, t('media_hash_mismatch') || 'Content hash mismatch'); return; } @@ -4394,6 +4509,7 @@ mediaShowBlob(card, blobURL); mediaPersistBlob(msgID, card, xhr.response, mediaBlobs[msgID].mime); } else { + if (source === 'fast') { handleFastFailure(xhr.statusText || ('HTTP ' + xhr.status)); return; } mediaShowError(card, xhr.statusText || ('HTTP ' + xhr.status)); } } @@ -4407,6 +4523,7 @@ var totalSize = (xhr.response && xhr.response.size) ? xhr.response.size : (parseInt(card.getAttribute('data-size'), 10) || 0); if (totalSize > 0) mediaUpdateProgress(card, totalSize, totalSize); + debugLog('media: ok msg=' + msgID + ' source=' + source + ' served-by=' + (xhr.getResponseHeader('X-Cache') || '?')); } var elapsed = Date.now() - progressShownAt; if (progressShownAt > 0 && elapsed < MIN_PROGRESS_VISIBLE_MS) { @@ -4415,8 +4532,40 @@ deliverBlob().finally(finishSlot); } }; + async function handleFastFailure(reason) { + attempt++; + if (attempt < GH_MAX_RETRIES) { + // Quiet retry — leave the bar alone. + await new Promise(function (r) { setTimeout(r, GH_RETRY_DELAY_MS); }); + var retry = new XMLHttpRequest(); + retry.responseType = 'blob'; + retry.open('GET', url); + // Reuse the same handlers by swapping the underlying xhr. + retry.onprogress = xhr.onprogress; + retry.onload = xhr.onload; + retry.onerror = xhr.onerror; + retry.onabort = xhr.onabort; + xhr = retry; + mediaInflight[msgID] = { xhr: xhr }; + xhr.send(); + return; + } + // Out of retries — ask the user before going to the slow path. + if (source === 'fast' && dnsAvail) { + var ok = await showConfirmDialog( + t('media_relay_fallback') || "Fast relay failed. Try the slow DNS path? Note: this can be very slow.", + t('yes') || 'Yes', t('no') || 'No'); + if (ok) { + restartWith('slow'); + return; + } + } + mediaShowError(card, reason || (t('media_failed') || 'Download failed')); + finishSlot(); + } xhr.onerror = function () { delete mediaInflight[msgID]; + if (source === 'fast') { handleFastFailure('network error'); return; } mediaShowError(card, 'network error'); finishSlot(); }; @@ -4426,7 +4575,14 @@ }; // Reset progress state for this download — both byte and block // counters live in mediaProgressState[msgID] and only ever go up. - mediaProgressState[msgID] = { loaded: 0, total: parseInt(size, 10) || 0, completed: 0, blocks: parseInt(blk, 10) || 0 }; + // For the fast (relay) path the block counter is meaningless, so + // blocks=0 and the text omits the K/N suffix. + mediaProgressState[msgID] = { + loaded: 0, + total: parseInt(size, 10) || 0, + completed: 0, + blocks: source === 'slow' ? (parseInt(blk, 10) || 0) : 0 + }; // Poll the server's per-download block counter so the user sees real // block-level progress (the Go side fetches one DNS block at a time @@ -4484,6 +4640,8 @@ + '' + ''; } + var msgIDImg = card.getAttribute('data-msg'); + if (msgIDImg && mediaProgressState[msgIDImg]) mediaRenderProgressState(card); return; } // File row: rebuild the info column from scratch so the fill always @@ -4501,6 +4659,8 @@ + '' + ''; card.innerHTML = newInner; + var msgID = card.getAttribute('data-msg'); + if (msgID && mediaProgressState[msgID]) mediaRenderProgressState(card); } // Byte-level update from xhr.onprogress. @@ -4947,6 +5107,10 @@ } // ===== LOG ===== + function debugLog(line) { + var el = document.getElementById('cfgDebug'); + if (el && el.checked) addLogLine(line); + } function addLogLine(line) { var el = document.getElementById('logPanel'); var div = document.createElement('div'); @@ -5142,12 +5306,10 @@ var info = document.getElementById('nextFetchInfoBtn'); if (!serverNextFetch) { el.textContent = ''; if (info) info.style.display = 'none'; return } if (info) { info.style.display = ''; info.title = t('next_fetch_info') } - var autoRefreshed = false; function tick() { var now = Math.floor(Date.now() / 1000), d = serverNextFetch - now; if (d <= 0) { el.textContent = ''; - if (!autoRefreshed) { autoRefreshed = true; setTimeout(function () { doAutoRefreshAfterCountdown() }, 3000) } return } var m = Math.floor(d / 60), s = d % 60; el.textContent = m + ':' + (s < 10 ? '0' : '') + s @@ -5177,27 +5339,6 @@ } catch (e) { } } - async function doAutoRefreshAfterCountdown() { - // Auto-triggered when server fetch countdown reaches 0 - // Clear metadata cache so we get fresh data - try { localStorage.removeItem(cacheKey()) } catch (e) { } - try { - // Reload channels (this also fetches /api/status and updates the timer) - await loadChannels(); - // If we have a selected channel, reload its messages too - if (selectedChannel > 0) await loadMessages(selectedChannel); - // If the timer still didn't show (server may not have refreshed yet), retry after a delay - if (!serverNextFetch || serverNextFetch <= Math.floor(Date.now() / 1000)) { - setTimeout(async function () { - try { - var sr = await fetch('/api/status'); var st = await sr.json(); - if (st.nextFetch && st.nextFetch > Math.floor(Date.now() / 1000)) { serverNextFetch = st.nextFetch; updateNextFetchDisplay() } - } catch (e) { } - }, 15000); - } - } catch (e) { } - } - // ===== SEND ===== async function sendMessage() { var input = document.getElementById('sendInput'); var text = input.value.trim(); diff --git a/internal/web/web.go b/internal/web/web.go index 62f1a12..4002013 100644 --- a/internal/web/web.go +++ b/internal/web/web.go @@ -163,6 +163,10 @@ type Server struct { dlMu sync.Mutex dlProgress map[string]*mediaDLProgress + // relayInfo caches the latest answer from RelayInfoChannel so the fast + // media path doesn't pay a DNS round trip per file. + relayInfo *relayCache + // mediaCache is a disk-backed store for downloaded media bytes so that // multiple devices on the same network share a single DNS-tunnelled // fetch. Entries expire after 7 days. @@ -202,6 +206,7 @@ func New(dataDir string, port int, host string, password string) (*Server, error scanner: scanner, mediaCache: mediaCache, dlProgress: make(map[string]*mediaDLProgress), + relayInfo: newRelayCache(), } if mediaCache != nil { @@ -2265,6 +2270,9 @@ func (s *Server) handleProfileSwitch(w http.ResponseWriter, r *http.Request) { s.config = &found.Config s.channels = nil s.messages = make(map[int][]protocol.Message) + if s.relayInfo != nil { + s.relayInfo.invalidate() + } s.lastMsgIDs = make(map[int]uint32) s.lastHashes = make(map[int]uint32) s.mu.Unlock() @@ -2601,6 +2609,14 @@ func (s *Server) handleClearCache(w http.ResponseWriter, r *http.Request) { if s.mediaCache != nil { mediaDeleted = s.mediaCache.Clear() } + // Reset in-memory message state too so refreshChannel's "no changes" + // guard doesn't skip the next fetch (prev IDs match what's on the + // server, but our cache is gone). + s.mu.Lock() + s.messages = make(map[int][]protocol.Message) + s.lastMsgIDs = make(map[int]uint32) + s.lastHashes = make(map[int]uint32) + s.mu.Unlock() s.addLog(fmt.Sprintf("Cache cleared: %d message files, %d media files", deleted, mediaDeleted)) writeJSON(w, map[string]any{"ok": true, "deleted": deleted, "mediaDeleted": mediaDeleted}) } diff --git a/scripts/install.sh b/scripts/install.sh index f66b906..ed196b4 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -293,6 +293,89 @@ setup_config() { fi fi + # --- Media relays --- + # Each relay is independent: the same file can be served by DNS, GitHub, + # and any future relay simultaneously. Enabling a relay just gives + # clients another way to fetch the bytes. + echo "" + echo -e "${green}═══════════════════════════════════════${plain}" + echo -e "${green} Media relays${plain}" + echo -e "${green}═══════════════════════════════════════${plain}" + local cur_dns_enabled cur_dns_size cur_dns_ttl cur_dns_comp + local cur_gh_enabled cur_gh_token cur_gh_repo cur_gh_size cur_gh_ttl + if $is_update; then + cur_dns_enabled=$(env_get THEFEED_DNS_MEDIA_ENABLED) + cur_dns_size=$(env_get THEFEED_DNS_MEDIA_MAX_SIZE_KB) + cur_dns_ttl=$(env_get THEFEED_DNS_MEDIA_CACHE_TTL_MIN) + cur_dns_comp=$(env_get THEFEED_DNS_MEDIA_COMPRESSION) + cur_gh_enabled=$(env_get THEFEED_GITHUB_RELAY_ENABLED) + cur_gh_token=$(env_get THEFEED_GITHUB_RELAY_TOKEN) + cur_gh_repo=$(env_get THEFEED_GITHUB_RELAY_REPO) + cur_gh_size=$(env_get THEFEED_GITHUB_RELAY_MAX_SIZE_KB) + cur_gh_ttl=$(env_get THEFEED_GITHUB_RELAY_TTL_MIN) + fi + + # DNS relay (slow path, off by default). + echo "" + echo -e "${yellow}DNS relay${plain} — files served block-by-block over DNS. Slower, works" + echo -e " in censored networks. Default 100 KB cap." + local dns_default="N" dns_prompt="[y/N]" + if [[ "$cur_dns_enabled" == "1" ]]; then dns_default="Y" dns_prompt="[Y/n]"; fi + local dns_enabled_in="" + read -rp "Enable DNS relay? $dns_prompt: " dns_enabled_in + if [[ -z "$dns_enabled_in" ]]; then dns_enabled_in="$dns_default"; fi + local dns_enabled="0" + if [[ "$dns_enabled_in" == "y" || "$dns_enabled_in" == "Y" ]]; then dns_enabled="1"; fi + + local dns_max_size="${cur_dns_size:-100}" + local dns_ttl="${cur_dns_ttl:-600}" + local dns_comp="${cur_dns_comp:-gzip}" + if [[ "$dns_enabled" == "1" ]]; then + read -rp "DNS relay max file size in KB [${dns_max_size}]: " in + dns_max_size="${in:-$dns_max_size}" + read -rp "DNS relay TTL in minutes [${dns_ttl}]: " in + dns_ttl="${in:-$dns_ttl}" + read -rp "DNS relay compression (none|gzip|deflate) [${dns_comp}]: " in + dns_comp="${in:-$dns_comp}" + fi + + # GitHub relay (fast path, default off — needs a token). + echo "" + echo -e "${yellow}GitHub relay${plain} — files uploaded to a repo and pulled by clients over" + echo -e " plain HTTPS. Faster + bigger files; needs a personal access token." + local gh_default="N" gh_prompt="[y/N]" + if [[ "$cur_gh_enabled" == "1" ]]; then gh_default="Y"; gh_prompt="[Y/n]"; fi + local gh_enabled_in="" + read -rp "Enable GitHub relay? $gh_prompt: " gh_enabled_in + if [[ -z "$gh_enabled_in" ]]; then gh_enabled_in="$gh_default"; fi + local gh_enabled="0" + if [[ "$gh_enabled_in" == "y" || "$gh_enabled_in" == "Y" ]]; then gh_enabled="1"; fi + + local gh_token="" gh_repo="" gh_max_size="${cur_gh_size:-15360}" + local gh_ttl="${cur_gh_ttl:-10080}" + if [[ "$gh_enabled" == "1" ]]; then + if [[ -n "$cur_gh_token" ]]; then + read -rp "GitHub token (PAT, contents:write) [keep current]: " gh_token + gh_token="${gh_token:-$cur_gh_token}" + else + read -rp "GitHub token (PAT, contents:write): " gh_token + fi + while true; do + if [[ -n "$cur_gh_repo" ]]; then + read -rp "GitHub repo (owner/repo) [${cur_gh_repo}]: " gh_repo + gh_repo="${gh_repo:-$cur_gh_repo}" + else + read -rp "GitHub repo (owner/repo): " gh_repo + fi + if [[ "$gh_repo" =~ ^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$ ]]; then break; fi + echo -e "${red}Invalid repo. Format: owner/repo${plain}" + done + read -rp "GitHub relay max file size in KB [${gh_max_size}]: " in + gh_max_size="${in:-$gh_max_size}" + read -rp "GitHub relay TTL in minutes [${gh_ttl}]: " in + gh_ttl="${in:-$gh_ttl}" + fi + # --- Telegram mode --- local no_telegram="" echo "" @@ -330,6 +413,15 @@ TELEGRAM_API_HASH=${api_hash} TELEGRAM_PHONE=${phone} THEFEED_LISTEN=${listen_addr} THEFEED_NO_TELEGRAM=1 +THEFEED_DNS_MEDIA_ENABLED=${dns_enabled} +THEFEED_DNS_MEDIA_MAX_SIZE_KB=${dns_max_size} +THEFEED_DNS_MEDIA_CACHE_TTL_MIN=${dns_ttl} +THEFEED_DNS_MEDIA_COMPRESSION=${dns_comp} +THEFEED_GITHUB_RELAY_ENABLED=${gh_enabled} +THEFEED_GITHUB_RELAY_TOKEN=${gh_token} +THEFEED_GITHUB_RELAY_REPO=${gh_repo} +THEFEED_GITHUB_RELAY_MAX_SIZE_KB=${gh_max_size} +THEFEED_GITHUB_RELAY_TTL_MIN=${gh_ttl} ENVEOF chmod 600 "$DATA_DIR/thefeed.env" echo -e "${green}Config saved to ${DATA_DIR}/thefeed.env${plain}" @@ -390,6 +482,15 @@ TELEGRAM_API_ID=${api_id} TELEGRAM_API_HASH=${api_hash} TELEGRAM_PHONE=${phone} THEFEED_LISTEN=${listen_addr} +THEFEED_DNS_MEDIA_ENABLED=${dns_enabled} +THEFEED_DNS_MEDIA_MAX_SIZE_KB=${dns_max_size} +THEFEED_DNS_MEDIA_CACHE_TTL_MIN=${dns_ttl} +THEFEED_DNS_MEDIA_COMPRESSION=${dns_comp} +THEFEED_GITHUB_RELAY_ENABLED=${gh_enabled} +THEFEED_GITHUB_RELAY_TOKEN=${gh_token} +THEFEED_GITHUB_RELAY_REPO=${gh_repo} +THEFEED_GITHUB_RELAY_MAX_SIZE_KB=${gh_max_size} +THEFEED_GITHUB_RELAY_TTL_MIN=${gh_ttl} ENVEOF chmod 600 "$DATA_DIR/thefeed.env" echo -e "${green}Config saved to ${DATA_DIR}/thefeed.env${plain}" @@ -441,6 +542,8 @@ install_service() { if [[ "${THEFEED_ALLOW_MANAGE:-}" == "1" ]]; then extra_flags="${extra_flags} --allow-manage" fi + # All --dns-media-* and --github-relay-* settings come from THEFEED_* + # env vars, so the binary picks them up via EnvironmentFile alone. cat > "$SERVICE_FILE" <