mirror of
https://github.com/sartoopjj/thefeed.git
synced 2026-05-19 05:24:35 +03:00
feat: ✨ media download with DNS query
This commit is contained in:
@@ -83,7 +83,15 @@ jobs:
|
||||
DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
||||
LDFLAGS="-s -w -X github.com/sartoopjj/thefeed/internal/version.Version=${VERSION} -X github.com/sartoopjj/thefeed/internal/version.Commit=${COMMIT} -X github.com/sartoopjj/thefeed/internal/version.Date=${DATE}"
|
||||
ext=""
|
||||
BUILD_MODE=""
|
||||
if [ "${{ matrix.goos }}" = "windows" ]; then ext=".exe"; fi
|
||||
# Modern Android requires PIE for executables launched via exec(),
|
||||
# and several heuristic AV engines (Kaspersky Boogr.gsh,
|
||||
# several VT vendors) flag non-PIE bundled binaries as suspicious.
|
||||
# Force PIE for the binaries that ship inside the APK.
|
||||
if [ "${{ matrix.goos }}" = "android" ] || [ "${{ matrix.android_arm }}" = "true" ]; then
|
||||
BUILD_MODE="-buildmode=pie"
|
||||
fi
|
||||
if [ "${{ matrix.goos }}" = "android" ] && [ "${{ matrix.goarch }}" = "arm64" ]; then
|
||||
out="build/thefeed-client-android-arm64"
|
||||
elif [ "${{ matrix.android_arm }}" = "true" ]; then
|
||||
@@ -91,7 +99,7 @@ jobs:
|
||||
else
|
||||
out="build/thefeed-client-${VERSION}-${{ matrix.goos }}-${{ matrix.goarch }}${ext}"
|
||||
fi
|
||||
go build -trimpath -ldflags="${LDFLAGS}" -o "$out" ./cmd/client
|
||||
go build -trimpath -buildvcs=false $BUILD_MODE -ldflags="${LDFLAGS}" -o "$out" ./cmd/client
|
||||
|
||||
- name: Compress with UPX
|
||||
if: (matrix.goos == 'linux' || matrix.goos == 'windows') && !matrix.android_arm
|
||||
@@ -123,11 +131,11 @@ jobs:
|
||||
- name: Stage Android client binary as JNI library
|
||||
run: |
|
||||
mkdir -p android/app/src/main/jniLibs/arm64-v8a
|
||||
# mkdir -p android/app/src/main/jniLibs/armeabi-v7a
|
||||
mkdir -p android/app/src/main/jniLibs/armeabi-v7a
|
||||
test -f artifacts/thefeed-client-android-arm64
|
||||
# test -f artifacts/thefeed-client-android-arm
|
||||
test -f artifacts/thefeed-client-android-arm
|
||||
cp artifacts/thefeed-client-android-arm64 android/app/src/main/jniLibs/arm64-v8a/libthefeed.so
|
||||
# cp artifacts/thefeed-client-android-arm android/app/src/main/jniLibs/armeabi-v7a/libthefeed.so
|
||||
cp artifacts/thefeed-client-android-arm android/app/src/main/jniLibs/armeabi-v7a/libthefeed.so
|
||||
|
||||
- name: Decode signing keystore
|
||||
env:
|
||||
@@ -164,8 +172,7 @@ jobs:
|
||||
|
||||
APK_DIR=app/build/outputs/apk/$BT
|
||||
cp "$APK_DIR"/app-arm64-v8a-${BT}.apk ../artifacts/thefeed-android-${VERSION}-arm64.apk
|
||||
# cp "$APK_DIR"/app-armeabi-v7a-${BT}.apk ../artifacts/thefeed-android-${VERSION}-arm.apk
|
||||
# cp "$APK_DIR"/app-universal-${BT}.apk ../artifacts/thefeed-android-${VERSION}-universal.apk
|
||||
cp "$APK_DIR"/app-armeabi-v7a-${BT}.apk ../artifacts/thefeed-android-${VERSION}-arm.apk
|
||||
|
||||
- name: Upload Android APK artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
@@ -173,8 +180,7 @@ jobs:
|
||||
name: thefeed-android-apk
|
||||
path: |
|
||||
artifacts/thefeed-android-*-arm64.apk
|
||||
# artifacts/thefeed-android-*-arm.apk
|
||||
# artifacts/thefeed-android-*-universal.apk
|
||||
artifacts/thefeed-android-*-arm.apk
|
||||
|
||||
release:
|
||||
needs: [build, android-apk]
|
||||
@@ -207,3 +213,4 @@ jobs:
|
||||
| FreeBSD | arm64 | [server-سرور](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/thefeed-server-freebsd-arm64) / [client-کلاینت](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/thefeed-client-${{ github.ref_name }}-freebsd-arm64) |
|
||||
| Windows | amd64 | [server-سرور](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/thefeed-server-windows-amd64.exe) / [client-کلاینت](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/thefeed-client-${{ github.ref_name }}-windows-amd64.exe) |
|
||||
| Android | arm64 (v8a) | [thefeed-android-${{ github.ref_name }}-arm64.apk - اندروید](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/thefeed-android-${{ github.ref_name }}-arm64.apk) |
|
||||
| Android | arm (v7a) | [thefeed-android-${{ github.ref_name }}-arm.apk - اندروید (دستگاههای قدیمیتر)](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/thefeed-android-${{ github.ref_name }}-arm.apk) |
|
||||
|
||||
@@ -52,6 +52,30 @@ DNS-based feed reader for Telegram channels and public X accounts. Designed for
|
||||
|
||||
All communication is encrypted with AES-256 and transmitted via standard DNS TXT queries and responses. Traffic is designed to blend with normal DNS activity. Message data is compressed before encryption.
|
||||
|
||||
## Image and File Downloads
|
||||
|
||||
Messages with attached photos, files, GIFs, audio, and videos can be cached on the server and downloaded over the same encrypted DNS channel.
|
||||
|
||||
The server downloads each attached media file (deduped by upstream id and content hash), assigns it a slot in a reserved channel range (`10000`–`60000`), and splits the bytes into the same-sized blocks used elsewhere. The message text gains a small metadata header:
|
||||
|
||||
```
|
||||
[IMAGE]<size>:<dl>:<ch>:<blk>:<crc32>
|
||||
optional caption
|
||||
```
|
||||
|
||||
`<dl>=0` means the file exceeded the server's size cap and isn't cached. Old clients render the header as a regular caption line.
|
||||
|
||||
Block 0 of every cached file begins with a 16-byte protocol header — 4 bytes CRC32 of the (decompressed) content, 1 byte version, 1 byte compression, 10 bytes reserved for future fields. The client checks the CRC against the expected value from the message metadata before delivering any bytes, so a stale message pointing to a slot the server has since reused for a different file is rejected after a single block. The remaining bytes are decompressed per the compression byte. Downloads are cached on the client (IndexedDB, 7 days) and on the local thefeed-client server (`<dataDir>/media-cache/`, 7 days) so multiple devices behind one client share a single DNS-tunnelled fetch. Concurrent downloads are limited to one at a time; extra clicks are queued.
|
||||
|
||||
Server flags:
|
||||
|
||||
- `--no-media` — disable the feature.
|
||||
- `--media-max-size` (KB, default 100) — per-file size cap.
|
||||
- `--media-cache-ttl` (minutes, default 600) — entry lifetime.
|
||||
- `--media-compression` (default `gzip`) — `none`, `gzip`, or `deflate`. The compression byte is carried in the block-0 header so the client can decompress without prior knowledge.
|
||||
|
||||
The hourly DNS report includes `totalMediaQueries` and a `mediaCache` block (entries, bytes, hits, misses, evictions).
|
||||
|
||||
## Downloads
|
||||
|
||||
[Releases](https://github.com/sartoopjj/thefeed/releases)
|
||||
@@ -101,6 +125,22 @@ Update:
|
||||
```bash
|
||||
sudo bash -c "$(curl -Ls https://raw.githubusercontent.com/sartoopjj/thefeed/main/scripts/install.sh)"
|
||||
```
|
||||
|
||||
Install a specific version (rollback, beta, or rc):
|
||||
```bash
|
||||
# Roll back to a known-good tag
|
||||
curl -Ls https://raw.githubusercontent.com/sartoopjj/thefeed/main/scripts/install.sh | sudo bash -s -- --version v0.9.2
|
||||
|
||||
# Install the most recent pre-release (beta / rc)
|
||||
curl -Ls https://raw.githubusercontent.com/sartoopjj/thefeed/main/scripts/install.sh | sudo bash -s -- --pre
|
||||
|
||||
# List recent releases (stable / pre-release labels)
|
||||
curl -Ls https://raw.githubusercontent.com/sartoopjj/thefeed/main/scripts/install.sh | sudo bash -s -- --list
|
||||
```
|
||||
|
||||
Short forms: `-v <tag>` is the same as `--version <tag>`. The legacy positional form
|
||||
`sudo bash install.sh v1.0.0` still works.
|
||||
|
||||
Re-login: `curl -Ls https://raw.githubusercontent.com/sartoopjj/thefeed/main/scripts/install.sh | sudo bash -s -- --login`
|
||||
Uninstall: `curl -Ls https://raw.githubusercontent.com/sartoopjj/thefeed/main/scripts/install.sh | sudo bash -s -- --uninstall`
|
||||
|
||||
@@ -310,6 +350,10 @@ Environment variables: `THEFEED_DOMAIN`, `THEFEED_KEY`, `THEFEED_MSG_LIMIT`, `TH
|
||||
| `--padding` | `32` | Max random padding bytes (0=disabled) |
|
||||
| `--msg-limit` | `15` | Maximum messages to fetch per Telegram channel |
|
||||
| `--allow-manage` | `false` | Allow remote send/channel management (default: disabled) |
|
||||
| `--no-media` | `false` | Disable downloading and serving image/file media |
|
||||
| `--media-max-size` | `100` | Per-file size cap for cached media in KB (0 = no cap) |
|
||||
| `--media-cache-ttl` | `600` | How long a cached media entry stays available, in minutes |
|
||||
| `--media-compression` | `gzip` | Compression for cached media: `none`, `gzip`, or `deflate` |
|
||||
| `--version` | | Show version and exit |
|
||||
|
||||
### Client
|
||||
|
||||
@@ -69,7 +69,7 @@ android {
|
||||
enable true
|
||||
reset()
|
||||
include 'arm64-v8a', 'armeabi-v7a'
|
||||
universalApk true
|
||||
universalApk false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,10 @@ func main() {
|
||||
msgLimit := flag.Int("msg-limit", 15, "Maximum messages to fetch per Telegram channel")
|
||||
allowManage := flag.Bool("allow-manage", false, "Allow remote channel management and sending via DNS")
|
||||
debug := flag.Bool("debug", false, "Log every decoded DNS query")
|
||||
noMedia := flag.Bool("no-media", false, "Disable downloading and serving image/file media (clients see [TAG] only)")
|
||||
mediaMaxSizeKB := flag.Int("media-max-size", 100, "Per-file size cap for cached media in KB (0 = no cap)")
|
||||
mediaCacheTTLMin := flag.Int("media-cache-ttl", 600, "How long a cached media entry stays available, in minutes")
|
||||
mediaCompression := flag.String("media-compression", "gzip", "Compression for cached media: none|gzip|deflate")
|
||||
showVersion := flag.Bool("version", false, "Show version and exit")
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "thefeed-server %s\n\nServes Telegram/X feed content over encrypted DNS for censorship-resistant access.\n\nUsage:\n thefeed-server [flags]\n\nFlags:\n", version.Version)
|
||||
@@ -152,6 +156,10 @@ func main() {
|
||||
NoTelegram: *noTelegram,
|
||||
AllowManage: *allowManage,
|
||||
Debug: *debug,
|
||||
NoMedia: *noMedia,
|
||||
MediaMaxSize: int64(*mediaMaxSizeKB) * 1024,
|
||||
MediaCacheTTL: *mediaCacheTTLMin,
|
||||
MediaCompression: *mediaCompression,
|
||||
Telegram: server.TelegramConfig{
|
||||
APIID: id,
|
||||
APIHash: *apiHash,
|
||||
|
||||
@@ -238,27 +238,95 @@ func (c *Cache) Cleanup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// detectGaps finds places in a sorted message list where consecutive IDs differ
|
||||
// by more than 1. Gaps larger than 500 are ignored (natural Telegram numbering).
|
||||
// Returns nil when there are fewer than 10 messages (not enough history to judge).
|
||||
// detectGaps finds runs of missing IDs between consecutive messages. Album-
|
||||
// merged canonicals cover a contiguous span of sibling IDs (counted via
|
||||
// albumSpan), so absorbed siblings don't show up as fake gaps. Diffs > 500
|
||||
// are ignored (natural Telegram numbering jumps); under 10 messages we don't
|
||||
// have enough history to judge.
|
||||
func detectGaps(msgs []protocol.Message) []Gap {
|
||||
if len(msgs) < 10 {
|
||||
return nil
|
||||
}
|
||||
var gaps []Gap
|
||||
for i := 1; i < len(msgs); i++ {
|
||||
prev, cur := msgs[i-1].ID, msgs[i].ID
|
||||
if diff := cur - prev; diff > 1 && diff <= 500 {
|
||||
prev, cur := msgs[i-1], msgs[i]
|
||||
span := uint32(albumSpan(prev.Text))
|
||||
if span == 0 {
|
||||
span = 1
|
||||
}
|
||||
expectedNext := prev.ID + span
|
||||
if cur.ID <= expectedNext {
|
||||
continue
|
||||
}
|
||||
diff := cur.ID - expectedNext
|
||||
if diff > 500 {
|
||||
continue
|
||||
}
|
||||
gaps = append(gaps, Gap{
|
||||
AfterID: prev,
|
||||
BeforeID: cur,
|
||||
Count: int(diff - 1),
|
||||
AfterID: expectedNext - 1,
|
||||
BeforeID: cur.ID,
|
||||
Count: int(diff),
|
||||
})
|
||||
}
|
||||
}
|
||||
return gaps
|
||||
}
|
||||
|
||||
// mediaHeaderTags are the leading [TAG] markers extractMessages may stack
|
||||
// at the start of a canonical message body — one per absorbed album item.
|
||||
var mediaHeaderTags = []string{
|
||||
protocol.MediaImage,
|
||||
protocol.MediaVideo,
|
||||
protocol.MediaFile,
|
||||
protocol.MediaAudio,
|
||||
protocol.MediaSticker,
|
||||
protocol.MediaGIF,
|
||||
protocol.MediaLocation,
|
||||
protocol.MediaContact,
|
||||
}
|
||||
|
||||
// albumSpan counts the leading media-header lines in a canonical body — 0
|
||||
// for plain text, 1 for a single media item, N for an N-item album. A
|
||||
// leading [REPLY]... line is skipped first.
|
||||
func albumSpan(text string) int {
|
||||
if strings.HasPrefix(text, protocol.MediaReply) {
|
||||
nl := strings.IndexByte(text, '\n')
|
||||
if nl < 0 {
|
||||
return 0
|
||||
}
|
||||
text = text[nl+1:]
|
||||
}
|
||||
n := 0
|
||||
for _, line := range strings.Split(text, "\n") {
|
||||
if !isMediaHeaderLine(line) {
|
||||
break
|
||||
}
|
||||
n++
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// isMediaHeaderLine matches both the bare [TAG] form and the downloadable
|
||||
// "[TAG]<digit>..." form. Caption text that happens to start with "[IMAGE]"
|
||||
// is rejected because rest[0] won't be a digit.
|
||||
func isMediaHeaderLine(line string) bool {
|
||||
for _, tag := range mediaHeaderTags {
|
||||
if line == tag {
|
||||
return true
|
||||
}
|
||||
if !strings.HasPrefix(line, tag) {
|
||||
continue
|
||||
}
|
||||
rest := line[len(tag):]
|
||||
if rest == "" {
|
||||
return true
|
||||
}
|
||||
if rest[0] >= '0' && rest[0] <= '9' {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// channelPath returns the file path for a channel's cache, keyed by sanitised name.
|
||||
// Only letters, digits, hyphens, and underscores are kept; everything else becomes _.
|
||||
func (c *Cache) channelPath(channelName string) string {
|
||||
|
||||
@@ -221,6 +221,111 @@ func TestCacheGapDetection_NoGapWhenFewMessages(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheGapDetection_AlbumNoFalsePositive(t *testing.T) {
|
||||
cache, _ := NewCache(t.TempDir())
|
||||
|
||||
// 10 sequential messages where ID=5 is a 2-image album: it absorbs ID 6
|
||||
// (a real Telegram behaviour). The next message is ID 7. Without the
|
||||
// album-aware fix, the gap detector would flag a missing ID 6.
|
||||
msgs := []protocol.Message{
|
||||
{ID: 1, Timestamp: 1700000000, Text: "a"},
|
||||
{ID: 2, Timestamp: 1700000001, Text: "b"},
|
||||
{ID: 3, Timestamp: 1700000002, Text: "c"},
|
||||
{ID: 4, Timestamp: 1700000003, Text: "d"},
|
||||
{ID: 5, Timestamp: 1700000004, Text: "[IMAGE]100:0:0:0:abcd1234:img1.jpg\n[IMAGE]200:0:0:0:abcd5678:img2.jpg\nalbum caption"},
|
||||
// ID 6 is absorbed into the album above; the feed jumps to 7.
|
||||
{ID: 7, Timestamp: 1700000005, Text: "e"},
|
||||
{ID: 8, Timestamp: 1700000006, Text: "f"},
|
||||
{ID: 9, Timestamp: 1700000007, Text: "g"},
|
||||
{ID: 10, Timestamp: 1700000008, Text: "h"},
|
||||
{ID: 11, Timestamp: 1700000009, Text: "i"},
|
||||
}
|
||||
result, _ := cache.MergeAndPut("albumchan", msgs)
|
||||
|
||||
if len(result.Gaps) != 0 {
|
||||
t.Errorf("album-absorbed sibling should not be flagged as a gap, got %+v", result.Gaps)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheGapDetection_AlbumWithRealGap(t *testing.T) {
|
||||
cache, _ := NewCache(t.TempDir())
|
||||
|
||||
// 3-image album at ID=5 absorbs IDs 6,7. A real gap of IDs 8,9 follows
|
||||
// before ID=10. The detector should report a single 2-message gap.
|
||||
msgs := []protocol.Message{
|
||||
{ID: 1, Timestamp: 1700000000, Text: "a"},
|
||||
{ID: 2, Timestamp: 1700000001, Text: "b"},
|
||||
{ID: 3, Timestamp: 1700000002, Text: "c"},
|
||||
{ID: 4, Timestamp: 1700000003, Text: "d"},
|
||||
{ID: 5, Timestamp: 1700000004, Text: "[IMAGE]100:0:0:0:aaaaaaaa:1.jpg\n[IMAGE]200:0:0:0:bbbbbbbb:2.jpg\n[IMAGE]300:0:0:0:cccccccc:3.jpg\ncap"},
|
||||
// IDs 6,7 absorbed; IDs 8,9 truly missing; resume at 10.
|
||||
{ID: 10, Timestamp: 1700000010, Text: "e"},
|
||||
{ID: 11, Timestamp: 1700000011, Text: "f"},
|
||||
{ID: 12, Timestamp: 1700000012, Text: "g"},
|
||||
{ID: 13, Timestamp: 1700000013, Text: "h"},
|
||||
{ID: 14, Timestamp: 1700000014, Text: "i"},
|
||||
{ID: 15, Timestamp: 1700000015, Text: "j"},
|
||||
}
|
||||
result, _ := cache.MergeAndPut("albumgap", msgs)
|
||||
|
||||
if len(result.Gaps) != 1 {
|
||||
t.Fatalf("expected exactly one gap, got %+v", result.Gaps)
|
||||
}
|
||||
g := result.Gaps[0]
|
||||
if g.AfterID != 7 || g.BeforeID != 10 || g.Count != 2 {
|
||||
t.Errorf("gap = %+v, want AfterID=7 BeforeID=10 Count=2", g)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheGapDetection_AlbumWithReplyPrefix(t *testing.T) {
|
||||
cache, _ := NewCache(t.TempDir())
|
||||
|
||||
// [REPLY]:42 prefix before the media headers should still let albumSpan
|
||||
// count the headers correctly.
|
||||
msgs := []protocol.Message{
|
||||
{ID: 1, Timestamp: 1700000000, Text: "a"},
|
||||
{ID: 2, Timestamp: 1700000001, Text: "b"},
|
||||
{ID: 3, Timestamp: 1700000002, Text: "c"},
|
||||
{ID: 4, Timestamp: 1700000003, Text: "d"},
|
||||
{ID: 5, Timestamp: 1700000004, Text: "[REPLY]:42\n[IMAGE]100:0:0:0:aaaaaaaa:1.jpg\n[IMAGE]200:0:0:0:bbbbbbbb:2.jpg\nreplied caption"},
|
||||
// ID 6 absorbed.
|
||||
{ID: 7, Timestamp: 1700000005, Text: "e"},
|
||||
{ID: 8, Timestamp: 1700000006, Text: "f"},
|
||||
{ID: 9, Timestamp: 1700000007, Text: "g"},
|
||||
{ID: 10, Timestamp: 1700000008, Text: "h"},
|
||||
{ID: 11, Timestamp: 1700000009, Text: "i"},
|
||||
}
|
||||
result, _ := cache.MergeAndPut("replychan", msgs)
|
||||
|
||||
if len(result.Gaps) != 0 {
|
||||
t.Errorf("album with reply prefix should not produce false gaps, got %+v", result.Gaps)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlbumSpan(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
text string
|
||||
want int
|
||||
}{
|
||||
{"plain text", "hello world", 0},
|
||||
{"single image legacy", "[IMAGE]\ncaption", 1},
|
||||
{"single image downloadable", "[IMAGE]100:0:0:0:abcd1234:f.jpg\ncap", 1},
|
||||
{"two images", "[IMAGE]100:0:0:0:aa:1.jpg\n[IMAGE]200:0:0:0:bb:2.jpg\ncap", 2},
|
||||
{"three mixed", "[IMAGE]1:0:0:0:aa:a.jpg\n[VIDEO]2:0:0:0:bb:b.mp4\n[FILE]3:0:0:0:cc:c.pdf\nx", 3},
|
||||
{"with reply prefix", "[REPLY]:99\n[IMAGE]100:0:0:0:aa:1.jpg\n[IMAGE]200:0:0:0:bb:2.jpg\ncap", 2},
|
||||
{"reply only no media", "[REPLY]:99\nhello", 0},
|
||||
{"caption that mentions a tag", "look at this [IMAGE] thing", 0},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
if got := albumSpan(c.text); got != c.want {
|
||||
t.Errorf("albumSpan(%q) = %d, want %d", c.text, got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheGapDetection_LargeGapIgnored(t *testing.T) {
|
||||
cache, _ := NewCache(t.TempDir())
|
||||
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/flate"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// DecompressMediaReader wraps r per the given compression.
|
||||
func DecompressMediaReader(r io.Reader, compression protocol.MediaCompression) (io.ReadCloser, error) {
|
||||
switch compression {
|
||||
case protocol.MediaCompressionNone:
|
||||
return io.NopCloser(r), nil
|
||||
case protocol.MediaCompressionGzip:
|
||||
return gzip.NewReader(r)
|
||||
case protocol.MediaCompressionDeflate:
|
||||
return flate.NewReader(r), nil
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported media compression: %d", compression)
|
||||
}
|
||||
|
||||
func decompressMediaBytes(body []byte, compression protocol.MediaCompression) ([]byte, error) {
|
||||
rc, err := DecompressMediaReader(bytes.NewReader(body), compression)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rc.Close()
|
||||
return io.ReadAll(rc)
|
||||
}
|
||||
|
||||
// MediaProgress reports per-block progress (completed of total). May be
|
||||
// invoked from a background goroutine.
|
||||
type MediaProgress func(completed, total int)
|
||||
|
||||
// MediaBlockHeaderLen re-exports the protocol header length so callers in
|
||||
// the web layer don't have to import the protocol package twice.
|
||||
const MediaBlockHeaderLen = protocol.MediaBlockHeaderLen
|
||||
|
||||
// ErrMediaHashMismatch indicates the assembled bytes don't match the
|
||||
// expected CRC32. The caller must discard the returned bytes.
|
||||
var ErrMediaHashMismatch = fmt.Errorf("media content hash mismatch")
|
||||
|
||||
// mediaBlockOuterRetries is the per-block retry budget the media path adds
|
||||
// on top of FetchBlock's own internal retries. A ~200-block file can lose
|
||||
// individual blocks repeatedly; without this, one persistent bad block
|
||||
// kills the whole download even though FetchBlock would succeed on a
|
||||
// later attempt.
|
||||
const mediaBlockOuterRetries = 5
|
||||
|
||||
func (f *Fetcher) fetchMediaBlock(ctx context.Context, channel, block uint16) ([]byte, error) {
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < mediaBlockOuterRetries; attempt++ {
|
||||
if ctx.Err() != nil {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
data, err := f.FetchBlock(ctx, channel, block)
|
||||
if err == nil {
|
||||
return data, nil
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
lastErr = err
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// FetchMedia returns the assembled bytes of a media blob served on a media
|
||||
// channel, optionally verifying expectedCRC32.
|
||||
func (f *Fetcher) FetchMedia(ctx context.Context, channel uint16, blockCount uint16, expectedCRC32 uint32, progress MediaProgress) ([]byte, error) {
|
||||
if !protocol.IsMediaChannel(channel) {
|
||||
return nil, fmt.Errorf("channel %d is outside media range", channel)
|
||||
}
|
||||
if blockCount == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type blockResult struct {
|
||||
idx int
|
||||
data []byte
|
||||
err error
|
||||
}
|
||||
|
||||
results := make(chan blockResult, blockCount)
|
||||
sem := make(chan struct{}, 5)
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < int(blockCount); i++ {
|
||||
wg.Add(1)
|
||||
go func(idx int) {
|
||||
defer wg.Done()
|
||||
select {
|
||||
case sem <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
results <- blockResult{idx: idx, err: ctx.Err()}
|
||||
return
|
||||
}
|
||||
defer func() { <-sem }()
|
||||
data, err := f.fetchMediaBlock(ctx, channel, uint16(idx))
|
||||
results <- blockResult{idx: idx, data: data, err: err}
|
||||
}(i)
|
||||
}
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(results)
|
||||
}()
|
||||
|
||||
ordered := make([][]byte, blockCount)
|
||||
completed := 0
|
||||
var progMu sync.Mutex
|
||||
for r := range results {
|
||||
if r.err != nil {
|
||||
if r.err == ctx.Err() {
|
||||
return nil, r.err
|
||||
}
|
||||
return nil, fmt.Errorf("media channel %d block %d: %w", channel, r.idx, r.err)
|
||||
}
|
||||
ordered[r.idx] = r.data
|
||||
completed++
|
||||
if progress != nil {
|
||||
progMu.Lock()
|
||||
progress(completed, int(blockCount))
|
||||
progMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
if len(ordered) == 0 || len(ordered[0]) < protocol.MediaBlockHeaderLen {
|
||||
return nil, fmt.Errorf("media channel %d: malformed block 0", channel)
|
||||
}
|
||||
header, err := protocol.DecodeMediaBlockHeader(ordered[0][:protocol.MediaBlockHeaderLen])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("media channel %d: %w", channel, err)
|
||||
}
|
||||
if expectedCRC32 != 0 && header.CRC32 != expectedCRC32 {
|
||||
return nil, ErrMediaHashMismatch
|
||||
}
|
||||
|
||||
// Concatenate all block bytes after the header.
|
||||
total := len(ordered[0]) - protocol.MediaBlockHeaderLen
|
||||
for i := 1; i < len(ordered); i++ {
|
||||
total += len(ordered[i])
|
||||
}
|
||||
body := make([]byte, 0, total)
|
||||
body = append(body, ordered[0][protocol.MediaBlockHeaderLen:]...)
|
||||
for i := 1; i < len(ordered); i++ {
|
||||
body = append(body, ordered[i]...)
|
||||
}
|
||||
|
||||
// Decompress per the header.
|
||||
out, err := decompressMediaBytes(body, header.Compression)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decompress media channel %d: %w", channel, err)
|
||||
}
|
||||
if expectedCRC32 != 0 {
|
||||
if got := crc32.ChecksumIEEE(out); got != expectedCRC32 {
|
||||
return nil, ErrMediaHashMismatch
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// FetchMediaBlocksStream fetches blocks [startBlock, startBlock+count) and
|
||||
// writes each block's raw bytes to w in order as soon as they become
|
||||
// contiguous. No header parsing; callers slice off the protocol header
|
||||
// themselves and decompress as appropriate. Cancelling ctx aborts both
|
||||
// in-flight DNS queries and pending writes.
|
||||
func (f *Fetcher) FetchMediaBlocksStream(ctx context.Context, channel, startBlock, count uint16, w io.Writer, progress MediaProgress) error {
|
||||
if !protocol.IsMediaChannel(channel) {
|
||||
return fmt.Errorf("channel %d is outside media range", channel)
|
||||
}
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
type blockResult struct {
|
||||
idx int
|
||||
data []byte
|
||||
err error
|
||||
}
|
||||
results := make(chan blockResult, count)
|
||||
sem := make(chan struct{}, 5)
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < int(count); i++ {
|
||||
wg.Add(1)
|
||||
go func(idx int) {
|
||||
defer wg.Done()
|
||||
select {
|
||||
case sem <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
results <- blockResult{idx: idx, err: ctx.Err()}
|
||||
return
|
||||
}
|
||||
defer func() { <-sem }()
|
||||
data, err := f.fetchMediaBlock(ctx, channel, uint16(int(startBlock)+idx))
|
||||
results <- blockResult{idx: idx, data: data, err: err}
|
||||
}(i)
|
||||
}
|
||||
go func() { wg.Wait(); close(results) }()
|
||||
|
||||
pending := make(map[int][]byte)
|
||||
next := 0
|
||||
completed := 0
|
||||
for r := range results {
|
||||
if r.err != nil {
|
||||
if r.err == ctx.Err() {
|
||||
return r.err
|
||||
}
|
||||
return fmt.Errorf("media channel %d block %d: %w", channel, int(startBlock)+r.idx, r.err)
|
||||
}
|
||||
pending[r.idx] = r.data
|
||||
for {
|
||||
payload, ok := pending[next]
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
if _, werr := w.Write(payload); werr != nil {
|
||||
return werr
|
||||
}
|
||||
if flusher, ok := w.(interface{ Flush() }); ok {
|
||||
flusher.Flush()
|
||||
}
|
||||
next++
|
||||
}
|
||||
completed++
|
||||
if progress != nil {
|
||||
progress(completed, int(count))
|
||||
}
|
||||
}
|
||||
if next != int(count) {
|
||||
return fmt.Errorf("media channel %d: incomplete (%d / %d)", channel, next, count)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,207 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/flate"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"hash/crc32"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// withMediaHeader prepends the protocol media block header to body. The
|
||||
// CRC32 is computed over the DECOMPRESSED bytes the caller passes in, but
|
||||
// `body` itself is what the server would have produced after compressing —
|
||||
// which for compression=none is just the bytes themselves.
|
||||
func withMediaHeader(crc uint32, body []byte, compression protocol.MediaCompression) []byte {
|
||||
hdr := protocol.EncodeMediaBlockHeader(protocol.MediaBlockHeader{
|
||||
CRC32: crc,
|
||||
Version: protocol.MediaHeaderVersion,
|
||||
Compression: compression,
|
||||
})
|
||||
out := make([]byte, 0, len(hdr)+len(body))
|
||||
out = append(out, hdr...)
|
||||
out = append(out, body...)
|
||||
return out
|
||||
}
|
||||
|
||||
func gzipBytes(t *testing.T, b []byte) []byte {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
zw, _ := gzip.NewWriterLevel(&buf, gzip.BestCompression)
|
||||
if _, err := zw.Write(b); err != nil {
|
||||
t.Fatalf("gzip: %v", err)
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
t.Fatalf("gzip close: %v", err)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// blockMockExchange wires the fetcher's exchangeFn so each (channel, block)
|
||||
// pair returns the matching slice from blocks.
|
||||
func blockMockExchange(f *Fetcher, want uint16, blocks [][]byte) func(context.Context, *dns.Msg, string) (*dns.Msg, time.Duration, error) {
|
||||
return func(ctx context.Context, m *dns.Msg, _ string) (*dns.Msg, time.Duration, error) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
ch, blk, err := protocol.DecodeQuery(f.queryKey, m.Question[0].Name, f.domain)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if ch != want {
|
||||
return nil, 0, errFakeNotFound{}
|
||||
}
|
||||
if int(blk) >= len(blocks) {
|
||||
return nil, 0, errFakeNotFound{}
|
||||
}
|
||||
encoded, encErr := protocol.EncodeResponse(f.responseKey, blocks[int(blk)], 0)
|
||||
if encErr != nil {
|
||||
return nil, 0, encErr
|
||||
}
|
||||
resp := new(dns.Msg)
|
||||
resp.SetReply(m)
|
||||
resp.Rcode = dns.RcodeSuccess
|
||||
resp.Answer = []dns.RR{&dns.TXT{
|
||||
Hdr: dns.RR_Header{Name: m.Question[0].Name, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: 0},
|
||||
Txt: []string{encoded},
|
||||
}}
|
||||
return resp, time.Millisecond, nil
|
||||
}
|
||||
}
|
||||
|
||||
type errFakeNotFound struct{}
|
||||
|
||||
func (errFakeNotFound) Error() string { return "fake nxdomain" }
|
||||
|
||||
func TestFetchMediaUncompressed(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
original := make([]byte, 1500)
|
||||
if _, err := rand.Read(original); err != nil {
|
||||
t.Fatalf("rand: %v", err)
|
||||
}
|
||||
crc := crc32.ChecksumIEEE(original)
|
||||
blocks := protocol.SplitIntoBlocks(withMediaHeader(crc, original, protocol.MediaCompressionNone))
|
||||
|
||||
channel := protocol.MediaChannelStart + 7
|
||||
f.exchangeFn = blockMockExchange(f, channel, blocks)
|
||||
|
||||
out, err := f.FetchMedia(context.Background(), channel, uint16(len(blocks)), crc, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("FetchMedia: %v", err)
|
||||
}
|
||||
if !bytes.Equal(out, original) {
|
||||
t.Fatalf("decompressed output differs from original")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchMediaDeflate(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
original := bytes.Repeat([]byte("xy "), 250)
|
||||
crc := crc32.ChecksumIEEE(original)
|
||||
var buf bytes.Buffer
|
||||
zw, _ := flate.NewWriter(&buf, flate.BestCompression)
|
||||
zw.Write(original)
|
||||
zw.Close()
|
||||
blocks := protocol.SplitIntoBlocks(withMediaHeader(crc, buf.Bytes(), protocol.MediaCompressionDeflate))
|
||||
|
||||
channel := protocol.MediaChannelStart + 9
|
||||
f.exchangeFn = blockMockExchange(f, channel, blocks)
|
||||
out, err := f.FetchMedia(context.Background(), channel, uint16(len(blocks)), crc, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("FetchMedia: %v", err)
|
||||
}
|
||||
if !bytes.Equal(out, original) {
|
||||
t.Fatalf("decompressed differs from original")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchMediaGzip(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
original := bytes.Repeat([]byte("abc123 "), 200) // compressible
|
||||
crc := crc32.ChecksumIEEE(original)
|
||||
body := gzipBytes(t, original)
|
||||
blocks := protocol.SplitIntoBlocks(withMediaHeader(crc, body, protocol.MediaCompressionGzip))
|
||||
|
||||
channel := protocol.MediaChannelStart + 8
|
||||
f.exchangeFn = blockMockExchange(f, channel, blocks)
|
||||
|
||||
out, err := f.FetchMedia(context.Background(), channel, uint16(len(blocks)), crc, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("FetchMedia: %v", err)
|
||||
}
|
||||
if !bytes.Equal(out, original) {
|
||||
t.Fatalf("decompressed output differs from original")
|
||||
}
|
||||
if len(body) >= len(original) {
|
||||
t.Fatalf("compressed body should be smaller than original (got %d vs %d)", len(body), len(original))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchMediaRejectsNonMediaChannel(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
_, err := f.FetchMedia(context.Background(), 1, 1, 0, nil)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for non-media channel")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchMediaRejectsBadHash(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
original := []byte("hello hash mismatch")
|
||||
crc := crc32.ChecksumIEEE(original)
|
||||
blocks := [][]byte{withMediaHeader(crc, original, protocol.MediaCompressionNone)}
|
||||
channel := protocol.MediaChannelStart + 1
|
||||
f.exchangeFn = blockMockExchange(f, channel, blocks)
|
||||
|
||||
_, err := f.FetchMedia(context.Background(), channel, 1, 0xDEADBEEF, nil)
|
||||
if err != ErrMediaHashMismatch {
|
||||
t.Fatalf("err = %v, want ErrMediaHashMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchMediaBlocksStreamWritesInOrder(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
blocks := [][]byte{
|
||||
[]byte("alpha"),
|
||||
[]byte("beta"),
|
||||
[]byte("gamma"),
|
||||
}
|
||||
channel := protocol.MediaChannelStart + 12
|
||||
f.exchangeFn = blockMockExchange(f, channel, blocks)
|
||||
|
||||
var got bytes.Buffer
|
||||
if err := f.FetchMediaBlocksStream(context.Background(), channel, 0, 3, &got, nil); err != nil {
|
||||
t.Fatalf("FetchMediaBlocksStream: %v", err)
|
||||
}
|
||||
want := append(append(append([]byte{}, blocks[0]...), blocks[1]...), blocks[2]...)
|
||||
if !bytes.Equal(got.Bytes(), want) {
|
||||
t.Fatalf("got %q, want %q", got.Bytes(), want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchMediaBlocksStreamPartialRange(t *testing.T) {
|
||||
f := newTestFetcher(t, []string{"1.1.1.1:53"})
|
||||
blocks := [][]byte{
|
||||
[]byte("first-block"),
|
||||
[]byte("second-block"),
|
||||
[]byte("third-block"),
|
||||
}
|
||||
channel := protocol.MediaChannelStart + 13
|
||||
f.exchangeFn = blockMockExchange(f, channel, blocks)
|
||||
|
||||
var got bytes.Buffer
|
||||
if err := f.FetchMediaBlocksStream(context.Background(), channel, 1, 2, &got, nil); err != nil {
|
||||
t.Fatalf("FetchMediaBlocksStream: %v", err)
|
||||
}
|
||||
want := append(append([]byte{}, blocks[1]...), blocks[2]...)
|
||||
if !bytes.Equal(got.Bytes(), want) {
|
||||
t.Fatalf("got %q, want %q", got.Bytes(), want)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
package protocol
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// MediaMeta describes a downloadable media blob attached to a feed message.
|
||||
//
|
||||
// Wire format embedded in a message's text body (immediately after the media
|
||||
// tag, before any caption):
|
||||
//
|
||||
// [IMAGE]<size>:<dl>:<ch>:<blk>:<crc32hex>[:<filename>]
|
||||
// caption goes here on the next line(s)
|
||||
//
|
||||
// The filename field is optional; when present it carries an OS-friendly
|
||||
// suggested filename (server-sanitised: no newlines, no path separators, no
|
||||
// control characters, length-capped). Old clients that split on ':' and
|
||||
// only read parts[0..4] keep working — they just ignore the trailing field.
|
||||
type MediaMeta struct {
|
||||
Tag string // e.g. MediaImage, MediaVideo, MediaFile
|
||||
Size int64
|
||||
Downloadable bool
|
||||
Channel uint16
|
||||
Blocks uint16
|
||||
CRC32 uint32
|
||||
Filename string
|
||||
}
|
||||
|
||||
// String renders the metadata in the wire format documented above, including
|
||||
// the leading tag and trailing newline that separates the metadata row from
|
||||
// any caption.
|
||||
func (m MediaMeta) String() string {
|
||||
dl := 0
|
||||
if m.Downloadable {
|
||||
dl = 1
|
||||
}
|
||||
if fn := SanitiseMediaFilename(m.Filename); fn != "" {
|
||||
return fmt.Sprintf("%s%d:%d:%d:%d:%08x:%s\n",
|
||||
m.Tag, m.Size, dl, m.Channel, m.Blocks, m.CRC32, fn)
|
||||
}
|
||||
return fmt.Sprintf("%s%d:%d:%d:%d:%08x\n",
|
||||
m.Tag, m.Size, dl, m.Channel, m.Blocks, m.CRC32)
|
||||
}
|
||||
|
||||
// SanitiseMediaFilename returns a filename safe to embed in the wire
|
||||
// metadata line. The output uses a restricted alphabet ([A-Za-z0-9._-]) so
|
||||
// no path separator, colon, newline, or control char can ever survive.
|
||||
// When the input is too long the base name is replaced with a short
|
||||
// hash-derived id but the extension is preserved so other OSes still
|
||||
// recognise the file type.
|
||||
func SanitiseMediaFilename(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
if i := strings.LastIndexAny(s, `/\`); i >= 0 {
|
||||
s = s[i+1:]
|
||||
}
|
||||
cleaned := filterFilenameRunes(s)
|
||||
if cleaned == "" || cleaned == "." || cleaned == ".." {
|
||||
return ""
|
||||
}
|
||||
|
||||
const maxBase = 24
|
||||
const maxExt = 8
|
||||
|
||||
base, ext := splitFilenameExt(cleaned)
|
||||
if len(ext) > maxExt {
|
||||
ext = ext[:maxExt]
|
||||
}
|
||||
if len(base) > maxBase {
|
||||
h := fnv.New64a()
|
||||
_, _ = h.Write([]byte(cleaned))
|
||||
base = "media-" + hex.EncodeToString(h.Sum(nil))[:8]
|
||||
}
|
||||
if base == "" || base == "." {
|
||||
base = "media"
|
||||
}
|
||||
if ext != "" {
|
||||
return base + "." + ext
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
func filterFilenameRunes(s string) string {
|
||||
var b strings.Builder
|
||||
for _, r := range s {
|
||||
switch {
|
||||
case r >= '0' && r <= '9',
|
||||
r >= 'A' && r <= 'Z',
|
||||
r >= 'a' && r <= 'z',
|
||||
r == '.', r == '_', r == '-':
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func splitFilenameExt(s string) (base, ext string) {
|
||||
if i := strings.LastIndexByte(s, '.'); i >= 0 && i < len(s)-1 {
|
||||
return s[:i], s[i+1:]
|
||||
}
|
||||
return s, ""
|
||||
}
|
||||
|
||||
// EncodeMediaText prepends the metadata line to an optional caption and
|
||||
// returns the combined message text. A nil/empty caption yields just the tag
|
||||
// + metadata + trailing newline-less string (the caption split is by the
|
||||
// metadata line's trailing \n, so an empty caption simply has no extra body).
|
||||
func EncodeMediaText(meta MediaMeta, caption string) string {
|
||||
header := meta.String()
|
||||
if caption == "" {
|
||||
// Drop the trailing newline so the message text doesn't end with a
|
||||
// blank line for caption-less media.
|
||||
return strings.TrimSuffix(header, "\n")
|
||||
}
|
||||
return header + caption
|
||||
}
|
||||
|
||||
// ParseMediaText parses a message body that begins with a known media tag.
|
||||
// On success it returns the metadata and the remaining caption (which may be
|
||||
// empty). When the body uses the legacy "[TAG]\ncaption" form (no metadata
|
||||
// suffix), ParseMediaText returns ok=true with Downloadable=false and
|
||||
// Channel=0 — the caller can treat it as a non-downloadable placeholder
|
||||
// exactly like before.
|
||||
//
|
||||
// Unknown tags return ok=false. Malformed metadata for a known tag also
|
||||
// returns ok=false so the caller falls back to legacy display.
|
||||
func ParseMediaText(body string) (meta MediaMeta, caption string, ok bool) {
|
||||
tag, rest, found := splitKnownMediaTag(body)
|
||||
if !found {
|
||||
return MediaMeta{}, body, false
|
||||
}
|
||||
meta.Tag = tag
|
||||
|
||||
// The bit between the tag and the first newline is the metadata payload.
|
||||
nl := strings.IndexByte(rest, '\n')
|
||||
var metaLine string
|
||||
if nl < 0 {
|
||||
metaLine = rest
|
||||
caption = ""
|
||||
} else {
|
||||
metaLine = rest[:nl]
|
||||
caption = rest[nl+1:]
|
||||
}
|
||||
metaLine = strings.TrimSpace(metaLine)
|
||||
|
||||
if metaLine == "" {
|
||||
// Legacy [TAG]\ncaption — no per-file metadata. Treat as not-downloadable.
|
||||
return MediaMeta{Tag: tag}, caption, true
|
||||
}
|
||||
|
||||
parts := strings.Split(metaLine, ":")
|
||||
if len(parts) < 5 {
|
||||
// Looks like a caption line that happens to start with this tag (e.g.
|
||||
// "[IMAGE]nice photo"). Don't claim a structured parse — return the
|
||||
// whole `rest` as caption so the message still renders.
|
||||
return MediaMeta{Tag: tag}, rest, true
|
||||
}
|
||||
|
||||
size, err := strconv.ParseInt(parts[0], 10, 64)
|
||||
if err != nil || size < 0 {
|
||||
return MediaMeta{Tag: tag}, rest, true
|
||||
}
|
||||
dl, err := strconv.Atoi(parts[1])
|
||||
if err != nil || (dl != 0 && dl != 1) {
|
||||
return MediaMeta{Tag: tag}, rest, true
|
||||
}
|
||||
ch, err := strconv.ParseUint(parts[2], 10, 16)
|
||||
if err != nil {
|
||||
return MediaMeta{Tag: tag}, rest, true
|
||||
}
|
||||
blk, err := strconv.ParseUint(parts[3], 10, 16)
|
||||
if err != nil {
|
||||
return MediaMeta{Tag: tag}, rest, true
|
||||
}
|
||||
crc, err := strconv.ParseUint(parts[4], 16, 32)
|
||||
if err != nil {
|
||||
return MediaMeta{Tag: tag}, rest, true
|
||||
}
|
||||
// Reject any channel claimed inside a parseable metadata line that falls
|
||||
// outside the reserved media range — that can only be a malformed message
|
||||
// or a tampering attempt; refuse to surface it as downloadable.
|
||||
channel := uint16(ch)
|
||||
downloadable := dl == 1
|
||||
if downloadable && (!IsMediaChannel(channel) || blk == 0) {
|
||||
downloadable = false
|
||||
}
|
||||
|
||||
meta.Size = size
|
||||
meta.Downloadable = downloadable
|
||||
meta.Channel = channel
|
||||
meta.Blocks = uint16(blk)
|
||||
meta.CRC32 = uint32(crc)
|
||||
if len(parts) >= 6 {
|
||||
// SanitiseMediaFilename strips the field separator, so we can't
|
||||
// reach this point with a colon inside the filename. Take parts[5]
|
||||
// directly and re-sanitise defensively.
|
||||
meta.Filename = SanitiseMediaFilename(parts[5])
|
||||
}
|
||||
return meta, caption, true
|
||||
}
|
||||
|
||||
// knownMediaTags are the message text prefixes that mark a downloadable media
|
||||
// attachment. Order matters only for prefix matching; longer/more-specific
|
||||
// tags are not currently aliased so the order is alphabetical for clarity.
|
||||
var knownMediaTags = []string{
|
||||
MediaAudio,
|
||||
MediaFile,
|
||||
MediaGIF,
|
||||
MediaImage,
|
||||
MediaSticker,
|
||||
MediaVideo,
|
||||
}
|
||||
|
||||
// splitKnownMediaTag returns the matched tag and the remainder of the body
|
||||
// when body starts with one of knownMediaTags.
|
||||
func splitKnownMediaTag(body string) (tag, rest string, ok bool) {
|
||||
for _, t := range knownMediaTags {
|
||||
if strings.HasPrefix(body, t) {
|
||||
return t, body[len(t):], true
|
||||
}
|
||||
}
|
||||
return "", body, false
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
package protocol
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// MediaCompression names a compression method applied to a cached media
|
||||
// file's bytes before they're split into DNS blocks.
|
||||
type MediaCompression byte
|
||||
|
||||
const (
|
||||
MediaCompressionNone MediaCompression = 0
|
||||
MediaCompressionGzip MediaCompression = 1
|
||||
MediaCompressionDeflate MediaCompression = 2
|
||||
)
|
||||
|
||||
// MediaHeaderVersion is the current header version. Bumped when the layout
|
||||
// changes incompatibly; until then, the reserved bytes carry future fields.
|
||||
const MediaHeaderVersion uint8 = 1
|
||||
|
||||
// MediaBlockHeaderLen is the fixed length of the metadata prefix that the
|
||||
// server prepends to a cached media file's bytes before splitting into
|
||||
// blocks. Block 0 of every media channel begins with these bytes.
|
||||
//
|
||||
// Layout (big-endian where multi-byte):
|
||||
// [0:4] CRC32(IEEE) of the DECOMPRESSED file content
|
||||
// [4] header version (currently 1)
|
||||
// [5] compression byte (MediaCompression*)
|
||||
// [6:16] reserved (zero) — room for future protocol fields without
|
||||
// bumping the version byte
|
||||
const MediaBlockHeaderLen = 16
|
||||
|
||||
// MediaBlockHeader is the parsed form of a media-channel block-0 header.
|
||||
type MediaBlockHeader struct {
|
||||
CRC32 uint32
|
||||
Version uint8
|
||||
Compression MediaCompression
|
||||
}
|
||||
|
||||
// EncodeMediaBlockHeader writes the binary header into a fresh slice of
|
||||
// length MediaBlockHeaderLen. Reserved bytes are zero-padded.
|
||||
func EncodeMediaBlockHeader(h MediaBlockHeader) []byte {
|
||||
buf := make([]byte, MediaBlockHeaderLen)
|
||||
binary.BigEndian.PutUint32(buf[0:4], h.CRC32)
|
||||
if h.Version == 0 {
|
||||
h.Version = MediaHeaderVersion
|
||||
}
|
||||
buf[4] = h.Version
|
||||
buf[5] = byte(h.Compression)
|
||||
return buf
|
||||
}
|
||||
|
||||
// DecodeMediaBlockHeader parses the first MediaBlockHeaderLen bytes of a
|
||||
// media block. Errors on truncation or unknown header version.
|
||||
func DecodeMediaBlockHeader(b []byte) (MediaBlockHeader, error) {
|
||||
if len(b) < MediaBlockHeaderLen {
|
||||
return MediaBlockHeader{}, fmt.Errorf("media block header truncated: have %d bytes, need %d", len(b), MediaBlockHeaderLen)
|
||||
}
|
||||
h := MediaBlockHeader{
|
||||
CRC32: binary.BigEndian.Uint32(b[0:4]),
|
||||
Version: b[4],
|
||||
Compression: MediaCompression(b[5]),
|
||||
}
|
||||
if h.Version != MediaHeaderVersion {
|
||||
return MediaBlockHeader{}, fmt.Errorf("media block header version %d not supported (want %d)", h.Version, MediaHeaderVersion)
|
||||
}
|
||||
switch h.Compression {
|
||||
case MediaCompressionNone, MediaCompressionGzip, MediaCompressionDeflate:
|
||||
default:
|
||||
return MediaBlockHeader{}, fmt.Errorf("media block header: unknown compression %d", h.Compression)
|
||||
}
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// ParseMediaCompressionName returns the MediaCompression matching one of
|
||||
// "none", "gzip", "deflate" (case-insensitive). Used by the CLI flag to
|
||||
// translate user input.
|
||||
func ParseMediaCompressionName(s string) (MediaCompression, error) {
|
||||
switch s {
|
||||
case "", "none":
|
||||
return MediaCompressionNone, nil
|
||||
case "gzip":
|
||||
return MediaCompressionGzip, nil
|
||||
case "deflate":
|
||||
return MediaCompressionDeflate, nil
|
||||
}
|
||||
return 0, fmt.Errorf("unknown media compression %q", s)
|
||||
}
|
||||
|
||||
// String returns the canonical name of the compression value.
|
||||
func (c MediaCompression) String() string {
|
||||
switch c {
|
||||
case MediaCompressionNone:
|
||||
return "none"
|
||||
case MediaCompressionGzip:
|
||||
return "gzip"
|
||||
case MediaCompressionDeflate:
|
||||
return "deflate"
|
||||
}
|
||||
return fmt.Sprintf("unknown(%d)", byte(c))
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
package protocol
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestEncodeDecodeMediaBlockHeader(t *testing.T) {
|
||||
cases := []MediaBlockHeader{
|
||||
{CRC32: 0x01020304, Version: MediaHeaderVersion, Compression: MediaCompressionNone},
|
||||
{CRC32: 0xdeadbeef, Version: MediaHeaderVersion, Compression: MediaCompressionGzip},
|
||||
{CRC32: 0, Version: MediaHeaderVersion, Compression: MediaCompressionDeflate},
|
||||
}
|
||||
for _, h := range cases {
|
||||
buf := EncodeMediaBlockHeader(h)
|
||||
if len(buf) != MediaBlockHeaderLen {
|
||||
t.Fatalf("encoded length = %d, want %d", len(buf), MediaBlockHeaderLen)
|
||||
}
|
||||
// Reserved bytes must be zero for forward compatibility.
|
||||
if !bytes.Equal(buf[6:], make([]byte, MediaBlockHeaderLen-6)) {
|
||||
t.Fatalf("reserved bytes not zero: %x", buf[6:])
|
||||
}
|
||||
got, err := DecodeMediaBlockHeader(buf)
|
||||
if err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
if got != h {
|
||||
t.Fatalf("round-trip: got %+v, want %+v", got, h)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMediaBlockHeaderRejectsBadVersion(t *testing.T) {
|
||||
buf := EncodeMediaBlockHeader(MediaBlockHeader{CRC32: 1, Version: MediaHeaderVersion, Compression: MediaCompressionNone})
|
||||
buf[4] = 9 // bogus version
|
||||
_, err := DecodeMediaBlockHeader(buf)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unknown version")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMediaBlockHeaderRejectsBadCompression(t *testing.T) {
|
||||
buf := EncodeMediaBlockHeader(MediaBlockHeader{Version: MediaHeaderVersion})
|
||||
buf[5] = 99
|
||||
_, err := DecodeMediaBlockHeader(buf)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unknown compression")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMediaBlockHeaderRejectsTruncated(t *testing.T) {
|
||||
_, err := DecodeMediaBlockHeader(make([]byte, MediaBlockHeaderLen-1))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for truncated header")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMediaCompressionName(t *testing.T) {
|
||||
cases := map[string]MediaCompression{
|
||||
"": MediaCompressionNone,
|
||||
"none": MediaCompressionNone,
|
||||
"gzip": MediaCompressionGzip,
|
||||
"deflate": MediaCompressionDeflate,
|
||||
}
|
||||
for in, want := range cases {
|
||||
got, err := ParseMediaCompressionName(in)
|
||||
if err != nil {
|
||||
t.Errorf("ParseMediaCompressionName(%q): %v", in, err)
|
||||
continue
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("ParseMediaCompressionName(%q) = %v, want %v", in, got, want)
|
||||
}
|
||||
}
|
||||
if _, err := ParseMediaCompressionName("brotli"); err == nil {
|
||||
t.Fatal("expected error for unknown compression name")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
package protocol
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestEncodeMediaTextRoundTrip(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
meta MediaMeta
|
||||
caption string
|
||||
}{
|
||||
{
|
||||
name: "image with caption",
|
||||
meta: MediaMeta{
|
||||
Tag: MediaImage,
|
||||
Size: 123456,
|
||||
Downloadable: true,
|
||||
Channel: 12345,
|
||||
Blocks: 42,
|
||||
CRC32: 0xabcdef01,
|
||||
},
|
||||
caption: "hello world\nmulti-line",
|
||||
},
|
||||
{
|
||||
name: "file with filename",
|
||||
meta: MediaMeta{
|
||||
Tag: MediaFile,
|
||||
Size: 800,
|
||||
Downloadable: true,
|
||||
Channel: MediaChannelStart,
|
||||
Blocks: 2,
|
||||
CRC32: 0,
|
||||
Filename: "report.zip",
|
||||
},
|
||||
caption: "",
|
||||
},
|
||||
{
|
||||
name: "filename strips path traversal",
|
||||
meta: MediaMeta{
|
||||
Tag: MediaFile,
|
||||
Size: 100,
|
||||
Downloadable: true,
|
||||
Channel: MediaChannelStart + 1,
|
||||
Blocks: 1,
|
||||
CRC32: 0xdeadbeef,
|
||||
// Server-side sanitisation strips dirs, control chars, and ":"
|
||||
// before the metadata reaches the wire — so a parsed filename
|
||||
// is never going to contain any of those.
|
||||
Filename: "/tmp/../etc/passwd:bad\nname",
|
||||
},
|
||||
caption: "",
|
||||
},
|
||||
{
|
||||
name: "non-downloadable image",
|
||||
meta: MediaMeta{
|
||||
Tag: MediaImage,
|
||||
Size: 50_000_000,
|
||||
Downloadable: false,
|
||||
Channel: 0,
|
||||
Blocks: 0,
|
||||
CRC32: 0xdeadbeef,
|
||||
},
|
||||
caption: "too big",
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
body := EncodeMediaText(tc.meta, tc.caption)
|
||||
meta, caption, ok := ParseMediaText(body)
|
||||
if !ok {
|
||||
t.Fatalf("ParseMediaText returned ok=false for body %q", body)
|
||||
}
|
||||
if caption != tc.caption {
|
||||
t.Fatalf("caption = %q, want %q", caption, tc.caption)
|
||||
}
|
||||
if meta.Tag != tc.meta.Tag {
|
||||
t.Fatalf("Tag = %q, want %q", meta.Tag, tc.meta.Tag)
|
||||
}
|
||||
if meta.Size != tc.meta.Size {
|
||||
t.Fatalf("Size = %d, want %d", meta.Size, tc.meta.Size)
|
||||
}
|
||||
if meta.Downloadable != tc.meta.Downloadable {
|
||||
t.Fatalf("Downloadable = %v, want %v", meta.Downloadable, tc.meta.Downloadable)
|
||||
}
|
||||
if meta.Channel != tc.meta.Channel {
|
||||
t.Fatalf("Channel = %d, want %d", meta.Channel, tc.meta.Channel)
|
||||
}
|
||||
if meta.Blocks != tc.meta.Blocks {
|
||||
t.Fatalf("Blocks = %d, want %d", meta.Blocks, tc.meta.Blocks)
|
||||
}
|
||||
if meta.CRC32 != tc.meta.CRC32 {
|
||||
t.Fatalf("CRC32 = %x, want %x", meta.CRC32, tc.meta.CRC32)
|
||||
}
|
||||
wantFilename := SanitiseMediaFilename(tc.meta.Filename)
|
||||
if meta.Filename != wantFilename {
|
||||
t.Fatalf("Filename = %q, want %q", meta.Filename, wantFilename)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitiseMediaFilename(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"": "",
|
||||
"report.zip": "report.zip",
|
||||
"path/to/report.zip": "report.zip",
|
||||
"..": "",
|
||||
"a:b\nc.txt": "abc.txt",
|
||||
"hello": "hello",
|
||||
"WeIrD-Name_v2.tar.gz": "WeIrD-Name_v2.tar.gz",
|
||||
"\xff\xfe.txt": "media.txt",
|
||||
"\u062d\u0645\u0644\u0647.zip": "media.zip",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := SanitiseMediaFilename(in); got != want {
|
||||
t.Errorf("SanitiseMediaFilename(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitiseMediaFilenameLongName(t *testing.T) {
|
||||
long := strings.Repeat("abc", 50) + ".zip"
|
||||
got := SanitiseMediaFilename(long)
|
||||
if !strings.HasPrefix(got, "media-") || !strings.HasSuffix(got, ".zip") {
|
||||
t.Fatalf("long filename = %q, want media-<hash>.zip", got)
|
||||
}
|
||||
if len(got) > 6+8+1+3 {
|
||||
t.Fatalf("long filename too long: %q", got)
|
||||
}
|
||||
if again := SanitiseMediaFilename(long); again != got {
|
||||
t.Fatalf("non-deterministic: %q vs %q", got, again)
|
||||
}
|
||||
}
|
||||
|
||||
// Backward compat: legacy "[IMAGE]\ncaption" must still parse cleanly with
|
||||
// caption preserved and Downloadable=false.
|
||||
func TestParseMediaTextLegacy(t *testing.T) {
|
||||
body := "[IMAGE]\nlook at this"
|
||||
meta, caption, ok := ParseMediaText(body)
|
||||
if !ok {
|
||||
t.Fatalf("ParseMediaText ok=false on legacy body")
|
||||
}
|
||||
if meta.Tag != MediaImage {
|
||||
t.Fatalf("Tag = %q, want %q", meta.Tag, MediaImage)
|
||||
}
|
||||
if meta.Downloadable {
|
||||
t.Fatalf("Downloadable should be false on legacy body")
|
||||
}
|
||||
if caption != "look at this" {
|
||||
t.Fatalf("caption = %q, want %q", caption, "look at this")
|
||||
}
|
||||
}
|
||||
|
||||
// Backward compat: legacy [IMAGE] with no caption.
|
||||
func TestParseMediaTextLegacyNoCaption(t *testing.T) {
|
||||
for _, body := range []string{"[IMAGE]", "[IMAGE]\n"} {
|
||||
meta, caption, ok := ParseMediaText(body)
|
||||
if !ok {
|
||||
t.Fatalf("ok=false on %q", body)
|
||||
}
|
||||
if meta.Tag != MediaImage {
|
||||
t.Fatalf("Tag = %q, want [IMAGE]", meta.Tag)
|
||||
}
|
||||
if meta.Downloadable {
|
||||
t.Fatalf("legacy body should not be downloadable")
|
||||
}
|
||||
if caption != "" {
|
||||
t.Fatalf("caption = %q, want empty", caption)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A normal caption that happens to lead with a media tag should not be
|
||||
// misparsed as downloadable metadata.
|
||||
func TestParseMediaTextHumanCaption(t *testing.T) {
|
||||
body := "[IMAGE]nice picture\nrest of post"
|
||||
meta, caption, ok := ParseMediaText(body)
|
||||
if !ok {
|
||||
t.Fatalf("ok=false on caption-leading body")
|
||||
}
|
||||
if meta.Downloadable {
|
||||
t.Fatalf("downloadable should be false for a human caption")
|
||||
}
|
||||
if meta.Channel != 0 {
|
||||
t.Fatalf("channel should be 0 for non-metadata body, got %d", meta.Channel)
|
||||
}
|
||||
want := "nice picture\nrest of post"
|
||||
if caption != want {
|
||||
t.Fatalf("caption = %q, want %q", caption, want)
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown tag → ok=false.
|
||||
func TestParseMediaTextUnknownTag(t *testing.T) {
|
||||
_, _, ok := ParseMediaText("not a tag")
|
||||
if ok {
|
||||
t.Fatalf("ok=true for non-tag body")
|
||||
}
|
||||
}
|
||||
|
||||
// A metadata line that names a channel outside the media range must NOT be
|
||||
// surfaced as downloadable.
|
||||
func TestParseMediaTextRejectsOutOfRangeChannel(t *testing.T) {
|
||||
body := "[IMAGE]100:1:5:200:00000000\ncaption"
|
||||
meta, _, ok := ParseMediaText(body)
|
||||
if !ok {
|
||||
t.Fatalf("ok=false on otherwise-valid metadata")
|
||||
}
|
||||
if meta.Downloadable {
|
||||
t.Fatalf("Downloadable should be false for channel %d outside media range", meta.Channel)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsMediaChannel(t *testing.T) {
|
||||
checks := map[uint16]bool{
|
||||
0: false,
|
||||
1: false,
|
||||
MediaChannelStart - 1: false,
|
||||
MediaChannelStart: true,
|
||||
MediaChannelStart + 100: true,
|
||||
MediaChannelEnd: true,
|
||||
MediaChannelEnd + 1: false,
|
||||
65535: false,
|
||||
}
|
||||
for ch, want := range checks {
|
||||
if got := IsMediaChannel(ch); got != want {
|
||||
t.Errorf("IsMediaChannel(%d) = %v, want %v", ch, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,12 @@ const (
|
||||
// DefaultBlockPayload is kept for compatibility; equals MaxBlockPayload.
|
||||
DefaultBlockPayload = MaxBlockPayload
|
||||
|
||||
// MediaBlockPayload is the fixed payload size used for media (image/file)
|
||||
// blocks. Media blocks are raw binary, and using a fixed size simplifies
|
||||
// both server-side block boundaries and client-side range/resume math.
|
||||
// Tuned for safe DNS UDP response after AES-GCM + base64 + padding.
|
||||
MediaBlockPayload = MaxBlockPayload
|
||||
|
||||
// DefaultMaxPadding is the default random padding added to responses to vary DNS response size.
|
||||
DefaultMaxPadding = 32
|
||||
|
||||
@@ -29,6 +35,14 @@ const (
|
||||
// MetadataChannel is the special channel number for server metadata.
|
||||
MetadataChannel = 0
|
||||
|
||||
// MediaChannelStart and MediaChannelEnd bound the channel-number range
|
||||
// reserved for cached binary media (images, files, ...). Each cached file
|
||||
// occupies one channel; bytes are split into raw blocks served via the
|
||||
// usual DNS TXT path. The range is well above typical feed channel counts
|
||||
// and well below the special control channels at the top of uint16 space.
|
||||
MediaChannelStart uint16 = 10000
|
||||
MediaChannelEnd uint16 = 60000 // inclusive
|
||||
|
||||
// MarkerSize is the random marker in metadata to verify data freshness.
|
||||
MarkerSize = 3
|
||||
|
||||
@@ -46,6 +60,14 @@ const (
|
||||
MsgContentHashSize = 4
|
||||
)
|
||||
|
||||
// IsMediaChannel reports whether ch falls inside the reserved media-blob
|
||||
// channel range. Media channels are not enumerated in Metadata; the client
|
||||
// learns each (channel, blocks, hash) tuple from the corresponding feed
|
||||
// message text via [TAG]<size>:<dl>:<ch>:<blk>:<crc32hex>.
|
||||
func IsMediaChannel(ch uint16) bool {
|
||||
return ch >= MediaChannelStart && ch <= MediaChannelEnd
|
||||
}
|
||||
|
||||
// Media placeholder strings for non-text content.
|
||||
const (
|
||||
MediaImage = "[IMAGE]"
|
||||
|
||||
@@ -58,6 +58,7 @@ type hourlyFetchReport struct {
|
||||
totalQueries int64
|
||||
metadataQueries int64
|
||||
versionQueries int64
|
||||
mediaQueries int64 // queries that landed in the media-blob channel range
|
||||
perChannel map[uint16]*channelFetchStats
|
||||
perResolver map[string]int64
|
||||
}
|
||||
@@ -696,6 +697,13 @@ func recordReportQuery(rep *hourlyFetchReport, event reportEvent) {
|
||||
rep.versionQueries++
|
||||
return
|
||||
}
|
||||
if protocol.IsMediaChannel(channel) {
|
||||
// We don't fan out per-media-channel stats — the channel-id is just
|
||||
// a transient slot, and 50K possible ids would explode the report.
|
||||
// Total media-query volume is enough for the operator's purposes.
|
||||
rep.mediaQueries++
|
||||
return
|
||||
}
|
||||
|
||||
stats := rep.perChannel[channel]
|
||||
if stats == nil {
|
||||
@@ -769,10 +777,14 @@ func (s *DNSServer) emitHourlyReport(rep *hourlyFetchReport, final bool) {
|
||||
"totalDnsQueries": rep.totalQueries,
|
||||
"totalMetadataQueries": rep.metadataQueries,
|
||||
"totalVersionQueries": rep.versionQueries,
|
||||
"totalMediaQueries": rep.mediaQueries,
|
||||
"channels": entries,
|
||||
"topResolvers": resolvers,
|
||||
"finalFlush": final,
|
||||
}
|
||||
if mediaCache := s.feed.MediaCache(); mediaCache != nil {
|
||||
payload["mediaCache"] = mediaCache.Stats()
|
||||
}
|
||||
b, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
log.Printf("[dns_hourly] marshal error: %v", err)
|
||||
|
||||
@@ -27,6 +27,13 @@ type Feed struct {
|
||||
telegramLoggedIn bool
|
||||
nextFetch uint32
|
||||
latestVersion string
|
||||
|
||||
// media holds binary blobs (images, files, ...) on a separate set of
|
||||
// channel numbers in the [MediaChannelStart, MediaChannelEnd] range. It
|
||||
// may be nil when media downloads are disabled — Feed.GetBlock then
|
||||
// rejects queries to media channels with a not-found error, mirroring
|
||||
// pre-feature behaviour.
|
||||
media *MediaCache
|
||||
}
|
||||
|
||||
// NewFeed creates a new Feed with the given channel names.
|
||||
@@ -88,6 +95,16 @@ func (f *Feed) GetBlock(channel, block int) ([]byte, error) {
|
||||
if channel == int(protocol.TitlesChannel) {
|
||||
return f.getTitlesBlock(block)
|
||||
}
|
||||
// Channel sits in the binary media range — delegate to MediaCache. We
|
||||
// drop the read lock first because MediaCache uses its own lock and we
|
||||
// don't want to hold f.mu across that path.
|
||||
if channel >= 0 && channel <= 0xFFFF && protocol.IsMediaChannel(uint16(channel)) {
|
||||
media := f.media
|
||||
if media == nil {
|
||||
return nil, fmt.Errorf("media channel %d not configured", channel)
|
||||
}
|
||||
return media.GetBlock(uint16(channel), uint16(block))
|
||||
}
|
||||
|
||||
ch, ok := f.blocks[channel]
|
||||
if !ok {
|
||||
@@ -99,6 +116,22 @@ func (f *Feed) GetBlock(channel, block int) ([]byte, error) {
|
||||
return ch[block], nil
|
||||
}
|
||||
|
||||
// SetMediaCache attaches a MediaCache to this Feed. Pass nil to disable
|
||||
// media serving (the default for backward compat). Safe to call once at
|
||||
// startup before any DNS query is served.
|
||||
func (f *Feed) SetMediaCache(c *MediaCache) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.media = c
|
||||
}
|
||||
|
||||
// MediaCache returns the configured MediaCache or nil.
|
||||
func (f *Feed) MediaCache() *MediaCache {
|
||||
f.mu.RLock()
|
||||
defer f.mu.RUnlock()
|
||||
return f.media
|
||||
}
|
||||
|
||||
func (f *Feed) getVersionBlock(block int) ([]byte, error) {
|
||||
blocks := f.versionBlocks
|
||||
if len(blocks) == 0 {
|
||||
|
||||
@@ -0,0 +1,524 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/flate"
|
||||
"compress/gzip"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// MediaCache stores binary media blobs (images, files, ...) keyed by an
|
||||
// upstream-stable identifier (Telegram file_id, image URL, ...). Each entry
|
||||
// occupies one channel number drawn from the [MediaChannelStart, MediaChannelEnd]
|
||||
// range, plus a precomputed list of fixed-size raw blocks served via the
|
||||
// regular DNS TXT path.
|
||||
//
|
||||
// The cache is safe for concurrent use. Hot-path operations (Store, GetBlock)
|
||||
// are O(log n) at worst and typically O(1) with the help of two side maps.
|
||||
type MediaCache struct {
|
||||
maxFileBytes int64
|
||||
ttl time.Duration
|
||||
compression protocol.MediaCompression
|
||||
|
||||
// Logger receives an info line per cache event when set (Store hits/misses,
|
||||
// evictions). The default is a silent no-op so tests don't print noise.
|
||||
logf func(format string, args ...interface{})
|
||||
|
||||
mu sync.RWMutex
|
||||
byKey map[string]*mediaEntry // upstream key (file_id / URL) → entry
|
||||
byChannel map[uint16]*mediaEntry // assigned channel → entry
|
||||
byHash map[uint32]*mediaEntry // CRC32(content) → entry, for cross-key dedup
|
||||
nextChannel uint16 // round-robin allocation hint
|
||||
|
||||
// Counters surfaced via Stats(); written with atomics so reads from the
|
||||
// hourly reporter don't have to acquire mu.
|
||||
storeHits uint64
|
||||
storeMisses uint64
|
||||
storeRejected uint64 // file too large
|
||||
queryCount uint64 // total media block queries served
|
||||
evictionCount uint64
|
||||
currentEntries int64 // live entry count
|
||||
currentBytes int64 // sum of file sizes currently cached
|
||||
}
|
||||
|
||||
type mediaEntry struct {
|
||||
channel uint16
|
||||
cacheKey string // primary upstream id this entry was first stored under
|
||||
aliases []string // additional keys (different upstream ids, same content)
|
||||
mimeType string
|
||||
filename string
|
||||
tag string // protocol media tag (MediaImage, MediaFile, ...)
|
||||
size int64
|
||||
crc32 uint32
|
||||
blocks [][]byte
|
||||
expiresAt time.Time
|
||||
// inflight prevents the eviction sweep from reaping an entry that is
|
||||
// currently being downloaded by a goroutine that hasn't installed it yet.
|
||||
inflight bool
|
||||
}
|
||||
|
||||
// MediaCacheConfig configures a new MediaCache.
|
||||
type MediaCacheConfig struct {
|
||||
// MaxFileBytes is the largest individual file the cache will accept.
|
||||
// Files larger than this are rejected by Store with ErrTooLarge.
|
||||
MaxFileBytes int64
|
||||
// TTL is how long an entry stays cached after its last refresh.
|
||||
TTL time.Duration
|
||||
// Compression is the wire-format compression used for media blocks.
|
||||
// Defaults to MediaCompressionNone when zero.
|
||||
Compression protocol.MediaCompression
|
||||
// Logf receives info-level cache events. Optional.
|
||||
Logf func(format string, args ...interface{})
|
||||
}
|
||||
|
||||
// ErrTooLarge is returned by Store when content exceeds MaxFileBytes.
|
||||
var ErrTooLarge = errors.New("media file exceeds configured max-size")
|
||||
|
||||
// ErrCacheFull is returned by Store when no media channel slot is available.
|
||||
// In practice this requires either MediaChannelEnd-Start+1 simultaneously
|
||||
// pinned files or a TTL too generous for the workload.
|
||||
var ErrCacheFull = errors.New("no free media channel slot")
|
||||
|
||||
// NewMediaCache constructs a cache with the given configuration. A zero
|
||||
// MaxFileBytes disables the size cap; a zero TTL means entries never expire
|
||||
// (not recommended in production).
|
||||
func NewMediaCache(cfg MediaCacheConfig) *MediaCache {
|
||||
logf := cfg.Logf
|
||||
if logf == nil {
|
||||
logf = func(string, ...interface{}) {}
|
||||
}
|
||||
return &MediaCache{
|
||||
maxFileBytes: cfg.MaxFileBytes,
|
||||
ttl: cfg.TTL,
|
||||
compression: cfg.Compression,
|
||||
logf: logf,
|
||||
byKey: make(map[string]*mediaEntry),
|
||||
byChannel: make(map[uint16]*mediaEntry),
|
||||
byHash: make(map[uint32]*mediaEntry),
|
||||
nextChannel: protocol.MediaChannelStart,
|
||||
}
|
||||
}
|
||||
|
||||
// Store inserts (or refreshes) a media blob into the cache and returns
|
||||
// metadata that the caller can embed in a feed message.
|
||||
//
|
||||
// cacheKey is an upstream-stable identifier (e.g. Telegram file_id, image
|
||||
// URL). When the same key is stored again, the existing entry's TTL is
|
||||
// refreshed and the same channel/blocks are returned without copying the
|
||||
// contents — callers should rely on this for the "fetch every 10 min"
|
||||
// duplicate-handling case described in the design.
|
||||
//
|
||||
// tag is the protocol media tag (MediaImage, MediaFile, ...); mimeType and
|
||||
// filename are optional and stored for the HTTP layer to surface to the
|
||||
// client. content is the raw file bytes; the caller may pass a slice it
|
||||
// continues to use after the call (Store copies into block-sized chunks).
|
||||
func (c *MediaCache) Store(cacheKey, tag string, content []byte, mimeType, filename string) (protocol.MediaMeta, error) {
|
||||
if cacheKey == "" {
|
||||
return protocol.MediaMeta{}, errors.New("media: empty cache key")
|
||||
}
|
||||
if tag == "" {
|
||||
tag = protocol.MediaFile
|
||||
}
|
||||
size := int64(len(content))
|
||||
if c.maxFileBytes > 0 && size > c.maxFileBytes {
|
||||
atomic.AddUint64(&c.storeRejected, 1)
|
||||
return protocol.MediaMeta{
|
||||
Tag: tag,
|
||||
Size: size,
|
||||
Downloadable: false,
|
||||
}, ErrTooLarge
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
hash := crc32.ChecksumIEEE(content)
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
if existing, ok := c.byKey[cacheKey]; ok && existing.crc32 == hash {
|
||||
// Same upstream id and same content — just refresh the TTL.
|
||||
existing.expiresAt = c.expiry(now)
|
||||
atomic.AddUint64(&c.storeHits, 1)
|
||||
c.logf("media: refresh tag=%s key=%s ch=%d size=%d", tag, cacheKey, existing.channel, existing.size)
|
||||
return c.metaForLocked(existing), nil
|
||||
}
|
||||
|
||||
// Cross-key content match: a different upstream id pointed at exactly
|
||||
// the same bytes. Bind the new cache key to the existing entry so any
|
||||
// future Lookup under either key works, and refresh the TTL. This is
|
||||
// the case the spec asks for: "same media → just reset TTL, don't take
|
||||
// a new channel slot".
|
||||
if existing, ok := c.byHash[hash]; ok {
|
||||
existing.expiresAt = c.expiry(now)
|
||||
if cacheKey != existing.cacheKey {
|
||||
alreadyAliased := false
|
||||
for _, a := range existing.aliases {
|
||||
if a == cacheKey {
|
||||
alreadyAliased = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !alreadyAliased {
|
||||
existing.aliases = append(existing.aliases, cacheKey)
|
||||
}
|
||||
}
|
||||
c.byKey[cacheKey] = existing
|
||||
atomic.AddUint64(&c.storeHits, 1)
|
||||
c.logf("media: dedup tag=%s key=%s ch=%d size=%d (hash match)", tag, cacheKey, existing.channel, existing.size)
|
||||
return c.metaForLocked(existing), nil
|
||||
}
|
||||
|
||||
// Either a new key, or the same key carries different bytes (a Telegram
|
||||
// edit, a re-upload). Allocate a fresh channel and replace.
|
||||
if existing, ok := c.byKey[cacheKey]; ok {
|
||||
c.dropEntryLocked(existing)
|
||||
}
|
||||
|
||||
// Opportunistic sweep before we allocate. Without this, expired entries
|
||||
// that don't sit on the allocator's linear-scan path (i.e. ones below
|
||||
// nextChannel) accumulate until the periodic sweep runs. That breaks
|
||||
// the "TTL is the upper bound on how long a slot stays cached" promise
|
||||
// across burst-store workloads with small TTLs. The cost is O(n) over
|
||||
// active entries; n is capped by the media-channel range.
|
||||
c.sweepExpiredLocked(now)
|
||||
|
||||
channel, err := c.allocateChannelLocked(now)
|
||||
if err != nil {
|
||||
return protocol.MediaMeta{}, err
|
||||
}
|
||||
|
||||
blocks, encErr := splitMediaBlocks(hash, content, c.compression)
|
||||
if encErr != nil {
|
||||
return protocol.MediaMeta{}, encErr
|
||||
}
|
||||
if size > 0 {
|
||||
var compressedBody int
|
||||
for _, b := range blocks {
|
||||
compressedBody += len(b)
|
||||
}
|
||||
compressedBody -= protocol.MediaBlockHeaderLen
|
||||
if compressedBody < 0 {
|
||||
compressedBody = 0
|
||||
}
|
||||
var savedPct int
|
||||
if c.compression != protocol.MediaCompressionNone && size > 0 {
|
||||
savedPct = int((size - int64(compressedBody)) * 100 / size)
|
||||
}
|
||||
c.logf("media: compress=%s key=%s orig=%d body=%d saved=%d%%", c.compression, cacheKey, size, compressedBody, savedPct)
|
||||
}
|
||||
entry := &mediaEntry{
|
||||
channel: channel,
|
||||
cacheKey: cacheKey,
|
||||
mimeType: mimeType,
|
||||
filename: protocol.SanitiseMediaFilename(filename),
|
||||
tag: tag,
|
||||
size: size,
|
||||
crc32: hash,
|
||||
blocks: blocks,
|
||||
expiresAt: c.expiry(now),
|
||||
}
|
||||
c.byKey[cacheKey] = entry
|
||||
c.byChannel[channel] = entry
|
||||
c.byHash[hash] = entry
|
||||
atomic.AddUint64(&c.storeMisses, 1)
|
||||
atomic.AddInt64(&c.currentEntries, 1)
|
||||
atomic.AddInt64(&c.currentBytes, size)
|
||||
c.logf("media: store tag=%s key=%s ch=%d size=%d blocks=%d", tag, cacheKey, channel, size, len(blocks))
|
||||
|
||||
return c.metaForLocked(entry), nil
|
||||
}
|
||||
|
||||
// LookupByChannel returns the cached entry's transport metadata (mime,
|
||||
// filename) for a serving channel. Returns ok=false if no entry is mapped.
|
||||
// Used by the HTTP layer to pick a sensible Content-Type/Content-Disposition
|
||||
// for clients that didn't provide one in the query string.
|
||||
func (c *MediaCache) LookupByChannel(channel uint16) (mime, filename string, ok bool) {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
entry, found := c.byChannel[channel]
|
||||
if !found {
|
||||
return "", "", false
|
||||
}
|
||||
return entry.mimeType, entry.filename, true
|
||||
}
|
||||
|
||||
// Lookup returns the metadata for an entry by cache key, refreshing TTL on
|
||||
// hit. Returns ok=false if not present.
|
||||
func (c *MediaCache) Lookup(cacheKey string) (protocol.MediaMeta, bool) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
entry, ok := c.byKey[cacheKey]
|
||||
if !ok {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
entry.expiresAt = c.expiry(time.Now())
|
||||
return c.metaForLocked(entry), true
|
||||
}
|
||||
|
||||
// GetBlock returns one block of cached media for serving over DNS. Returns an
|
||||
// error if the channel isn't a media channel, the entry has expired, or the
|
||||
// block index is out of range. Increments the served-query counter.
|
||||
func (c *MediaCache) GetBlock(channel, block uint16) ([]byte, error) {
|
||||
if !protocol.IsMediaChannel(channel) {
|
||||
return nil, fmt.Errorf("channel %d is outside media range", channel)
|
||||
}
|
||||
atomic.AddUint64(&c.queryCount, 1)
|
||||
|
||||
c.mu.RLock()
|
||||
entry, ok := c.byChannel[channel]
|
||||
c.mu.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("media channel %d not found", channel)
|
||||
}
|
||||
if int(block) >= len(entry.blocks) {
|
||||
return nil, fmt.Errorf("media block %d out of range (%d blocks)", block, len(entry.blocks))
|
||||
}
|
||||
// Reading a block extends the entry lifetime — clients in the middle of
|
||||
// downloading shouldn't have the cache rug pulled mid-transfer.
|
||||
c.mu.Lock()
|
||||
entry.expiresAt = c.expiry(time.Now())
|
||||
c.mu.Unlock()
|
||||
return entry.blocks[block], nil
|
||||
}
|
||||
|
||||
// Sweep evicts entries whose TTL has elapsed. Returns the number evicted.
|
||||
// Safe to call from a periodic goroutine.
|
||||
func (c *MediaCache) Sweep() int {
|
||||
if c.ttl <= 0 {
|
||||
return 0
|
||||
}
|
||||
now := time.Now()
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
n := c.sweepExpiredLocked(now)
|
||||
if n > 0 {
|
||||
c.logf("media: sweep evicted=%d remaining=%d", n, len(c.byChannel))
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// sweepExpiredLocked is the shared implementation behind both the periodic
|
||||
// Sweep and the opportunistic per-Store sweep. Caller must hold c.mu.
|
||||
// It returns the number of entries evicted.
|
||||
func (c *MediaCache) sweepExpiredLocked(now time.Time) int {
|
||||
if c.ttl <= 0 {
|
||||
return 0
|
||||
}
|
||||
var expired []*mediaEntry
|
||||
for _, entry := range c.byChannel {
|
||||
if entry.inflight {
|
||||
continue
|
||||
}
|
||||
if now.After(entry.expiresAt) {
|
||||
expired = append(expired, entry)
|
||||
}
|
||||
}
|
||||
for _, entry := range expired {
|
||||
c.dropEntryLocked(entry)
|
||||
}
|
||||
return len(expired)
|
||||
}
|
||||
|
||||
// MediaCacheStats is a snapshot of cache counters.
|
||||
type MediaCacheStats struct {
|
||||
Entries int64 `json:"entries"`
|
||||
Bytes int64 `json:"bytes"`
|
||||
Queries uint64 `json:"queries"`
|
||||
StoreHits uint64 `json:"storeHits"`
|
||||
StoreMisses uint64 `json:"storeMisses"`
|
||||
StoreRejected uint64 `json:"storeRejected"`
|
||||
Evictions uint64 `json:"evictions"`
|
||||
MaxFileBytes int64 `json:"maxFileBytes"`
|
||||
TTLSeconds int64 `json:"ttlSeconds"`
|
||||
}
|
||||
|
||||
// Stats returns a snapshot of cache counters. Lock-free for the per-counter
|
||||
// fields; Entries and Bytes are also atomic.
|
||||
func (c *MediaCache) Stats() MediaCacheStats {
|
||||
return MediaCacheStats{
|
||||
Entries: atomic.LoadInt64(&c.currentEntries),
|
||||
Bytes: atomic.LoadInt64(&c.currentBytes),
|
||||
Queries: atomic.LoadUint64(&c.queryCount),
|
||||
StoreHits: atomic.LoadUint64(&c.storeHits),
|
||||
StoreMisses: atomic.LoadUint64(&c.storeMisses),
|
||||
StoreRejected: atomic.LoadUint64(&c.storeRejected),
|
||||
Evictions: atomic.LoadUint64(&c.evictionCount),
|
||||
MaxFileBytes: c.maxFileBytes,
|
||||
TTLSeconds: int64(c.ttl / time.Second),
|
||||
}
|
||||
}
|
||||
|
||||
// allocateChannelLocked finds a free channel in the media range, evicting
|
||||
// expired entries on the way. Caller must hold c.mu.
|
||||
func (c *MediaCache) allocateChannelLocked(now time.Time) (uint16, error) {
|
||||
rangeSize := int(protocol.MediaChannelEnd) - int(protocol.MediaChannelStart) + 1
|
||||
start := c.nextChannel
|
||||
if start < protocol.MediaChannelStart || start > protocol.MediaChannelEnd {
|
||||
start = protocol.MediaChannelStart
|
||||
}
|
||||
cur := start
|
||||
for i := 0; i < rangeSize; i++ {
|
||||
entry, taken := c.byChannel[cur]
|
||||
if !taken {
|
||||
c.advanceNextLocked(cur)
|
||||
return cur, nil
|
||||
}
|
||||
if !entry.inflight && c.ttl > 0 && now.After(entry.expiresAt) {
|
||||
c.dropEntryLocked(entry)
|
||||
c.advanceNextLocked(cur)
|
||||
return cur, nil
|
||||
}
|
||||
// Step to next slot, wrap when we hit the end of the range.
|
||||
if cur == protocol.MediaChannelEnd {
|
||||
cur = protocol.MediaChannelStart
|
||||
} else {
|
||||
cur++
|
||||
}
|
||||
}
|
||||
// Range fully occupied with non-expired entries — evict the oldest one as
|
||||
// a last resort, so the cache never hard-fails under steady-state
|
||||
// pressure with reasonable configs.
|
||||
var oldest *mediaEntry
|
||||
for _, entry := range c.byChannel {
|
||||
if entry.inflight {
|
||||
continue
|
||||
}
|
||||
if oldest == nil || entry.expiresAt.Before(oldest.expiresAt) {
|
||||
oldest = entry
|
||||
}
|
||||
}
|
||||
if oldest == nil {
|
||||
return 0, ErrCacheFull
|
||||
}
|
||||
freed := oldest.channel
|
||||
c.dropEntryLocked(oldest)
|
||||
c.advanceNextLocked(freed)
|
||||
return freed, nil
|
||||
}
|
||||
|
||||
func (c *MediaCache) advanceNextLocked(used uint16) {
|
||||
if used == protocol.MediaChannelEnd {
|
||||
c.nextChannel = protocol.MediaChannelStart
|
||||
} else {
|
||||
c.nextChannel = used + 1
|
||||
}
|
||||
}
|
||||
|
||||
func (c *MediaCache) dropEntryLocked(entry *mediaEntry) {
|
||||
delete(c.byChannel, entry.channel)
|
||||
delete(c.byKey, entry.cacheKey)
|
||||
for _, alias := range entry.aliases {
|
||||
// Only delete an alias if it still resolves to this entry; a later
|
||||
// store under the same key may have rebound it elsewhere.
|
||||
if c.byKey[alias] == entry {
|
||||
delete(c.byKey, alias)
|
||||
}
|
||||
}
|
||||
if c.byHash[entry.crc32] == entry {
|
||||
delete(c.byHash, entry.crc32)
|
||||
}
|
||||
atomic.AddInt64(&c.currentEntries, -1)
|
||||
atomic.AddInt64(&c.currentBytes, -entry.size)
|
||||
atomic.AddUint64(&c.evictionCount, 1)
|
||||
}
|
||||
|
||||
func (c *MediaCache) expiry(now time.Time) time.Time {
|
||||
if c.ttl <= 0 {
|
||||
// "Never" — represented as far future so all comparisons act as expected.
|
||||
return time.Unix(1<<62, 0)
|
||||
}
|
||||
return now.Add(c.ttl)
|
||||
}
|
||||
|
||||
func (c *MediaCache) metaForLocked(entry *mediaEntry) protocol.MediaMeta {
|
||||
return protocol.MediaMeta{
|
||||
Tag: entry.tag,
|
||||
Size: entry.size,
|
||||
Downloadable: true,
|
||||
Channel: entry.channel,
|
||||
Blocks: uint16(len(entry.blocks)),
|
||||
CRC32: entry.crc32,
|
||||
Filename: entry.filename,
|
||||
}
|
||||
}
|
||||
|
||||
// splitMediaBlocks compresses the content (when compression != none),
|
||||
// prepends the protocol media header, then splits the result into
|
||||
// randomly-sized blocks. The CRC32 carried in the header is over the
|
||||
// DECOMPRESSED bytes so the client can verify integrity after
|
||||
// decompression. Uniform sizing is avoided to match the anti-DPI strategy
|
||||
// used for feed-message blocks.
|
||||
func splitMediaBlocks(crc32Hash uint32, content []byte, compression protocol.MediaCompression) ([][]byte, error) {
|
||||
body, err := compressMediaBytes(content, compression)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
header := protocol.EncodeMediaBlockHeader(protocol.MediaBlockHeader{
|
||||
CRC32: crc32Hash,
|
||||
Version: protocol.MediaHeaderVersion,
|
||||
Compression: compression,
|
||||
})
|
||||
full := make([]byte, 0, len(header)+len(body))
|
||||
full = append(full, header...)
|
||||
full = append(full, body...)
|
||||
return protocol.SplitIntoBlocks(full), nil
|
||||
}
|
||||
|
||||
func compressMediaBytes(content []byte, compression protocol.MediaCompression) ([]byte, error) {
|
||||
switch compression {
|
||||
case protocol.MediaCompressionNone:
|
||||
return content, nil
|
||||
case protocol.MediaCompressionGzip:
|
||||
var buf bytes.Buffer
|
||||
zw, err := gzip.NewWriterLevel(&buf, gzip.BestCompression)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := zw.Write(content); err != nil {
|
||||
zw.Close()
|
||||
return nil, err
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
case protocol.MediaCompressionDeflate:
|
||||
var buf bytes.Buffer
|
||||
zw, err := flate.NewWriter(&buf, flate.BestCompression)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := zw.Write(content); err != nil {
|
||||
zw.Close()
|
||||
return nil, err
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported media compression: %d", compression)
|
||||
}
|
||||
|
||||
// DecompressMediaBytes is the inverse of compressMediaBytes; exposed for
|
||||
// the HTTP layer (which receives a stream of compressed bytes after the
|
||||
// header is stripped) and tests.
|
||||
func DecompressMediaBytes(r io.Reader, compression protocol.MediaCompression) (io.ReadCloser, error) {
|
||||
switch compression {
|
||||
case protocol.MediaCompressionNone:
|
||||
return io.NopCloser(r), nil
|
||||
case protocol.MediaCompressionGzip:
|
||||
return gzip.NewReader(r)
|
||||
case protocol.MediaCompressionDeflate:
|
||||
return flate.NewReader(r), nil
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported media compression: %d", compression)
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// httpMediaClient is a small shared client for fetching media URLs the
|
||||
// public-Telegram and X readers extract. It deliberately uses a relatively
|
||||
// short timeout — media downloads must not stall the rest of a fetch cycle.
|
||||
var httpMediaClient = &http.Client{
|
||||
Timeout: 60 * time.Second,
|
||||
// Disallow redirects to non-http(s) schemes; Telegram CDN sometimes
|
||||
// redirects through 301/302 to a regional host which is fine.
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) > 5 {
|
||||
return errors.New("too many redirects")
|
||||
}
|
||||
if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
|
||||
return fmt.Errorf("disallowed redirect scheme %q", req.URL.Scheme)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
// allowedMediaSchemes is the set of URL schemes downloadHTTPMedia will load.
|
||||
var allowedMediaSchemes = map[string]bool{
|
||||
"http": true,
|
||||
"https": true,
|
||||
}
|
||||
|
||||
// downloadHTTPMedia fetches the bytes at rawURL and stores them in cache,
|
||||
// using the URL itself as the cache key (so refreshing the same channel
|
||||
// every 10 min just bumps TTL on hit).
|
||||
//
|
||||
// It enforces the configured max-size both up-front (Content-Length) and on
|
||||
// the wire (LimitReader) so a server lying about size can't blow past the
|
||||
// limit. URLs are validated against allowedMediaSchemes; private-network
|
||||
// targets are not blocked here because callers (PublicReader, XPublicReader)
|
||||
// only pass URLs scraped from Telegram/Nitter responses.
|
||||
func downloadHTTPMedia(ctx context.Context, cache *MediaCache, tag, rawURL string) (protocol.MediaMeta, bool) {
|
||||
if cache == nil || rawURL == "" {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil || !allowedMediaSchemes[parsed.Scheme] {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
// Cache key is the canonical URL — image-link rotation on the upstream
|
||||
// side will create a fresh entry, but identical URLs across fetches will
|
||||
// just refresh TTL.
|
||||
cacheKey := tag + ":url:" + parsed.String()
|
||||
if meta, ok := cache.Lookup(cacheKey); ok {
|
||||
return meta, true
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, parsed.String(), nil)
|
||||
if err != nil {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; thefeed/1.0)")
|
||||
req.Header.Set("Accept", "image/*, application/octet-stream;q=0.9, */*;q=0.5")
|
||||
|
||||
resp, err := httpMediaClient.Do(req)
|
||||
if err != nil {
|
||||
logfMedia("[media-http] %s: request failed: %v", parsed.String(), err)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
// Defense in depth: reject HTML/XHTML responses outright. Telegram's
|
||||
// public web view sometimes redirects "file" links to the channel page
|
||||
// itself; without this check we'd happily cache the channel's HTML as
|
||||
// the user's downloadable file.
|
||||
ctype := strings.ToLower(strings.TrimSpace(strings.Split(resp.Header.Get("Content-Type"), ";")[0]))
|
||||
if ctype == "text/html" || ctype == "application/xhtml+xml" {
|
||||
logfMedia("[media-http] %s: refusing HTML response (got %s)", parsed.String(), ctype)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
maxBytes := cache.maxFileBytes
|
||||
if maxBytes > 0 && resp.ContentLength > 0 && resp.ContentLength > maxBytes {
|
||||
size := resp.ContentLength
|
||||
return protocol.MediaMeta{
|
||||
Tag: tag,
|
||||
Size: size,
|
||||
Downloadable: false,
|
||||
}, true
|
||||
}
|
||||
|
||||
limit := int64(-1)
|
||||
if maxBytes > 0 {
|
||||
limit = maxBytes + 1 // +1 to detect overflow vs exact match
|
||||
}
|
||||
var body io.Reader = resp.Body
|
||||
if limit > 0 {
|
||||
body = io.LimitReader(resp.Body, limit)
|
||||
}
|
||||
bytes, err := io.ReadAll(body)
|
||||
if err != nil {
|
||||
logfMedia("[media-http] %s: read failed: %v", parsed.String(), err)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
if maxBytes > 0 && int64(len(bytes)) > maxBytes {
|
||||
return protocol.MediaMeta{
|
||||
Tag: tag,
|
||||
Size: int64(len(bytes)),
|
||||
Downloadable: false,
|
||||
}, true
|
||||
}
|
||||
|
||||
meta, err := cache.Store(cacheKey, tag, bytes, resp.Header.Get("Content-Type"), urlBaseName(parsed))
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrTooLarge) {
|
||||
return meta, true
|
||||
}
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
return meta, true
|
||||
}
|
||||
|
||||
// urlBaseName returns the trailing path segment, stripped of its query, as a
|
||||
// best-effort filename for HTTP layer Content-Disposition headers.
|
||||
func urlBaseName(u *url.URL) string {
|
||||
if u == nil {
|
||||
return ""
|
||||
}
|
||||
base := path.Base(u.Path)
|
||||
if base == "" || base == "/" || base == "." {
|
||||
return ""
|
||||
}
|
||||
if i := strings.IndexByte(base, '?'); i >= 0 {
|
||||
base = base[:i]
|
||||
}
|
||||
return base
|
||||
}
|
||||
@@ -0,0 +1,298 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// TestApplyHTTPMediaSourcesEndToEnd wires a fake upstream HTTP image server,
|
||||
// runs applyHTTPMediaSources against it, and verifies the message body now
|
||||
// carries downloadable metadata that ParseMediaText can read back. Then it
|
||||
// fetches a block out of the resulting MediaCache to confirm the bytes were
|
||||
// stored correctly.
|
||||
func TestApplyHTTPMediaSourcesEndToEnd(t *testing.T) {
|
||||
imageBytes := []byte("fake-image-bytes-payload-1234567890")
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "image/png")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write(imageBytes)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour})
|
||||
|
||||
msgs := []protocol.Message{
|
||||
{ID: 100, Timestamp: 1, Text: protocol.MediaImage + "\nhello"},
|
||||
}
|
||||
sources := []mediaSource{{tag: protocol.MediaImage, url: srv.URL + "/photo.png"}}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
applyHTTPMediaSources(ctx, cache, msgs, sources)
|
||||
|
||||
meta, caption, ok := protocol.ParseMediaText(msgs[0].Text)
|
||||
if !ok {
|
||||
t.Fatalf("ParseMediaText ok=false on rewritten message: %q", msgs[0].Text)
|
||||
}
|
||||
if !meta.Downloadable {
|
||||
t.Fatalf("expected downloadable meta, got %+v (text=%q)", meta, msgs[0].Text)
|
||||
}
|
||||
if meta.Tag != protocol.MediaImage {
|
||||
t.Fatalf("Tag = %q, want %q", meta.Tag, protocol.MediaImage)
|
||||
}
|
||||
if meta.Size != int64(len(imageBytes)) {
|
||||
t.Fatalf("Size = %d, want %d", meta.Size, len(imageBytes))
|
||||
}
|
||||
if caption != "hello" {
|
||||
t.Fatalf("caption = %q, want %q", caption, "hello")
|
||||
}
|
||||
|
||||
// Block 0 starts with the 4-byte CRC32 prefix; subsequent blocks are
|
||||
// raw content.
|
||||
var got []byte
|
||||
for blk := uint16(0); blk < meta.Blocks; blk++ {
|
||||
b, err := cache.GetBlock(meta.Channel, blk)
|
||||
if err != nil {
|
||||
t.Fatalf("GetBlock(%d, %d): %v", meta.Channel, blk, err)
|
||||
}
|
||||
got = append(got, b...)
|
||||
}
|
||||
if len(got) < protocol.MediaBlockHeaderLen {
|
||||
t.Fatalf("block 0 too short: %d", len(got))
|
||||
}
|
||||
hdr, err := protocol.DecodeMediaBlockHeader(got[:protocol.MediaBlockHeaderLen])
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeMediaBlockHeader: %v", err)
|
||||
}
|
||||
if hdr.CRC32 != meta.CRC32 {
|
||||
t.Fatalf("header CRC = %x, want %x", hdr.CRC32, meta.CRC32)
|
||||
}
|
||||
if string(got[protocol.MediaBlockHeaderLen:]) != string(imageBytes) {
|
||||
t.Fatalf("reassembled bytes differ:\n got: %q\n want: %q", got[protocol.MediaBlockHeaderLen:], imageBytes)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyHTTPMediaSourcesGzipRoundTrip: with --media-compression=gzip,
|
||||
// a successful upstream fetch lands compressed blocks in the cache. A
|
||||
// client decompressing the assembled blocks recovers the original bytes
|
||||
// verbatim and the embedded CRC32 matches.
|
||||
func TestApplyHTTPMediaSourcesGzipRoundTrip(t *testing.T) {
|
||||
imageBytes := bytes.Repeat([]byte("compressible-stripe "), 300)
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "image/png")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write(imageBytes)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
cache := NewMediaCache(MediaCacheConfig{
|
||||
MaxFileBytes: 1 << 20,
|
||||
TTL: time.Hour,
|
||||
Compression: protocol.MediaCompressionGzip,
|
||||
})
|
||||
msgs := []protocol.Message{{ID: 100, Timestamp: 1, Text: protocol.MediaImage + "\n"}}
|
||||
sources := []mediaSource{{tag: protocol.MediaImage, url: srv.URL + "/big.png"}}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
applyHTTPMediaSources(ctx, cache, msgs, sources)
|
||||
|
||||
meta, _, ok := protocol.ParseMediaText(msgs[0].Text)
|
||||
if !ok || !meta.Downloadable {
|
||||
t.Fatalf("expected downloadable meta, got %+v", meta)
|
||||
}
|
||||
|
||||
var got []byte
|
||||
for blk := uint16(0); blk < meta.Blocks; blk++ {
|
||||
b, err := cache.GetBlock(meta.Channel, blk)
|
||||
if err != nil {
|
||||
t.Fatalf("GetBlock: %v", err)
|
||||
}
|
||||
got = append(got, b...)
|
||||
}
|
||||
hdr, err := protocol.DecodeMediaBlockHeader(got[:protocol.MediaBlockHeaderLen])
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeMediaBlockHeader: %v", err)
|
||||
}
|
||||
if hdr.Compression != protocol.MediaCompressionGzip {
|
||||
t.Fatalf("compression = %v, want gzip", hdr.Compression)
|
||||
}
|
||||
if hdr.CRC32 != meta.CRC32 {
|
||||
t.Fatalf("header CRC = %x, want %x", hdr.CRC32, meta.CRC32)
|
||||
}
|
||||
rc, err := DecompressMediaBytes(bytes.NewReader(got[protocol.MediaBlockHeaderLen:]), hdr.Compression)
|
||||
if err != nil {
|
||||
t.Fatalf("decompress: %v", err)
|
||||
}
|
||||
defer rc.Close()
|
||||
out, err := io.ReadAll(rc)
|
||||
if err != nil {
|
||||
t.Fatalf("read all: %v", err)
|
||||
}
|
||||
if !bytes.Equal(out, imageBytes) {
|
||||
t.Fatalf("decompressed differs from upstream")
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyHTTPMediaSourcesAlbum: when src.extraURLs is populated (public-mode
|
||||
// album), every URL is fetched and the canonical body is rebuilt with N
|
||||
// stacked downloadable headers + the original caption. The frontend then
|
||||
// renders an N-card album.
|
||||
func TestApplyHTTPMediaSourcesAlbum(t *testing.T) {
|
||||
images := [][]byte{
|
||||
[]byte("first-image-bytes-XXXXXX"),
|
||||
[]byte("second-image-bytes-YYYYY"),
|
||||
[]byte("third-image-bytes-ZZZZZZ"),
|
||||
}
|
||||
served := 0
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
// Path looks like /img-N.jpg → pick the matching slice.
|
||||
switch r.URL.Path {
|
||||
case "/img1.jpg":
|
||||
w.Write(images[0])
|
||||
case "/img2.jpg":
|
||||
w.Write(images[1])
|
||||
case "/img3.jpg":
|
||||
w.Write(images[2])
|
||||
}
|
||||
served++
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour})
|
||||
|
||||
// Mirror what parsePublicMessagesWithMedia produces for a 3-image album:
|
||||
// stacked [IMAGE] headers + caption, plus an extraURLs slice on the source.
|
||||
body := protocol.MediaImage + "\n" + protocol.MediaImage + "\n" + protocol.MediaImage + "\nalbum caption"
|
||||
msgs := []protocol.Message{{ID: 5, Timestamp: 1, Text: body}}
|
||||
sources := []mediaSource{{
|
||||
tag: protocol.MediaImage,
|
||||
url: srv.URL + "/img1.jpg",
|
||||
extraURLs: []string{srv.URL + "/img2.jpg", srv.URL + "/img3.jpg"},
|
||||
}}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
applyHTTPMediaSources(ctx, cache, msgs, sources)
|
||||
|
||||
if served != 3 {
|
||||
t.Errorf("served = %d, want 3 upstream fetches", served)
|
||||
}
|
||||
|
||||
// Rewritten body must have exactly 3 [IMAGE]<size>:1:... headers and
|
||||
// the original caption preserved on the trailing line.
|
||||
got := msgs[0].Text
|
||||
headerCount := strings.Count(got, protocol.MediaImage)
|
||||
if headerCount != 3 {
|
||||
t.Fatalf("header count = %d, want 3 (text=%q)", headerCount, got)
|
||||
}
|
||||
if !strings.HasSuffix(got, "\nalbum caption") {
|
||||
t.Errorf("caption not preserved: %q", got)
|
||||
}
|
||||
|
||||
// Each header must round-trip through ParseMediaText with downloadable=true.
|
||||
rest := got
|
||||
for i := 0; i < 3; i++ {
|
||||
meta, c, ok := protocol.ParseMediaText(rest)
|
||||
if !ok {
|
||||
t.Fatalf("ParseMediaText #%d ok=false on %q", i, rest)
|
||||
}
|
||||
if !meta.Downloadable {
|
||||
t.Errorf("header #%d not downloadable: %+v", i, meta)
|
||||
}
|
||||
if int(meta.Size) != len(images[i]) {
|
||||
t.Errorf("header #%d size = %d, want %d", i, meta.Size, len(images[i]))
|
||||
}
|
||||
rest = c
|
||||
}
|
||||
if rest != "album caption" {
|
||||
t.Errorf("trailing caption = %q, want %q", rest, "album caption")
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyHTTPMediaSourcesAlbumPartialFailure: when one upstream fetch
|
||||
// fails we still emit a placeholder [TAG] for that slot so the album's
|
||||
// ID-span (= number of leading headers) is preserved. The remaining items
|
||||
// stay downloadable.
|
||||
func TestApplyHTTPMediaSourcesAlbumPartialFailure(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/broken.jpg" {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "image/jpeg")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("ok-image"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 1 << 20, TTL: time.Hour})
|
||||
|
||||
body := protocol.MediaImage + "\n" + protocol.MediaImage + "\ncap"
|
||||
msgs := []protocol.Message{{ID: 5, Timestamp: 1, Text: body}}
|
||||
sources := []mediaSource{{
|
||||
tag: protocol.MediaImage,
|
||||
url: srv.URL + "/ok.jpg",
|
||||
extraURLs: []string{srv.URL + "/broken.jpg"},
|
||||
}}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
applyHTTPMediaSources(ctx, cache, msgs, sources)
|
||||
|
||||
got := msgs[0].Text
|
||||
if c := strings.Count(got, protocol.MediaImage); c != 2 {
|
||||
t.Errorf("header count = %d, want 2 (text=%q)", c, got)
|
||||
}
|
||||
// First should be downloadable; last line is the broken-fallback bare tag
|
||||
// followed by the caption.
|
||||
if !strings.HasSuffix(got, "\n"+protocol.MediaImage+"\ncap") {
|
||||
t.Errorf("expected placeholder + caption tail, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyHTTPMediaSourcesRejectsOversize: a too-large file leaves the
|
||||
// message text untouched but still records the entry as "metadata only" with
|
||||
// downloadable=false so the UI can show the size without offering the button.
|
||||
func TestApplyHTTPMediaSourcesRejectsOversize(t *testing.T) {
|
||||
bigBody := strings.Repeat("X", 1024)
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Length", "1024")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(bigBody))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
cache := NewMediaCache(MediaCacheConfig{MaxFileBytes: 100, TTL: time.Hour})
|
||||
msgs := []protocol.Message{{ID: 1, Timestamp: 1, Text: protocol.MediaImage + "\ncap"}}
|
||||
sources := []mediaSource{{tag: protocol.MediaImage, url: srv.URL + "/big.jpg"}}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
applyHTTPMediaSources(ctx, cache, msgs, sources)
|
||||
|
||||
meta, _, ok := protocol.ParseMediaText(msgs[0].Text)
|
||||
if !ok {
|
||||
t.Fatalf("ParseMediaText ok=false")
|
||||
}
|
||||
if meta.Downloadable {
|
||||
t.Fatalf("oversized file should not be downloadable; got meta=%+v", meta)
|
||||
}
|
||||
if meta.Size != int64(len(bigBody)) {
|
||||
t.Fatalf("Size = %d, want %d (server should still surface the size)", meta.Size, len(bigBody))
|
||||
}
|
||||
stats := cache.Stats()
|
||||
if stats.Entries != 0 {
|
||||
t.Fatalf("oversized file should not occupy a cache slot, got entries=%d", stats.Entries)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// mediaDebugLogs gates verbose media-cache log output. Server.Run flips it
|
||||
// based on the --debug flag at startup. Atomic so other goroutines reading
|
||||
// the value while logging don't need a mutex.
|
||||
var mediaDebugLogs atomic.Bool
|
||||
|
||||
// SetMediaDebugLogs enables or disables the media debug log channel.
|
||||
func SetMediaDebugLogs(enabled bool) {
|
||||
mediaDebugLogs.Store(enabled)
|
||||
}
|
||||
|
||||
// logfMedia prints a media-feature log line only when debug logging is on.
|
||||
// Errors that operators should always see go through plain log.Printf
|
||||
// directly; logfMedia is reserved for the chatty per-store / per-cache-hit
|
||||
// chatter.
|
||||
func logfMedia(format string, args ...interface{}) {
|
||||
if !mediaDebugLogs.Load() {
|
||||
return
|
||||
}
|
||||
log.Printf("[media-debug] "+format, args...)
|
||||
}
|
||||
@@ -0,0 +1,321 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/gotd/td/tg"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// telegramMediaDownloadChunk is the per-RPC chunk size used by UploadGetFile.
|
||||
// MTProto requires Limit to be a multiple of 4KB and ≤ 1MB; 256KB is a good
|
||||
// trade-off between API call overhead and memory pressure for tiny files.
|
||||
const telegramMediaDownloadChunk = 256 * 1024
|
||||
|
||||
// telegramMediaPhotoSizeOrder lists Telegram photo size codes from smallest
|
||||
// to largest. The downloader picks the smallest *usable* size — for a
|
||||
// DNS-tunnelled feed, bandwidth is precious and a thumbnail is usually
|
||||
// enough for the user to decide whether to look at the original. The
|
||||
// "stripped" placeholder type is filtered out separately because it is not
|
||||
// a real renderable image.
|
||||
//
|
||||
// a / b — tiny (≤ 100px)
|
||||
// c — small chat preview
|
||||
// m — medium
|
||||
// s — small (legacy)
|
||||
// x — high-quality
|
||||
// y / w — original / largest
|
||||
var telegramMediaPhotoSizeOrder = []string{"a", "b", "c", "m", "s", "x", "y", "w"}
|
||||
|
||||
// downloadTelegramMedia fetches and caches media for a Telegram message. It
|
||||
// returns the metadata that should be embedded in the message body, or an
|
||||
// empty MediaMeta with ok=false to fall through to the legacy [TAG] path.
|
||||
//
|
||||
// The function is best-effort: any error (download failure, oversized file,
|
||||
// missing download API) is logged once and the message is returned without
|
||||
// downloadable metadata so the rest of the feed isn't blocked. The caller
|
||||
// is responsible for substituting EncodeMediaText into the message body.
|
||||
func (tr *TelegramReader) downloadTelegramMedia(ctx context.Context, api *tg.Client, msg *tg.Message) (protocol.MediaMeta, bool) {
|
||||
if api == nil || msg == nil || msg.Media == nil {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
cache := tr.feed.MediaCache()
|
||||
if cache == nil {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
switch m := msg.Media.(type) {
|
||||
case *tg.MessageMediaPhoto:
|
||||
photo, ok := m.Photo.(*tg.Photo)
|
||||
if !ok {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
return tr.downloadTelegramPhoto(ctx, api, cache, photo)
|
||||
case *tg.MessageMediaDocument:
|
||||
doc, ok := m.Document.(*tg.Document)
|
||||
if !ok {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
return tr.downloadTelegramDocument(ctx, api, cache, doc)
|
||||
}
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
func (tr *TelegramReader) downloadTelegramPhoto(ctx context.Context, api *tg.Client, cache *MediaCache, photo *tg.Photo) (protocol.MediaMeta, bool) {
|
||||
cacheKey := "tg-photo:" + strconv.FormatInt(photo.ID, 10)
|
||||
|
||||
// Hit the cache before doing any I/O — exact dedup, no bytes transferred.
|
||||
if meta, ok := cache.Lookup(cacheKey); ok {
|
||||
return meta, true
|
||||
}
|
||||
|
||||
bestType, bestBytes := pickSmallestPhotoSize(photo.Sizes)
|
||||
if bestType == "" {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
// Honour the configured max-size early so we don't even open the RPC for
|
||||
// objects we'll just throw away.
|
||||
if maxBytes := cache.maxFileBytes; maxBytes > 0 && bestBytes > maxBytes {
|
||||
return protocol.MediaMeta{
|
||||
Tag: protocol.MediaImage,
|
||||
Size: bestBytes,
|
||||
Downloadable: false,
|
||||
}, true
|
||||
}
|
||||
|
||||
loc := &tg.InputPhotoFileLocation{
|
||||
ID: photo.ID,
|
||||
AccessHash: photo.AccessHash,
|
||||
FileReference: photo.FileReference,
|
||||
ThumbSize: bestType,
|
||||
}
|
||||
bytes, err := tr.downloadTelegramFile(ctx, api, loc, bestBytes)
|
||||
if err != nil {
|
||||
// Transient fetch error (network, FILE_REFERENCE_EXPIRED, etc.).
|
||||
// We don't mark the message as non-downloadable in that case —
|
||||
// "non-downloadable" means "the file exists but the server chose
|
||||
// not to cache it" (i.e. oversized). Falling through to legacy
|
||||
// keeps the UI honest, and the next 10-min refresh cycle re-tries.
|
||||
tr.logMediaError("photo", photo.ID, err)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
meta, err := cache.Store(cacheKey, protocol.MediaImage, bytes, "image/jpeg", "")
|
||||
if err != nil {
|
||||
// ErrTooLarge is reported as non-downloadable; any other store error
|
||||
// is just dropped to legacy.
|
||||
if errors.Is(err, ErrTooLarge) {
|
||||
return meta, true
|
||||
}
|
||||
tr.logMediaError("photo", photo.ID, err)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
return meta, true
|
||||
}
|
||||
|
||||
func (tr *TelegramReader) downloadTelegramDocument(ctx context.Context, api *tg.Client, cache *MediaCache, doc *tg.Document) (protocol.MediaMeta, bool) {
|
||||
cacheKey := "tg-doc:" + strconv.FormatInt(doc.ID, 10)
|
||||
if meta, ok := cache.Lookup(cacheKey); ok {
|
||||
return meta, true
|
||||
}
|
||||
|
||||
tag, filename := classifyDocumentTagAndName(doc)
|
||||
if tag == protocol.MediaSticker {
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
if maxBytes := cache.maxFileBytes; maxBytes > 0 && doc.Size > maxBytes {
|
||||
return protocol.MediaMeta{
|
||||
Tag: tag,
|
||||
Size: doc.Size,
|
||||
Downloadable: false,
|
||||
}, true
|
||||
}
|
||||
|
||||
loc := &tg.InputDocumentFileLocation{
|
||||
ID: doc.ID,
|
||||
AccessHash: doc.AccessHash,
|
||||
FileReference: doc.FileReference,
|
||||
ThumbSize: "",
|
||||
}
|
||||
bytes, err := tr.downloadTelegramFile(ctx, api, loc, doc.Size)
|
||||
if err != nil {
|
||||
// See note in downloadTelegramPhoto: transient fetch errors should
|
||||
// not be surfaced as "non-downloadable", they should fall through
|
||||
// to legacy [TAG]\ncaption rendering and let the next refresh retry.
|
||||
tr.logMediaError("doc", doc.ID, err)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
|
||||
meta, err := cache.Store(cacheKey, tag, bytes, doc.MimeType, filename)
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrTooLarge) {
|
||||
return meta, true
|
||||
}
|
||||
tr.logMediaError("doc", doc.ID, err)
|
||||
return protocol.MediaMeta{}, false
|
||||
}
|
||||
return meta, true
|
||||
}
|
||||
|
||||
// downloadTelegramFile downloads `expectedSize` bytes (or all available bytes
|
||||
// when expectedSize <= 0) from the given Telegram file location. It enforces
|
||||
// the configured max-size cap defensively so a file that lies about its size
|
||||
// still can't blow past the limit on the wire.
|
||||
func (tr *TelegramReader) downloadTelegramFile(ctx context.Context, api *tg.Client, loc tg.InputFileLocationClass, expectedSize int64) ([]byte, error) {
|
||||
cache := tr.feed.MediaCache()
|
||||
maxBytes := int64(0)
|
||||
if cache != nil {
|
||||
maxBytes = cache.maxFileBytes
|
||||
}
|
||||
|
||||
var (
|
||||
out []byte
|
||||
offset int64
|
||||
)
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
req := &tg.UploadGetFileRequest{
|
||||
Location: loc,
|
||||
Offset: offset,
|
||||
Limit: telegramMediaDownloadChunk,
|
||||
}
|
||||
res, err := api.UploadGetFile(ctx, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("upload.getFile offset=%d: %w", offset, err)
|
||||
}
|
||||
fileRes, ok := res.(*tg.UploadFile)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected upload response type %T", res)
|
||||
}
|
||||
if len(fileRes.Bytes) == 0 {
|
||||
break
|
||||
}
|
||||
out = append(out, fileRes.Bytes...)
|
||||
offset += int64(len(fileRes.Bytes))
|
||||
|
||||
// Hard guard against runaway downloads.
|
||||
if maxBytes > 0 && int64(len(out)) > maxBytes {
|
||||
return nil, fmt.Errorf("download exceeded configured max-size (%d > %d)", len(out), maxBytes)
|
||||
}
|
||||
|
||||
// We consider the transfer complete when the server returned less than
|
||||
// the requested chunk (canonical EOF) or we've reached the expected size.
|
||||
if len(fileRes.Bytes) < telegramMediaDownloadChunk {
|
||||
break
|
||||
}
|
||||
if expectedSize > 0 && int64(len(out)) >= expectedSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// pickSmallestPhotoSize returns the smallest usable size in a Telegram
|
||||
// Photo as (type-code, byte-size). DNS-tunnelled bandwidth is precious, so
|
||||
// we prefer a small chat-preview thumbnail over the full-resolution
|
||||
// original whenever Telegram offers both. Returns empty type when no usable
|
||||
// size is available (e.g. only stripped placeholder thumbs).
|
||||
func pickSmallestPhotoSize(sizes []tg.PhotoSizeClass) (string, int64) {
|
||||
type candidate struct {
|
||||
typ string
|
||||
size int64
|
||||
}
|
||||
var pool []candidate
|
||||
add := func(typ string, size int64) {
|
||||
if typ == "" {
|
||||
return
|
||||
}
|
||||
pool = append(pool, candidate{typ: typ, size: size})
|
||||
}
|
||||
for _, s := range sizes {
|
||||
switch v := s.(type) {
|
||||
case *tg.PhotoSize:
|
||||
add(v.Type, int64(v.Size))
|
||||
case *tg.PhotoCachedSize:
|
||||
add(v.Type, int64(len(v.Bytes)))
|
||||
case *tg.PhotoSizeProgressive:
|
||||
// Progressive carries a slice of progressive sizes; the FIRST
|
||||
// element is the smallest progressive prefix the server can
|
||||
// stream, which suits "smallest usable" perfectly.
|
||||
if len(v.Sizes) > 0 {
|
||||
add(v.Type, int64(v.Sizes[0]))
|
||||
} else {
|
||||
add(v.Type, 0)
|
||||
}
|
||||
case *tg.PhotoStrippedSize:
|
||||
// Stripped sizes are tiny placeholder thumbs — skip.
|
||||
}
|
||||
}
|
||||
if len(pool) == 0 {
|
||||
return "", 0
|
||||
}
|
||||
|
||||
// Prefer the entry with the smallest declared byte size; break ties
|
||||
// using the type-code preference order (smallest first). When the
|
||||
// declared size is 0 (unknown), the type code alone decides the order.
|
||||
rank := make(map[string]int, len(telegramMediaPhotoSizeOrder))
|
||||
for i, t := range telegramMediaPhotoSizeOrder {
|
||||
rank[t] = i
|
||||
}
|
||||
bestIdx := -1
|
||||
for i, c := range pool {
|
||||
if bestIdx < 0 {
|
||||
bestIdx = i
|
||||
continue
|
||||
}
|
||||
b := pool[bestIdx]
|
||||
// Prefer a strictly smaller known size.
|
||||
if c.size > 0 && b.size > 0 {
|
||||
if c.size < b.size {
|
||||
bestIdx = i
|
||||
continue
|
||||
}
|
||||
if c.size == b.size && rank[c.typ] < rank[b.typ] {
|
||||
bestIdx = i
|
||||
}
|
||||
continue
|
||||
}
|
||||
// One of them has unknown size — fall back to type-code rank.
|
||||
if rank[c.typ] < rank[b.typ] {
|
||||
bestIdx = i
|
||||
}
|
||||
}
|
||||
chosen := pool[bestIdx]
|
||||
return chosen.typ, chosen.size
|
||||
}
|
||||
|
||||
// classifyDocumentTagAndName returns the protocol media tag and best-effort
|
||||
// filename for a Telegram Document. The tag mirrors classifyDocument's logic
|
||||
// but also exposes the filename attribute so the HTTP layer can offer a
|
||||
// reasonable Content-Disposition.
|
||||
func classifyDocumentTagAndName(doc *tg.Document) (string, string) {
|
||||
tag := protocol.MediaFile
|
||||
filename := ""
|
||||
for _, attr := range doc.Attributes {
|
||||
switch a := attr.(type) {
|
||||
case *tg.DocumentAttributeVideo:
|
||||
tag = protocol.MediaVideo
|
||||
case *tg.DocumentAttributeAudio:
|
||||
tag = protocol.MediaAudio
|
||||
case *tg.DocumentAttributeSticker:
|
||||
tag = protocol.MediaSticker
|
||||
case *tg.DocumentAttributeAnimated:
|
||||
tag = protocol.MediaGIF
|
||||
case *tg.DocumentAttributeFilename:
|
||||
filename = a.FileName
|
||||
}
|
||||
}
|
||||
return tag, filename
|
||||
}
|
||||
|
||||
func (tr *TelegramReader) logMediaError(kind string, id int64, err error) {
|
||||
// Best-effort log; the receiver's package log is fine for now.
|
||||
logfMedia("[telegram] media %s id=%d download failed: %v", kind, id, err)
|
||||
}
|
||||
@@ -0,0 +1,307 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"hash/crc32"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
func newTestCache(maxBytes int64, ttl time.Duration) *MediaCache {
|
||||
return NewMediaCache(MediaCacheConfig{MaxFileBytes: maxBytes, TTL: ttl})
|
||||
}
|
||||
|
||||
func TestMediaCacheStoreAndGetBlock(t *testing.T) {
|
||||
cache := newTestCache(1<<20, time.Hour)
|
||||
content := bytes.Repeat([]byte("ab"), 1000) // 2000 bytes — multiple blocks
|
||||
|
||||
meta, err := cache.Store("key1", protocol.MediaImage, content, "image/jpeg", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Store: %v", err)
|
||||
}
|
||||
if !meta.Downloadable {
|
||||
t.Fatalf("Downloadable = false, want true")
|
||||
}
|
||||
if !protocol.IsMediaChannel(meta.Channel) {
|
||||
t.Fatalf("Channel %d not in media range", meta.Channel)
|
||||
}
|
||||
if meta.Size != int64(len(content)) {
|
||||
t.Fatalf("Size = %d, want %d", meta.Size, len(content))
|
||||
}
|
||||
if meta.CRC32 != crc32.ChecksumIEEE(content) {
|
||||
t.Fatalf("CRC32 mismatch")
|
||||
}
|
||||
if meta.Blocks == 0 {
|
||||
t.Fatalf("Blocks should be > 0")
|
||||
}
|
||||
|
||||
// Reassemble: block 0 begins with the protocol media header, then comes
|
||||
// the (compression-default = none) bytes which equal the original.
|
||||
var got []byte
|
||||
for blk := uint16(0); blk < meta.Blocks; blk++ {
|
||||
b, err := cache.GetBlock(meta.Channel, blk)
|
||||
if err != nil {
|
||||
t.Fatalf("GetBlock(%d, %d): %v", meta.Channel, blk, err)
|
||||
}
|
||||
got = append(got, b...)
|
||||
}
|
||||
if len(got) < protocol.MediaBlockHeaderLen {
|
||||
t.Fatalf("assembled bytes too short: %d", len(got))
|
||||
}
|
||||
hdr, err := protocol.DecodeMediaBlockHeader(got[:protocol.MediaBlockHeaderLen])
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeMediaBlockHeader: %v", err)
|
||||
}
|
||||
if hdr.CRC32 != meta.CRC32 {
|
||||
t.Fatalf("header CRC = %x, want %x", hdr.CRC32, meta.CRC32)
|
||||
}
|
||||
if hdr.Compression != protocol.MediaCompressionNone {
|
||||
t.Fatalf("header compression = %v, want none", hdr.Compression)
|
||||
}
|
||||
if !bytes.Equal(got[protocol.MediaBlockHeaderLen:], content) {
|
||||
t.Fatalf("reassembled bytes differ: got %d, want %d", len(got)-protocol.MediaBlockHeaderLen, len(content))
|
||||
}
|
||||
}
|
||||
|
||||
// TestMediaCacheStoreGzip exercises the compressed wire path: bytes after
|
||||
// the header are gzip-compressed and DecompressMediaBytes reproduces the
|
||||
// original.
|
||||
func TestMediaCacheStoreGzip(t *testing.T) {
|
||||
cache := NewMediaCache(MediaCacheConfig{
|
||||
MaxFileBytes: 1 << 20,
|
||||
TTL: time.Hour,
|
||||
Compression: protocol.MediaCompressionGzip,
|
||||
})
|
||||
content := bytes.Repeat([]byte("compress-me "), 200)
|
||||
|
||||
meta, err := cache.Store("gz", protocol.MediaFile, content, "text/plain", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Store: %v", err)
|
||||
}
|
||||
|
||||
var got []byte
|
||||
for blk := uint16(0); blk < meta.Blocks; blk++ {
|
||||
b, err := cache.GetBlock(meta.Channel, blk)
|
||||
if err != nil {
|
||||
t.Fatalf("GetBlock(%d, %d): %v", meta.Channel, blk, err)
|
||||
}
|
||||
got = append(got, b...)
|
||||
}
|
||||
hdr, err := protocol.DecodeMediaBlockHeader(got[:protocol.MediaBlockHeaderLen])
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeMediaBlockHeader: %v", err)
|
||||
}
|
||||
if hdr.Compression != protocol.MediaCompressionGzip {
|
||||
t.Fatalf("compression = %v, want gzip", hdr.Compression)
|
||||
}
|
||||
body, err := DecompressMediaBytes(bytes.NewReader(got[protocol.MediaBlockHeaderLen:]), hdr.Compression)
|
||||
if err != nil {
|
||||
t.Fatalf("decompress: %v", err)
|
||||
}
|
||||
defer body.Close()
|
||||
decompressed := new(bytes.Buffer)
|
||||
if _, err := decompressed.ReadFrom(body); err != nil {
|
||||
t.Fatalf("read decompressed: %v", err)
|
||||
}
|
||||
if !bytes.Equal(decompressed.Bytes(), content) {
|
||||
t.Fatalf("decompressed differs from original")
|
||||
}
|
||||
if crc32.ChecksumIEEE(decompressed.Bytes()) != hdr.CRC32 {
|
||||
t.Fatalf("header CRC %x doesn't match decompressed CRC %x", hdr.CRC32, crc32.ChecksumIEEE(decompressed.Bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
// Storing the same key with the same content should refresh TTL but reuse
|
||||
// the existing channel — this is the "every 10 min refresh" deduplication
|
||||
// path called out in the spec.
|
||||
func TestMediaCacheDedup(t *testing.T) {
|
||||
cache := newTestCache(0, time.Hour)
|
||||
content := []byte("hello")
|
||||
meta1, err := cache.Store("dup", protocol.MediaImage, content, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("first Store: %v", err)
|
||||
}
|
||||
stats1 := cache.Stats()
|
||||
meta2, err := cache.Store("dup", protocol.MediaImage, content, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("second Store: %v", err)
|
||||
}
|
||||
if meta1.Channel != meta2.Channel {
|
||||
t.Fatalf("dedup: channel changed (%d → %d)", meta1.Channel, meta2.Channel)
|
||||
}
|
||||
stats2 := cache.Stats()
|
||||
if stats2.StoreHits != stats1.StoreHits+1 {
|
||||
t.Fatalf("StoreHits did not increment: %d → %d", stats1.StoreHits, stats2.StoreHits)
|
||||
}
|
||||
if stats2.StoreMisses != stats1.StoreMisses {
|
||||
t.Fatalf("StoreMisses changed unexpectedly")
|
||||
}
|
||||
}
|
||||
|
||||
// Cross-key dedup: identical bytes arriving under a different upstream id
|
||||
// must reuse the existing cache slot, refresh the TTL, and not consume a
|
||||
// fresh channel — this is the behaviour the spec calls out.
|
||||
func TestMediaCacheCrossKeyDedup(t *testing.T) {
|
||||
cache := newTestCache(0, time.Hour)
|
||||
content := []byte("the same bytes under different keys")
|
||||
m1, err := cache.Store("key-A", protocol.MediaImage, content, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("first Store: %v", err)
|
||||
}
|
||||
statsBefore := cache.Stats()
|
||||
|
||||
m2, err := cache.Store("key-B-different", protocol.MediaImage, content, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("second Store: %v", err)
|
||||
}
|
||||
if m1.Channel != m2.Channel {
|
||||
t.Fatalf("cross-key dedup: channel changed (%d -> %d)", m1.Channel, m2.Channel)
|
||||
}
|
||||
statsAfter := cache.Stats()
|
||||
if statsAfter.Entries != statsBefore.Entries {
|
||||
t.Fatalf("cross-key dedup: entries grew %d -> %d (should reuse slot)", statsBefore.Entries, statsAfter.Entries)
|
||||
}
|
||||
if statsAfter.StoreHits != statsBefore.StoreHits+1 {
|
||||
t.Fatalf("StoreHits should have incremented")
|
||||
}
|
||||
|
||||
// Lookup under either key returns the same entry.
|
||||
if meta, ok := cache.Lookup("key-A"); !ok || meta.Channel != m1.Channel {
|
||||
t.Fatalf("Lookup(key-A) failed: ok=%v meta=%+v", ok, meta)
|
||||
}
|
||||
if meta, ok := cache.Lookup("key-B-different"); !ok || meta.Channel != m1.Channel {
|
||||
t.Fatalf("Lookup(key-B-different) failed: ok=%v meta=%+v", ok, meta)
|
||||
}
|
||||
}
|
||||
|
||||
// Same key with different bytes (e.g. a Telegram edit) must replace the
|
||||
// stored content and produce a new channel.
|
||||
func TestMediaCacheKeyReplaceOnContentChange(t *testing.T) {
|
||||
cache := newTestCache(0, time.Hour)
|
||||
first := []byte("first content")
|
||||
second := []byte("second content (different)")
|
||||
|
||||
m1, err := cache.Store("k", protocol.MediaImage, first, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("first Store: %v", err)
|
||||
}
|
||||
m2, err := cache.Store("k", protocol.MediaImage, second, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("second Store: %v", err)
|
||||
}
|
||||
if m1.CRC32 == m2.CRC32 {
|
||||
t.Fatalf("CRC32 should differ for different content")
|
||||
}
|
||||
// Verify GetBlock on m1.Channel either succeeds with NEW bytes (channel
|
||||
// reuse) or fails entirely — never returns the OLD bytes. Block 0
|
||||
// begins with the protocol header whose CRC field identifies which
|
||||
// content the slot is currently serving.
|
||||
if blk, err := cache.GetBlock(m1.Channel, 0); err == nil {
|
||||
if len(blk) >= protocol.MediaBlockHeaderLen {
|
||||
if hdr, err := protocol.DecodeMediaBlockHeader(blk[:protocol.MediaBlockHeaderLen]); err == nil && hdr.CRC32 == m1.CRC32 {
|
||||
t.Fatalf("GetBlock returned stale (first) bytes after content change")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMediaCacheRejectsOversizeFile(t *testing.T) {
|
||||
cache := newTestCache(100, time.Hour)
|
||||
_, err := cache.Store("big", protocol.MediaFile, bytes.Repeat([]byte("x"), 200), "", "")
|
||||
if !errors.Is(err, ErrTooLarge) {
|
||||
t.Fatalf("err = %v, want ErrTooLarge", err)
|
||||
}
|
||||
stats := cache.Stats()
|
||||
if stats.StoreRejected != 1 {
|
||||
t.Fatalf("StoreRejected = %d, want 1", stats.StoreRejected)
|
||||
}
|
||||
if stats.Entries != 0 {
|
||||
t.Fatalf("Entries = %d, want 0", stats.Entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMediaCacheGetBlockOutOfRange(t *testing.T) {
|
||||
cache := newTestCache(0, time.Hour)
|
||||
_, err := cache.GetBlock(protocol.MediaChannelStart, 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for unknown channel")
|
||||
}
|
||||
_, err = cache.GetBlock(0, 0)
|
||||
if err == nil || !strings.Contains(err.Error(), "outside media range") {
|
||||
t.Fatalf("expected media-range error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMediaCacheSweepEvictsExpired(t *testing.T) {
|
||||
cache := newTestCache(0, 10*time.Millisecond)
|
||||
_, err := cache.Store("k", protocol.MediaFile, []byte("data"), "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Store: %v", err)
|
||||
}
|
||||
if cache.Stats().Entries != 1 {
|
||||
t.Fatalf("Entries = %d, want 1", cache.Stats().Entries)
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
if n := cache.Sweep(); n != 1 {
|
||||
t.Fatalf("Sweep evicted %d, want 1", n)
|
||||
}
|
||||
if cache.Stats().Entries != 0 {
|
||||
t.Fatalf("Entries after sweep = %d, want 0", cache.Stats().Entries)
|
||||
}
|
||||
}
|
||||
|
||||
// Allocator: when the next-hint slot is taken but expired, that slot is
|
||||
// reclaimed instead of skipped.
|
||||
func TestMediaCacheReclaimsExpiredSlot(t *testing.T) {
|
||||
cache := newTestCache(0, 10*time.Millisecond)
|
||||
m1, err := cache.Store("a", protocol.MediaFile, []byte("aaa"), "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Store a: %v", err)
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
// Force the allocator's nextChannel back to m1.Channel by storing keys
|
||||
// until we wrap is impractical, but we know the next hint is m1.Channel+1.
|
||||
// Triggering a Store with the expired slot in the way of the linear scan
|
||||
// proves it's reclaimed and the new entry fits.
|
||||
m2, err := cache.Store("b", protocol.MediaFile, []byte("bbb"), "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("Store b: %v", err)
|
||||
}
|
||||
if m2.Channel == m1.Channel {
|
||||
t.Logf("note: reused expired slot at ch %d (expected when nextChannel wraps)", m2.Channel)
|
||||
}
|
||||
stats := cache.Stats()
|
||||
if stats.Entries != 1 {
|
||||
t.Fatalf("Entries = %d, want 1 (the old expired entry should be gone)", stats.Entries)
|
||||
}
|
||||
}
|
||||
|
||||
// Round-trip with the wire-format encoder: a cache entry's metadata, when
|
||||
// embedded in a message, can be parsed back to recover the same channel and
|
||||
// hash a client would download.
|
||||
func TestMediaCacheMetadataRoundTrip(t *testing.T) {
|
||||
cache := newTestCache(0, time.Hour)
|
||||
content := []byte("round trip content")
|
||||
meta, err := cache.Store("rt", protocol.MediaImage, content, "image/png", "pic.png")
|
||||
if err != nil {
|
||||
t.Fatalf("Store: %v", err)
|
||||
}
|
||||
body := protocol.EncodeMediaText(meta, "look at this")
|
||||
parsed, caption, ok := protocol.ParseMediaText(body)
|
||||
if !ok {
|
||||
t.Fatalf("ParseMediaText ok=false")
|
||||
}
|
||||
if parsed.Channel != meta.Channel {
|
||||
t.Fatalf("Channel: parsed %d, stored %d", parsed.Channel, meta.Channel)
|
||||
}
|
||||
if parsed.CRC32 != meta.CRC32 {
|
||||
t.Fatalf("CRC32 mismatch")
|
||||
}
|
||||
if caption != "look at this" {
|
||||
t.Fatalf("caption = %q", caption)
|
||||
}
|
||||
}
|
||||
+172
-9
@@ -172,13 +172,79 @@ func (pr *PublicReader) fetchChannel(ctx context.Context, username string) ([]pr
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
msgs, err := parsePublicMessages(body)
|
||||
msgs, sources, err := parsePublicMessagesWithMedia(body)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
// If the server has a configured media cache, fetch each scraped image
|
||||
// URL and rewrite the corresponding message text to embed downloadable
|
||||
// metadata. Failures here are best-effort: messages keep their legacy
|
||||
// "[IMAGE]\ncaption" body when downloads don't succeed.
|
||||
if cache := pr.feed.MediaCache(); cache != nil {
|
||||
applyHTTPMediaSources(ctx, cache, msgs, sources)
|
||||
}
|
||||
return msgs, extractChannelTitle(body), nil
|
||||
}
|
||||
|
||||
// applyHTTPMediaSources downloads each src.url (+ extraURLs for albums) and
|
||||
// rewrites the matching message body with N stacked downloadable metadata
|
||||
// lines. Failed downloads emit a bare [TAG] so the album's ID span is
|
||||
// preserved.
|
||||
func applyHTTPMediaSources(ctx context.Context, cache *MediaCache, msgs []protocol.Message, sources []mediaSource) {
|
||||
for i := range msgs {
|
||||
if i >= len(sources) {
|
||||
break
|
||||
}
|
||||
src := sources[i]
|
||||
if src.url == "" || src.tag == "" {
|
||||
continue
|
||||
}
|
||||
// Strip every leading [TAG] header so we can re-emit clean metadata
|
||||
// (ParseMediaText only peels one tag per call).
|
||||
body := msgs[i].Text
|
||||
for {
|
||||
_, rest, parsed := protocol.ParseMediaText(body)
|
||||
if !parsed {
|
||||
break
|
||||
}
|
||||
body = rest
|
||||
}
|
||||
caption := body
|
||||
|
||||
urls := append([]string{src.url}, src.extraURLs...)
|
||||
var encoded strings.Builder
|
||||
downloaded := 0
|
||||
for j, u := range urls {
|
||||
meta, ok := downloadHTTPMedia(ctx, cache, src.tag, u)
|
||||
if j > 0 {
|
||||
encoded.WriteByte('\n')
|
||||
}
|
||||
if !ok {
|
||||
encoded.WriteString(src.tag)
|
||||
continue
|
||||
}
|
||||
downloaded++
|
||||
encoded.WriteString(strings.TrimSuffix(meta.String(), "\n"))
|
||||
}
|
||||
if downloaded == 0 {
|
||||
continue
|
||||
}
|
||||
newText := encoded.String()
|
||||
if caption != "" {
|
||||
newText += "\n" + caption
|
||||
}
|
||||
msgs[i].Text = newText
|
||||
}
|
||||
}
|
||||
|
||||
// mediaSource is the per-message media descriptor returned by the public
|
||||
// scraper. extraURLs holds additional album siblings; url is the first one.
|
||||
type mediaSource struct {
|
||||
tag string
|
||||
url string
|
||||
extraURLs []string
|
||||
}
|
||||
|
||||
// extractChannelTitle parses the channel display name from the Telegram public page.
|
||||
func extractChannelTitle(body []byte) string {
|
||||
doc, err := html.Parse(strings.NewReader(string(body)))
|
||||
@@ -219,12 +285,27 @@ func mergeMessages(old, new []protocol.Message) []protocol.Message {
|
||||
}
|
||||
|
||||
func parsePublicMessages(body []byte) ([]protocol.Message, error) {
|
||||
msgs, _, err := parsePublicMessagesWithMedia(body)
|
||||
return msgs, err
|
||||
}
|
||||
|
||||
// parsePublicMessagesWithMedia is identical to parsePublicMessages but also
|
||||
// returns a per-message media descriptor — same length and ordering as the
|
||||
// returned messages — that callers can use to fetch the underlying photo or
|
||||
// document over HTTP and rewrite the message body. The legacy behaviour
|
||||
// (returning just messages) is preserved by parsePublicMessages above for
|
||||
// existing tests and pre-feature callers.
|
||||
func parsePublicMessagesWithMedia(body []byte) ([]protocol.Message, []mediaSource, error) {
|
||||
doc, err := html.Parse(strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse html: %w", err)
|
||||
return nil, nil, fmt.Errorf("parse html: %w", err)
|
||||
}
|
||||
|
||||
var collected []publicMessage
|
||||
type collectedMsg struct {
|
||||
msg publicMessage
|
||||
src mediaSource
|
||||
}
|
||||
var collected []collectedMsg
|
||||
visitNodes(doc, func(n *html.Node) {
|
||||
post := attrValue(n, "data-post")
|
||||
if post == "" {
|
||||
@@ -235,17 +316,42 @@ func parsePublicMessages(body []byte) ([]protocol.Message, error) {
|
||||
return
|
||||
}
|
||||
text := strings.TrimSpace(extractMessageText(findMessageBodyNode(n)))
|
||||
var src mediaSource
|
||||
mediaPrefix := ""
|
||||
switch {
|
||||
case findFirstByClass(n, "tgme_widget_message_photo_wrap") != nil:
|
||||
// Albums share one data-post block with N nested photo wraps.
|
||||
// Stack N [IMAGE] headers so the client-side gap detector
|
||||
// (albumSpan) doesn't flag the absorbed sibling IDs as missing.
|
||||
photoWraps := findAllByClass(n, "tgme_widget_message_photo_wrap")
|
||||
if len(photoWraps) > 1 {
|
||||
headers := make([]string, len(photoWraps))
|
||||
for i := range headers {
|
||||
headers[i] = protocol.MediaImage
|
||||
}
|
||||
mediaPrefix = strings.Join(headers, "\n")
|
||||
} else {
|
||||
mediaPrefix = protocol.MediaImage
|
||||
}
|
||||
src = mediaSource{tag: protocol.MediaImage, url: extractBackgroundImageURL(photoWraps[0])}
|
||||
for i := 1; i < len(photoWraps); i++ {
|
||||
if u := extractBackgroundImageURL(photoWraps[i]); u != "" {
|
||||
src.extraURLs = append(src.extraURLs, u)
|
||||
}
|
||||
}
|
||||
case findFirstByClass(n, "tgme_widget_message_video_player") != nil ||
|
||||
findFirstByClass(n, "tgme_widget_message_roundvideo_player") != nil:
|
||||
mediaPrefix = protocol.MediaVideo
|
||||
// t.me/s/ does not serve real video bytes — the player anchor links
|
||||
// to the channel page itself. Don't try to download.
|
||||
case findFirstByClass(n, "tgme_widget_message_sticker_wrap") != nil:
|
||||
mediaPrefix = protocol.MediaSticker
|
||||
// Stickers are emitted as the legacy tag only; we don't cache or
|
||||
// serve their bytes (animated/.tgs variants don't render inline
|
||||
// in the browser anyway).
|
||||
case findFirstByClass(n, "tgme_widget_message_voice") != nil:
|
||||
mediaPrefix = protocol.MediaAudio
|
||||
// Public web view doesn't expose voice file bytes either.
|
||||
case findFirstByClass(n, "tgme_widget_message_poll") != nil:
|
||||
mediaPrefix = protocol.MediaPoll
|
||||
pollBody := extractPollData(n)
|
||||
@@ -264,6 +370,10 @@ func parsePublicMessages(body []byte) ([]protocol.Message, error) {
|
||||
mediaPrefix = protocol.MediaContact
|
||||
case findFirstByClass(n, "tgme_widget_message_document_wrap") != nil:
|
||||
mediaPrefix = protocol.MediaFile
|
||||
// In t.me/s/ the document link is a "view in Telegram" page link,
|
||||
// not the file CDN — fetching it would download the channel HTML.
|
||||
// Skip; documents are downloadable only when the server runs with
|
||||
// a Telegram login (gotd UploadGetFile path).
|
||||
case findFirstByClass(n, "message_media_not_supported") != nil:
|
||||
// Telegram shows "Please open Telegram to view this post" for
|
||||
// content the public web view can't render: polls/quizzes, but
|
||||
@@ -294,26 +404,31 @@ func parsePublicMessages(body []byte) ([]protocol.Message, error) {
|
||||
text = protocol.MediaReply + "\n" + text
|
||||
}
|
||||
}
|
||||
collected = append(collected, publicMessage{
|
||||
collected = append(collected, collectedMsg{
|
||||
msg: publicMessage{
|
||||
id: id,
|
||||
timestamp: extractMessageTimestamp(n),
|
||||
text: text,
|
||||
},
|
||||
src: src,
|
||||
})
|
||||
})
|
||||
|
||||
if len(collected) == 0 {
|
||||
return nil, fmt.Errorf("no public messages found")
|
||||
return nil, nil, fmt.Errorf("no public messages found")
|
||||
}
|
||||
|
||||
sort.Slice(collected, func(i, j int) bool {
|
||||
return collected[i].id > collected[j].id
|
||||
return collected[i].msg.id > collected[j].msg.id
|
||||
})
|
||||
|
||||
msgs := make([]protocol.Message, 0, len(collected))
|
||||
for _, msg := range collected {
|
||||
msgs = append(msgs, protocol.Message{ID: msg.id, Timestamp: msg.timestamp, Text: msg.text})
|
||||
sources := make([]mediaSource, 0, len(collected))
|
||||
for _, c := range collected {
|
||||
msgs = append(msgs, protocol.Message{ID: c.msg.id, Timestamp: c.msg.timestamp, Text: c.msg.text})
|
||||
sources = append(sources, c.src)
|
||||
}
|
||||
return msgs, nil
|
||||
return msgs, sources, nil
|
||||
}
|
||||
|
||||
func visitNodes(n *html.Node, fn func(*html.Node)) {
|
||||
@@ -339,6 +454,17 @@ func findFirstByClass(n *html.Node, class string) *html.Node {
|
||||
return found
|
||||
}
|
||||
|
||||
// findAllByClass returns every descendant of n that carries the given class.
|
||||
func findAllByClass(n *html.Node, class string) []*html.Node {
|
||||
var found []*html.Node
|
||||
visitNodes(n, func(cur *html.Node) {
|
||||
if hasClass(cur, class) {
|
||||
found = append(found, cur)
|
||||
}
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
func hasClass(n *html.Node, class string) bool {
|
||||
if n == nil || n.Type != html.ElementNode {
|
||||
return false
|
||||
@@ -578,3 +704,40 @@ func extractReplyID(replyNode *html.Node) uint32 {
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// extractBackgroundImageURL pulls the URL out of an inline
|
||||
// `style="background-image:url('...')"` attribute. Telegram's public photo
|
||||
// widget uses this pattern to render thumbnails — the URL points to the
|
||||
// CDN-hosted image and is the source we want to download. Returns an empty
|
||||
// string when the pattern is not present.
|
||||
func extractBackgroundImageURL(n *html.Node) string {
|
||||
if n == nil {
|
||||
return ""
|
||||
}
|
||||
style := attrValue(n, "style")
|
||||
if style == "" {
|
||||
return ""
|
||||
}
|
||||
idx := strings.Index(style, "background-image")
|
||||
if idx < 0 {
|
||||
return ""
|
||||
}
|
||||
// Find url(...) after the property name.
|
||||
rest := style[idx:]
|
||||
open := strings.Index(rest, "url(")
|
||||
if open < 0 {
|
||||
return ""
|
||||
}
|
||||
rest = rest[open+len("url("):]
|
||||
close := strings.IndexByte(rest, ')')
|
||||
if close < 0 {
|
||||
return ""
|
||||
}
|
||||
raw := strings.TrimSpace(rest[:close])
|
||||
raw = strings.TrimPrefix(raw, "'")
|
||||
raw = strings.TrimSuffix(raw, "'")
|
||||
raw = strings.TrimPrefix(raw, "\"")
|
||||
raw = strings.TrimSuffix(raw, "\"")
|
||||
return raw
|
||||
}
|
||||
|
||||
|
||||
@@ -104,6 +104,76 @@ func TestMergeMessages(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePublicMessagesAlbumStacksHeaders(t *testing.T) {
|
||||
// Album = one data-post with N nested photo wraps. We must emit N
|
||||
// stacked [IMAGE] headers so albumSpan suppresses the absorbed-sibling
|
||||
// "1 missed" gap.
|
||||
body := []byte(`
|
||||
<html><body>
|
||||
<div class="tgme_widget_message" data-post="testchan/210">
|
||||
<a class="tgme_widget_message_date"><time datetime="2026-04-10T12:00:00+00:00"></time></a>
|
||||
<a class="tgme_widget_message_photo_wrap" style="background-image:url('https://cdn.telegram.org/img1.jpg')"></a>
|
||||
<a class="tgme_widget_message_photo_wrap" style="background-image:url('https://cdn.telegram.org/img2.jpg')"></a>
|
||||
<a class="tgme_widget_message_photo_wrap" style="background-image:url('https://cdn.telegram.org/img3.jpg')"></a>
|
||||
<div class="tgme_widget_message_text">album caption</div>
|
||||
</div>
|
||||
</body></html>
|
||||
`)
|
||||
|
||||
msgs, sources, err := parsePublicMessagesWithMedia(body)
|
||||
if err != nil {
|
||||
t.Fatalf("parsePublicMessagesWithMedia: %v", err)
|
||||
}
|
||||
if len(msgs) != 1 {
|
||||
t.Fatalf("len(msgs) = %d, want 1", len(msgs))
|
||||
}
|
||||
wantText := "[IMAGE]\n[IMAGE]\n[IMAGE]\nalbum caption"
|
||||
if msgs[0].Text != wantText {
|
||||
t.Fatalf("msgs[0].Text = %q, want %q", msgs[0].Text, wantText)
|
||||
}
|
||||
if len(sources) != 1 {
|
||||
t.Fatalf("len(sources) = %d, want 1", len(sources))
|
||||
}
|
||||
src := sources[0]
|
||||
if src.tag != protocol.MediaImage {
|
||||
t.Errorf("src.tag = %q, want %q", src.tag, protocol.MediaImage)
|
||||
}
|
||||
if src.url != "https://cdn.telegram.org/img1.jpg" {
|
||||
t.Errorf("src.url = %q, want first photo URL", src.url)
|
||||
}
|
||||
if len(src.extraURLs) != 2 ||
|
||||
src.extraURLs[0] != "https://cdn.telegram.org/img2.jpg" ||
|
||||
src.extraURLs[1] != "https://cdn.telegram.org/img3.jpg" {
|
||||
t.Errorf("src.extraURLs = %v, want [img2, img3]", src.extraURLs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePublicMessagesSinglePhotoUnchanged(t *testing.T) {
|
||||
// Single photo: one [IMAGE] header, no extraURLs.
|
||||
body := []byte(`
|
||||
<html><body>
|
||||
<div class="tgme_widget_message" data-post="testchan/220">
|
||||
<a class="tgme_widget_message_photo_wrap" style="background-image:url('https://cdn.telegram.org/single.jpg')"></a>
|
||||
<div class="tgme_widget_message_text">just one</div>
|
||||
</div>
|
||||
</body></html>
|
||||
`)
|
||||
msgs, sources, err := parsePublicMessagesWithMedia(body)
|
||||
if err != nil {
|
||||
t.Fatalf("parsePublicMessagesWithMedia: %v", err)
|
||||
}
|
||||
if len(msgs) != 1 {
|
||||
t.Fatalf("len(msgs) = %d, want 1", len(msgs))
|
||||
}
|
||||
wantText := "[IMAGE]\njust one"
|
||||
if msgs[0].Text != wantText {
|
||||
t.Fatalf("msgs[0].Text = %q, want %q", msgs[0].Text, wantText)
|
||||
}
|
||||
if sources[0].url != "https://cdn.telegram.org/single.jpg" || len(sources[0].extraURLs) != 0 {
|
||||
t.Errorf("source = %+v, want url=single, extraURLs empty", sources[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePublicMessagesReplyPreviewUsesMainBody(t *testing.T) {
|
||||
body := []byte(`
|
||||
<html><body>
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
@@ -24,6 +25,20 @@ type Config struct {
|
||||
NoTelegram bool // if true, fetch public channels without Telegram login
|
||||
AllowManage bool // if true, remote channel management and sending via DNS is allowed
|
||||
Debug bool // if true, log every decoded DNS query
|
||||
// NoMedia disables downloading and serving image/file media. When set, the
|
||||
// server emits the legacy [TAG]\ncaption form for media messages so old
|
||||
// clients keep working unchanged.
|
||||
NoMedia bool
|
||||
// MediaMaxSize is the per-file cap in bytes for cached media. 0 means no
|
||||
// cap (not recommended in production).
|
||||
MediaMaxSize int64
|
||||
// MediaCacheTTL is the cache lifetime in minutes for a single entry. The
|
||||
// effective TTL is reset whenever the same upstream id is fetched again.
|
||||
MediaCacheTTL int
|
||||
// MediaCompression names the compression applied to cached media bytes
|
||||
// before they're split into DNS blocks. One of "none", "gzip",
|
||||
// "deflate". Empty defaults to "gzip".
|
||||
MediaCompression string
|
||||
Telegram TelegramConfig
|
||||
}
|
||||
|
||||
@@ -64,6 +79,39 @@ func (s *Server) Run(ctx context.Context) error {
|
||||
return fmt.Errorf("derive keys: %w", err)
|
||||
}
|
||||
|
||||
SetMediaDebugLogs(s.cfg.Debug)
|
||||
|
||||
// Configure media cache before any reader starts so the very first fetch
|
||||
// cycle can populate it. When --no-media is set we leave Feed.media as
|
||||
// nil; the readers fall through to the legacy [TAG]\ncaption form, and
|
||||
// Feed.GetBlock rejects media-channel queries with not-found.
|
||||
if !s.cfg.NoMedia {
|
||||
ttlMin := s.cfg.MediaCacheTTL
|
||||
if ttlMin <= 0 {
|
||||
ttlMin = 600
|
||||
}
|
||||
ttl := time.Duration(ttlMin) * time.Minute
|
||||
compName := s.cfg.MediaCompression
|
||||
if compName == "" {
|
||||
compName = "gzip"
|
||||
}
|
||||
compression, err := protocol.ParseMediaCompressionName(compName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("--media-compression: %w", err)
|
||||
}
|
||||
mediaCache := NewMediaCache(MediaCacheConfig{
|
||||
MaxFileBytes: s.cfg.MediaMaxSize,
|
||||
TTL: ttl,
|
||||
Compression: compression,
|
||||
Logf: logfMedia,
|
||||
})
|
||||
s.feed.SetMediaCache(mediaCache)
|
||||
log.Printf("[server] media cache enabled: max-size=%d bytes, ttl=%s, compression=%s", s.cfg.MediaMaxSize, ttl, compression)
|
||||
go s.runMediaSweep(ctx, mediaCache, ttl)
|
||||
} else {
|
||||
log.Println("[server] media cache disabled (--no-media)")
|
||||
}
|
||||
|
||||
go startLatestVersionTracker(ctx, s.feed)
|
||||
var channelCtl channelRefresher
|
||||
|
||||
@@ -180,3 +228,29 @@ func prefixXAccounts(accounts []string) []string {
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// runMediaSweep periodically evicts expired entries from the cache. The
|
||||
// interval is min(ttl/4, 5min) so we don't waste cycles on long-TTL configs
|
||||
// while still reclaiming slots in time under steady-state churn.
|
||||
func (s *Server) runMediaSweep(ctx context.Context, cache *MediaCache, ttl time.Duration) {
|
||||
if cache == nil {
|
||||
return
|
||||
}
|
||||
interval := ttl / 4
|
||||
if interval <= 0 || interval > 5*time.Minute {
|
||||
interval = 5 * time.Minute
|
||||
}
|
||||
if interval < 30*time.Second {
|
||||
interval = 30 * time.Second
|
||||
}
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
cache.Sweep()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+91
-11
@@ -253,7 +253,7 @@ func (tr *TelegramReader) fetchAll(ctx context.Context, api *tg.Client) {
|
||||
}
|
||||
|
||||
userNames := buildUserMap(hist)
|
||||
msgs, err := tr.extractMessages(hist, rp.chatType, userNames)
|
||||
msgs, err := tr.extractMessages(ctx, api, hist, rp.chatType, userNames)
|
||||
if err != nil {
|
||||
log.Printf("[telegram] fetch %s: extract messages failed: %v", username, err)
|
||||
failed++
|
||||
@@ -334,7 +334,7 @@ func (tr *TelegramReader) fetchChannel(ctx context.Context, api *tg.Client, user
|
||||
}
|
||||
|
||||
userNames := buildUserMap(hist)
|
||||
return tr.extractMessages(hist, protocol.ChatTypeChannel, userNames)
|
||||
return tr.extractMessages(ctx, api, hist, protocol.ChatTypeChannel, userNames)
|
||||
}
|
||||
|
||||
// buildUserMap extracts a user ID → display name map from a history response.
|
||||
@@ -366,7 +366,7 @@ func buildUserMap(hist tg.MessagesMessagesClass) map[int64]string {
|
||||
return m
|
||||
}
|
||||
|
||||
func (tr *TelegramReader) extractMessages(hist tg.MessagesMessagesClass, chatType protocol.ChatType, userNames map[int64]string) ([]protocol.Message, error) {
|
||||
func (tr *TelegramReader) extractMessages(ctx context.Context, api *tg.Client, hist tg.MessagesMessagesClass, chatType protocol.ChatType, userNames map[int64]string) ([]protocol.Message, error) {
|
||||
var tgMsgs []tg.MessageClass
|
||||
|
||||
switch h := hist.(type) {
|
||||
@@ -380,21 +380,67 @@ func (tr *TelegramReader) extractMessages(hist tg.MessagesMessagesClass, chatTyp
|
||||
return nil, fmt.Errorf("unexpected messages type: %T", hist)
|
||||
}
|
||||
|
||||
var msgs []protocol.Message
|
||||
// Album-aware grouping: Telegram delivers an album as N separate
|
||||
// messages sharing the same GroupedID. We merge them into one feed
|
||||
// message that carries every album item's media header and the album's
|
||||
// single caption, with the lowest message ID as the canonical post id.
|
||||
type album struct {
|
||||
canonical *tg.Message
|
||||
headers []string
|
||||
caption string
|
||||
}
|
||||
groups := map[int64]*album{}
|
||||
var order []int64
|
||||
var nextSingleID int64 = -1 // sentinel keys for non-grouped messages
|
||||
|
||||
for _, raw := range tgMsgs {
|
||||
msg, ok := raw.(*tg.Message)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
header, caption := tr.extractMediaHeaderAndCaption(ctx, api, msg)
|
||||
if header == "" && caption == "" {
|
||||
continue
|
||||
}
|
||||
gid := msg.GroupedID
|
||||
if gid == 0 {
|
||||
gid = nextSingleID
|
||||
nextSingleID--
|
||||
}
|
||||
g, exists := groups[gid]
|
||||
if !exists {
|
||||
g = &album{canonical: msg}
|
||||
groups[gid] = g
|
||||
order = append(order, gid)
|
||||
}
|
||||
if header != "" {
|
||||
g.headers = append(g.headers, header)
|
||||
}
|
||||
if caption != "" && g.caption == "" {
|
||||
g.caption = caption
|
||||
}
|
||||
// Keep the canonical pointer at the lowest-id message so reply,
|
||||
// timestamp, and ordering stay stable across album items.
|
||||
if msg.ID < g.canonical.ID {
|
||||
g.canonical = msg
|
||||
}
|
||||
}
|
||||
|
||||
text := tr.extractText(msg)
|
||||
msgs := make([]protocol.Message, 0, len(order))
|
||||
for _, gid := range order {
|
||||
g := groups[gid]
|
||||
text := strings.Join(g.headers, "\n")
|
||||
if text != "" && g.caption != "" {
|
||||
text += "\n" + g.caption
|
||||
} else if text == "" {
|
||||
text = g.caption
|
||||
}
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// For private chats, prefix with the sender's name.
|
||||
if chatType == protocol.ChatTypePrivate {
|
||||
if fromID, ok := msg.GetFromID(); ok {
|
||||
if fromID, ok := g.canonical.GetFromID(); ok {
|
||||
if pu, ok := fromID.(*tg.PeerUser); ok {
|
||||
if name, ok := userNames[pu.UserID]; ok {
|
||||
text = name + ": " + text
|
||||
@@ -403,8 +449,7 @@ func (tr *TelegramReader) extractMessages(hist tg.MessagesMessagesClass, chatTyp
|
||||
}
|
||||
}
|
||||
|
||||
// Mark messages that are replies (include reply-to message ID).
|
||||
if replyTo, hasReply := msg.GetReplyTo(); hasReply {
|
||||
if replyTo, hasReply := g.canonical.GetReplyTo(); hasReply {
|
||||
if rh, ok := replyTo.(*tg.MessageReplyHeader); ok {
|
||||
if rid, hasID := rh.GetReplyToMsgID(); hasID {
|
||||
text = fmt.Sprintf("%s:%d\n%s", protocol.MediaReply, rid, text)
|
||||
@@ -417,8 +462,8 @@ func (tr *TelegramReader) extractMessages(hist tg.MessagesMessagesClass, chatTyp
|
||||
}
|
||||
|
||||
msgs = append(msgs, protocol.Message{
|
||||
ID: uint32(msg.ID),
|
||||
Timestamp: uint32(msg.Date),
|
||||
ID: uint32(g.canonical.ID),
|
||||
Timestamp: uint32(g.canonical.Date),
|
||||
Text: text,
|
||||
})
|
||||
}
|
||||
@@ -426,6 +471,41 @@ func (tr *TelegramReader) extractMessages(hist tg.MessagesMessagesClass, chatTyp
|
||||
return msgs, nil
|
||||
}
|
||||
|
||||
// extractMediaHeaderAndCaption returns the [TAG]<meta> header line (if any)
|
||||
// and the human caption for a single Telegram message. Used by the album
|
||||
// merger to combine N messages into one feed message with multiple headers.
|
||||
// Polls remain inline because they're never grouped into albums.
|
||||
func (tr *TelegramReader) extractMediaHeaderAndCaption(ctx context.Context, api *tg.Client, msg *tg.Message) (header, caption string) {
|
||||
caption = applyTextURLEntities(msg.Message, msg.Entities)
|
||||
if msg.Media == nil {
|
||||
return "", caption
|
||||
}
|
||||
switch m := msg.Media.(type) {
|
||||
case *tg.MessageMediaPhoto, *tg.MessageMediaDocument:
|
||||
if meta, ok := tr.downloadTelegramMedia(ctx, api, msg); ok {
|
||||
header = strings.TrimSuffix(meta.String(), "\n")
|
||||
return header, caption
|
||||
}
|
||||
// Non-downloadable image/doc: fall back to legacy [TAG] tag only.
|
||||
if _, ok := m.(*tg.MessageMediaPhoto); ok {
|
||||
return protocol.MediaImage, caption
|
||||
}
|
||||
if d, ok := m.(*tg.MessageMediaDocument); ok {
|
||||
return tr.classifyDocument(d), caption
|
||||
}
|
||||
case *tg.MessageMediaGeo, *tg.MessageMediaGeoLive, *tg.MessageMediaVenue:
|
||||
return protocol.MediaLocation, caption
|
||||
case *tg.MessageMediaContact:
|
||||
return protocol.MediaContact, caption
|
||||
case *tg.MessageMediaPoll:
|
||||
// Polls render with a synthesised body that's not a normal caption;
|
||||
// keep the legacy single-message behaviour by returning the whole
|
||||
// payload as the "caption" with no header.
|
||||
return "", tr.extractText(msg)
|
||||
}
|
||||
return "", caption
|
||||
}
|
||||
|
||||
func (tr *TelegramReader) extractText(msg *tg.Message) string {
|
||||
text := applyTextURLEntities(msg.Message, msg.Entities)
|
||||
|
||||
|
||||
@@ -247,7 +247,7 @@ func (xr *XPublicReader) fetchAccount(ctx context.Context, username string) ([]p
|
||||
continue
|
||||
}
|
||||
|
||||
msgs, title, err := parseXRSSMessages(body, username)
|
||||
msgs, sources, title, err := parseXRSSMessagesWithMedia(body, username)
|
||||
if err != nil {
|
||||
log.Printf("[x] @%s: instance %s: parse error: %v", username, instance, err)
|
||||
lastErr = fmt.Errorf("%s: %w", instance, err)
|
||||
@@ -255,9 +255,15 @@ func (xr *XPublicReader) fetchAccount(ctx context.Context, username string) ([]p
|
||||
}
|
||||
// Filter out garbled messages (invalid UTF-8 or mostly non-printable).
|
||||
cleaned := msgs[:0]
|
||||
for _, m := range msgs {
|
||||
cleanedSources := sources[:0]
|
||||
for i, m := range msgs {
|
||||
if isReadableText(m.Text) {
|
||||
cleaned = append(cleaned, m)
|
||||
if i < len(sources) {
|
||||
cleanedSources = append(cleanedSources, sources[i])
|
||||
} else {
|
||||
cleanedSources = append(cleanedSources, mediaSource{})
|
||||
}
|
||||
} else {
|
||||
log.Printf("[x] @%s: skipping garbled message ID=%d (len=%d)", username, m.ID, len(m.Text))
|
||||
}
|
||||
@@ -266,6 +272,14 @@ func (xr *XPublicReader) fetchAccount(ctx context.Context, username string) ([]p
|
||||
lastErr = fmt.Errorf("%s: all %d messages were garbled", instance, len(msgs))
|
||||
continue
|
||||
}
|
||||
// Run image downloads when a media cache is attached. Each Nitter
|
||||
// item carries an image URL we extracted from the description; for
|
||||
// non-image media types we have no public URL to fetch on X, so the
|
||||
// downstream rendering simply falls back to the legacy [TAG]\ncaption
|
||||
// form for those.
|
||||
if cache := xr.feed.MediaCache(); cache != nil && len(cleanedSources) > 0 {
|
||||
applyHTTPMediaSources(ctx, cache, cleaned, cleanedSources)
|
||||
}
|
||||
return cleaned, title, nil
|
||||
}
|
||||
if lastErr == nil {
|
||||
@@ -291,19 +305,32 @@ type xRSSItem struct {
|
||||
}
|
||||
|
||||
func parseXRSSMessages(body []byte, feedUser string) ([]protocol.Message, string, error) {
|
||||
msgs, _, title, err := parseXRSSMessagesWithMedia(body, feedUser)
|
||||
return msgs, title, err
|
||||
}
|
||||
|
||||
// parseXRSSMessagesWithMedia parses a Nitter RSS feed and additionally
|
||||
// returns one mediaSource per parsed message — same length and order — so
|
||||
// the caller can run HTTP downloads against the extracted image URLs and
|
||||
// rewrite messages to use the [IMAGE]<size>:<dl>:<ch>:<blk>:<crc32> form.
|
||||
// X posts on Nitter can contain multiple images per status; we only surface
|
||||
// the *first* one for now, which keeps the download pipeline simple and
|
||||
// matches what the legacy text rendering shows.
|
||||
func parseXRSSMessagesWithMedia(body []byte, feedUser string) ([]protocol.Message, []mediaSource, string, error) {
|
||||
body = sanitizeUTF8(body)
|
||||
var feed xRSS
|
||||
if err := xml.Unmarshal(body, &feed); err != nil {
|
||||
return nil, "", fmt.Errorf("parse rss: %w", err)
|
||||
return nil, nil, "", fmt.Errorf("parse rss: %w", err)
|
||||
}
|
||||
if len(feed.Channel.Items) == 0 {
|
||||
return nil, "", fmt.Errorf("empty rss feed")
|
||||
return nil, nil, "", fmt.Errorf("empty rss feed")
|
||||
}
|
||||
|
||||
title := strings.TrimSpace(feed.Channel.Title)
|
||||
|
||||
feedUserLower := strings.ToLower(strings.TrimPrefix(feedUser, "@"))
|
||||
msgs := make([]protocol.Message, 0, len(feed.Channel.Items))
|
||||
sources := make([]mediaSource, 0, len(feed.Channel.Items))
|
||||
for _, item := range feed.Channel.Items {
|
||||
id, err := extractXStatusID(item.GUID, item.Link)
|
||||
if err != nil {
|
||||
@@ -329,12 +356,60 @@ func parseXRSSMessages(body []byte, feedUser string) ([]protocol.Message, string
|
||||
}
|
||||
}
|
||||
|
||||
// Best-effort image extraction from the description / encoded HTML.
|
||||
src := mediaSource{}
|
||||
if u := extractFirstImgSrc(item.Description); u != "" {
|
||||
src = mediaSource{tag: protocol.MediaImage, url: u}
|
||||
} else if u := extractFirstImgSrc(item.Encoded); u != "" {
|
||||
src = mediaSource{tag: protocol.MediaImage, url: u}
|
||||
}
|
||||
|
||||
msgs = append(msgs, protocol.Message{ID: id, Timestamp: ts, Text: text})
|
||||
sources = append(sources, src)
|
||||
}
|
||||
if len(msgs) == 0 {
|
||||
return nil, "", fmt.Errorf("no parseable posts")
|
||||
return nil, nil, "", fmt.Errorf("no parseable posts")
|
||||
}
|
||||
return msgs, title, nil
|
||||
return msgs, sources, title, nil
|
||||
}
|
||||
|
||||
// extractFirstImgSrc scans an HTML fragment for the first <img src="..."> and
|
||||
// returns the URL value. Returns "" when no img is present. We avoid pulling
|
||||
// in golang.org/x/net/html for this single-purpose lookup; the regex only
|
||||
// needs to handle the simple cases Nitter generates.
|
||||
func extractFirstImgSrc(htmlFrag string) string {
|
||||
if htmlFrag == "" {
|
||||
return ""
|
||||
}
|
||||
low := strings.ToLower(htmlFrag)
|
||||
idx := strings.Index(low, "<img ")
|
||||
if idx < 0 {
|
||||
return ""
|
||||
}
|
||||
tail := htmlFrag[idx:]
|
||||
srcIdx := strings.Index(strings.ToLower(tail), "src=")
|
||||
if srcIdx < 0 {
|
||||
return ""
|
||||
}
|
||||
tail = tail[srcIdx+len("src="):]
|
||||
if len(tail) == 0 {
|
||||
return ""
|
||||
}
|
||||
quote := tail[0]
|
||||
if quote != '"' && quote != '\'' {
|
||||
// Bare attribute value — read until whitespace or '>'.
|
||||
end := strings.IndexAny(tail, " >")
|
||||
if end < 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(tail[:end])
|
||||
}
|
||||
tail = tail[1:]
|
||||
end := strings.IndexByte(tail, quote)
|
||||
if end < 0 {
|
||||
return ""
|
||||
}
|
||||
return tail[:end]
|
||||
}
|
||||
|
||||
// extractLinkUsername extracts the username from a Nitter/X status URL.
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNormaliseAutoUpdateList(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in []string
|
||||
want []string
|
||||
}{
|
||||
{"nil", nil, []string{}},
|
||||
{"empty", []string{}, []string{}},
|
||||
{"strip @", []string{"@one", "two"}, []string{"one", "two"}},
|
||||
{"trim whitespace", []string{" one ", "\ttwo\n"}, []string{"one", "two"}},
|
||||
{"drop empties", []string{"one", "", " ", "@", "two"}, []string{"one", "two"}},
|
||||
{"dedupe preserves order", []string{"a", "b", "@a", "c", "b"}, []string{"a", "b", "c"}},
|
||||
{"dedupe across @ form", []string{"@chan", "chan"}, []string{"chan"}},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := normaliseAutoUpdateList(c.in)
|
||||
if got == nil {
|
||||
got = []string{}
|
||||
}
|
||||
if !reflect.DeepEqual(got, c.want) {
|
||||
t.Errorf("normaliseAutoUpdateList(%v) = %v, want %v", c.in, got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,356 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"unicode"
|
||||
|
||||
"github.com/sartoopjj/thefeed/internal/client"
|
||||
"github.com/sartoopjj/thefeed/internal/protocol"
|
||||
)
|
||||
|
||||
// mediaDLProgress tracks how many blocks of a single in-flight download have
|
||||
// been fetched. The frontend polls /api/media/progress to drive a smooth
|
||||
// per-block counter while the xhr is still reading the response body.
|
||||
type mediaDLProgress struct {
|
||||
completed int32
|
||||
total int32
|
||||
}
|
||||
|
||||
// mediaProgressKey is the join of (channel, blockCount, crc) the frontend
|
||||
// uses to look up its own download. It matches the params on the GET URL so
|
||||
// no extra bookkeeping leaks into the JSON response.
|
||||
func mediaProgressKey(channel uint16, blocks uint16, crc uint32) string {
|
||||
return fmt.Sprintf("%d:%d:%08x", channel, blocks, crc)
|
||||
}
|
||||
|
||||
func (s *Server) handleMediaProgress(w http.ResponseWriter, r *http.Request) {
|
||||
q := r.URL.Query()
|
||||
ch64, _ := strconv.ParseUint(q.Get("ch"), 10, 16)
|
||||
blk64, _ := strconv.ParseUint(q.Get("blk"), 10, 16)
|
||||
crc64, _ := strconv.ParseUint(strings.TrimSpace(q.Get("crc")), 16, 32)
|
||||
key := mediaProgressKey(uint16(ch64), uint16(blk64), uint32(crc64))
|
||||
|
||||
s.dlMu.Lock()
|
||||
prog := s.dlProgress[key]
|
||||
s.dlMu.Unlock()
|
||||
if prog == nil {
|
||||
writeJSON(w, map[string]any{"active": false, "completed": 0, "total": int(blk64)})
|
||||
return
|
||||
}
|
||||
writeJSON(w, map[string]any{
|
||||
"active": true,
|
||||
"completed": int(atomic.LoadInt32(&prog.completed)),
|
||||
"total": int(atomic.LoadInt32(&prog.total)),
|
||||
})
|
||||
}
|
||||
|
||||
// handleMediaGet streams a media blob assembled from the
|
||||
// (channel, blocks, crc) tuple embedded in a message's text.
|
||||
//
|
||||
// Query string:
|
||||
//
|
||||
// ch=<uint16> media channel number (10000..60000)
|
||||
// blk=<uint16> total block count
|
||||
// size=<bytes> expected file size (Content-Length)
|
||||
// crc=<hex8> expected CRC32 of full body
|
||||
// name=<filename> optional filename for Content-Disposition
|
||||
// type=<mime> optional mime type override; sanitized
|
||||
func (s *Server) handleMediaGet(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
q := r.URL.Query()
|
||||
ch64, err := strconv.ParseUint(q.Get("ch"), 10, 16)
|
||||
if err != nil {
|
||||
http.Error(w, "bad ch", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
channel := uint16(ch64)
|
||||
if !protocol.IsMediaChannel(channel) {
|
||||
http.Error(w, "ch out of media range", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
blk64, err := strconv.ParseUint(q.Get("blk"), 10, 16)
|
||||
if err != nil || blk64 == 0 {
|
||||
http.Error(w, "bad blk", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
blockCount := uint16(blk64)
|
||||
|
||||
const maxClaimedSize = 100 * 1024 * 1024
|
||||
expectedSize, _ := strconv.ParseInt(q.Get("size"), 10, 64)
|
||||
if expectedSize < 0 || expectedSize > maxClaimedSize {
|
||||
http.Error(w, "bad size", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
expectedCRC := uint32(0)
|
||||
if v := strings.TrimSpace(q.Get("crc")); v != "" {
|
||||
c, err := strconv.ParseUint(v, 16, 32)
|
||||
if err != nil {
|
||||
http.Error(w, "bad crc", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
expectedCRC = uint32(c)
|
||||
}
|
||||
|
||||
s.mu.RLock()
|
||||
fetcher := s.fetcher
|
||||
s.mu.RUnlock()
|
||||
if fetcher == nil {
|
||||
http.Error(w, "fetcher not configured", http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
|
||||
// Disk-cache hit: serve directly without ever talking to DNS.
|
||||
if s.mediaCache != nil && expectedCRC != 0 && expectedSize > 0 {
|
||||
if body, mime, ok := s.mediaCache.Get(expectedSize, expectedCRC); ok {
|
||||
servedMime := sanitizeMime(q.Get("type"))
|
||||
if servedMime == "application/octet-stream" {
|
||||
if mime != "" {
|
||||
servedMime = sanitizeMime(mime)
|
||||
} else if sniffed := http.DetectContentType(body); sniffed != "" {
|
||||
servedMime = sanitizeMime(sniffed)
|
||||
}
|
||||
}
|
||||
w.Header().Set("Content-Type", servedMime)
|
||||
w.Header().Set("Content-Length", strconv.Itoa(len(body)))
|
||||
w.Header().Set("Cache-Control", "private, max-age=86400")
|
||||
w.Header().Set("X-Total-Blocks", strconv.Itoa(int(blockCount)))
|
||||
w.Header().Set("X-Cache", "HIT")
|
||||
if filename := sanitizeFilename(q.Get("name")); filename != "" {
|
||||
w.Header().Set("Content-Disposition", "inline; filename=\""+filename+"\"")
|
||||
}
|
||||
if _, err := w.Write(body); err != nil {
|
||||
s.addLog(fmt.Sprintf("media disk-cache write failed: %v", err))
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch block 0 synchronously: it carries the protocol header (CRC32,
|
||||
// version, compression). We need that before we can decompress and
|
||||
// before we can sniff Content-Type from the decompressed body.
|
||||
firstBlock, err := fetcher.FetchBlock(ctx, channel, 0)
|
||||
if err != nil {
|
||||
if ctx.Err() != nil {
|
||||
http.Error(w, "fetch cancelled", 499)
|
||||
return
|
||||
}
|
||||
http.Error(w, fmt.Sprintf("fetch media: %v", err), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
if len(firstBlock) < protocol.MediaBlockHeaderLen {
|
||||
http.Error(w, "malformed block 0", http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
header, err := protocol.DecodeMediaBlockHeader(firstBlock[:protocol.MediaBlockHeaderLen])
|
||||
if err != nil {
|
||||
http.Error(w, "malformed block 0", http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
if expectedCRC != 0 && header.CRC32 != expectedCRC {
|
||||
http.Error(w, "content hash mismatch", http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
firstCompressed := firstBlock[protocol.MediaBlockHeaderLen:]
|
||||
|
||||
// Register this download so /api/media/progress can report block
|
||||
// progress as the client polls. Block 0 is already fetched.
|
||||
progKey := mediaProgressKey(channel, blockCount, expectedCRC)
|
||||
prog := &mediaDLProgress{total: int32(blockCount), completed: 1}
|
||||
s.dlMu.Lock()
|
||||
s.dlProgress[progKey] = prog
|
||||
s.dlMu.Unlock()
|
||||
defer func() {
|
||||
s.dlMu.Lock()
|
||||
delete(s.dlProgress, progKey)
|
||||
s.dlMu.Unlock()
|
||||
}()
|
||||
|
||||
// Pipe compressed bytes (block-0 payload + later blocks) into a
|
||||
// decompressor reader. Fed by a goroutine; consumed below for sniffing
|
||||
// and for streaming to the HTTP response.
|
||||
pipeR, pipeW := io.Pipe()
|
||||
go func() {
|
||||
var pipeErr error
|
||||
defer func() { pipeW.CloseWithError(pipeErr) }()
|
||||
if _, err := pipeW.Write(firstCompressed); err != nil {
|
||||
pipeErr = err
|
||||
return
|
||||
}
|
||||
if blockCount > 1 {
|
||||
progressCB := func(done, _ int) {
|
||||
// done counts blocks 1..N-1; add 1 for block 0 already fetched.
|
||||
atomic.StoreInt32(&prog.completed, int32(done+1))
|
||||
}
|
||||
pipeErr = fetcher.FetchMediaBlocksStream(ctx, channel, 1, blockCount-1, pipeW, progressCB)
|
||||
}
|
||||
}()
|
||||
|
||||
body, err := client.DecompressMediaReader(pipeR, header.Compression)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("decompress: %v", err), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
defer body.Close()
|
||||
|
||||
// Tee decompressed bytes into a buffer so we can persist them to the
|
||||
// disk cache after a successful response.
|
||||
var teeBuf *bytes.Buffer
|
||||
if s.mediaCache != nil && expectedCRC != 0 && expectedSize > 0 && expectedSize <= mediaCacheMaxFileExt {
|
||||
teeBuf = bytes.NewBuffer(make([]byte, 0, expectedSize))
|
||||
}
|
||||
|
||||
// Sniff Content-Type from the first decompressed bytes before flushing
|
||||
// headers — once Content-Type goes out we can't change it.
|
||||
const sniffSize = 512
|
||||
sniff := make([]byte, sniffSize)
|
||||
n, err := io.ReadFull(body, sniff)
|
||||
if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF {
|
||||
http.Error(w, fmt.Sprintf("read media: %v", err), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
sniff = sniff[:n]
|
||||
|
||||
mime := sanitizeMime(q.Get("type"))
|
||||
if mime == "application/octet-stream" {
|
||||
if got := http.DetectContentType(sniff); got != "" {
|
||||
mime = sanitizeMime(got)
|
||||
}
|
||||
}
|
||||
filename := sanitizeFilename(q.Get("name"))
|
||||
|
||||
w.Header().Set("Content-Type", mime)
|
||||
if expectedSize > 0 {
|
||||
w.Header().Set("Content-Length", strconv.FormatInt(expectedSize, 10))
|
||||
}
|
||||
w.Header().Set("Cache-Control", "private, max-age=86400")
|
||||
w.Header().Set("X-Total-Blocks", strconv.Itoa(int(blockCount)))
|
||||
w.Header().Set("X-Cache", "MISS")
|
||||
w.Header().Set("X-Media-Compression", header.Compression.String())
|
||||
if filename != "" {
|
||||
w.Header().Set("Content-Disposition", "inline; filename=\""+filename+"\"")
|
||||
}
|
||||
flusher, _ := w.(http.Flusher)
|
||||
|
||||
if teeBuf != nil {
|
||||
teeBuf.Write(sniff)
|
||||
}
|
||||
if _, err := w.Write(sniff); err != nil {
|
||||
s.addLog(fmt.Sprintf("media write head failed: %v", err))
|
||||
return
|
||||
}
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
|
||||
dst := io.Writer(&flushAfterEachWriter{w: w, flusher: flusher})
|
||||
if teeBuf != nil {
|
||||
dst = io.MultiWriter(dst, teeBuf)
|
||||
}
|
||||
// Small buffer so the browser sees many small chunks instead of one big
|
||||
// one — the xhr onprogress event fires per chunk, which is what drives
|
||||
// the smooth K/N block counter on the client.
|
||||
buf := make([]byte, 2048)
|
||||
if _, err := io.CopyBuffer(dst, body, buf); err != nil {
|
||||
s.addLog(fmt.Sprintf("media stream failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if teeBuf == nil {
|
||||
s.addLog(fmt.Sprintf("media disk-cache skipped: size=%d crc=%x mediaCache=%v", expectedSize, expectedCRC, s.mediaCache != nil))
|
||||
} else if expectedSize > 0 && int64(teeBuf.Len()) != expectedSize {
|
||||
s.addLog(fmt.Sprintf("media disk-cache skipped: tee=%d expected=%d (truncated stream)", teeBuf.Len(), expectedSize))
|
||||
} else {
|
||||
if err := s.mediaCache.Put(int64(teeBuf.Len()), expectedCRC, teeBuf.Bytes(), mime); err != nil {
|
||||
s.addLog(fmt.Sprintf("media disk-cache put failed: %v", err))
|
||||
} else {
|
||||
s.addLog(fmt.Sprintf("media cached: %d bytes, crc=%08x, mime=%s", teeBuf.Len(), expectedCRC, mime))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type flushAfterEachWriter struct {
|
||||
w http.ResponseWriter
|
||||
flusher http.Flusher
|
||||
}
|
||||
|
||||
func (fw *flushAfterEachWriter) Write(p []byte) (int, error) {
|
||||
n, err := fw.w.Write(p)
|
||||
if err == nil && fw.flusher != nil {
|
||||
fw.flusher.Flush()
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (fw *flushAfterEachWriter) Flush() {
|
||||
if fw.flusher != nil {
|
||||
fw.flusher.Flush()
|
||||
}
|
||||
}
|
||||
|
||||
// sanitizeMime returns a "type/subtype" MIME string built from safe
|
||||
// characters. HTML/SVG variants are rejected.
|
||||
func sanitizeMime(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return "application/octet-stream"
|
||||
}
|
||||
if i := strings.IndexByte(s, ';'); i >= 0 {
|
||||
s = strings.TrimSpace(s[:i])
|
||||
}
|
||||
slash := strings.IndexByte(s, '/')
|
||||
if slash <= 0 || slash == len(s)-1 {
|
||||
return "application/octet-stream"
|
||||
}
|
||||
for _, r := range s {
|
||||
if r == '/' || r == '-' || r == '+' || r == '.' {
|
||||
continue
|
||||
}
|
||||
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
|
||||
return "application/octet-stream"
|
||||
}
|
||||
}
|
||||
switch strings.ToLower(s) {
|
||||
case "text/html", "application/xhtml+xml", "image/svg+xml":
|
||||
return "application/octet-stream"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// sanitizeFilename strips path components and control characters.
|
||||
func sanitizeFilename(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
if i := strings.LastIndexAny(s, `/\`); i >= 0 {
|
||||
s = s[i+1:]
|
||||
}
|
||||
if s == "" || s == ".." {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
for _, r := range s {
|
||||
if r < 0x20 || r == 0x7F || r == '"' || r == '\\' {
|
||||
continue
|
||||
}
|
||||
b.WriteRune(r)
|
||||
}
|
||||
out := b.String()
|
||||
if len(out) > 200 {
|
||||
out = out[:200]
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
mediaCacheFileExt = ".cache"
|
||||
mediaCacheMaxMime = 200
|
||||
mediaCacheMaxFileExt = 1 << 26 // 64 MiB hard cap per cached file
|
||||
)
|
||||
|
||||
// mediaDiskCache stores downloaded media blobs on disk so multiple devices
|
||||
// connected to the same client/server share the cost of one DNS-tunnelled
|
||||
// fetch. Entries are content-addressed by (size, crc32) and reaped after
|
||||
// ttl based on file mtime.
|
||||
//
|
||||
// File format: each entry is a single file
|
||||
//
|
||||
// <size>_<crc8hex>.cache
|
||||
//
|
||||
// containing:
|
||||
//
|
||||
// 2 bytes BE — mime length
|
||||
// N bytes — mime utf8
|
||||
// rest — raw file bytes
|
||||
type mediaDiskCache struct {
|
||||
dir string
|
||||
ttl time.Duration
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func newMediaDiskCache(dir string, ttl time.Duration) (*mediaDiskCache, error) {
|
||||
if dir == "" {
|
||||
return nil, errors.New("media cache dir is empty")
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0o700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &mediaDiskCache{dir: dir, ttl: ttl}, nil
|
||||
}
|
||||
|
||||
func (c *mediaDiskCache) keyFile(size int64, crc uint32) string {
|
||||
return filepath.Join(c.dir, fmt.Sprintf("%d_%08x%s", size, crc, mediaCacheFileExt))
|
||||
}
|
||||
|
||||
// Get returns the cached body and mime type if present and not expired.
|
||||
// Touching mtime on hit so the entry stays alive while it's in use.
|
||||
func (c *mediaDiskCache) Get(size int64, crc uint32) (body []byte, mime string, ok bool) {
|
||||
if size <= 0 || crc == 0 {
|
||||
return nil, "", false
|
||||
}
|
||||
path := c.keyFile(size, crc)
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, "", false
|
||||
}
|
||||
if c.ttl > 0 && time.Since(info.ModTime()) > c.ttl {
|
||||
_ = os.Remove(path)
|
||||
return nil, "", false
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil || len(data) < 2 {
|
||||
return nil, "", false
|
||||
}
|
||||
mimeLen := int(binary.BigEndian.Uint16(data[:2]))
|
||||
if mimeLen > mediaCacheMaxMime || 2+mimeLen > len(data) {
|
||||
return nil, "", false
|
||||
}
|
||||
mime = string(data[2 : 2+mimeLen])
|
||||
body = data[2+mimeLen:]
|
||||
if int64(len(body)) != size {
|
||||
// Corrupt or partial write — treat as miss.
|
||||
return nil, "", false
|
||||
}
|
||||
_ = os.Chtimes(path, time.Now(), time.Now())
|
||||
return body, mime, true
|
||||
}
|
||||
|
||||
// Put writes the body+mime atomically to the cache.
|
||||
func (c *mediaDiskCache) Put(size int64, crc uint32, body []byte, mime string) error {
|
||||
if size <= 0 || crc == 0 || int64(len(body)) != size {
|
||||
return errors.New("media cache: invalid put")
|
||||
}
|
||||
if len(body) > mediaCacheMaxFileExt {
|
||||
return errors.New("media cache: body too large")
|
||||
}
|
||||
if len(mime) > mediaCacheMaxMime {
|
||||
mime = mime[:mediaCacheMaxMime]
|
||||
}
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
path := c.keyFile(size, crc)
|
||||
tmp := path + ".tmp"
|
||||
f, err := os.Create(tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
header := make([]byte, 2)
|
||||
binary.BigEndian.PutUint16(header, uint16(len(mime)))
|
||||
if _, err := f.Write(header); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
if _, err := f.Write([]byte(mime)); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
if _, err := f.Write(body); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmp, path)
|
||||
}
|
||||
|
||||
// Cleanup removes entries older than ttl. Returns the count removed.
|
||||
func (c *mediaDiskCache) Cleanup() int {
|
||||
if c.ttl <= 0 {
|
||||
return 0
|
||||
}
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
entries, err := os.ReadDir(c.dir)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
now := time.Now()
|
||||
removed := 0
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), mediaCacheFileExt) {
|
||||
continue
|
||||
}
|
||||
info, err := e.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if now.Sub(info.ModTime()) > c.ttl {
|
||||
if os.Remove(filepath.Join(c.dir, e.Name())) == nil {
|
||||
removed++
|
||||
}
|
||||
}
|
||||
}
|
||||
return removed
|
||||
}
|
||||
|
||||
// Clear deletes every cached entry. Returns the count removed.
|
||||
func (c *mediaDiskCache) Clear() int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
entries, err := os.ReadDir(c.dir)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
removed := 0
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), mediaCacheFileExt) {
|
||||
continue
|
||||
}
|
||||
if os.Remove(filepath.Join(c.dir, e.Name())) == nil {
|
||||
removed++
|
||||
}
|
||||
}
|
||||
return removed
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
package web
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSanitizeMime(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"": "application/octet-stream",
|
||||
"image/jpeg": "image/jpeg",
|
||||
"image/png; charset=utf-8": "image/png",
|
||||
"text/html": "application/octet-stream", // blocked
|
||||
"application/xhtml+xml": "application/octet-stream", // blocked
|
||||
"image/svg+xml": "application/octet-stream", // blocked (XSS via SVG)
|
||||
"image/jpeg<script>": "application/octet-stream", // bad chars
|
||||
"weird": "application/octet-stream", // no slash
|
||||
"/leading": "application/octet-stream",
|
||||
"trailing/": "application/octet-stream",
|
||||
"application/vnd.api+json": "application/vnd.api+json",
|
||||
"image/webp": "image/webp",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := sanitizeMime(in); got != want {
|
||||
t.Errorf("sanitizeMime(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeFilename(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"": "",
|
||||
"foo.png": "foo.png",
|
||||
"../../etc/passwd": "passwd",
|
||||
"foo/bar/baz.txt": "baz.txt",
|
||||
"weird\nname.txt": "weirdname.txt",
|
||||
`bad"quote"name`: "badquotename",
|
||||
"..": "",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := sanitizeFilename(in); got != want {
|
||||
t.Errorf("sanitizeFilename(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
+1479
-35
File diff suppressed because it is too large
Load Diff
+368
-10
@@ -49,12 +49,31 @@ type Config struct {
|
||||
}
|
||||
|
||||
// Profile wraps a Config with a user-chosen nickname and a unique ID.
|
||||
// AutoUpdate is the per-profile list of channel usernames the auto-update
|
||||
// goroutine should refresh; AutoUpdateInterval (seconds, 0 → default) sets
|
||||
// the cadence.
|
||||
type Profile struct {
|
||||
ID string `json:"id"`
|
||||
Nickname string `json:"nickname"`
|
||||
Config Config `json:"config"`
|
||||
AutoUpdate []string `json:"autoUpdate,omitempty"`
|
||||
AutoUpdateInterval int `json:"autoUpdateInterval,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
// minAutoUpdateInterval is the floor — never tick faster than once per
|
||||
// minute, even if the user sets something silly. The DNS path is
|
||||
// expensive and the server's own fetch cycle is much longer.
|
||||
minAutoUpdateInterval = 60 * time.Second
|
||||
// serverFetchSettleDelay is how long after nextFetch we wait before
|
||||
// asking the server for fresh data — gives it time to process the
|
||||
// upstream Telegram fetch and have a coherent metadata snapshot.
|
||||
serverFetchSettleDelay = 30 * time.Second
|
||||
// autoUpdateStartupDelay defers the first tick so the initial metadata
|
||||
// + resolver checks have a chance to land before we start polling.
|
||||
autoUpdateStartupDelay = 30 * time.Second
|
||||
)
|
||||
|
||||
// SavedResolverScore stores persistent resolver performance data.
|
||||
type SavedResolverScore struct {
|
||||
Success int64 `json:"success"`
|
||||
@@ -137,6 +156,17 @@ type Server struct {
|
||||
titlesMu sync.Mutex
|
||||
titlesLoading bool
|
||||
titlesBackoffUntil time.Time
|
||||
|
||||
// dlMu guards dlProgress. Active media downloads register their block
|
||||
// counter here so the frontend can poll /api/media/progress and show
|
||||
// per-block updates instead of waiting for byte chunks.
|
||||
dlMu sync.Mutex
|
||||
dlProgress map[string]*mediaDLProgress
|
||||
|
||||
// mediaCache is a disk-backed store for downloaded media bytes so that
|
||||
// multiple devices on the same network share a single DNS-tunnelled
|
||||
// fetch. Entries expire after 7 days.
|
||||
mediaCache *mediaDiskCache
|
||||
}
|
||||
|
||||
// New creates a new web server.
|
||||
@@ -154,6 +184,11 @@ func New(dataDir string, port int, host string, password string) (*Server, error
|
||||
|
||||
scanner := client.NewResolverScanner()
|
||||
|
||||
mediaCache, mcErr := newMediaDiskCache(filepath.Join(dataDir, "media-cache"), 7*24*time.Hour)
|
||||
if mcErr != nil {
|
||||
log.Printf("Warning: media disk cache disabled: %v", mcErr)
|
||||
}
|
||||
|
||||
s := &Server{
|
||||
dataDir: dataDir,
|
||||
port: port,
|
||||
@@ -165,6 +200,13 @@ func New(dataDir string, port int, host string, password string) (*Server, error
|
||||
lastMsgIDs: make(map[int]uint32),
|
||||
lastHashes: make(map[int]uint32),
|
||||
scanner: scanner,
|
||||
mediaCache: mediaCache,
|
||||
dlProgress: make(map[string]*mediaDLProgress),
|
||||
}
|
||||
|
||||
if mediaCache != nil {
|
||||
go mediaCache.Cleanup()
|
||||
go s.runMediaCacheSweep()
|
||||
}
|
||||
|
||||
// Migrate per-profile resolvers into the shared bank on first run.
|
||||
@@ -213,6 +255,8 @@ func (s *Server) Run() error {
|
||||
mux.HandleFunc("/api/events", s.handleSSE)
|
||||
mux.HandleFunc("/api/profiles", s.handleProfiles)
|
||||
mux.HandleFunc("/api/profiles/switch", s.handleProfileSwitch)
|
||||
mux.HandleFunc("/api/auto-update", s.handleAutoUpdate)
|
||||
mux.HandleFunc("/api/auto-update/toggle", s.handleAutoUpdateToggle)
|
||||
mux.HandleFunc("/api/settings", s.handleSettings)
|
||||
mux.HandleFunc("/api/version-check", s.handleVersionCheck)
|
||||
mux.HandleFunc("/api/cache/clear", s.handleClearCache)
|
||||
@@ -230,6 +274,11 @@ func (s *Server) Run() error {
|
||||
mux.HandleFunc("/api/scanner/progress", s.handleScannerProgress)
|
||||
mux.HandleFunc("/api/scanner/apply", s.handleScannerApply)
|
||||
mux.HandleFunc("/api/scanner/presets", s.handleScannerPresets)
|
||||
// Media (image/file) downloader: assembles a binary blob from a media
|
||||
// channel and streams it back. See internal/web/media.go for the param
|
||||
// contract.
|
||||
mux.HandleFunc("/api/media/get", s.handleMediaGet)
|
||||
mux.HandleFunc("/api/media/progress", s.handleMediaProgress)
|
||||
mux.HandleFunc("/", s.handleIndex)
|
||||
|
||||
// Listen on the specified host (default 127.0.0.1)
|
||||
@@ -782,9 +831,140 @@ func (s *Server) initFetcher() error {
|
||||
s.fetcher = fetcher
|
||||
s.cache = cache
|
||||
go cache.Cleanup() // remove channel files not updated in 7 days
|
||||
|
||||
// Goroutine dies with fetcherCtx, so a profile switch / config change
|
||||
// stops it cleanly.
|
||||
go s.runAutoUpdateLoop(ctx)
|
||||
return nil
|
||||
}
|
||||
|
||||
// runAutoUpdateLoop refreshes the active profile's AutoUpdate channels on a
|
||||
// schedule that follows the server's own fetch cycle — there's no point
|
||||
// polling more often than the server actually pulls fresh data from
|
||||
// Telegram. User-set Profile.AutoUpdateInterval is honoured if it's >= the
|
||||
// 60s floor; otherwise we align with nextFetch + settle delay.
|
||||
func (s *Server) runAutoUpdateLoop(ctx context.Context) {
|
||||
select {
|
||||
case <-time.After(autoUpdateStartupDelay):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
var lastTick time.Time
|
||||
for {
|
||||
wait := s.nextAutoUpdateWait(lastTick)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(wait):
|
||||
}
|
||||
if !s.canAutoUpdate() {
|
||||
continue
|
||||
}
|
||||
s.tickAutoUpdate()
|
||||
lastTick = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
// nextAutoUpdateWait returns how long to sleep before the next tick. Honours
|
||||
// user override when set sensibly; otherwise sleeps until just after the
|
||||
// server's next Telegram fetch so we always pull just-refreshed data.
|
||||
func (s *Server) nextAutoUpdateWait(lastTick time.Time) time.Duration {
|
||||
pl, _ := s.loadProfiles()
|
||||
if pl != nil && pl.Active != "" {
|
||||
for _, p := range pl.Profiles {
|
||||
if p.ID != pl.Active {
|
||||
continue
|
||||
}
|
||||
if p.AutoUpdateInterval > 0 {
|
||||
user := time.Duration(p.AutoUpdateInterval) * time.Second
|
||||
if user < minAutoUpdateInterval {
|
||||
user = minAutoUpdateInterval
|
||||
}
|
||||
return user
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
s.mu.RLock()
|
||||
nf := s.nextFetch
|
||||
s.mu.RUnlock()
|
||||
if nf == 0 {
|
||||
return minAutoUpdateInterval
|
||||
}
|
||||
target := time.Unix(int64(nf), 0).Add(serverFetchSettleDelay)
|
||||
delay := time.Until(target)
|
||||
if delay < minAutoUpdateInterval {
|
||||
delay = minAutoUpdateInterval
|
||||
}
|
||||
if !lastTick.IsZero() {
|
||||
if since := time.Since(lastTick); since < minAutoUpdateInterval {
|
||||
if rem := minAutoUpdateInterval - since; rem > delay {
|
||||
delay = rem
|
||||
}
|
||||
}
|
||||
}
|
||||
return delay
|
||||
}
|
||||
|
||||
// canAutoUpdate returns false when we should skip a tick: server hasn't
|
||||
// produced metadata yet (channel list empty), or the resolver scanner is
|
||||
// busy (it'd race with our DNS fetches), or there's no fetcher.
|
||||
func (s *Server) canAutoUpdate() bool {
|
||||
s.mu.RLock()
|
||||
channels := s.channels
|
||||
fetcher := s.fetcher
|
||||
scanner := s.scanner
|
||||
s.mu.RUnlock()
|
||||
if fetcher == nil || len(channels) == 0 {
|
||||
return false
|
||||
}
|
||||
if scanner != nil {
|
||||
switch scanner.State() {
|
||||
case client.ScannerRunning, client.ScannerPaused:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *Server) tickAutoUpdate() {
|
||||
pl, err := s.loadProfiles()
|
||||
if err != nil || pl == nil || pl.Active == "" {
|
||||
return
|
||||
}
|
||||
var watch []string
|
||||
for _, p := range pl.Profiles {
|
||||
if p.ID == pl.Active {
|
||||
watch = p.AutoUpdate
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(watch) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
s.mu.RLock()
|
||||
channels := s.channels
|
||||
s.mu.RUnlock()
|
||||
if len(channels) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
wantSet := make(map[string]bool, len(watch))
|
||||
for _, name := range watch {
|
||||
wantSet[strings.TrimPrefix(strings.TrimSpace(name), "@")] = true
|
||||
}
|
||||
|
||||
for i, ch := range channels {
|
||||
if !wantSet[ch.Name] {
|
||||
continue
|
||||
}
|
||||
go s.refreshChannel(i + 1) // 1-indexed
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) checkLatestVersion(ctx context.Context) (string, error) {
|
||||
s.mu.RLock()
|
||||
cfg := s.config
|
||||
@@ -1961,6 +2141,10 @@ func (s *Server) handleProfiles(w http.ResponseWriter, r *http.Request) {
|
||||
addToBank(pl, req.Profile.Config.Resolvers)
|
||||
req.Profile.Config.Resolvers = nil
|
||||
}
|
||||
// Carry over fields the edit-profile UI doesn't manage so
|
||||
// they don't get wiped on save (auto-update list etc.).
|
||||
req.Profile.AutoUpdate = p.AutoUpdate
|
||||
req.Profile.AutoUpdateInterval = p.AutoUpdateInterval
|
||||
pl.Profiles[i] = req.Profile
|
||||
if p.ID == pl.Active {
|
||||
needsReinit = true
|
||||
@@ -2097,6 +2281,164 @@ func (s *Server) handleProfileSwitch(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, map[string]any{"ok": true})
|
||||
}
|
||||
|
||||
// handleAutoUpdate exposes the active profile's auto-update channel list.
|
||||
// GET → {channels, intervalSeconds, defaultIntervalSeconds}.
|
||||
// POST {channels, intervalSeconds?} replaces both. Names are stripped and
|
||||
// dedup'd before saving.
|
||||
func (s *Server) handleAutoUpdate(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.Method {
|
||||
case http.MethodGet:
|
||||
pl, _ := s.loadProfiles()
|
||||
channels := []string{}
|
||||
interval := 0
|
||||
if pl != nil && pl.Active != "" {
|
||||
for _, p := range pl.Profiles {
|
||||
if p.ID == pl.Active {
|
||||
if p.AutoUpdate != nil {
|
||||
channels = p.AutoUpdate
|
||||
}
|
||||
interval = p.AutoUpdateInterval
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
writeJSON(w, map[string]any{
|
||||
"channels": channels,
|
||||
"intervalSeconds": interval,
|
||||
"defaultIntervalSeconds": int(minAutoUpdateInterval / time.Second),
|
||||
})
|
||||
|
||||
case http.MethodPost:
|
||||
var req struct {
|
||||
Channels []string `json:"channels"`
|
||||
IntervalSeconds *int `json:"intervalSeconds,omitempty"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(w, "invalid JSON", 400)
|
||||
return
|
||||
}
|
||||
pl, err := s.loadProfiles()
|
||||
if err != nil || pl == nil || pl.Active == "" {
|
||||
http.Error(w, "no active profile", 400)
|
||||
return
|
||||
}
|
||||
idx := -1
|
||||
for i, p := range pl.Profiles {
|
||||
if p.ID == pl.Active {
|
||||
idx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if idx < 0 {
|
||||
http.Error(w, "active profile not found", 400)
|
||||
return
|
||||
}
|
||||
pl.Profiles[idx].AutoUpdate = normaliseAutoUpdateList(req.Channels)
|
||||
if req.IntervalSeconds != nil {
|
||||
v := *req.IntervalSeconds
|
||||
if v < 0 {
|
||||
v = 0
|
||||
}
|
||||
minSec := int(minAutoUpdateInterval / time.Second)
|
||||
if v > 0 && v < minSec {
|
||||
v = minSec // floor: never poll faster than the server fetches
|
||||
}
|
||||
pl.Profiles[idx].AutoUpdateInterval = v
|
||||
}
|
||||
if err := s.saveProfiles(pl); err != nil {
|
||||
http.Error(w, fmt.Sprintf("save: %v", err), 500)
|
||||
return
|
||||
}
|
||||
writeJSON(w, map[string]any{
|
||||
"ok": true,
|
||||
"channels": pl.Profiles[idx].AutoUpdate,
|
||||
"intervalSeconds": pl.Profiles[idx].AutoUpdateInterval,
|
||||
})
|
||||
|
||||
default:
|
||||
http.Error(w, "method not allowed", 405)
|
||||
}
|
||||
}
|
||||
|
||||
// handleAutoUpdateToggle flips one channel's membership. Body {channel}.
|
||||
// Returns {enabled, channels}.
|
||||
func (s *Server) handleAutoUpdateToggle(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "method not allowed", 405)
|
||||
return
|
||||
}
|
||||
var req struct {
|
||||
Channel string `json:"channel"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(w, "invalid JSON", 400)
|
||||
return
|
||||
}
|
||||
name := strings.TrimPrefix(strings.TrimSpace(req.Channel), "@")
|
||||
if name == "" {
|
||||
http.Error(w, "channel required", 400)
|
||||
return
|
||||
}
|
||||
pl, err := s.loadProfiles()
|
||||
if err != nil || pl == nil || pl.Active == "" {
|
||||
http.Error(w, "no active profile", 400)
|
||||
return
|
||||
}
|
||||
idx := -1
|
||||
for i, p := range pl.Profiles {
|
||||
if p.ID == pl.Active {
|
||||
idx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if idx < 0 {
|
||||
http.Error(w, "active profile not found", 400)
|
||||
return
|
||||
}
|
||||
current := pl.Profiles[idx].AutoUpdate
|
||||
on := false
|
||||
hit := -1
|
||||
for i, n := range current {
|
||||
if strings.TrimPrefix(strings.TrimSpace(n), "@") == name {
|
||||
hit = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if hit >= 0 {
|
||||
current = append(current[:hit], current[hit+1:]...)
|
||||
} else {
|
||||
current = append(current, name)
|
||||
on = true
|
||||
}
|
||||
pl.Profiles[idx].AutoUpdate = normaliseAutoUpdateList(current)
|
||||
if err := s.saveProfiles(pl); err != nil {
|
||||
http.Error(w, fmt.Sprintf("save: %v", err), 500)
|
||||
return
|
||||
}
|
||||
writeJSON(w, map[string]any{
|
||||
"ok": true,
|
||||
"channel": name,
|
||||
"enabled": on,
|
||||
"channels": pl.Profiles[idx].AutoUpdate,
|
||||
})
|
||||
}
|
||||
|
||||
// normaliseAutoUpdateList strips @ + whitespace, drops empties, dedupes
|
||||
// while preserving order.
|
||||
func normaliseAutoUpdateList(in []string) []string {
|
||||
seen := make(map[string]bool, len(in))
|
||||
out := make([]string, 0, len(in))
|
||||
for _, raw := range in {
|
||||
name := strings.TrimPrefix(strings.TrimSpace(raw), "@")
|
||||
if name == "" || seen[name] {
|
||||
continue
|
||||
}
|
||||
seen[name] = true
|
||||
out = append(out, name)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// handleSettings manages user preferences (font size etc.).
|
||||
func (s *Server) handleSettings(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.Method {
|
||||
@@ -2223,26 +2565,42 @@ func (s *Server) handleVersionCheck(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, map[string]any{"ok": true, "latestVersion": v})
|
||||
}
|
||||
|
||||
// handleClearCache deletes all files in the cache directory.
|
||||
// runMediaCacheSweep evicts expired media-cache entries every hour for the
|
||||
// lifetime of the process.
|
||||
func (s *Server) runMediaCacheSweep() {
|
||||
ticker := time.NewTicker(1 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
if s.mediaCache == nil {
|
||||
return
|
||||
}
|
||||
s.mediaCache.Cleanup()
|
||||
}
|
||||
}
|
||||
|
||||
// handleClearCache wipes both the per-channel message cache and the
|
||||
// downloaded-media disk cache.
|
||||
func (s *Server) handleClearCache(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "method not allowed", 405)
|
||||
return
|
||||
}
|
||||
cacheDir := filepath.Join(s.dataDir, "cache")
|
||||
entries, err := os.ReadDir(cacheDir)
|
||||
if err != nil {
|
||||
writeJSON(w, map[string]any{"ok": true, "deleted": 0})
|
||||
return
|
||||
}
|
||||
deleted := 0
|
||||
cacheDir := filepath.Join(s.dataDir, "cache")
|
||||
if entries, err := os.ReadDir(cacheDir); err == nil {
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
if os.Remove(filepath.Join(cacheDir, e.Name())) == nil {
|
||||
deleted++
|
||||
}
|
||||
}
|
||||
}
|
||||
s.addLog(fmt.Sprintf("Cache cleared: %d files deleted", deleted))
|
||||
writeJSON(w, map[string]any{"ok": true, "deleted": deleted})
|
||||
mediaDeleted := 0
|
||||
if s.mediaCache != nil {
|
||||
mediaDeleted = s.mediaCache.Clear()
|
||||
}
|
||||
s.addLog(fmt.Sprintf("Cache cleared: %d message files, %d media files", deleted, mediaDeleted))
|
||||
writeJSON(w, map[string]any{"ok": true, "deleted": deleted, "mediaDeleted": mediaDeleted})
|
||||
}
|
||||
|
||||
+121
-14
@@ -75,6 +75,48 @@ get_latest_version() {
|
||||
echo "$version"
|
||||
}
|
||||
|
||||
_fetch_releases() {
|
||||
local body
|
||||
body=$(curl -Ls "https://api.github.com/repos/${GITHUB_REPO}/releases?per_page=20")
|
||||
if [[ -z "$body" ]]; then
|
||||
body=$(curl -4 -Ls "https://api.github.com/repos/${GITHUB_REPO}/releases?per_page=20")
|
||||
fi
|
||||
echo "$body"
|
||||
}
|
||||
|
||||
# Normalise GitHub JSON (pretty or minified) to one release object per line.
|
||||
_split_releases() {
|
||||
_fetch_releases | tr -d '\n' | sed 's/{/\n{/g'
|
||||
}
|
||||
|
||||
get_latest_prerelease() {
|
||||
_split_releases \
|
||||
| grep -F '"prerelease":true' \
|
||||
| head -1 \
|
||||
| sed -E 's/.*"tag_name":[[:space:]]*"([^"]+)".*/\1/'
|
||||
}
|
||||
|
||||
list_versions() {
|
||||
echo -e "${green}Recent thefeed releases (most recent first):${plain}"
|
||||
local line tag label
|
||||
while IFS= read -r line; do
|
||||
case "$line" in
|
||||
*'"tag_name"'*) ;;
|
||||
*) continue ;;
|
||||
esac
|
||||
tag=$(echo "$line" | sed -E 's/.*"tag_name":[[:space:]]*"([^"]+)".*/\1/')
|
||||
if echo "$line" | grep -qF '"prerelease":true'; then
|
||||
label="[pre-release]"
|
||||
else
|
||||
label="[stable]"
|
||||
fi
|
||||
printf " %-15s %s\n" "$tag" "$label"
|
||||
done < <(_split_releases)
|
||||
echo ""
|
||||
echo -e "Install one with: ${blue}sudo bash install.sh --version <tag>${plain}"
|
||||
echo -e "Or: ${blue}sudo bash install.sh <tag>${plain} (positional)"
|
||||
}
|
||||
|
||||
download_binary() {
|
||||
local version="$1"
|
||||
local arch_name
|
||||
@@ -484,16 +526,29 @@ show_usage() {
|
||||
|
||||
install_thefeed() {
|
||||
local version="$1"
|
||||
local channel="${2:-stable}" # "stable" or "pre"
|
||||
|
||||
# Get version
|
||||
if [[ -z "$version" ]]; then
|
||||
if [[ "$channel" == "pre" ]]; then
|
||||
version=$(get_latest_prerelease)
|
||||
if [[ -z "$version" ]]; then
|
||||
echo -e "${red}No pre-release found on GitHub${plain}"
|
||||
echo -e "${yellow}Run: bash install.sh --list to see available versions${plain}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${yellow}Channel:${plain} ${blue}pre-release${plain}"
|
||||
else
|
||||
version=$(get_latest_version)
|
||||
if [[ -z "$version" ]]; then
|
||||
echo -e "${red}Failed to fetch latest version from GitHub${plain}"
|
||||
echo -e "${yellow}Please check your network or specify a version: bash install.sh v1.0.0${plain}"
|
||||
echo -e "${yellow}Please check your network or specify a version: bash install.sh --version v1.0.0${plain}"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [[ "$version" =~ ^[0-9] ]]; then
|
||||
version="v${version}"
|
||||
fi
|
||||
echo -e "Version: ${green}${version}${plain}"
|
||||
|
||||
# Check current version
|
||||
@@ -586,36 +641,88 @@ show_help() {
|
||||
echo -e "Usage: bash $0 [OPTION]"
|
||||
echo ""
|
||||
echo -e "Options:"
|
||||
echo -e " ${green}(no args)${plain} Install or update to latest version"
|
||||
echo -e " ${green}v1.0.0${plain} Install specific version"
|
||||
echo -e " ${green}(no args)${plain} Install or update to latest stable version"
|
||||
echo -e " ${green}--version <tag>${plain} Install a specific version (rollback, beta, rc)"
|
||||
echo -e " ${green}-v <tag>${plain} Short form of --version"
|
||||
echo -e " ${green}<tag>${plain} Positional form, e.g. bash install.sh v1.0.0"
|
||||
echo -e " ${green}--pre${plain} Install the latest pre-release (beta/rc)"
|
||||
echo -e " ${green}--list${plain} List recent releases with stable/pre labels"
|
||||
echo -e " ${green}--login${plain} Re-authenticate with Telegram"
|
||||
echo -e " ${green}--uninstall${plain} Remove thefeed"
|
||||
echo -e " ${green}--help${plain} Show this help"
|
||||
echo ""
|
||||
echo -e "Examples:"
|
||||
echo -e " Roll back: ${blue}sudo bash install.sh --version v0.9.2${plain}"
|
||||
echo -e " Install beta: ${blue}sudo bash install.sh --pre${plain}"
|
||||
echo -e " Specific tag: ${blue}sudo bash install.sh --version v1.2.0-rc1${plain}"
|
||||
echo -e " See available: ${blue}sudo bash install.sh --list${plain}"
|
||||
echo ""
|
||||
echo -e "No-Telegram mode (recommended for most users):"
|
||||
echo -e " Reads public Telegram channels without needing Telegram credentials."
|
||||
echo -e " Safer because no phone number or API keys are stored on the server."
|
||||
echo ""
|
||||
echo -e "Quick commands:"
|
||||
echo -e " Install/Update: ${blue}curl -Ls https://raw.githubusercontent.com/${GITHUB_REPO}/main/scripts/install.sh | sudo bash${plain}"
|
||||
echo -e " Install beta: ${blue}curl -Ls https://raw.githubusercontent.com/${GITHUB_REPO}/main/scripts/install.sh | sudo bash -s -- --pre${plain}"
|
||||
echo -e " Roll back: ${blue}curl -Ls https://raw.githubusercontent.com/${GITHUB_REPO}/main/scripts/install.sh | sudo bash -s -- --version v0.9.2${plain}"
|
||||
echo -e " Uninstall: ${blue}curl -Ls https://raw.githubusercontent.com/${GITHUB_REPO}/main/scripts/install.sh | sudo bash -s -- --uninstall${plain}"
|
||||
}
|
||||
|
||||
# Main
|
||||
echo -e "${green}Running thefeed installer...${plain}"
|
||||
|
||||
case "${1:-}" in
|
||||
--help | -h)
|
||||
show_help
|
||||
;;
|
||||
# Flags: --version <tag> / -v <tag> / positional <tag>, --pre, --list,
|
||||
# --login, --uninstall, --help. No args = latest stable.
|
||||
REQUEST_VERSION=""
|
||||
REQUEST_CHANNEL="stable"
|
||||
ACTION="install"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
ACTION="help"; shift ;;
|
||||
--login)
|
||||
login_only
|
||||
;;
|
||||
ACTION="login"; shift ;;
|
||||
--uninstall)
|
||||
uninstall_thefeed
|
||||
;;
|
||||
ACTION="uninstall"; shift ;;
|
||||
--list)
|
||||
ACTION="list"; shift ;;
|
||||
--pre|--prerelease|--beta)
|
||||
REQUEST_CHANNEL="pre"; shift ;;
|
||||
--version|-v)
|
||||
shift
|
||||
if [[ -z "${1:-}" ]]; then
|
||||
echo -e "${red}--version requires a tag argument (e.g. --version v1.0.0)${plain}"
|
||||
exit 1
|
||||
fi
|
||||
REQUEST_VERSION="$1"; shift ;;
|
||||
--version=*)
|
||||
REQUEST_VERSION="${1#*=}"; shift ;;
|
||||
--)
|
||||
shift; break ;;
|
||||
-*)
|
||||
echo -e "${red}Unknown flag: $1${plain}"
|
||||
echo -e "Run ${blue}bash $0 --help${plain} for usage"
|
||||
exit 1 ;;
|
||||
*)
|
||||
# Positional tag, e.g. bash install.sh v1.0.0
|
||||
if [[ -z "$REQUEST_VERSION" ]]; then
|
||||
REQUEST_VERSION="$1"
|
||||
fi
|
||||
shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "$ACTION" in
|
||||
help)
|
||||
show_help ;;
|
||||
login)
|
||||
login_only ;;
|
||||
uninstall)
|
||||
uninstall_thefeed ;;
|
||||
list)
|
||||
list_versions ;;
|
||||
install)
|
||||
install_base
|
||||
install_thefeed "$1"
|
||||
;;
|
||||
install_thefeed "$REQUEST_VERSION" "$REQUEST_CHANNEL" ;;
|
||||
esac
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// createDefaultProfile spins up a dummy "active" profile so the auto-update
|
||||
// endpoints have somewhere to write. Returns the resulting profile id.
|
||||
func createDefaultProfile(t *testing.T, base string) string {
|
||||
t.Helper()
|
||||
body := `{"action":"create","profile":{"id":"","nickname":"AU","config":{"domain":"au.example","key":"k","resolvers":["127.0.0.1:9999"],"queryMode":"single","rateLimit":0}}}`
|
||||
resp := postJSON(t, base+"/api/profiles", body)
|
||||
resp.Body.Close()
|
||||
m := decodeJSON(t, getJSON(t, base+"/api/profiles"))
|
||||
profs, ok := m["profiles"].([]any)
|
||||
if !ok || len(profs) == 0 {
|
||||
t.Fatalf("profile not created, got %v", m["profiles"])
|
||||
}
|
||||
return profs[0].(map[string]any)["id"].(string)
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_GetEmpty(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
createDefaultProfile(t, base)
|
||||
|
||||
resp := getJSON(t, base+"/api/auto-update")
|
||||
if resp.StatusCode != 200 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
t.Fatalf("expected 200, got %d body=%s", resp.StatusCode, body)
|
||||
}
|
||||
m := decodeJSON(t, resp)
|
||||
chans, _ := m["channels"].([]any)
|
||||
if len(chans) != 0 {
|
||||
t.Errorf("expected empty channels, got %v", chans)
|
||||
}
|
||||
if d, _ := m["defaultIntervalSeconds"].(float64); int(d) != 60 {
|
||||
t.Errorf("defaultIntervalSeconds = %v, want 60", m["defaultIntervalSeconds"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_ToggleAddsAndRemoves(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
createDefaultProfile(t, base)
|
||||
|
||||
// First toggle: add.
|
||||
resp := postJSON(t, base+"/api/auto-update/toggle", `{"channel":"thefeed1"}`)
|
||||
if resp.StatusCode != 200 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
t.Fatalf("toggle add: %d body=%s", resp.StatusCode, body)
|
||||
}
|
||||
m := decodeJSON(t, resp)
|
||||
if m["enabled"] != true {
|
||||
t.Errorf("first toggle should set enabled=true, got %v", m["enabled"])
|
||||
}
|
||||
chans := m["channels"].([]any)
|
||||
if len(chans) != 1 || chans[0] != "thefeed1" {
|
||||
t.Errorf("channels after add = %v, want [thefeed1]", chans)
|
||||
}
|
||||
|
||||
// Second toggle: remove.
|
||||
resp2 := postJSON(t, base+"/api/auto-update/toggle", `{"channel":"thefeed1"}`)
|
||||
m2 := decodeJSON(t, resp2)
|
||||
if m2["enabled"] != false {
|
||||
t.Errorf("second toggle should set enabled=false, got %v", m2["enabled"])
|
||||
}
|
||||
chans2 := m2["channels"]
|
||||
if list, ok := chans2.([]any); !ok || len(list) != 0 {
|
||||
t.Errorf("channels after remove = %v, want []", chans2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_TogglesAtSign(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
createDefaultProfile(t, base)
|
||||
|
||||
// Add with "@chan" — server must store stripped form.
|
||||
postJSON(t, base+"/api/auto-update/toggle", `{"channel":"@chan"}`).Body.Close()
|
||||
|
||||
m := decodeJSON(t, getJSON(t, base+"/api/auto-update"))
|
||||
chans := m["channels"].([]any)
|
||||
if len(chans) != 1 || chans[0] != "chan" {
|
||||
t.Errorf("channels = %v, want [chan]", chans)
|
||||
}
|
||||
|
||||
// Toggle with bare form should remove the same entry.
|
||||
resp := postJSON(t, base+"/api/auto-update/toggle", `{"channel":"chan"}`)
|
||||
m2 := decodeJSON(t, resp)
|
||||
if m2["enabled"] != false {
|
||||
t.Errorf("expected enabled=false, got %v", m2["enabled"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_PostReplacesList(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
createDefaultProfile(t, base)
|
||||
|
||||
// POST with normalisation cases: leading @, dupes, whitespace.
|
||||
body := `{"channels":["@a","b","@a"," c ",""],"intervalSeconds":120}`
|
||||
resp := postJSON(t, base+"/api/auto-update", body)
|
||||
if resp.StatusCode != 200 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
t.Fatalf("POST: %d body=%s", resp.StatusCode, body)
|
||||
}
|
||||
m := decodeJSON(t, resp)
|
||||
chans := m["channels"].([]any)
|
||||
want := []string{"a", "b", "c"}
|
||||
if len(chans) != len(want) {
|
||||
t.Fatalf("channels len = %d, want %d (%v)", len(chans), len(want), chans)
|
||||
}
|
||||
for i, w := range want {
|
||||
if chans[i] != w {
|
||||
t.Errorf("channels[%d] = %v, want %q", i, chans[i], w)
|
||||
}
|
||||
}
|
||||
if iv, _ := m["intervalSeconds"].(float64); int(iv) != 120 {
|
||||
t.Errorf("intervalSeconds = %v, want 120", m["intervalSeconds"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_IntervalFloor(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
createDefaultProfile(t, base)
|
||||
|
||||
// Anything <60s gets bumped to the 60s floor; 0 stays 0 (means
|
||||
// "follow the server's nextFetch cadence with the built-in default").
|
||||
resp := postJSON(t, base+"/api/auto-update", `{"channels":["x"],"intervalSeconds":5}`)
|
||||
m := decodeJSON(t, resp)
|
||||
if iv, _ := m["intervalSeconds"].(float64); int(iv) != 60 {
|
||||
t.Errorf("intervalSeconds floor = %v, want 60", m["intervalSeconds"])
|
||||
}
|
||||
|
||||
resp2 := postJSON(t, base+"/api/auto-update", `{"channels":["x"],"intervalSeconds":0}`)
|
||||
m2 := decodeJSON(t, resp2)
|
||||
if iv, _ := m2["intervalSeconds"].(float64); int(iv) != 0 {
|
||||
t.Errorf("intervalSeconds zero = %v, want 0 (default)", m2["intervalSeconds"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_NoActiveProfile(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
|
||||
// No profile created → no active profile → POST should fail with 400.
|
||||
resp := postJSON(t, base+"/api/auto-update/toggle", `{"channel":"x"}`)
|
||||
if resp.StatusCode != 400 {
|
||||
t.Fatalf("toggle without profile: expected 400, got %d", resp.StatusCode)
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
// GET should succeed and return empty channels.
|
||||
resp2 := getJSON(t, base+"/api/auto-update")
|
||||
if resp2.StatusCode != 200 {
|
||||
t.Fatalf("GET without profile: expected 200, got %d", resp2.StatusCode)
|
||||
}
|
||||
m := decodeJSON(t, resp2)
|
||||
chans, _ := m["channels"].([]any)
|
||||
if len(chans) != 0 {
|
||||
t.Errorf("expected empty channels with no profile, got %v", chans)
|
||||
}
|
||||
}
|
||||
|
||||
func TestE2E_AutoUpdate_PersistsAcrossGets(t *testing.T) {
|
||||
base, _ := startWebServer(t)
|
||||
createDefaultProfile(t, base)
|
||||
|
||||
postJSON(t, base+"/api/auto-update", `{"channels":["alpha","beta"]}`).Body.Close()
|
||||
|
||||
// Fresh GET should return the same list.
|
||||
m := decodeJSON(t, getJSON(t, base+"/api/auto-update"))
|
||||
chans := m["channels"].([]any)
|
||||
if len(chans) != 2 || chans[0] != "alpha" || chans[1] != "beta" {
|
||||
t.Errorf("channels persisted = %v, want [alpha beta]", chans)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user