mirror of
https://github.com/sartoopjj/thefeed.git
synced 2026-05-18 05:24:36 +03:00
467 lines
13 KiB
Go
467 lines
13 KiB
Go
// Optional, removable backup feed. Keep this file isolated from the rest
|
|
// of the web package so deleting it (plus the routes / clear-cache wiring
|
|
// in web.go and the BEGIN/END telemirror block in static/index.html) is
|
|
// enough to drop the feature.
|
|
package web
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/sartoopjj/thefeed/internal/telemirror"
|
|
)
|
|
|
|
type telemirrorHub struct {
|
|
client *telemirror.Client
|
|
cache *telemirror.Cache
|
|
store *telemirror.Store
|
|
imgs *telemirror.ImageCache
|
|
|
|
// onUpdate fires after a successful refresh so the SSE channel can
|
|
// nudge the frontend to re-fetch. nil-safe.
|
|
onUpdate func(username string)
|
|
|
|
mu sync.Mutex
|
|
refreshing map[string]chan struct{}
|
|
}
|
|
|
|
func newTelemirrorHub(dataDir string, onUpdate func(string)) *telemirrorHub {
|
|
tmDir := filepath.Join(dataDir, "telemirror")
|
|
return &telemirrorHub{
|
|
client: telemirror.NewClient(),
|
|
cache: telemirror.NewCache(tmDir),
|
|
store: telemirror.NewStore(dataDir),
|
|
imgs: telemirror.NewImageCache(filepath.Join(tmDir, "images")),
|
|
onUpdate: onUpdate,
|
|
refreshing: make(map[string]chan struct{}),
|
|
}
|
|
}
|
|
|
|
func (h *telemirrorHub) handleChannels(w http.ResponseWriter, r *http.Request) {
|
|
switch r.Method {
|
|
case http.MethodGet:
|
|
list := h.store.List()
|
|
type entry struct {
|
|
Username string `json:"username"`
|
|
Pinned bool `json:"pinned"`
|
|
}
|
|
out := make([]entry, 0, len(list))
|
|
for _, u := range list {
|
|
out = append(out, entry{Username: u, Pinned: telemirror.IsDefault(u)})
|
|
}
|
|
writeJSON(w, map[string]any{"channels": out})
|
|
|
|
case http.MethodPost:
|
|
var req struct {
|
|
Username string `json:"username"`
|
|
Action string `json:"action"`
|
|
}
|
|
if err := json.NewDecoder(io.LimitReader(r.Body, 1024)).Decode(&req); err != nil {
|
|
http.Error(w, "invalid JSON", 400)
|
|
return
|
|
}
|
|
switch req.Action {
|
|
case "add":
|
|
if err := h.store.Add(req.Username); err != nil {
|
|
http.Error(w, err.Error(), 400)
|
|
return
|
|
}
|
|
case "remove":
|
|
if err := h.store.Remove(req.Username); err != nil {
|
|
http.Error(w, err.Error(), 400)
|
|
return
|
|
}
|
|
default:
|
|
http.Error(w, "unknown action", 400)
|
|
return
|
|
}
|
|
writeJSON(w, map[string]any{"ok": true})
|
|
|
|
default:
|
|
http.Error(w, "method not allowed", 405)
|
|
}
|
|
}
|
|
|
|
// handleChannel serves /api/telemirror/channel/<username>[?refresh=1].
|
|
// Stale-while-revalidate: serve cached content immediately, refresh in
|
|
// the background when it's older than FreshTTL.
|
|
func (h *telemirrorHub) handleChannel(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method != http.MethodGet {
|
|
http.Error(w, "method not allowed", 405)
|
|
return
|
|
}
|
|
username := telemirror.SanitizeUsername(strings.TrimPrefix(r.URL.Path, "/api/telemirror/channel/"))
|
|
if username == "" {
|
|
http.Error(w, "missing username", 400)
|
|
return
|
|
}
|
|
forceRefresh := r.URL.Query().Get("refresh") == "1"
|
|
|
|
cached, fresh := h.cache.Get(username)
|
|
if cached != nil && fresh && !forceRefresh {
|
|
writeJSON(w, rewriteImageURLs(cached))
|
|
return
|
|
}
|
|
if cached != nil && !forceRefresh {
|
|
go func() { _, _ = h.refresh(username) }()
|
|
writeJSON(w, rewriteImageURLs(cached))
|
|
return
|
|
}
|
|
|
|
// forceRefresh + cache hit: trigger refresh in background but cap
|
|
// how long we make the user wait. On flaky networks fronting can
|
|
// take minutes — we'd rather return cached than time out. The
|
|
// background fetch keeps running (h.refresh coalesces concurrent
|
|
// callers), so the next request sees fresh data.
|
|
if forceRefresh && cached != nil {
|
|
ch := make(chan *telemirror.FetchResult, 1)
|
|
go func() {
|
|
res, err := h.refresh(username)
|
|
if err != nil || res == nil {
|
|
ch <- nil
|
|
return
|
|
}
|
|
ch <- res
|
|
}()
|
|
select {
|
|
case res := <-ch:
|
|
if res != nil {
|
|
writeJSON(w, rewriteImageURLs(res))
|
|
return
|
|
}
|
|
case <-time.After(15 * time.Second):
|
|
}
|
|
writeJSON(w, rewriteImageURLs(cached))
|
|
return
|
|
}
|
|
|
|
// No cache yet — must wait for the first fetch.
|
|
res, err := h.refresh(username)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), 502)
|
|
return
|
|
}
|
|
writeJSON(w, rewriteImageURLs(res))
|
|
}
|
|
|
|
// rewriteImageURLs points Channel.Photo at /api/telemirror/avatar/<username>
|
|
// (stable across restarts) and Media.Thumb at the URL-hash img proxy.
|
|
// Media.URL is left alone — it's a post permalink, not image bytes.
|
|
func rewriteImageURLs(in *telemirror.FetchResult) *telemirror.FetchResult {
|
|
if in == nil {
|
|
return nil
|
|
}
|
|
cp := *in
|
|
if u := telemirror.SanitizeUsername(cp.Channel.Username); u != "" {
|
|
cp.Channel.Photo = "/api/telemirror/avatar/" + strings.ToLower(u)
|
|
} else {
|
|
cp.Channel.Photo = proxyImgURL(cp.Channel.Photo)
|
|
}
|
|
cp.Posts = make([]telemirror.Post, len(in.Posts))
|
|
for i, p := range in.Posts {
|
|
p.Media = append([]telemirror.Media(nil), p.Media...)
|
|
for j := range p.Media {
|
|
p.Media[j].Thumb = proxyImgURL(p.Media[j].Thumb)
|
|
}
|
|
cp.Posts[i] = p
|
|
}
|
|
return &cp
|
|
}
|
|
|
|
// proxyImgURL routes an image URL through our /api/telemirror/img
|
|
// endpoint, applying the translate.goog hostname rewrite when the
|
|
// source is a Telegram CDN. Google's Translate proxy doesn't rewrite
|
|
// inline background-image URLs, so the parser hands us the original
|
|
// cdn4.telegram.org / cdn4.telesco.pe URLs and we have to translate
|
|
// them ourselves before fetching through fronting.
|
|
func proxyImgURL(raw string) string {
|
|
rewritten := translateGoogify(raw)
|
|
if rewritten == "" {
|
|
return raw
|
|
}
|
|
return "/api/telemirror/img?u=" + url.QueryEscape(rewritten)
|
|
}
|
|
|
|
// translateGoogify maps a CDN URL to its Google-Translate-proxied form.
|
|
// Returns "" if the URL is something we shouldn't proxy at all.
|
|
//
|
|
// Examples:
|
|
//
|
|
// https://cdn4.telegram.org/file/abc.jpg
|
|
// → https://cdn4-telegram-org.translate.goog/file/abc.jpg
|
|
// https://cdn1.telesco.pe/file/x.jpg
|
|
// → https://cdn1-telesco-pe.translate.goog/file/x.jpg
|
|
// https://cdn4-telegram-org.translate.goog/file/abc.jpg
|
|
// → unchanged (already on translate.goog)
|
|
func translateGoogify(raw string) string {
|
|
if raw == "" || !strings.HasPrefix(raw, "https://") {
|
|
return ""
|
|
}
|
|
u, err := url.Parse(raw)
|
|
if err != nil || u.Host == "" {
|
|
return ""
|
|
}
|
|
host := strings.ToLower(u.Host)
|
|
switch {
|
|
case strings.HasSuffix(host, ".translate.goog"):
|
|
// Already on translate.goog — proxy directly.
|
|
return raw
|
|
case strings.HasSuffix(host, ".telegram.org"),
|
|
strings.HasSuffix(host, ".telesco.pe"),
|
|
host == "t.me":
|
|
// Replace every "." with "-" and append ".translate.goog".
|
|
// Same scheme Google's proxy uses.
|
|
u.Host = strings.ReplaceAll(host, ".", "-") + ".translate.goog"
|
|
return u.String()
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// isProxiableHost — only the translate.goog form. Used by the /api/telemirror/img
|
|
// handler to validate the `u` query parameter, after the JSON
|
|
// rewrite has converted Telegram CDN URLs to their .translate.goog equivalent.
|
|
func isProxiableHost(rawURL string) bool {
|
|
u, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
host := strings.ToLower(u.Host)
|
|
return strings.HasSuffix(host, ".translate.goog")
|
|
}
|
|
|
|
// handleImg proxies an image URL through fronting, persisting the
|
|
// response under sha256(u) for subsequent hits.
|
|
func (h *telemirrorHub) handleImg(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method != http.MethodGet {
|
|
http.Error(w, "method not allowed", 405)
|
|
return
|
|
}
|
|
raw := r.URL.Query().Get("u")
|
|
if raw == "" {
|
|
http.Error(w, "missing u", 400)
|
|
return
|
|
}
|
|
if !isProxiableHost(raw) {
|
|
http.Error(w, "host not allowed", 400)
|
|
return
|
|
}
|
|
if body, ctype, ok := h.imgs.Get(raw); ok {
|
|
writeTelemirrorImg(w, body, ctype)
|
|
return
|
|
}
|
|
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
|
defer cancel()
|
|
body, ctype, err := h.client.FetchURL(ctx, raw)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), 502)
|
|
return
|
|
}
|
|
// Reject HTML/error pages so the browser's onerror fires cleanly.
|
|
low := strings.ToLower(ctype)
|
|
if !strings.HasPrefix(low, "image/") && !strings.HasPrefix(low, "video/") && !strings.HasPrefix(low, "audio/") && low != "" {
|
|
http.Error(w, "upstream not an image: "+ctype, 502)
|
|
return
|
|
}
|
|
if ctype == "" {
|
|
ctype = "application/octet-stream"
|
|
}
|
|
_ = h.imgs.Put(raw, ctype, body)
|
|
writeTelemirrorImg(w, body, ctype)
|
|
}
|
|
|
|
func writeTelemirrorImg(w http.ResponseWriter, body []byte, ctype string) {
|
|
if ctype == "" {
|
|
ctype = "application/octet-stream"
|
|
}
|
|
w.Header().Set("Content-Type", ctype)
|
|
w.Header().Set("Cache-Control", "private, max-age=3600")
|
|
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(body)))
|
|
_, _ = w.Write(body)
|
|
}
|
|
|
|
// refresh fetches and parses a channel, coalescing concurrent calls for
|
|
// the same username so we don't hit the upstream more than once at a time.
|
|
func (h *telemirrorHub) refresh(username string) (*telemirror.FetchResult, error) {
|
|
username = strings.ToLower(telemirror.SanitizeUsername(username))
|
|
if username == "" {
|
|
return nil, telemirror.ErrEmptyUsername
|
|
}
|
|
|
|
h.mu.Lock()
|
|
if ch, ok := h.refreshing[username]; ok {
|
|
h.mu.Unlock()
|
|
<-ch
|
|
if r, _ := h.cache.Get(username); r != nil {
|
|
return r, nil
|
|
}
|
|
return nil, fmt.Errorf("telemirror: concurrent refresh did not produce a result")
|
|
}
|
|
done := make(chan struct{})
|
|
h.refreshing[username] = done
|
|
h.mu.Unlock()
|
|
|
|
defer func() {
|
|
h.mu.Lock()
|
|
delete(h.refreshing, username)
|
|
h.mu.Unlock()
|
|
close(done)
|
|
}()
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
|
defer cancel()
|
|
|
|
body, err := h.client.FetchHTML(ctx, username)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
chInfo, posts, err := telemirror.ParseHTML(body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Reject "successful" responses that have no widget content — usually
|
|
// a captcha / rate-limit / soft-error page returned with status 200.
|
|
if len(posts) == 0 && chInfo.Title == "" && chInfo.Description == "" {
|
|
return nil, fmt.Errorf("telemirror: empty widget for %q", username)
|
|
}
|
|
if chInfo.Username == "" {
|
|
chInfo.Username = username
|
|
}
|
|
res := &telemirror.FetchResult{Channel: *chInfo, Posts: posts}
|
|
_ = h.cache.Put(username, res)
|
|
if h.onUpdate != nil {
|
|
h.onUpdate(username)
|
|
}
|
|
go h.ensureAvatarCached(chInfo.Username, chInfo.Photo)
|
|
return res, nil
|
|
}
|
|
|
|
// ensureAvatarCached downloads and persists the avatar under username
|
|
// only when no entry is on disk yet.
|
|
func (h *telemirrorHub) ensureAvatarCached(username, photoURL string) {
|
|
username = strings.ToLower(telemirror.SanitizeUsername(username))
|
|
if username == "" || photoURL == "" {
|
|
return
|
|
}
|
|
if _, _, ok := h.imgs.GetByKey(username); ok {
|
|
return
|
|
}
|
|
proxyURL := translateGoogify(photoURL)
|
|
if proxyURL == "" {
|
|
return
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
body, ctype, err := h.client.FetchURL(ctx, proxyURL)
|
|
if err != nil {
|
|
return
|
|
}
|
|
low := strings.ToLower(ctype)
|
|
if !strings.HasPrefix(low, "image/") && low != "" {
|
|
return
|
|
}
|
|
if ctype == "" {
|
|
ctype = "image/jpeg"
|
|
}
|
|
_ = h.imgs.PutByKey(username, ctype, body)
|
|
}
|
|
|
|
// handleAvatar serves /api/telemirror/avatar/<username> from disk;
|
|
// on miss, recovers the URL from the cached channel JSON, fetches,
|
|
// persists, and serves.
|
|
func (h *telemirrorHub) handleAvatar(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method != http.MethodGet {
|
|
http.Error(w, "method not allowed", 405)
|
|
return
|
|
}
|
|
username := strings.ToLower(telemirror.SanitizeUsername(
|
|
strings.TrimPrefix(r.URL.Path, "/api/telemirror/avatar/")))
|
|
if username == "" {
|
|
http.Error(w, "missing username", 400)
|
|
return
|
|
}
|
|
if body, ctype, ok := h.imgs.GetByKey(username); ok {
|
|
writeTelemirrorImg(w, body, ctype)
|
|
return
|
|
}
|
|
cached, _ := h.cache.Get(username)
|
|
if cached == nil || cached.Channel.Photo == "" {
|
|
http.Error(w, "no cached avatar", 404)
|
|
return
|
|
}
|
|
proxyURL := translateGoogify(cached.Channel.Photo)
|
|
if proxyURL == "" {
|
|
http.Error(w, "no cached avatar", 404)
|
|
return
|
|
}
|
|
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
|
defer cancel()
|
|
body, ctype, err := h.client.FetchURL(ctx, proxyURL)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), 502)
|
|
return
|
|
}
|
|
low := strings.ToLower(ctype)
|
|
if !strings.HasPrefix(low, "image/") && low != "" {
|
|
http.Error(w, "upstream not an image: "+ctype, 502)
|
|
return
|
|
}
|
|
if ctype == "" {
|
|
ctype = "image/jpeg"
|
|
}
|
|
_ = h.imgs.PutByKey(username, ctype, body)
|
|
writeTelemirrorImg(w, body, ctype)
|
|
}
|
|
|
|
func (h *telemirrorHub) ClearCache() {
|
|
h.cache.Clear()
|
|
h.imgs.Clear()
|
|
}
|
|
|
|
// handleOlder serves /api/telemirror/older/<username>?before=<id>.
|
|
// Fetches a fresh widget filtered to posts older than the given id.
|
|
// Not cached — every call hits upstream because pagination data
|
|
// otherwise grows unbounded per channel.
|
|
func (h *telemirrorHub) handleOlder(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method != http.MethodGet {
|
|
http.Error(w, "method not allowed", 405)
|
|
return
|
|
}
|
|
username := strings.ToLower(telemirror.SanitizeUsername(
|
|
strings.TrimPrefix(r.URL.Path, "/api/telemirror/older/")))
|
|
if username == "" {
|
|
http.Error(w, "missing username", 400)
|
|
return
|
|
}
|
|
beforeID, _ := strconv.Atoi(r.URL.Query().Get("before"))
|
|
if beforeID <= 0 {
|
|
http.Error(w, "missing before", 400)
|
|
return
|
|
}
|
|
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
|
defer cancel()
|
|
body, err := h.client.FetchHTMLBefore(ctx, username, beforeID)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), 502)
|
|
return
|
|
}
|
|
chInfo, posts, err := telemirror.ParseHTML(body)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), 502)
|
|
return
|
|
}
|
|
if chInfo.Username == "" {
|
|
chInfo.Username = username
|
|
}
|
|
res := &telemirror.FetchResult{Channel: *chInfo, Posts: posts}
|
|
writeJSON(w, rewriteImageURLs(res))
|
|
}
|