mirror of
https://github.com/therealaleph/MasterHttpRelayVPN-RUST.git
synced 2026-05-18 06:34:41 +03:00
perf: TLS connection pool + coalesce tuning for lower latency (#751)
TLS pool improvements: - Increase POOL_TTL from 45s to 60s so connections live longer - Add POOL_MIN (8): background refill loop keeps at least 8 ready TLS connections so acquire() never pays a cold handshake - Refill checks every 5s, only counts connections with ≥20s remaining as "healthy" — nearly-expired entries don't count - warm() now opens sequentially (500ms gaps) with 8s expiry offset per connection so they roll off gradually instead of all expiring together after a cliff - acquire() picks the freshest connection (most remaining TTL) instead of popping whatever is on top Coalesce step increase: - DEFAULT_COALESCE_STEP_MS: 10 → 200. The dominant bottleneck is the Apps Script round-trip (~1.5s), so the extra 200ms wait is negligible to the user but lets significantly more ops land in each batch — measured 3–5 ops/batch vs 1 op/batch at 10ms during page loads, cutting round-trips roughly in half. Tested on Android (Pixel 6 Pro) with full-mode tunnel. Pool hit rate went from 96% (POOL_MIN=4) to 100% (POOL_MIN=8) — zero cold TLS handshakes during requests. Co-authored-by: yyoyoian-pixel <279225925+yyoyoian-pixel@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+85
-30
@@ -57,7 +57,9 @@ pub enum FronterError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type PooledStream = TlsStream<TcpStream>;
|
type PooledStream = TlsStream<TcpStream>;
|
||||||
const POOL_TTL_SECS: u64 = 45;
|
const POOL_TTL_SECS: u64 = 60;
|
||||||
|
const POOL_MIN: usize = 8;
|
||||||
|
const POOL_REFILL_INTERVAL_SECS: u64 = 5;
|
||||||
const POOL_MAX: usize = 80;
|
const POOL_MAX: usize = 80;
|
||||||
const REQUEST_TIMEOUT_SECS: u64 = 25;
|
const REQUEST_TIMEOUT_SECS: u64 = 25;
|
||||||
const RANGE_PARALLEL_CHUNK_BYTES: u64 = 256 * 1024;
|
const RANGE_PARALLEL_CHUNK_BYTES: u64 = 256 * 1024;
|
||||||
@@ -644,33 +646,31 @@ impl DomainFronter {
|
|||||||
Ok(tls)
|
Ok(tls)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Open `n` outbound TLS connections in parallel and park them in the
|
/// Open `n` outbound TLS connections sequentially (500 ms apart) and
|
||||||
/// pool so the first few user requests don't pay the handshake cost.
|
/// park them in the pool. Staggered so we don't burst N TLS handshakes
|
||||||
/// Errors are logged but not returned — best-effort.
|
/// at Google edge simultaneously, and each connection gets an 8 s
|
||||||
|
/// expiry offset so they roll off gradually instead of all hitting
|
||||||
|
/// POOL_TTL_SECS at once.
|
||||||
pub async fn warm(self: &Arc<Self>, n: usize) {
|
pub async fn warm(self: &Arc<Self>, n: usize) {
|
||||||
let mut set = tokio::task::JoinSet::new();
|
let mut warmed = 0usize;
|
||||||
for _ in 0..n {
|
for i in 0..n {
|
||||||
let me = self.clone();
|
if i > 0 {
|
||||||
set.spawn(async move {
|
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||||
match me.open().await {
|
}
|
||||||
Ok(s) => Some(PoolEntry {
|
match self.open().await {
|
||||||
|
Ok(s) => {
|
||||||
|
let entry = PoolEntry {
|
||||||
stream: s,
|
stream: s,
|
||||||
created: Instant::now(),
|
created: Instant::now() - Duration::from_secs(8 * i as u64),
|
||||||
}),
|
};
|
||||||
Err(e) => {
|
let mut pool = self.pool.lock().await;
|
||||||
tracing::debug!("pool warm: open failed: {}", e);
|
if pool.len() < POOL_MAX {
|
||||||
None
|
pool.push(entry);
|
||||||
|
warmed += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
Err(e) => {
|
||||||
}
|
tracing::debug!("pool warm: open failed: {}", e);
|
||||||
let mut warmed = 0;
|
|
||||||
while let Some(res) = set.join_next().await {
|
|
||||||
if let Ok(Some(entry)) = res {
|
|
||||||
let mut pool = self.pool.lock().await;
|
|
||||||
if pool.len() < POOL_MAX {
|
|
||||||
pool.push(entry);
|
|
||||||
warmed += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -679,6 +679,56 @@ impl DomainFronter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Background loop that keeps at least `POOL_MIN` valid connections
|
||||||
|
/// ready. A connection only counts toward the minimum if it has at
|
||||||
|
/// least 20 s of TTL remaining — nearly-expired entries don't help.
|
||||||
|
/// Checks every `POOL_REFILL_INTERVAL_SECS`, evicts expired entries,
|
||||||
|
/// and opens replacements one at a time so there's no burst.
|
||||||
|
pub async fn run_pool_refill(self: Arc<Self>) {
|
||||||
|
const MIN_REMAINING_SECS: u64 = 20;
|
||||||
|
loop {
|
||||||
|
tokio::time::sleep(Duration::from_secs(POOL_REFILL_INTERVAL_SECS)).await;
|
||||||
|
|
||||||
|
// Evict expired entries first.
|
||||||
|
{
|
||||||
|
let mut pool = self.pool.lock().await;
|
||||||
|
pool.retain(|e| e.created.elapsed().as_secs() < POOL_TTL_SECS);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count only connections with enough life left.
|
||||||
|
// Refill one at a time to avoid bursting TLS handshakes.
|
||||||
|
loop {
|
||||||
|
let healthy = {
|
||||||
|
let pool = self.pool.lock().await;
|
||||||
|
pool.iter()
|
||||||
|
.filter(|e| {
|
||||||
|
let age = e.created.elapsed().as_secs();
|
||||||
|
age + MIN_REMAINING_SECS < POOL_TTL_SECS
|
||||||
|
})
|
||||||
|
.count()
|
||||||
|
};
|
||||||
|
if healthy >= POOL_MIN {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
match self.open().await {
|
||||||
|
Ok(s) => {
|
||||||
|
let mut pool = self.pool.lock().await;
|
||||||
|
if pool.len() < POOL_MAX {
|
||||||
|
pool.push(PoolEntry {
|
||||||
|
stream: s,
|
||||||
|
created: Instant::now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::debug!("pool refill: open failed: {}", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Keep the Apps Script container warm with a periodic HEAD ping.
|
/// Keep the Apps Script container warm with a periodic HEAD ping.
|
||||||
///
|
///
|
||||||
/// `acquire()` keeps the *TCP/TLS pool* warm but does nothing for the
|
/// `acquire()` keeps the *TCP/TLS pool* warm but does nothing for the
|
||||||
@@ -721,12 +771,17 @@ impl DomainFronter {
|
|||||||
async fn acquire(&self) -> Result<PoolEntry, FronterError> {
|
async fn acquire(&self) -> Result<PoolEntry, FronterError> {
|
||||||
{
|
{
|
||||||
let mut pool = self.pool.lock().await;
|
let mut pool = self.pool.lock().await;
|
||||||
while let Some(entry) = pool.pop() {
|
// Evict expired, then hand out the freshest (most remaining TTL).
|
||||||
if entry.created.elapsed().as_secs() < POOL_TTL_SECS {
|
pool.retain(|e| e.created.elapsed().as_secs() < POOL_TTL_SECS);
|
||||||
return Ok(entry);
|
if !pool.is_empty() {
|
||||||
}
|
// Freshest = smallest elapsed time. swap_remove is O(1).
|
||||||
// expired — drop it
|
let freshest = pool
|
||||||
drop(entry);
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.min_by_key(|(_, e)| e.created.elapsed())
|
||||||
|
.map(|(i, _)| i)
|
||||||
|
.unwrap();
|
||||||
|
return Ok(pool.swap_remove(freshest));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let stream = self.open().await?;
|
let stream = self.open().await?;
|
||||||
|
|||||||
@@ -593,6 +593,16 @@ impl ProxyServer {
|
|||||||
tokio::spawn(async move { std::future::pending::<()>().await })
|
tokio::spawn(async move { std::future::pending::<()>().await })
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Background pool refill: keeps at least POOL_MIN ready TLS
|
||||||
|
// connections so acquire() never pays a cold handshake.
|
||||||
|
let refill_task = if let Some(refill_fronter) = self.fronter.clone() {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
refill_fronter.run_pool_refill().await;
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
tokio::spawn(async move { std::future::pending::<()>().await })
|
||||||
|
};
|
||||||
|
|
||||||
let stats_task = if let Some(stats_fronter) = self.fronter.clone() {
|
let stats_task = if let Some(stats_fronter) = self.fronter.clone() {
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let mut ticker = tokio::time::interval(std::time::Duration::from_secs(60));
|
let mut ticker = tokio::time::interval(std::time::Duration::from_secs(60));
|
||||||
@@ -701,6 +711,7 @@ impl ProxyServer {
|
|||||||
tracing::info!("Shutdown signal received, stopping listeners");
|
tracing::info!("Shutdown signal received, stopping listeners");
|
||||||
stats_task.abort();
|
stats_task.abort();
|
||||||
keepalive_task.abort();
|
keepalive_task.abort();
|
||||||
|
refill_task.abort();
|
||||||
http_task.abort();
|
http_task.abort();
|
||||||
socks_task.abort();
|
socks_task.abort();
|
||||||
}
|
}
|
||||||
|
|||||||
+10
-11
@@ -60,17 +60,16 @@ const CLIENT_FIRST_DATA_WAIT: Duration = Duration::from_millis(50);
|
|||||||
/// step for more ops. Resets on every arrival, up to max from the first
|
/// step for more ops. Resets on every arrival, up to max from the first
|
||||||
/// op. Overridable via config `coalesce_step_ms` / `coalesce_max_ms`.
|
/// op. Overridable via config `coalesce_step_ms` / `coalesce_max_ms`.
|
||||||
///
|
///
|
||||||
/// 10 ms is enough to catch ops that arrive in the same event-loop tick
|
/// 200 ms balances latency against batching efficiency. The dominant
|
||||||
/// (e.g. a browser opening 6 parallel connections) without adding
|
/// bottleneck is the Apps Script round-trip (~1.5 s), so the extra
|
||||||
/// perceptible latency to downloads where the tunnel-node reply — not
|
/// 200 ms wait is negligible to the user but lets significantly more
|
||||||
/// coalescing — is the real bottleneck. When both sides *do* have data
|
/// ops land in each batch — a page load that would fire 10 separate
|
||||||
/// in flight (uploads, bursty page loads), the adaptive reset still
|
/// 1-op batches at 10 ms now packs 3–5 ops per batch, cutting the
|
||||||
/// packs batches efficiently: each arriving op resets the step timer, so
|
/// number of round-trips roughly in half. On idle sessions the step
|
||||||
/// a rapid burst naturally coalesces up to `DEFAULT_COALESCE_MAX_MS`
|
/// timer fires once with nothing queued (no cost); under load each
|
||||||
/// without an explicit upload/download distinction. The net effect is
|
/// arriving op resets the timer, so rapid bursts still coalesce up to
|
||||||
/// "don't wait when there's nothing to wait for; batch aggressively when
|
/// `DEFAULT_COALESCE_MAX_MS` naturally.
|
||||||
/// there is."
|
const DEFAULT_COALESCE_STEP_MS: u64 = 200;
|
||||||
const DEFAULT_COALESCE_STEP_MS: u64 = 10;
|
|
||||||
const DEFAULT_COALESCE_MAX_MS: u64 = 1000;
|
const DEFAULT_COALESCE_MAX_MS: u64 = 1000;
|
||||||
|
|
||||||
/// Structured error code the tunnel-node returns when it doesn't know the
|
/// Structured error code the tunnel-node returns when it doesn't know the
|
||||||
|
|||||||
Reference in New Issue
Block a user