mirror of
https://github.com/therealaleph/MasterHttpRelayVPN-RUST.git
synced 2026-05-18 23:54:48 +03:00
fix(android): tighten VPN session lifecycle reliability
This commit is contained in:
@@ -42,11 +42,18 @@ class MhrvApp : Application() {
|
|||||||
)
|
)
|
||||||
val previous = Thread.getDefaultUncaughtExceptionHandler()
|
val previous = Thread.getDefaultUncaughtExceptionHandler()
|
||||||
Thread.setDefaultUncaughtExceptionHandler { thread, throwable ->
|
Thread.setDefaultUncaughtExceptionHandler { thread, throwable ->
|
||||||
|
// Log.e itself can throw on extreme conditions (logd dead,
|
||||||
|
// OOM allocating the formatted message). If we let that
|
||||||
|
// bubble up, we'd recurse into our own handler with a
|
||||||
|
// half-handled original exception; swallow it so the
|
||||||
|
// previous handler still fires with the real failure.
|
||||||
|
try {
|
||||||
Log.e(
|
Log.e(
|
||||||
CRASH_TAG,
|
CRASH_TAG,
|
||||||
"uncaught on thread=${thread.name} (id=${thread.id}): ${throwable.message}",
|
"uncaught on thread=${thread.name} (id=${thread.id}): ${throwable.message}",
|
||||||
throwable,
|
throwable,
|
||||||
)
|
)
|
||||||
|
} catch (_: Throwable) { }
|
||||||
// Let the default handler still terminate the process and
|
// Let the default handler still terminate the process and
|
||||||
// show the system "app closed" dialog — we just wanted to
|
// show the system "app closed" dialog — we just wanted to
|
||||||
// get a log line out the door first.
|
// get a log line out the door first.
|
||||||
|
|||||||
@@ -242,13 +242,14 @@ class MhrvVpnService : VpnService() {
|
|||||||
tun = parcelFd
|
tun = parcelFd
|
||||||
|
|
||||||
// 3) Start tun2proxy on a worker thread. It blocks until stop() or
|
// 3) Start tun2proxy on a worker thread. It blocks until stop() or
|
||||||
// shutdown. We detach the fd so ownership transfers cleanly; the
|
// shutdown. We detach the fd so ownership transfers cleanly to
|
||||||
// ParcelFileDescriptor (`tun`) still holds a reference, so closing
|
// tun2proxy (closeFdOnDrop = true closes it on return from run()).
|
||||||
// it at teardown reliably tears down the TUN even if tun2proxy
|
// The ParcelFileDescriptor (`tun`) we keep is post-detach — its
|
||||||
// doesn't cleanly exit.
|
// own close() is a no-op for the underlying fd, so the worker is
|
||||||
|
// the sole owner once it's running.
|
||||||
val detachedFd = parcelFd.detachFd()
|
val detachedFd = parcelFd.detachFd()
|
||||||
tun2proxyRunning.set(true)
|
tun2proxyRunning.set(true)
|
||||||
tun2proxyThread = Thread({
|
val worker = Thread({
|
||||||
try {
|
try {
|
||||||
val rc = Tun2proxy.run(
|
val rc = Tun2proxy.run(
|
||||||
"socks5://127.0.0.1:$socks5Port",
|
"socks5://127.0.0.1:$socks5Port",
|
||||||
@@ -264,7 +265,29 @@ class MhrvVpnService : VpnService() {
|
|||||||
} finally {
|
} finally {
|
||||||
tun2proxyRunning.set(false)
|
tun2proxyRunning.set(false)
|
||||||
}
|
}
|
||||||
}, "tun2proxy").apply { start() }
|
}, "tun2proxy")
|
||||||
|
try {
|
||||||
|
worker.start()
|
||||||
|
tun2proxyThread = worker
|
||||||
|
} catch (t: Throwable) {
|
||||||
|
// Thread.start can throw OutOfMemoryError under extreme memory
|
||||||
|
// pressure. The fd we just detached has no owner — without an
|
||||||
|
// explicit close it leaks for the life of the process. Adopt
|
||||||
|
// it into a fresh ParcelFileDescriptor purely so we can call
|
||||||
|
// close() on it.
|
||||||
|
Log.e(TAG, "tun2proxy thread start failed: ${t.message}", t)
|
||||||
|
tun2proxyRunning.set(false)
|
||||||
|
try {
|
||||||
|
ParcelFileDescriptor.adoptFd(detachedFd).close()
|
||||||
|
} catch (closeErr: Throwable) {
|
||||||
|
Log.w(TAG, "adoptFd($detachedFd).close failed: ${closeErr.message}")
|
||||||
|
}
|
||||||
|
Native.stopProxy(proxyHandle)
|
||||||
|
proxyHandle = 0L
|
||||||
|
try { stopForeground(STOP_FOREGROUND_REMOVE) } catch (_: Throwable) {}
|
||||||
|
stopSelf()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// (startForeground was already called at the top of this method
|
// (startForeground was already called at the top of this method
|
||||||
// to satisfy Android 8+'s foreground-service contract — see the
|
// to satisfy Android 8+'s foreground-service contract — see the
|
||||||
@@ -291,12 +314,23 @@ class MhrvVpnService : VpnService() {
|
|||||||
* tun2proxy still forwarding packets into a half-dead Rust runtime
|
* tun2proxy still forwarding packets into a half-dead Rust runtime
|
||||||
* while the runtime is force-aborting its tasks — that's the scenario
|
* while the runtime is force-aborting its tasks — that's the scenario
|
||||||
* that manifested as "Stop crashes the app" when there were in-flight
|
* that manifested as "Stop crashes the app" when there were in-flight
|
||||||
* relay requests piled up against a dead Apps Script deployment. The
|
* relay requests piled up against a dead Apps Script deployment.
|
||||||
* correct order is:
|
*
|
||||||
* 1. Signal tun2proxy to stop (cooperative).
|
* Steps, with the bound on each one called out so a hung native call
|
||||||
* 2. Close the TUN fd — forces tun2proxy's read() to return EBADF.
|
* cannot stall the whole teardown thread:
|
||||||
* 3. Join the tun2proxy thread (now it really will exit).
|
* 1. Signal tun2proxy to stop (cooperative). Bounded by a 2s
|
||||||
* 4. Shut down the Rust proxy runtime (nothing left to forward to).
|
* side-thread join — if the JNI call hangs we proceed anyway.
|
||||||
|
* 2. Drop our `ParcelFileDescriptor` reference. Because we already
|
||||||
|
* called detachFd() at startup, this is a no-op for the
|
||||||
|
* underlying fd — the worker (closeFdOnDrop=true) owns it.
|
||||||
|
* We keep the call only so the PROXY_ONLY / failed-establish
|
||||||
|
* paths still null out the field cleanly.
|
||||||
|
* 3. Join the tun2proxy thread, bounded at 4s. If the worker is
|
||||||
|
* stuck we log and move on — the runtime shutdown below will
|
||||||
|
* knock the rest of the world over.
|
||||||
|
* 4. Shut down the Rust proxy runtime, bounded by `rt.shutdown_timeout`
|
||||||
|
* on the Rust side (5s). This is the hard backstop: the listener
|
||||||
|
* socket is released here regardless of what the worker is doing.
|
||||||
*/
|
*/
|
||||||
private fun teardown() {
|
private fun teardown() {
|
||||||
// Idempotency guard. Without this, onDestroy racing the
|
// Idempotency guard. Without this, onDestroy racing the
|
||||||
@@ -315,17 +349,29 @@ class MhrvVpnService : VpnService() {
|
|||||||
"(tun2proxy running=${tun2proxyRunning.get()}, proxyHandle=$proxyHandle)",
|
"(tun2proxy running=${tun2proxyRunning.get()}, proxyHandle=$proxyHandle)",
|
||||||
)
|
)
|
||||||
|
|
||||||
// 1. Cooperative stop signal.
|
// 1. Cooperative stop signal — bounded so a hung Rust call cannot
|
||||||
|
// stall the entire teardown thread. We've never observed
|
||||||
|
// Tun2proxy.stop() block in practice, but the contract isn't
|
||||||
|
// documented as bounded and the rest of teardown already takes
|
||||||
|
// care to be timeout-bounded; this closes the gap.
|
||||||
if (tun2proxyRunning.get()) {
|
if (tun2proxyRunning.get()) {
|
||||||
|
val stopper = Thread({
|
||||||
try { Tun2proxy.stop() } catch (t: Throwable) {
|
try { Tun2proxy.stop() } catch (t: Throwable) {
|
||||||
Log.w(TAG, "Tun2proxy.stop: ${t.message}")
|
Log.w(TAG, "Tun2proxy.stop: ${t.message}")
|
||||||
}
|
}
|
||||||
|
}, "mhrv-tun2proxy-stop").apply { start() }
|
||||||
|
try { stopper.join(2_000) } catch (_: InterruptedException) {}
|
||||||
|
if (stopper.isAlive) {
|
||||||
|
Log.w(TAG, "Tun2proxy.stop did not return within 2s — proceeding")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Close the TUN fd. Since we called detachFd earlier the
|
// 2. Drop our PFD reference. detachFd at startup means this
|
||||||
// ParcelFileDescriptor no longer owns the fd and close() here
|
// close() is a no-op for the underlying fd — tun2proxy owns
|
||||||
// is a no-op; the real fd is owned by tun2proxy (closeFdOnDrop
|
// it (closeFdOnDrop = true) and closes it on return from
|
||||||
// = true), which closes it on return from run().
|
// run(). The call is kept only to null the field cleanly on
|
||||||
|
// paths that never reached detachFd (PROXY_ONLY, or an
|
||||||
|
// establish() that failed mid-builder).
|
||||||
try { tun?.close() } catch (t: Throwable) {
|
try { tun?.close() } catch (t: Throwable) {
|
||||||
Log.w(TAG, "tun.close: ${t.message}")
|
Log.w(TAG, "tun.close: ${t.message}")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,8 +46,10 @@ import com.therealaleph.mhrv.ui.theme.ErrRed
|
|||||||
import com.therealaleph.mhrv.ui.theme.OkGreen
|
import com.therealaleph.mhrv.ui.theme.OkGreen
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.delay
|
import kotlinx.coroutines.delay
|
||||||
|
import kotlinx.coroutines.flow.first
|
||||||
import kotlinx.coroutines.launch
|
import kotlinx.coroutines.launch
|
||||||
import kotlinx.coroutines.withContext
|
import kotlinx.coroutines.withContext
|
||||||
|
import kotlinx.coroutines.withTimeoutOrNull
|
||||||
import org.json.JSONObject
|
import org.json.JSONObject
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -122,18 +124,31 @@ fun HomeScreen(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cooldown on Start/Stop. Rapid taps during a VPN transition trigger
|
// Gate Start/Stop on the service's actual state transition rather
|
||||||
// an emulator-specific EGL renderer crash
|
// than a fixed timer. The previous 2s cooldown was shorter than the
|
||||||
// (F OpenGLRenderer: EGL_NOT_INITIALIZED during rendering) — the
|
// worst-case teardown (Tun2proxy.stop + 4s join + 5s rt.shutdown_timeout
|
||||||
// service survives, but the Compose UI process dies and the app
|
// ≈ 9s on the slowest path), which let the user fire a fresh Connect
|
||||||
// appears to close. On real hardware this is rare, but debouncing
|
// while the previous Stop's native cleanup was still releasing the
|
||||||
// is useful UX anyway: neither start nor stop is truly instant,
|
// listener port — the new startProxy then failed with "Address already
|
||||||
// and the user gets no feedback if they tap while one is in flight.
|
// in use".
|
||||||
var transitionCooldown by remember { mutableStateOf(false) }
|
//
|
||||||
LaunchedEffect(transitionCooldown) {
|
// `awaitingRunning` holds the value we expect VpnState.isRunning to
|
||||||
if (transitionCooldown) {
|
// settle on after the user's action; null means "no transition in
|
||||||
delay(2000)
|
// flight". The LaunchedEffect below suspends on the StateFlow until
|
||||||
transitionCooldown = false
|
// the predicate matches, with a 12s backstop in case the service
|
||||||
|
// failed before flipping the flag (e.g., establish() returned null).
|
||||||
|
// Side benefit: this also debounces the rapid-tap EGL renderer crash
|
||||||
|
// the old timer was guarding against.
|
||||||
|
var awaitingRunning by remember { mutableStateOf<Boolean?>(null) }
|
||||||
|
val transitioning = awaitingRunning != null
|
||||||
|
LaunchedEffect(awaitingRunning) {
|
||||||
|
val target = awaitingRunning ?: return@LaunchedEffect
|
||||||
|
try {
|
||||||
|
withTimeoutOrNull(12_000) {
|
||||||
|
VpnState.isRunning.first { it == target }
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
awaitingRunning = null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -373,10 +388,11 @@ fun HomeScreen(
|
|||||||
val isVpnRunning by VpnState.isRunning.collectAsState()
|
val isVpnRunning by VpnState.isRunning.collectAsState()
|
||||||
Button(
|
Button(
|
||||||
onClick = {
|
onClick = {
|
||||||
transitionCooldown = true
|
|
||||||
if (isVpnRunning) {
|
if (isVpnRunning) {
|
||||||
|
awaitingRunning = false
|
||||||
onStop()
|
onStop()
|
||||||
} else {
|
} else {
|
||||||
|
awaitingRunning = true
|
||||||
// Connect flow: auto-resolve google_ip so we don't
|
// Connect flow: auto-resolve google_ip so we don't
|
||||||
// hand the proxy a stale anycast target; repair
|
// hand the proxy a stale anycast target; repair
|
||||||
// front_domain if it got corrupted into an IP
|
// front_domain if it got corrupted into an IP
|
||||||
@@ -418,7 +434,7 @@ fun HomeScreen(
|
|||||||
},
|
},
|
||||||
enabled = (isVpnRunning ||
|
enabled = (isVpnRunning ||
|
||||||
cfg.mode == Mode.GOOGLE_ONLY ||
|
cfg.mode == Mode.GOOGLE_ONLY ||
|
||||||
(cfg.hasDeploymentId && cfg.authKey.isNotBlank())) && !transitionCooldown,
|
(cfg.hasDeploymentId && cfg.authKey.isNotBlank())) && !transitioning,
|
||||||
colors = ButtonDefaults.buttonColors(
|
colors = ButtonDefaults.buttonColors(
|
||||||
containerColor = if (isVpnRunning) ErrRed else OkGreen,
|
containerColor = if (isVpnRunning) ErrRed else OkGreen,
|
||||||
contentColor = androidx.compose.ui.graphics.Color.White,
|
contentColor = androidx.compose.ui.graphics.Color.White,
|
||||||
@@ -430,7 +446,7 @@ fun HomeScreen(
|
|||||||
) {
|
) {
|
||||||
Text(
|
Text(
|
||||||
when {
|
when {
|
||||||
transitionCooldown -> "…"
|
transitioning -> "…"
|
||||||
isVpnRunning -> stringResource(R.string.btn_disconnect)
|
isVpnRunning -> stringResource(R.string.btn_disconnect)
|
||||||
else -> stringResource(R.string.btn_connect)
|
else -> stringResource(R.string.btn_connect)
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user