From eec7e2965ce9f0cea9d659b4c2c579abeac33df3 Mon Sep 17 00:00:00 2001 From: Sentinel-Bluebuilder Date: Mon, 22 Jun 2026 12:27:54 -0700 Subject: [PATCH] Fix Linux WireGuard full-tunnel reconnect: clear stale wgsent0 interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consumer-app context (x402 AI-agent dVPN end-to-end test, 2026-06-22): an agent connecting via the JS WireGuard full-tunnel path on Linux hit `wg-quick: 'wgsent0' already exists` on reconnect after a crashed run. `wg-quick down wgsent0` did not remove it, so reconnect was wedged until a manual teardown. The tester's workaround was to pass splitIPs: ['0.0.0.0/0','::/0']; this fixes the underlying defect so the workaround is no longer required for reconnect. Root cause: the Linux install pre-down ran only `wg-quick down `. wg-quick resolves a bare name against /etc/wireguard/.conf, but the SDK writes the WireGuard config to a TEMP dir (os.tmpdir()/sentinel-wg). So the down found no matching config, did nothing, and the stale kernel interface from the crashed run survived — blocking the next `wg-quick up`. Fix: new teardownLinuxInterface(name, confPath) tears a leftover interface down in three escalating steps — 1. `wg-quick down ` (covers the temp-dir conf case) 2. `wg-quick down ` (covers /etc/wireguard installs) 3. `ip link delete dev ` (last resort: raw kernel interface that wg-quick can no longer map back to a config) Each step is best-effort and never throws. Used by both the install pre-down and emergencyCleanupSync() so exit-handler cleanup also clears a wedged interface. emergencyCleanupSync's hardcoded /tmp path is replaced with os.tmpdir() to match where configs are actually written. Doc (same diff): ai-path/FAILURES.md gains TUNNEL entry T17 documenting the failure, root cause, fix, and prevention rule. --- ai-path/FAILURES.md | 1 + wireguard.js | 49 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/ai-path/FAILURES.md b/ai-path/FAILURES.md index a790026..3989a54 100644 --- a/ai-path/FAILURES.md +++ b/ai-path/FAILURES.md @@ -121,6 +121,7 @@ | T14 | Config ACL race -- key readable before ACL set | Config file (with private key) world-readable in `ProgramData` between write and ACL set | File written FIRST, ACL set SECOND; failure in ACL leaves file exposed | Create directory with restrictive ACL first, then write file | Set restrictive permissions on directory before writing sensitive files | | T15 | Full tunnel + Handshake DNS = 0-speed (44 nodes) | WireGuard tunnel connects but speed test returns exactly 0 Mbps | Full tunnel routes DNS through VPN; Handshake DNS (103.196.38.38) unreachable through many nodes | Pre-resolve all speed test hostnames BEFORE tunnel installation; use resolved IPs | Always pre-resolve DNS before installing full tunnel | | T16 | V2Ray port TIME_WAIT kills fallback | V2Ray fallback to different outbound fails because SOCKS5 port still in TIME_WAIT | All outbound configs used same SOCKS5 port; Windows TIME_WAIT is ~120s | Each outbound gets incrementing port: `basePort + idx` | Never reuse same port across V2Ray fallback attempts | +| T17 | Linux full-tunnel reconnect fails with `'wgsent0' already exists` | `wg-quick up` aborts on reconnect after a crash; `wg-quick down wgsent0` did not remove the temp-dir-config interface | Pre-down used only the bare interface name, which `wg-quick` resolves to `/etc/wireguard/wgsent0.conf` -- but the SDK writes the conf to a temp dir, so the down was a no-op and the stale kernel interface survived | `teardownLinuxInterface()` tears down by the actual conf path first, then by name, then `ip link delete dev wgsent0` as a last resort (called from both install pre-down and `emergencyCleanupSync`) | On Linux always tear down a stale wg interface by its real conf path and fall back to `ip link delete`; never assume the conf lives under /etc/wireguard | ### WALLET diff --git a/wireguard.js b/wireguard.js index 553e1cd..9753fc0 100644 --- a/wireguard.js +++ b/wireguard.js @@ -107,14 +107,11 @@ export function emergencyCleanupSync() { } catch {} // sc query may fail — no services installed } - // Linux/macOS: use wg-quick down for known tunnel configs + // Linux/macOS: tear down known tunnel configs (and any wedged kernel interface) if (WG_QUICK && process.platform !== 'win32') { for (const name of ['wgsent0', activeTunnelName].filter(Boolean)) { - // Try wg-quick down with the tunnel name or config path - const confPath = activeTunnelConf || `/tmp/sentinel-wg/${name}.conf`; - try { execFileSync(WG_QUICK, ['down', confPath], { timeout: 10_000, stdio: 'pipe' }); } catch {} // conf may not exist - // Also try by interface name directly - try { execFileSync(WG_QUICK, ['down', name], { timeout: 10_000, stdio: 'pipe' }); } catch {} // interface may not exist + const confPath = activeTunnelConf || `${os.tmpdir()}/sentinel-wg/${name}.conf`; + teardownLinuxInterface(name, confPath); } } @@ -162,6 +159,36 @@ function runWgCommand(args, timeoutMs = 30_000) { } } +// ─── Linux stale-interface teardown ─────────────────────────────────────────── +/** + * Best-effort removal of a leftover WireGuard interface on Linux/macOS. + * Tries, in order: wg-quick down by conf path, wg-quick down by bare name, then a + * raw `ip link delete` to clear a wedged kernel interface that wg-quick can't see. + * Each step is independent and must not block the next — a no-op is expected when + * there is nothing to remove. Does NOT throw. + * + * Why all three: a clean run is removed by wg-quick. But a crashed run leaves the + * interface up with its config in a TEMP dir; `wg-quick down ` looks for + * /etc/wireguard/.conf (absent) and fails, so the interface survives and the + * next `wg-quick up` dies with " already exists". Passing the real conf path + * fixes that; the `ip link delete` fallback handles the case where even the temp + * conf is gone but the kernel interface lingers. + */ +function teardownLinuxInterface(name, confPath) { + if (!WG_QUICK) return; + // 1. By the actual conf path (covers temp-dir configs from a crashed run). + if (confPath && existsSync(confPath)) { + try { execFileSync(WG_QUICK, ['down', confPath], { timeout: 10_000, stdio: 'pipe' }); } catch {} // conf may not be up + } + // 2. By bare interface name (covers configs installed under /etc/wireguard). + try { execFileSync(WG_QUICK, ['down', name], { timeout: 10_000, stdio: 'pipe' }); } catch {} // interface may not exist + // 3. Raw kernel interface delete — last resort for an interface wg-quick can't map + // back to a config (e.g. temp conf already deleted). `ip` is Linux-only. + if (process.platform === 'linux') { + try { execFileSync('ip', ['link', 'delete', 'dev', name], { timeout: 5_000, stdio: 'pipe' }); } catch {} // interface may not exist + } +} + // ─── Install tunnel ─────────────────────────────────────────────────────────── /** * Install and activate a WireGuard tunnel. @@ -204,8 +231,14 @@ export async function installWgTunnel(confPath) { return activeTunnelName; } else if (WG_QUICK) { - // Force-remove any existing tunnel with this name before installing (prevents "already exists" on Linux) - try { execFileSync(WG_QUICK, ['down', name], { timeout: 10_000, stdio: 'pipe' }); } catch {} + // Force-remove any leftover interface with this name before installing, otherwise + // `wg-quick up` aborts with "wgsent0 already exists" (Linux full-tunnel regression). + // A crashed/killed previous run leaves the kernel interface AND a temp-dir conf behind; + // `wg-quick down ` alone does NOT cover the temp-dir case because it resolves the + // bare name against /etc/wireguard/.conf, which is empty here. Tear down by the + // actual conf path first, then by name, then delete the raw kernel interface as a + // last resort so a wedged interface from a prior crash can never block reconnect. + teardownLinuxInterface(name, confPath); await sleep(500); execFileSync(WG_QUICK, ['up', confPath], { timeout: 30_000, stdio: 'inherit' }); activeTunnelConf = confPath;