From 27a6f4377cab8c339b9e8fc01689e3a246324d04 Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Tue, 7 Apr 2026 14:57:15 +0100 Subject: [PATCH] feat(cli): background the VPN daemon with re-exec + clean shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `sunbeam connect` now fork-execs itself with a hidden `__vpn-daemon` subcommand instead of running the daemon in-process. The user-facing command spawns the child detached (stdio → log file, setsid for no controlling TTY), polls the IPC socket until the daemon reaches Running, prints a one-line status, and exits. The user gets back to their shell immediately. - src/cli.rs: `Connect { foreground }` instead of unit. Add hidden `__vpn-daemon` Verb that the spawned child runs. - src/vpn_cmds.rs: split into spawn_background_daemon (default path) and run_daemon_foreground (used by both `connect --foreground` and `__vpn-daemon`). Detached child uses pre_exec(setsid) and inherits --context from the parent so it resolves the same VPN config. Refuses to start if a daemon is already running on the control socket; cleans up stale socket files. Switches the proxy bind from 16443 (sienna's existing SSH tunnel uses it) to 16579. - sunbeam-net/src/daemon/lifecycle: add a SocketGuard RAII type so the IPC control socket is unlinked when the daemon exits, regardless of shutdown path. Otherwise `vpn status` after a clean disconnect would see a stale socket and report an error. End-to-end smoke test against the docker stack: $ sunbeam connect ==> VPN daemon spawned (pid 90072, ...) Connected (100.64.0.154, fd7a:115c:a1e0::9a) — 2 peers visible $ sunbeam vpn status VPN: running addresses: 100.64.0.154, fd7a:115c:a1e0::9a peers: 2 derp home: region 0 $ sunbeam disconnect ==> Asking VPN daemon to stop... Daemon acknowledged shutdown. $ sunbeam vpn status VPN: not running --- Cargo.lock | 5 +- Cargo.toml | 1 + src/cli.rs | 20 ++- src/vpn_cmds.rs | 209 +++++++++++++++++++++------- sunbeam-net/src/daemon/lifecycle.rs | 25 ++++ 5 files changed, 208 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 56e4ff39..481bf81a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2331,9 +2331,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.183" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libm" @@ -4357,6 +4357,7 @@ dependencies = [ "k8s-openapi", "kube", "lettre", + "libc", "pkcs1", "pkcs8", "rand 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index f051168e..cd637bbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,6 +84,7 @@ wfe-yaml = { version = "1.6.3", registry = "sunbeam" } async-trait = "0.1" hostname = "0.4.2" whoami = "2.1.1" +libc = "0.2.184" [dev-dependencies] wiremock = "0.6" diff --git a/src/cli.rs b/src/cli.rs index b1a3ff01..74b063f7 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -173,8 +173,12 @@ pub enum Verb { service: String, }, - /// Connect to the cluster VPN (foreground; Ctrl-C to disconnect). - Connect, + /// Connect to the cluster VPN (spawns a background daemon). + Connect { + /// Run the daemon in the foreground instead of detaching. + #[arg(long)] + foreground: bool, + }, /// Disconnect from the cluster VPN. Disconnect, @@ -185,6 +189,12 @@ pub enum Verb { action: VpnAction, }, + /// Internal: run the VPN daemon in the foreground. Used by `connect` + /// when it spawns itself as a background process. Not part of the + /// public CLI surface. + #[command(name = "__vpn-daemon", hide = true)] + VpnDaemon, + /// Self-update from latest mainline commit. Update, @@ -1514,7 +1524,9 @@ pub async fn dispatch() -> Result<()> { crate::service_cmds::cmd_shell(&service).await } - Some(Verb::Connect) => crate::vpn_cmds::cmd_connect().await, + Some(Verb::Connect { foreground }) => { + crate::vpn_cmds::cmd_connect(foreground).await + } Some(Verb::Disconnect) => crate::vpn_cmds::cmd_disconnect().await, @@ -1522,6 +1534,8 @@ pub async fn dispatch() -> Result<()> { VpnAction::Status => crate::vpn_cmds::cmd_vpn_status().await, }, + Some(Verb::VpnDaemon) => crate::vpn_cmds::cmd_vpn_daemon().await, + Some(Verb::Update) => crate::update::cmd_update().await, Some(Verb::Version) => { diff --git a/src/vpn_cmds.rs b/src/vpn_cmds.rs index 8cbe4363..a50bda5a 100644 --- a/src/vpn_cmds.rs +++ b/src/vpn_cmds.rs @@ -1,24 +1,28 @@ //! `sunbeam connect` / `sunbeam disconnect` / `sunbeam vpn ...` //! -//! These commands wrap the `sunbeam-net` daemon and run it in the foreground -//! of the CLI process. We don't currently background it as a separate -//! daemon process — running in the foreground keeps the lifecycle simple -//! and is the right shape for the typical workflow ("connect, do work, -//! disconnect with ^C"). +//! `sunbeam connect` re-execs the current binary with a hidden +//! `__vpn-daemon` subcommand and detaches it (stdio → /dev/null + a log +//! file). The detached child runs the actual `sunbeam-net` daemon and +//! listens on the IPC control socket. The user-facing process polls the +//! socket until the daemon reaches Running, prints status, and exits. +//! +//! This shape avoids forking from inside the tokio runtime. use crate::config::active_context; use crate::error::{Result, SunbeamError}; use crate::output::{ok, step, warn}; use std::path::PathBuf; -/// Run `sunbeam connect` — start the VPN daemon and block until shutdown. -pub async fn cmd_connect() -> Result<()> { +/// Run `sunbeam connect`. +/// +/// Default mode spawns a backgrounded daemon and returns once it reaches +/// Running. With `--foreground`, runs the daemon in-process and blocks +/// until SIGINT or SIGTERM. +pub async fn cmd_connect(foreground: bool) -> Result<()> { let ctx = active_context(); - if ctx.vpn_url.is_empty() { return Err(SunbeamError::Other( - "no VPN configured for this context — set vpn-url and vpn-auth-key in config" - .into(), + "no VPN configured for this context — set vpn-url and vpn-auth-key in config".into(), )); } if ctx.vpn_auth_key.is_empty() { @@ -29,14 +33,131 @@ pub async fn cmd_connect() -> Result<()> { let state_dir = vpn_state_dir()?; std::fs::create_dir_all(&state_dir).map_err(|e| { - SunbeamError::Other(format!( - "create vpn state dir {}: {e}", - state_dir.display() - )) + SunbeamError::Other(format!("create vpn state dir {}: {e}", state_dir.display())) + })?; + + if foreground { + return run_daemon_foreground().await; + } + + spawn_background_daemon(&state_dir).await +} + +/// Spawn a detached daemon child and wait for it to reach Running. +async fn spawn_background_daemon(state_dir: &std::path::Path) -> Result<()> { + // Refuse to start a second daemon if one is already running. + let socket = state_dir.join("daemon.sock"); + let probe = sunbeam_net::IpcClient::new(&socket); + if probe.socket_exists() { + if let Ok(status) = probe.status().await { + warn(&format!( + "VPN daemon already running ({status}). Use `sunbeam disconnect` first." + )); + return Ok(()); + } + // Stale socket — clean it up so the new daemon can rebind. + let _ = std::fs::remove_file(&socket); + } + + // Re-exec ourselves with the hidden subcommand. + let exe = std::env::current_exe() + .map_err(|e| SunbeamError::Other(format!("locate current_exe: {e}")))?; + let log_path = state_dir.join("daemon.log"); + let log = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&log_path) + .map_err(|e| SunbeamError::Other(format!("open daemon log: {e}")))?; + let log_err = log + .try_clone() + .map_err(|e| SunbeamError::Other(format!("dup daemon log fd: {e}")))?; + + let mut cmd = std::process::Command::new(&exe); + // --context is a top-level flag, must precede the subcommand. + let cfg = crate::config::load_config(); + if !cfg.current_context.is_empty() { + cmd.arg("--context").arg(&cfg.current_context); + } + cmd.arg("__vpn-daemon"); + cmd.stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::from(log)) + .stderr(std::process::Stdio::from(log_err)); + + // Detach from the controlling terminal so closing the parent shell + // doesn't SIGHUP the daemon. + use std::os::unix::process::CommandExt; + unsafe { + cmd.pre_exec(|| { + // Become a session leader so the child has no controlling TTY. + libc::setsid(); + Ok(()) + }); + } + + let child = cmd + .spawn() + .map_err(|e| SunbeamError::Other(format!("spawn daemon: {e}")))?; + + step(&format!( + "VPN daemon spawned (pid {}, logs at {})", + child.id(), + log_path.display() + )); + + // Poll the IPC socket until the daemon reaches Running. + let client = sunbeam_net::IpcClient::new(&socket); + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30); + loop { + if std::time::Instant::now() > deadline { + warn( + "VPN daemon did not reach Running state within 30s — \ + check the daemon log for details", + ); + return Ok(()); + } + if !client.socket_exists() { + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + continue; + } + match client.status().await { + Ok(sunbeam_net::DaemonStatus::Running { + addresses, + peer_count, + .. + }) => { + let addrs: Vec = addresses.iter().map(|a| a.to_string()).collect(); + ok(&format!( + "Connected ({}) — {} peers visible", + addrs.join(", "), + peer_count + )); + return Ok(()); + } + Ok(sunbeam_net::DaemonStatus::Error { message }) => { + return Err(SunbeamError::Other(format!("VPN daemon error: {message}"))); + } + // Still starting / connecting / registering — keep polling. + Ok(_) | Err(_) => { + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + } + } +} + +/// The hidden `__vpn-daemon` subcommand entry point. +pub async fn cmd_vpn_daemon() -> Result<()> { + run_daemon_foreground().await +} + +/// Build VpnConfig from the active context, start the daemon, and block +/// until SIGINT/SIGTERM or an IPC `Stop` request brings it down. +async fn run_daemon_foreground() -> Result<()> { + let ctx = active_context(); + let state_dir = vpn_state_dir()?; + std::fs::create_dir_all(&state_dir).map_err(|e| { + SunbeamError::Other(format!("create vpn state dir {}: {e}", state_dir.display())) })?; - // Build the netmap label as "@" so multiple workstations - // for the same human are distinguishable in `headscale nodes list`. let user = whoami::username().unwrap_or_else(|_| "unknown".to_string()); let host = hostname::get() .ok() @@ -48,11 +169,11 @@ pub async fn cmd_connect() -> Result<()> { coordination_url: ctx.vpn_url.clone(), auth_key: ctx.vpn_auth_key.clone(), state_dir: state_dir.clone(), - // Bind the local k8s proxy on a fixed port the rest of the CLI can - // discover via context (or via IPC, eventually). - proxy_bind: "127.0.0.1:16443".parse().expect("static addr"), - // Default cluster API target — TODO: derive from netmap once we - // know which peer hosts the k8s API. + // Bind the local k8s proxy on 16579 — far enough away from common + // conflicts (6443 = kube API, 16443 = sienna's SSH tunnel) that we + // shouldn't collide on dev machines. TODO: make this configurable + // and discoverable via IPC. + proxy_bind: "127.0.0.1:16579".parse().expect("static addr"), cluster_api_addr: "100.64.0.1".parse().expect("static addr"), cluster_api_port: 6443, control_socket: state_dir.join("daemon.sock"), @@ -65,39 +186,33 @@ pub async fn cmd_connect() -> Result<()> { .await .map_err(|e| SunbeamError::Other(format!("daemon start: {e}")))?; - // Block until the daemon reaches Running, then sit on it until SIGINT. - let mut ready = false; - for _ in 0..60 { - match handle.current_status() { - sunbeam_net::DaemonStatus::Running { addresses, peer_count, .. } => { - let addrs: Vec = addresses.iter().map(|a| a.to_string()).collect(); - ok(&format!( - "Connected ({}) — {} peers visible", - addrs.join(", "), - peer_count - )); - ready = true; + // Wait for either Ctrl-C, SIGTERM, or the daemon stopping itself + // (e.g. via an IPC `Stop` request). + let ctrl_c = tokio::signal::ctrl_c(); + tokio::pin!(ctrl_c); + let mut sigterm = + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .map_err(|e| SunbeamError::Other(format!("install SIGTERM handler: {e}")))?; + + loop { + tokio::select! { + biased; + _ = &mut ctrl_c => { + step("Interrupt — disconnecting..."); break; } - sunbeam_net::DaemonStatus::Reconnecting { attempt } => { - warn(&format!("Reconnecting (attempt {attempt})...")); + _ = sigterm.recv() => { + step("SIGTERM — disconnecting..."); + break; } - sunbeam_net::DaemonStatus::Error { ref message } => { - return Err(SunbeamError::Other(format!("VPN error: {message}"))); + _ = tokio::time::sleep(std::time::Duration::from_millis(500)) => { + if matches!(handle.current_status(), sunbeam_net::DaemonStatus::Stopped) { + break; + } } - _ => {} } - tokio::time::sleep(std::time::Duration::from_millis(500)).await; - } - if !ready { - warn("VPN daemon did not reach Running state within 30s — continuing anyway"); } - println!("Press Ctrl-C to disconnect."); - tokio::signal::ctrl_c() - .await - .map_err(|e| SunbeamError::Other(format!("install signal handler: {e}")))?; - step("Disconnecting..."); handle .shutdown() .await diff --git a/sunbeam-net/src/daemon/lifecycle.rs b/sunbeam-net/src/daemon/lifecycle.rs index 12f6e2a8..805bb714 100644 --- a/sunbeam-net/src/daemon/lifecycle.rs +++ b/sunbeam-net/src/daemon/lifecycle.rs @@ -42,6 +42,11 @@ async fn run_daemon_loop( status: Arc>, shutdown: tokio_util::sync::CancellationToken, ) -> crate::Result<()> { + // Make sure the IPC control socket is cleaned up no matter how the + // daemon exits — otherwise `sunbeam vpn status` after a clean shutdown + // would see a stale socket file and report "stale socket". + let _socket_guard = SocketGuard::new(config.control_socket.clone()); + let keys = crate::keys::NodeKeys::load_or_generate(&config.state_dir)?; let mut attempt: u32 = 0; let max_backoff = Duration::from_secs(60); @@ -86,6 +91,26 @@ enum SessionExit { Disconnected, } +/// RAII guard that removes a Unix socket file when dropped. Used by +/// `run_daemon_loop` to make sure the IPC control socket is cleaned up +/// when the daemon exits, regardless of whether shutdown was triggered +/// via DaemonHandle, IPC Stop, signal, or panic. +struct SocketGuard { + path: std::path::PathBuf, +} + +impl SocketGuard { + fn new(path: std::path::PathBuf) -> Self { + Self { path } + } +} + +impl Drop for SocketGuard { + fn drop(&mut self) { + let _ = std::fs::remove_file(&self.path); + } +} + /// Run a single VPN session. Returns when the session ends (error or shutdown). async fn run_session( config: &VpnConfig,