feat(cli): background the VPN daemon with re-exec + clean shutdown
`sunbeam connect` now fork-execs itself with a hidden `__vpn-daemon`
subcommand instead of running the daemon in-process. The user-facing
command spawns the child detached (stdio → log file, setsid for no
controlling TTY), polls the IPC socket until the daemon reaches
Running, prints a one-line status, and exits. The user gets back to
their shell immediately.
- src/cli.rs: `Connect { foreground }` instead of unit. Add hidden
`__vpn-daemon` Verb that the spawned child runs.
- src/vpn_cmds.rs: split into spawn_background_daemon (default path)
and run_daemon_foreground (used by both `connect --foreground` and
`__vpn-daemon`). Detached child uses pre_exec(setsid) and inherits
--context from the parent so it resolves the same VPN config.
Refuses to start if a daemon is already running on the control
socket; cleans up stale socket files. Switches the proxy bind from
16443 (sienna's existing SSH tunnel uses it) to 16579.
- sunbeam-net/src/daemon/lifecycle: add a SocketGuard RAII type so the
IPC control socket is unlinked when the daemon exits, regardless of
shutdown path. Otherwise `vpn status` after a clean disconnect would
see a stale socket and report an error.
End-to-end smoke test against the docker stack:
$ sunbeam connect
==> VPN daemon spawned (pid 90072, ...)
Connected (100.64.0.154, fd7a:115c:a1e0::9a) — 2 peers visible
$ sunbeam vpn status
VPN: running
addresses: 100.64.0.154, fd7a:115c:a1e0::9a
peers: 2
derp home: region 0
$ sunbeam disconnect
==> Asking VPN daemon to stop...
Daemon acknowledged shutdown.
$ sunbeam vpn status
VPN: not running
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -2331,9 +2331,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.183"
|
version = "0.2.184"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libm"
|
name = "libm"
|
||||||
@@ -4357,6 +4357,7 @@ dependencies = [
|
|||||||
"k8s-openapi",
|
"k8s-openapi",
|
||||||
"kube",
|
"kube",
|
||||||
"lettre",
|
"lettre",
|
||||||
|
"libc",
|
||||||
"pkcs1",
|
"pkcs1",
|
||||||
"pkcs8",
|
"pkcs8",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
|
|||||||
@@ -84,6 +84,7 @@ wfe-yaml = { version = "1.6.3", registry = "sunbeam" }
|
|||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
hostname = "0.4.2"
|
hostname = "0.4.2"
|
||||||
whoami = "2.1.1"
|
whoami = "2.1.1"
|
||||||
|
libc = "0.2.184"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
wiremock = "0.6"
|
wiremock = "0.6"
|
||||||
|
|||||||
20
src/cli.rs
20
src/cli.rs
@@ -173,8 +173,12 @@ pub enum Verb {
|
|||||||
service: String,
|
service: String,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Connect to the cluster VPN (foreground; Ctrl-C to disconnect).
|
/// Connect to the cluster VPN (spawns a background daemon).
|
||||||
Connect,
|
Connect {
|
||||||
|
/// Run the daemon in the foreground instead of detaching.
|
||||||
|
#[arg(long)]
|
||||||
|
foreground: bool,
|
||||||
|
},
|
||||||
|
|
||||||
/// Disconnect from the cluster VPN.
|
/// Disconnect from the cluster VPN.
|
||||||
Disconnect,
|
Disconnect,
|
||||||
@@ -185,6 +189,12 @@ pub enum Verb {
|
|||||||
action: VpnAction,
|
action: VpnAction,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Internal: run the VPN daemon in the foreground. Used by `connect`
|
||||||
|
/// when it spawns itself as a background process. Not part of the
|
||||||
|
/// public CLI surface.
|
||||||
|
#[command(name = "__vpn-daemon", hide = true)]
|
||||||
|
VpnDaemon,
|
||||||
|
|
||||||
/// Self-update from latest mainline commit.
|
/// Self-update from latest mainline commit.
|
||||||
Update,
|
Update,
|
||||||
|
|
||||||
@@ -1514,7 +1524,9 @@ pub async fn dispatch() -> Result<()> {
|
|||||||
crate::service_cmds::cmd_shell(&service).await
|
crate::service_cmds::cmd_shell(&service).await
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(Verb::Connect) => crate::vpn_cmds::cmd_connect().await,
|
Some(Verb::Connect { foreground }) => {
|
||||||
|
crate::vpn_cmds::cmd_connect(foreground).await
|
||||||
|
}
|
||||||
|
|
||||||
Some(Verb::Disconnect) => crate::vpn_cmds::cmd_disconnect().await,
|
Some(Verb::Disconnect) => crate::vpn_cmds::cmd_disconnect().await,
|
||||||
|
|
||||||
@@ -1522,6 +1534,8 @@ pub async fn dispatch() -> Result<()> {
|
|||||||
VpnAction::Status => crate::vpn_cmds::cmd_vpn_status().await,
|
VpnAction::Status => crate::vpn_cmds::cmd_vpn_status().await,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
Some(Verb::VpnDaemon) => crate::vpn_cmds::cmd_vpn_daemon().await,
|
||||||
|
|
||||||
Some(Verb::Update) => crate::update::cmd_update().await,
|
Some(Verb::Update) => crate::update::cmd_update().await,
|
||||||
|
|
||||||
Some(Verb::Version) => {
|
Some(Verb::Version) => {
|
||||||
|
|||||||
209
src/vpn_cmds.rs
209
src/vpn_cmds.rs
@@ -1,24 +1,28 @@
|
|||||||
//! `sunbeam connect` / `sunbeam disconnect` / `sunbeam vpn ...`
|
//! `sunbeam connect` / `sunbeam disconnect` / `sunbeam vpn ...`
|
||||||
//!
|
//!
|
||||||
//! These commands wrap the `sunbeam-net` daemon and run it in the foreground
|
//! `sunbeam connect` re-execs the current binary with a hidden
|
||||||
//! of the CLI process. We don't currently background it as a separate
|
//! `__vpn-daemon` subcommand and detaches it (stdio → /dev/null + a log
|
||||||
//! daemon process — running in the foreground keeps the lifecycle simple
|
//! file). The detached child runs the actual `sunbeam-net` daemon and
|
||||||
//! and is the right shape for the typical workflow ("connect, do work,
|
//! listens on the IPC control socket. The user-facing process polls the
|
||||||
//! disconnect with ^C").
|
//! socket until the daemon reaches Running, prints status, and exits.
|
||||||
|
//!
|
||||||
|
//! This shape avoids forking from inside the tokio runtime.
|
||||||
|
|
||||||
use crate::config::active_context;
|
use crate::config::active_context;
|
||||||
use crate::error::{Result, SunbeamError};
|
use crate::error::{Result, SunbeamError};
|
||||||
use crate::output::{ok, step, warn};
|
use crate::output::{ok, step, warn};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
/// Run `sunbeam connect` — start the VPN daemon and block until shutdown.
|
/// Run `sunbeam connect`.
|
||||||
pub async fn cmd_connect() -> Result<()> {
|
///
|
||||||
|
/// Default mode spawns a backgrounded daemon and returns once it reaches
|
||||||
|
/// Running. With `--foreground`, runs the daemon in-process and blocks
|
||||||
|
/// until SIGINT or SIGTERM.
|
||||||
|
pub async fn cmd_connect(foreground: bool) -> Result<()> {
|
||||||
let ctx = active_context();
|
let ctx = active_context();
|
||||||
|
|
||||||
if ctx.vpn_url.is_empty() {
|
if ctx.vpn_url.is_empty() {
|
||||||
return Err(SunbeamError::Other(
|
return Err(SunbeamError::Other(
|
||||||
"no VPN configured for this context — set vpn-url and vpn-auth-key in config"
|
"no VPN configured for this context — set vpn-url and vpn-auth-key in config".into(),
|
||||||
.into(),
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
if ctx.vpn_auth_key.is_empty() {
|
if ctx.vpn_auth_key.is_empty() {
|
||||||
@@ -29,14 +33,131 @@ pub async fn cmd_connect() -> Result<()> {
|
|||||||
|
|
||||||
let state_dir = vpn_state_dir()?;
|
let state_dir = vpn_state_dir()?;
|
||||||
std::fs::create_dir_all(&state_dir).map_err(|e| {
|
std::fs::create_dir_all(&state_dir).map_err(|e| {
|
||||||
SunbeamError::Other(format!(
|
SunbeamError::Other(format!("create vpn state dir {}: {e}", state_dir.display()))
|
||||||
"create vpn state dir {}: {e}",
|
})?;
|
||||||
state_dir.display()
|
|
||||||
))
|
if foreground {
|
||||||
|
return run_daemon_foreground().await;
|
||||||
|
}
|
||||||
|
|
||||||
|
spawn_background_daemon(&state_dir).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Spawn a detached daemon child and wait for it to reach Running.
|
||||||
|
async fn spawn_background_daemon(state_dir: &std::path::Path) -> Result<()> {
|
||||||
|
// Refuse to start a second daemon if one is already running.
|
||||||
|
let socket = state_dir.join("daemon.sock");
|
||||||
|
let probe = sunbeam_net::IpcClient::new(&socket);
|
||||||
|
if probe.socket_exists() {
|
||||||
|
if let Ok(status) = probe.status().await {
|
||||||
|
warn(&format!(
|
||||||
|
"VPN daemon already running ({status}). Use `sunbeam disconnect` first."
|
||||||
|
));
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
// Stale socket — clean it up so the new daemon can rebind.
|
||||||
|
let _ = std::fs::remove_file(&socket);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-exec ourselves with the hidden subcommand.
|
||||||
|
let exe = std::env::current_exe()
|
||||||
|
.map_err(|e| SunbeamError::Other(format!("locate current_exe: {e}")))?;
|
||||||
|
let log_path = state_dir.join("daemon.log");
|
||||||
|
let log = std::fs::OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(&log_path)
|
||||||
|
.map_err(|e| SunbeamError::Other(format!("open daemon log: {e}")))?;
|
||||||
|
let log_err = log
|
||||||
|
.try_clone()
|
||||||
|
.map_err(|e| SunbeamError::Other(format!("dup daemon log fd: {e}")))?;
|
||||||
|
|
||||||
|
let mut cmd = std::process::Command::new(&exe);
|
||||||
|
// --context is a top-level flag, must precede the subcommand.
|
||||||
|
let cfg = crate::config::load_config();
|
||||||
|
if !cfg.current_context.is_empty() {
|
||||||
|
cmd.arg("--context").arg(&cfg.current_context);
|
||||||
|
}
|
||||||
|
cmd.arg("__vpn-daemon");
|
||||||
|
cmd.stdin(std::process::Stdio::null())
|
||||||
|
.stdout(std::process::Stdio::from(log))
|
||||||
|
.stderr(std::process::Stdio::from(log_err));
|
||||||
|
|
||||||
|
// Detach from the controlling terminal so closing the parent shell
|
||||||
|
// doesn't SIGHUP the daemon.
|
||||||
|
use std::os::unix::process::CommandExt;
|
||||||
|
unsafe {
|
||||||
|
cmd.pre_exec(|| {
|
||||||
|
// Become a session leader so the child has no controlling TTY.
|
||||||
|
libc::setsid();
|
||||||
|
Ok(())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let child = cmd
|
||||||
|
.spawn()
|
||||||
|
.map_err(|e| SunbeamError::Other(format!("spawn daemon: {e}")))?;
|
||||||
|
|
||||||
|
step(&format!(
|
||||||
|
"VPN daemon spawned (pid {}, logs at {})",
|
||||||
|
child.id(),
|
||||||
|
log_path.display()
|
||||||
|
));
|
||||||
|
|
||||||
|
// Poll the IPC socket until the daemon reaches Running.
|
||||||
|
let client = sunbeam_net::IpcClient::new(&socket);
|
||||||
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
|
||||||
|
loop {
|
||||||
|
if std::time::Instant::now() > deadline {
|
||||||
|
warn(
|
||||||
|
"VPN daemon did not reach Running state within 30s — \
|
||||||
|
check the daemon log for details",
|
||||||
|
);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if !client.socket_exists() {
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match client.status().await {
|
||||||
|
Ok(sunbeam_net::DaemonStatus::Running {
|
||||||
|
addresses,
|
||||||
|
peer_count,
|
||||||
|
..
|
||||||
|
}) => {
|
||||||
|
let addrs: Vec<String> = addresses.iter().map(|a| a.to_string()).collect();
|
||||||
|
ok(&format!(
|
||||||
|
"Connected ({}) — {} peers visible",
|
||||||
|
addrs.join(", "),
|
||||||
|
peer_count
|
||||||
|
));
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Ok(sunbeam_net::DaemonStatus::Error { message }) => {
|
||||||
|
return Err(SunbeamError::Other(format!("VPN daemon error: {message}")));
|
||||||
|
}
|
||||||
|
// Still starting / connecting / registering — keep polling.
|
||||||
|
Ok(_) | Err(_) => {
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The hidden `__vpn-daemon` subcommand entry point.
|
||||||
|
pub async fn cmd_vpn_daemon() -> Result<()> {
|
||||||
|
run_daemon_foreground().await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build VpnConfig from the active context, start the daemon, and block
|
||||||
|
/// until SIGINT/SIGTERM or an IPC `Stop` request brings it down.
|
||||||
|
async fn run_daemon_foreground() -> Result<()> {
|
||||||
|
let ctx = active_context();
|
||||||
|
let state_dir = vpn_state_dir()?;
|
||||||
|
std::fs::create_dir_all(&state_dir).map_err(|e| {
|
||||||
|
SunbeamError::Other(format!("create vpn state dir {}: {e}", state_dir.display()))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// Build the netmap label as "<user>@<host>" so multiple workstations
|
|
||||||
// for the same human are distinguishable in `headscale nodes list`.
|
|
||||||
let user = whoami::username().unwrap_or_else(|_| "unknown".to_string());
|
let user = whoami::username().unwrap_or_else(|_| "unknown".to_string());
|
||||||
let host = hostname::get()
|
let host = hostname::get()
|
||||||
.ok()
|
.ok()
|
||||||
@@ -48,11 +169,11 @@ pub async fn cmd_connect() -> Result<()> {
|
|||||||
coordination_url: ctx.vpn_url.clone(),
|
coordination_url: ctx.vpn_url.clone(),
|
||||||
auth_key: ctx.vpn_auth_key.clone(),
|
auth_key: ctx.vpn_auth_key.clone(),
|
||||||
state_dir: state_dir.clone(),
|
state_dir: state_dir.clone(),
|
||||||
// Bind the local k8s proxy on a fixed port the rest of the CLI can
|
// Bind the local k8s proxy on 16579 — far enough away from common
|
||||||
// discover via context (or via IPC, eventually).
|
// conflicts (6443 = kube API, 16443 = sienna's SSH tunnel) that we
|
||||||
proxy_bind: "127.0.0.1:16443".parse().expect("static addr"),
|
// shouldn't collide on dev machines. TODO: make this configurable
|
||||||
// Default cluster API target — TODO: derive from netmap once we
|
// and discoverable via IPC.
|
||||||
// know which peer hosts the k8s API.
|
proxy_bind: "127.0.0.1:16579".parse().expect("static addr"),
|
||||||
cluster_api_addr: "100.64.0.1".parse().expect("static addr"),
|
cluster_api_addr: "100.64.0.1".parse().expect("static addr"),
|
||||||
cluster_api_port: 6443,
|
cluster_api_port: 6443,
|
||||||
control_socket: state_dir.join("daemon.sock"),
|
control_socket: state_dir.join("daemon.sock"),
|
||||||
@@ -65,39 +186,33 @@ pub async fn cmd_connect() -> Result<()> {
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| SunbeamError::Other(format!("daemon start: {e}")))?;
|
.map_err(|e| SunbeamError::Other(format!("daemon start: {e}")))?;
|
||||||
|
|
||||||
// Block until the daemon reaches Running, then sit on it until SIGINT.
|
// Wait for either Ctrl-C, SIGTERM, or the daemon stopping itself
|
||||||
let mut ready = false;
|
// (e.g. via an IPC `Stop` request).
|
||||||
for _ in 0..60 {
|
let ctrl_c = tokio::signal::ctrl_c();
|
||||||
match handle.current_status() {
|
tokio::pin!(ctrl_c);
|
||||||
sunbeam_net::DaemonStatus::Running { addresses, peer_count, .. } => {
|
let mut sigterm =
|
||||||
let addrs: Vec<String> = addresses.iter().map(|a| a.to_string()).collect();
|
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
|
||||||
ok(&format!(
|
.map_err(|e| SunbeamError::Other(format!("install SIGTERM handler: {e}")))?;
|
||||||
"Connected ({}) — {} peers visible",
|
|
||||||
addrs.join(", "),
|
loop {
|
||||||
peer_count
|
tokio::select! {
|
||||||
));
|
biased;
|
||||||
ready = true;
|
_ = &mut ctrl_c => {
|
||||||
|
step("Interrupt — disconnecting...");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
sunbeam_net::DaemonStatus::Reconnecting { attempt } => {
|
_ = sigterm.recv() => {
|
||||||
warn(&format!("Reconnecting (attempt {attempt})..."));
|
step("SIGTERM — disconnecting...");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
sunbeam_net::DaemonStatus::Error { ref message } => {
|
_ = tokio::time::sleep(std::time::Duration::from_millis(500)) => {
|
||||||
return Err(SunbeamError::Other(format!("VPN error: {message}")));
|
if matches!(handle.current_status(), sunbeam_net::DaemonStatus::Stopped) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
|
||||||
}
|
}
|
||||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
|
||||||
}
|
|
||||||
if !ready {
|
|
||||||
warn("VPN daemon did not reach Running state within 30s — continuing anyway");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("Press Ctrl-C to disconnect.");
|
|
||||||
tokio::signal::ctrl_c()
|
|
||||||
.await
|
|
||||||
.map_err(|e| SunbeamError::Other(format!("install signal handler: {e}")))?;
|
|
||||||
step("Disconnecting...");
|
|
||||||
handle
|
handle
|
||||||
.shutdown()
|
.shutdown()
|
||||||
.await
|
.await
|
||||||
|
|||||||
@@ -42,6 +42,11 @@ async fn run_daemon_loop(
|
|||||||
status: Arc<RwLock<DaemonStatus>>,
|
status: Arc<RwLock<DaemonStatus>>,
|
||||||
shutdown: tokio_util::sync::CancellationToken,
|
shutdown: tokio_util::sync::CancellationToken,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
|
// Make sure the IPC control socket is cleaned up no matter how the
|
||||||
|
// daemon exits — otherwise `sunbeam vpn status` after a clean shutdown
|
||||||
|
// would see a stale socket file and report "stale socket".
|
||||||
|
let _socket_guard = SocketGuard::new(config.control_socket.clone());
|
||||||
|
|
||||||
let keys = crate::keys::NodeKeys::load_or_generate(&config.state_dir)?;
|
let keys = crate::keys::NodeKeys::load_or_generate(&config.state_dir)?;
|
||||||
let mut attempt: u32 = 0;
|
let mut attempt: u32 = 0;
|
||||||
let max_backoff = Duration::from_secs(60);
|
let max_backoff = Duration::from_secs(60);
|
||||||
@@ -86,6 +91,26 @@ enum SessionExit {
|
|||||||
Disconnected,
|
Disconnected,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// RAII guard that removes a Unix socket file when dropped. Used by
|
||||||
|
/// `run_daemon_loop` to make sure the IPC control socket is cleaned up
|
||||||
|
/// when the daemon exits, regardless of whether shutdown was triggered
|
||||||
|
/// via DaemonHandle, IPC Stop, signal, or panic.
|
||||||
|
struct SocketGuard {
|
||||||
|
path: std::path::PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SocketGuard {
|
||||||
|
fn new(path: std::path::PathBuf) -> Self {
|
||||||
|
Self { path }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for SocketGuard {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let _ = std::fs::remove_file(&self.path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Run a single VPN session. Returns when the session ends (error or shutdown).
|
/// Run a single VPN session. Returns when the session ends (error or shutdown).
|
||||||
async fn run_session(
|
async fn run_session(
|
||||||
config: &VpnConfig,
|
config: &VpnConfig,
|
||||||
|
|||||||
Reference in New Issue
Block a user