feat(net): derive cluster API target from netmap by hostname

Adds an optional `cluster_api_host` field to VpnConfig. When set, the
daemon resolves it against the netmap's peer list once the first
netmap arrives and uses that peer's tailnet IP as the proxy backend,
overriding the static `cluster_api_addr`. Falls back to the static
addr if the hostname doesn't match any peer.

The resolver tries hostname first, then peer name (FQDN), then a
prefix match against name. Picks v4 over v6 from the peer's address
list.

- sunbeam-net/src/config.rs: new `cluster_api_host: Option<String>`
- sunbeam-net/src/daemon/lifecycle.rs: resolve_peer_ip helper +
  resolution at proxy bind time
- sunbeam-net/tests/integration.rs: pass cluster_api_host: None in
  the existing VpnConfig literals
- src/config.rs: new context field `vpn-cluster-host`
- src/vpn_cmds.rs: thread it from context → VpnConfig
This commit is contained in:
2026-04-07 15:00:30 +01:00
parent 27a6f4377c
commit e934eb45dc
5 changed files with 76 additions and 5 deletions

View File

@@ -65,6 +65,13 @@ pub struct Context {
/// Stored in plain text — keep this file readable only by the user. /// Stored in plain text — keep this file readable only by the user.
#[serde(default, rename = "vpn-auth-key", skip_serializing_if = "String::is_empty")] #[serde(default, rename = "vpn-auth-key", skip_serializing_if = "String::is_empty")]
pub vpn_auth_key: String, pub vpn_auth_key: String,
/// Hostname of the cluster API server peer to look up in the netmap.
/// When set, the VPN daemon resolves this against the netmap's peer
/// list and proxies k8s API traffic to that peer's tailnet IP. When
/// empty, falls back to a static fallback address.
#[serde(default, rename = "vpn-cluster-host", skip_serializing_if = "String::is_empty")]
pub vpn_cluster_host: String,
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@@ -171,11 +171,19 @@ async fn run_daemon_foreground() -> Result<()> {
state_dir: state_dir.clone(), state_dir: state_dir.clone(),
// Bind the local k8s proxy on 16579 — far enough away from common // Bind the local k8s proxy on 16579 — far enough away from common
// conflicts (6443 = kube API, 16443 = sienna's SSH tunnel) that we // conflicts (6443 = kube API, 16443 = sienna's SSH tunnel) that we
// shouldn't collide on dev machines. TODO: make this configurable // shouldn't collide on dev machines. TODO: make this configurable.
// and discoverable via IPC.
proxy_bind: "127.0.0.1:16579".parse().expect("static addr"), proxy_bind: "127.0.0.1:16579".parse().expect("static addr"),
// Static fallback if the netmap doesn't have the named host.
cluster_api_addr: "100.64.0.1".parse().expect("static addr"), cluster_api_addr: "100.64.0.1".parse().expect("static addr"),
cluster_api_port: 6443, cluster_api_port: 6443,
// If the user set vpn-cluster-host in their context config, the
// daemon resolves it from the netmap and uses that peer's
// tailnet IP for the proxy backend.
cluster_api_host: if ctx.vpn_cluster_host.is_empty() {
None
} else {
Some(ctx.vpn_cluster_host.clone())
},
control_socket: state_dir.join("daemon.sock"), control_socket: state_dir.join("daemon.sock"),
hostname, hostname,
server_public_key: None, server_public_key: None,

View File

@@ -12,10 +12,18 @@ pub struct VpnConfig {
pub state_dir: PathBuf, pub state_dir: PathBuf,
/// Address to bind the SOCKS/TCP proxy on. /// Address to bind the SOCKS/TCP proxy on.
pub proxy_bind: SocketAddr, pub proxy_bind: SocketAddr,
/// Cluster API server IP (inside the VPN). /// Cluster API server IP (inside the VPN). Used as a fallback when
/// `cluster_api_host` is None or doesn't resolve from the netmap.
pub cluster_api_addr: IpAddr, pub cluster_api_addr: IpAddr,
/// Cluster API server port. /// Cluster API server port.
pub cluster_api_port: u16, pub cluster_api_port: u16,
/// Optional peer hostname (or hostname prefix) to look up in the
/// netmap for the cluster API server's tailnet IP. When set, the
/// daemon resolves this from the first netmap and overrides
/// `cluster_api_addr` with the matching peer's address. Set this
/// when the cluster API runs on a node whose tailnet IP isn't
/// known statically.
pub cluster_api_host: Option<String>,
/// Path for the daemon control socket. /// Path for the daemon control socket.
pub control_socket: PathBuf, pub control_socket: PathBuf,
/// Hostname to register with the coordination server. /// Hostname to register with the coordination server.

View File

@@ -172,11 +172,29 @@ async fn run_session(
let (engine, channels) = NetworkEngine::new(smoltcp_ip, 10)?; let (engine, channels) = NetworkEngine::new(smoltcp_ip, 10)?;
// 7. Start TCP proxy that routes through the engine // 7. Start TCP proxy that routes through the engine. If the user
// configured cluster_api_host, look it up in the netmap and use
// that peer's tailnet IP instead of the static cluster_api_addr.
let cancel = tokio_util::sync::CancellationToken::new(); let cancel = tokio_util::sync::CancellationToken::new();
let proxy_cmd_tx = channels.cmd_tx.clone(); let proxy_cmd_tx = channels.cmd_tx.clone();
let proxy_bind = config.proxy_bind; let proxy_bind = config.proxy_bind;
let cluster_addr = std::net::SocketAddr::new(config.cluster_api_addr, config.cluster_api_port); let resolved_addr = config
.cluster_api_host
.as_deref()
.and_then(|host| resolve_peer_ip(host, &peers))
.unwrap_or(config.cluster_api_addr);
if let Some(ref host) = config.cluster_api_host {
if resolved_addr == config.cluster_api_addr {
tracing::warn!(
"cluster_api_host '{host}' did not match any netmap peer; \
falling back to static cluster_api_addr {}",
config.cluster_api_addr
);
} else {
tracing::info!("resolved cluster_api_host '{host}' → {resolved_addr}");
}
}
let cluster_addr = std::net::SocketAddr::new(resolved_addr, config.cluster_api_port);
// Proxy listener task: accepts local connections and sends them to the engine // Proxy listener task: accepts local connections and sends them to the engine
let proxy_cancel = cancel.clone(); let proxy_cancel = cancel.clone();
@@ -578,6 +596,31 @@ async fn run_derp_loop(
} }
} }
/// Look up a peer's tailnet IP from the netmap by hostname.
///
/// Tries (in order): exact hostname match, exact `name` (FQDN) match,
/// then prefix match against `name`. Returns the first IPv4 address
/// from the peer's `addresses` list, falling back to IPv6 only if
/// there are no v4 entries.
fn resolve_peer_ip(host: &str, peers: &[crate::proto::types::Node]) -> Option<IpAddr> {
let matched = peers
.iter()
.find(|p| p.hostinfo.hostname == host)
.or_else(|| peers.iter().find(|p| p.name == host))
.or_else(|| peers.iter().find(|p| p.name.starts_with(host)))?;
let addrs: Vec<IpAddr> = matched
.addresses
.iter()
.filter_map(|s| s.split('/').next()?.parse().ok())
.collect();
addrs
.iter()
.find(|a| a.is_ipv4())
.copied()
.or_else(|| addrs.first().copied())
}
/// Pick the first DERP node from the map (any region, any node). /// Pick the first DERP node from the map (any region, any node).
fn pick_derp_node(derp_map: &DerpMap) -> Option<(String, u16)> { fn pick_derp_node(derp_map: &DerpMap) -> Option<(String, u16)> {
derp_map derp_map
@@ -684,6 +727,7 @@ mod tests {
proxy_bind: "127.0.0.1:0".parse().unwrap(), proxy_bind: "127.0.0.1:0".parse().unwrap(),
cluster_api_addr: "10.0.0.1".parse().unwrap(), cluster_api_addr: "10.0.0.1".parse().unwrap(),
cluster_api_port: 6443, cluster_api_port: 6443,
cluster_api_host: None,
control_socket: dir.path().join("test.sock"), control_socket: dir.path().join("test.sock"),
hostname: "test-node".to_string(), hostname: "test-node".to_string(),
server_public_key: Some([0xaa; 32]), server_public_key: Some([0xaa; 32]),

View File

@@ -34,6 +34,7 @@ async fn test_register_and_receive_netmap() {
proxy_bind: "127.0.0.1:0".parse().unwrap(), proxy_bind: "127.0.0.1:0".parse().unwrap(),
cluster_api_addr: "127.0.0.1".parse().unwrap(), cluster_api_addr: "127.0.0.1".parse().unwrap(),
cluster_api_port: 6443, cluster_api_port: 6443,
cluster_api_host: None,
control_socket: state_dir.path().join("test.sock"), control_socket: state_dir.path().join("test.sock"),
hostname: "sunbeam-net-test".into(), hostname: "sunbeam-net-test".into(),
server_public_key: None, server_public_key: None,
@@ -102,6 +103,7 @@ async fn test_proxy_listener_accepts() {
proxy_bind, proxy_bind,
cluster_api_addr: "100.64.0.1".parse().unwrap(), cluster_api_addr: "100.64.0.1".parse().unwrap(),
cluster_api_port: 6443, cluster_api_port: 6443,
cluster_api_host: None,
control_socket: state_dir.path().join("proxy.sock"), control_socket: state_dir.path().join("proxy.sock"),
hostname: "sunbeam-net-proxy-test".into(), hostname: "sunbeam-net-proxy-test".into(),
server_public_key: None, server_public_key: None,
@@ -165,6 +167,7 @@ async fn test_e2e_tcp_through_tunnel() {
proxy_bind, proxy_bind,
cluster_api_addr: peer_a_ip, cluster_api_addr: peer_a_ip,
cluster_api_port: 5678, cluster_api_port: 5678,
cluster_api_host: None,
control_socket: state_dir.path().join("e2e.sock"), control_socket: state_dir.path().join("e2e.sock"),
hostname: "sunbeam-net-e2e-test".into(), hostname: "sunbeam-net-e2e-test".into(),
server_public_key: None, server_public_key: None,
@@ -245,6 +248,7 @@ async fn test_daemon_lifecycle() {
proxy_bind: "127.0.0.1:0".parse().unwrap(), proxy_bind: "127.0.0.1:0".parse().unwrap(),
cluster_api_addr: "127.0.0.1".parse().unwrap(), cluster_api_addr: "127.0.0.1".parse().unwrap(),
cluster_api_port: 6443, cluster_api_port: 6443,
cluster_api_host: None,
control_socket: state_dir.path().join("daemon.sock"), control_socket: state_dir.path().join("daemon.sock"),
hostname: "sunbeam-net-daemon-test".into(), hostname: "sunbeam-net-daemon-test".into(),
server_public_key: None, server_public_key: None,