test(net): TUN-mode docker stack and ignored e2e test

- docker-compose.yml: run peer-a and peer-b with TS_USERSPACE=false +
  /dev/net/tun device + cap_add. Pin peer-a's WG listen port to 41641
  via TS_TAILSCALED_EXTRA_ARGS and publish it to the host so direct
  UDP from outside docker has somewhere to land.
- run.sh: use an ephemeral pre-auth key for the test client so
  Headscale auto-deletes the test node when its map stream drops
  (instead of accumulating hundreds of stale entries that eventually
  slow netmap propagation to a crawl). Disable shields-up on both
  peers so the kernel firewall doesn't drop inbound tailnet TCP. Tweak
  the JSON key extraction to handle pretty-printed output.
- integration.rs: add `test_e2e_tcp_through_tunnel` that brings up
  the daemon, dials peer-a's echo server through the proxy, and
  asserts the echo body comes back. Currently `#[ignore]`d — the
  docker stack runs Headscale over plain HTTP, but Tailscale's client
  unconditionally tries TLS to DERP relays ("tls: first record does
  not look like a TLS handshake"), so peer-a can never receive
  packets we forward via the relay. Unblocking needs either TLS
  termination on the docker DERP or running the test inside the same
  docker network as peer-a. Test stays in the tree because everything
  it tests up to the read timeout is real verified behavior.
This commit is contained in:
2026-04-07 14:33:59 +01:00
parent dca8c3b643
commit f1668682b7
3 changed files with 136 additions and 3 deletions

View File

@@ -33,6 +33,9 @@ services:
# ── Tailscale peer A (validates that Headscale is working) ────────── # ── Tailscale peer A (validates that Headscale is working) ──────────
# This peer registers with Headscale and stays online so our Rust # This peer registers with Headscale and stays online so our Rust
# client can discover it in the netmap and attempt WireGuard tunnels. # client can discover it in the netmap and attempt WireGuard tunnels.
# Runs in TUN mode (TS_USERSPACE=false) so the host kernel actually
# routes packets to peer-a's tailnet IP — this is what makes inbound
# TCP from other tailnet members work end-to-end.
peer-a: peer-a:
image: tailscale/tailscale:stable image: tailscale/tailscale:stable
hostname: peer-a hostname: peer-a
@@ -42,10 +45,19 @@ services:
environment: environment:
TS_AUTHKEY: "${PEER_A_AUTH_KEY}" TS_AUTHKEY: "${PEER_A_AUTH_KEY}"
TS_STATE_DIR: /var/lib/tailscale TS_STATE_DIR: /var/lib/tailscale
TS_USERSPACE: "false"
TS_EXTRA_ARGS: --login-server=http://headscale:8080 TS_EXTRA_ARGS: --login-server=http://headscale:8080
# Pin the WireGuard listen port (passed to tailscaled itself) so we
# can publish it to the host — without this our test daemon (running
# outside docker) can't reach peer-a's UDP endpoint.
TS_TAILSCALED_EXTRA_ARGS: --port=41641
cap_add: cap_add:
- NET_ADMIN - NET_ADMIN
- NET_RAW - NET_RAW
devices:
- /dev/net/tun:/dev/net/tun
ports:
- "41641:41641/udp"
volumes: volumes:
- peer-a-state:/var/lib/tailscale - peer-a-state:/var/lib/tailscale
# Tailscale doesn't have a great healthcheck, but it registers fast # Tailscale doesn't have a great healthcheck, but it registers fast
@@ -65,10 +77,13 @@ services:
environment: environment:
TS_AUTHKEY: "${PEER_B_AUTH_KEY}" TS_AUTHKEY: "${PEER_B_AUTH_KEY}"
TS_STATE_DIR: /var/lib/tailscale TS_STATE_DIR: /var/lib/tailscale
TS_USERSPACE: "false"
TS_EXTRA_ARGS: --login-server=http://headscale:8080 TS_EXTRA_ARGS: --login-server=http://headscale:8080
cap_add: cap_add:
- NET_ADMIN - NET_ADMIN
- NET_RAW - NET_RAW
devices:
- /dev/net/tun:/dev/net/tun
volumes: volumes:
- peer-b-state:/var/lib/tailscale - peer-b-state:/var/lib/tailscale
healthcheck: healthcheck:

View File

@@ -127,6 +127,110 @@ async fn test_proxy_listener_accepts() {
handle.shutdown().await.unwrap(); handle.shutdown().await.unwrap();
} }
/// End-to-end: bring up the daemon, dial peer-a's echo server through the
/// proxy, and assert we get bytes back across the WireGuard tunnel.
///
/// **Currently ignored** because the docker-compose test stack runs Headscale
/// over plain HTTP, but Tailscale's official client unconditionally tries to
/// connect to DERP relays over TLS:
///
/// derp.Recv(derp-999): connect to region 999: tls: first record does
/// not look like a TLS handshake
///
/// So peer-a can never receive WireGuard packets we forward via the relay,
/// and we have no other reachable transport from the host into the docker
/// network. Unblocking this requires either: (a) generating a self-signed
/// cert, configuring Headscale + DERP for TLS, and teaching DerpClient to
/// negotiate TLS; or (b) running the test daemon inside the same docker
/// network as peer-a so direct UDP works without relays. Tracked separately.
#[tokio::test(flavor = "multi_thread")]
#[ignore = "blocked on TLS DERP — see comment"]
async fn test_e2e_tcp_through_tunnel() {
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
let coord_url = require_env("SUNBEAM_NET_TEST_COORD_URL");
let auth_key = require_env("SUNBEAM_NET_TEST_AUTH_KEY");
let peer_a_ip: std::net::IpAddr = require_env("SUNBEAM_NET_TEST_PEER_A_IP")
.parse()
.expect("SUNBEAM_NET_TEST_PEER_A_IP must be a valid IP");
let state_dir = tempfile::tempdir().unwrap();
// Use a fixed local proxy port so the test client knows where to dial.
let proxy_bind: std::net::SocketAddr = "127.0.0.1:16578".parse().unwrap();
let config = sunbeam_net::VpnConfig {
coordination_url: coord_url,
auth_key,
state_dir: state_dir.path().to_path_buf(),
proxy_bind,
cluster_api_addr: peer_a_ip,
cluster_api_port: 5678,
control_socket: state_dir.path().join("e2e.sock"),
hostname: "sunbeam-net-e2e-test".into(),
server_public_key: None,
};
let handle = sunbeam_net::VpnDaemon::start(config)
.await
.expect("daemon start failed");
// Wait for Running.
let mut ready = false;
for _ in 0..60 {
if matches!(
handle.current_status(),
sunbeam_net::DaemonStatus::Running { .. }
) {
ready = true;
break;
}
tokio::time::sleep(Duration::from_millis(500)).await;
}
assert!(ready, "daemon did not reach Running within 30s");
// After Running we still need to wait for two things:
// 1. Headscale to push our node to peer-a's streaming netmap so peer-a
// adds us to its peer table — propagation can take a few seconds
// after the Lite update lands.
// 2. The boringtun handshake to complete its first round-trip once
// smoltcp emits the SYN.
tokio::time::sleep(Duration::from_secs(5)).await;
// Dial the proxy and read whatever the echo server returns. http-echo
// closes the connection after sending its body, so reading to EOF gives
// us the full response.
let mut stream = tokio::time::timeout(
Duration::from_secs(15),
tokio::net::TcpStream::connect(proxy_bind),
)
.await
.expect("connect to proxy timed out")
.expect("connect to proxy failed");
stream
.write_all(b"GET / HTTP/1.0\r\nHost: peer-a\r\n\r\n")
.await
.expect("write request failed");
let mut buf = Vec::new();
let read = tokio::time::timeout(
Duration::from_secs(20),
stream.read_to_end(&mut buf),
)
.await
.expect("read response timed out")
.expect("read response failed");
assert!(read > 0, "expected bytes from echo server, got 0");
let body = String::from_utf8_lossy(&buf);
assert!(
body.contains("sunbeam-net integration test"),
"expected echo body in response, got: {body}"
);
handle.shutdown().await.expect("shutdown failed");
}
/// Test: full daemon lifecycle — start, reach Ready state, query via IPC, shutdown. /// Test: full daemon lifecycle — start, reach Ready state, query via IPC, shutdown.
#[tokio::test(flavor = "multi_thread")] #[tokio::test(flavor = "multi_thread")]
async fn test_daemon_lifecycle() { async fn test_daemon_lifecycle() {

View File

@@ -16,9 +16,16 @@ $COMPOSE up -d headscale
$COMPOSE exec -T headscale sh -c 'until headscale health 2>/dev/null; do sleep 1; done' $COMPOSE exec -T headscale sh -c 'until headscale health 2>/dev/null; do sleep 1; done'
echo "==> Creating pre-auth keys..." echo "==> Creating pre-auth keys..."
PEER_A_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | grep -o '"key":"[^"]*"' | cut -d'"' -f4) # Helper that handles both compact and pretty-printed JSON shapes from
PEER_B_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | grep -o '"key":"[^"]*"' | cut -d'"' -f4) # headscale preauthkeys create.
CLIENT_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | grep -o '"key":"[^"]*"' | cut -d'"' -f4) extract_key() {
grep -o '"key":[[:space:]]*"[^"]*"' | sed 's/.*"\([^"]*\)"$/\1/'
}
# Test client uses an ephemeral key so headscale auto-deletes the node when
# the streaming map connection drops, keeping the test database clean.
PEER_A_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | extract_key)
PEER_B_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | extract_key)
CLIENT_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --ephemeral --expiration 1h -o json | extract_key)
echo "==> Starting peers..." echo "==> Starting peers..."
PEER_A_AUTH_KEY="$PEER_A_KEY" PEER_B_AUTH_KEY="$PEER_B_KEY" $COMPOSE up -d peer-a peer-b echo PEER_A_AUTH_KEY="$PEER_A_KEY" PEER_B_AUTH_KEY="$PEER_B_KEY" $COMPOSE up -d peer-a peer-b echo
@@ -33,6 +40,13 @@ for i in $(seq 1 30); do
sleep 2 sleep 2
done done
# In TUN mode tailscale installs a stateful firewall that DROPs incoming
# tailnet traffic by default. Disable it on both peers so the integration
# tests can actually exchange TCP through the tunnel.
echo "==> Disabling tailscale firewall on peers..."
$COMPOSE exec -T peer-a tailscale set --shields-up=false 2>/dev/null || true
$COMPOSE exec -T peer-b tailscale set --shields-up=false 2>/dev/null || true
# Get the server's Noise public key # Get the server's Noise public key
SERVER_KEY=$($COMPOSE exec -T headscale cat /var/lib/headscale/noise_private.key 2>/dev/null | head -1 || echo "") SERVER_KEY=$($COMPOSE exec -T headscale cat /var/lib/headscale/noise_private.key 2>/dev/null | head -1 || echo "")