From f1668682b758f17e58a2e6840a381499df87f7ae Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Tue, 7 Apr 2026 14:33:59 +0100 Subject: [PATCH] test(net): TUN-mode docker stack and ignored e2e test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - docker-compose.yml: run peer-a and peer-b with TS_USERSPACE=false + /dev/net/tun device + cap_add. Pin peer-a's WG listen port to 41641 via TS_TAILSCALED_EXTRA_ARGS and publish it to the host so direct UDP from outside docker has somewhere to land. - run.sh: use an ephemeral pre-auth key for the test client so Headscale auto-deletes the test node when its map stream drops (instead of accumulating hundreds of stale entries that eventually slow netmap propagation to a crawl). Disable shields-up on both peers so the kernel firewall doesn't drop inbound tailnet TCP. Tweak the JSON key extraction to handle pretty-printed output. - integration.rs: add `test_e2e_tcp_through_tunnel` that brings up the daemon, dials peer-a's echo server through the proxy, and asserts the echo body comes back. Currently `#[ignore]`d — the docker stack runs Headscale over plain HTTP, but Tailscale's client unconditionally tries TLS to DERP relays ("tls: first record does not look like a TLS handshake"), so peer-a can never receive packets we forward via the relay. Unblocking needs either TLS termination on the docker DERP or running the test inside the same docker network as peer-a. Test stays in the tree because everything it tests up to the read timeout is real verified behavior. --- sunbeam-net/tests/docker-compose.yml | 15 ++++ sunbeam-net/tests/integration.rs | 104 +++++++++++++++++++++++++++ sunbeam-net/tests/run.sh | 20 +++++- 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/sunbeam-net/tests/docker-compose.yml b/sunbeam-net/tests/docker-compose.yml index 2bd381f1..dae70484 100644 --- a/sunbeam-net/tests/docker-compose.yml +++ b/sunbeam-net/tests/docker-compose.yml @@ -33,6 +33,9 @@ services: # ── Tailscale peer A (validates that Headscale is working) ────────── # This peer registers with Headscale and stays online so our Rust # client can discover it in the netmap and attempt WireGuard tunnels. + # Runs in TUN mode (TS_USERSPACE=false) so the host kernel actually + # routes packets to peer-a's tailnet IP — this is what makes inbound + # TCP from other tailnet members work end-to-end. peer-a: image: tailscale/tailscale:stable hostname: peer-a @@ -42,10 +45,19 @@ services: environment: TS_AUTHKEY: "${PEER_A_AUTH_KEY}" TS_STATE_DIR: /var/lib/tailscale + TS_USERSPACE: "false" TS_EXTRA_ARGS: --login-server=http://headscale:8080 + # Pin the WireGuard listen port (passed to tailscaled itself) so we + # can publish it to the host — without this our test daemon (running + # outside docker) can't reach peer-a's UDP endpoint. + TS_TAILSCALED_EXTRA_ARGS: --port=41641 cap_add: - NET_ADMIN - NET_RAW + devices: + - /dev/net/tun:/dev/net/tun + ports: + - "41641:41641/udp" volumes: - peer-a-state:/var/lib/tailscale # Tailscale doesn't have a great healthcheck, but it registers fast @@ -65,10 +77,13 @@ services: environment: TS_AUTHKEY: "${PEER_B_AUTH_KEY}" TS_STATE_DIR: /var/lib/tailscale + TS_USERSPACE: "false" TS_EXTRA_ARGS: --login-server=http://headscale:8080 cap_add: - NET_ADMIN - NET_RAW + devices: + - /dev/net/tun:/dev/net/tun volumes: - peer-b-state:/var/lib/tailscale healthcheck: diff --git a/sunbeam-net/tests/integration.rs b/sunbeam-net/tests/integration.rs index 203777d5..4d9f0606 100644 --- a/sunbeam-net/tests/integration.rs +++ b/sunbeam-net/tests/integration.rs @@ -127,6 +127,110 @@ async fn test_proxy_listener_accepts() { handle.shutdown().await.unwrap(); } +/// End-to-end: bring up the daemon, dial peer-a's echo server through the +/// proxy, and assert we get bytes back across the WireGuard tunnel. +/// +/// **Currently ignored** because the docker-compose test stack runs Headscale +/// over plain HTTP, but Tailscale's official client unconditionally tries to +/// connect to DERP relays over TLS: +/// +/// derp.Recv(derp-999): connect to region 999: tls: first record does +/// not look like a TLS handshake +/// +/// So peer-a can never receive WireGuard packets we forward via the relay, +/// and we have no other reachable transport from the host into the docker +/// network. Unblocking this requires either: (a) generating a self-signed +/// cert, configuring Headscale + DERP for TLS, and teaching DerpClient to +/// negotiate TLS; or (b) running the test daemon inside the same docker +/// network as peer-a so direct UDP works without relays. Tracked separately. +#[tokio::test(flavor = "multi_thread")] +#[ignore = "blocked on TLS DERP — see comment"] +async fn test_e2e_tcp_through_tunnel() { + use std::time::Duration; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + + let coord_url = require_env("SUNBEAM_NET_TEST_COORD_URL"); + let auth_key = require_env("SUNBEAM_NET_TEST_AUTH_KEY"); + let peer_a_ip: std::net::IpAddr = require_env("SUNBEAM_NET_TEST_PEER_A_IP") + .parse() + .expect("SUNBEAM_NET_TEST_PEER_A_IP must be a valid IP"); + + let state_dir = tempfile::tempdir().unwrap(); + // Use a fixed local proxy port so the test client knows where to dial. + let proxy_bind: std::net::SocketAddr = "127.0.0.1:16578".parse().unwrap(); + let config = sunbeam_net::VpnConfig { + coordination_url: coord_url, + auth_key, + state_dir: state_dir.path().to_path_buf(), + proxy_bind, + cluster_api_addr: peer_a_ip, + cluster_api_port: 5678, + control_socket: state_dir.path().join("e2e.sock"), + hostname: "sunbeam-net-e2e-test".into(), + server_public_key: None, + }; + + let handle = sunbeam_net::VpnDaemon::start(config) + .await + .expect("daemon start failed"); + + // Wait for Running. + let mut ready = false; + for _ in 0..60 { + if matches!( + handle.current_status(), + sunbeam_net::DaemonStatus::Running { .. } + ) { + ready = true; + break; + } + tokio::time::sleep(Duration::from_millis(500)).await; + } + assert!(ready, "daemon did not reach Running within 30s"); + + // After Running we still need to wait for two things: + // 1. Headscale to push our node to peer-a's streaming netmap so peer-a + // adds us to its peer table — propagation can take a few seconds + // after the Lite update lands. + // 2. The boringtun handshake to complete its first round-trip once + // smoltcp emits the SYN. + tokio::time::sleep(Duration::from_secs(5)).await; + + // Dial the proxy and read whatever the echo server returns. http-echo + // closes the connection after sending its body, so reading to EOF gives + // us the full response. + let mut stream = tokio::time::timeout( + Duration::from_secs(15), + tokio::net::TcpStream::connect(proxy_bind), + ) + .await + .expect("connect to proxy timed out") + .expect("connect to proxy failed"); + + stream + .write_all(b"GET / HTTP/1.0\r\nHost: peer-a\r\n\r\n") + .await + .expect("write request failed"); + + let mut buf = Vec::new(); + let read = tokio::time::timeout( + Duration::from_secs(20), + stream.read_to_end(&mut buf), + ) + .await + .expect("read response timed out") + .expect("read response failed"); + + assert!(read > 0, "expected bytes from echo server, got 0"); + let body = String::from_utf8_lossy(&buf); + assert!( + body.contains("sunbeam-net integration test"), + "expected echo body in response, got: {body}" + ); + + handle.shutdown().await.expect("shutdown failed"); +} + /// Test: full daemon lifecycle — start, reach Ready state, query via IPC, shutdown. #[tokio::test(flavor = "multi_thread")] async fn test_daemon_lifecycle() { diff --git a/sunbeam-net/tests/run.sh b/sunbeam-net/tests/run.sh index 1c426dec..b57e1f01 100755 --- a/sunbeam-net/tests/run.sh +++ b/sunbeam-net/tests/run.sh @@ -16,9 +16,16 @@ $COMPOSE up -d headscale $COMPOSE exec -T headscale sh -c 'until headscale health 2>/dev/null; do sleep 1; done' echo "==> Creating pre-auth keys..." -PEER_A_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | grep -o '"key":"[^"]*"' | cut -d'"' -f4) -PEER_B_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | grep -o '"key":"[^"]*"' | cut -d'"' -f4) -CLIENT_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | grep -o '"key":"[^"]*"' | cut -d'"' -f4) +# Helper that handles both compact and pretty-printed JSON shapes from +# headscale preauthkeys create. +extract_key() { + grep -o '"key":[[:space:]]*"[^"]*"' | sed 's/.*"\([^"]*\)"$/\1/' +} +# Test client uses an ephemeral key so headscale auto-deletes the node when +# the streaming map connection drops, keeping the test database clean. +PEER_A_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | extract_key) +PEER_B_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --expiration 1h -o json | extract_key) +CLIENT_KEY=$($COMPOSE exec -T headscale headscale preauthkeys create --user test --reusable --ephemeral --expiration 1h -o json | extract_key) echo "==> Starting peers..." PEER_A_AUTH_KEY="$PEER_A_KEY" PEER_B_AUTH_KEY="$PEER_B_KEY" $COMPOSE up -d peer-a peer-b echo @@ -33,6 +40,13 @@ for i in $(seq 1 30); do sleep 2 done +# In TUN mode tailscale installs a stateful firewall that DROPs incoming +# tailnet traffic by default. Disable it on both peers so the integration +# tests can actually exchange TCP through the tunnel. +echo "==> Disabling tailscale firewall on peers..." +$COMPOSE exec -T peer-a tailscale set --shields-up=false 2>/dev/null || true +$COMPOSE exec -T peer-b tailscale set --shields-up=false 2>/dev/null || true + # Get the server's Noise public key SERVER_KEY=$($COMPOSE exec -T headscale cat /var/lib/headscale/noise_private.key 2>/dev/null | head -1 || echo "")