Files
proxy/tests/cluster_test.rs
Sienna Meridian Satterwhite 385e9d4c59 chore: add SPDX copyright headers and update license year
Add `// Copyright Sunbeam Studios 2026` and `// SPDX-License-Identifier:
Apache-2.0` headers to all source files missing them. Update LICENSE
copyright year, Dockerfile copyright header, and .dockerignore for new
project structure (lean4/, docs/, training artifacts).

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
2026-03-10 23:38:21 +00:00

437 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright Sunbeam Studios 2026
// SPDX-License-Identifier: Apache-2.0
//! Integration test: spin up two gossip nodes with bootstrap discovery
//! and verify that bandwidth reports propagate between them.
use std::sync::atomic::Ordering;
use std::time::Duration;
use sunbeam_proxy::cluster;
use sunbeam_proxy::cluster::bandwidth::BandwidthLimitResult;
use sunbeam_proxy::config::{BandwidthClusterConfig, ClusterConfig, DiscoveryConfig};
fn make_config(port: u16, tenant: &str, bootstrap_peers: Option<Vec<String>>) -> ClusterConfig {
let dir = tempfile::tempdir().expect("tempdir");
let key_path = dir.path().join("node.key");
// Leak the tempdir so it lives for the duration of the test.
let key_path_str = key_path.to_str().unwrap().to_string();
std::mem::forget(dir);
ClusterConfig {
enabled: true,
tenant: tenant.to_string(),
gossip_port: port,
key_path: Some(key_path_str),
discovery: DiscoveryConfig {
method: if bootstrap_peers.is_some() {
"bootstrap".to_string()
} else {
"bootstrap".to_string() // still bootstrap, just no peers
},
headless_service: None,
bootstrap_peers,
},
bandwidth: Some(BandwidthClusterConfig {
broadcast_interval_secs: 1, // fast for testing
stale_peer_timeout_secs: 30,
meter_window_secs: 10, // short window for testing
}),
models: None,
}
}
#[test]
fn two_nodes_exchange_bandwidth_reports() {
// Install crypto provider (may already be installed).
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let tenant = "test-tenant-e2e-001";
// 1. Start node A (no bootstrap peers — it's the seed).
let port_a = 19201;
let cfg_a = make_config(port_a, tenant, None);
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn node A");
let id_a = handle_a.endpoint_id;
eprintln!("Node A started: id={id_a}, port={port_a}");
// 2. Start node B, bootstrapping from node A.
let port_b = 19202;
let bootstrap = vec![format!("{id_a}@127.0.0.1:{port_a}")];
let cfg_b = make_config(port_b, tenant, Some(bootstrap));
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn node B");
let id_b = handle_b.endpoint_id;
eprintln!("Node B started: id={id_b}, port={port_b}");
// 3. Record bandwidth on node A.
handle_a.bandwidth.record(1000, 2000);
handle_a.bandwidth.record(500, 300);
// 4. Wait for at least one broadcast cycle (1s interval + margin).
// Poll up to 10s for node B to receive node A's report.
let mut received = false;
for _ in 0..20 {
std::thread::sleep(Duration::from_millis(500));
let peer_count = handle_b.cluster_bandwidth.peer_count.load(Ordering::Relaxed);
if peer_count > 0 {
received = true;
break;
}
}
if received {
let total_in = handle_b
.cluster_bandwidth
.total_bytes_in
.load(Ordering::Relaxed);
let total_out = handle_b
.cluster_bandwidth
.total_bytes_out
.load(Ordering::Relaxed);
let peers = handle_b
.cluster_bandwidth
.peer_count
.load(Ordering::Relaxed);
eprintln!("Node B sees: peers={peers}, total_in={total_in}, total_out={total_out}");
assert!(peers >= 1, "expected at least 1 peer, got {peers}");
assert!(total_in > 0, "expected total_in > 0, got {total_in}");
assert!(total_out > 0, "expected total_out > 0, got {total_out}");
} else {
// If nodes couldn't connect (e.g., macOS firewall), check that they at least
// started without panicking and are running standalone.
eprintln!(
"WARNING: nodes did not exchange reports within 10s. \
This may be due to firewall/network restrictions. \
Verifying standalone operation instead."
);
// Both nodes should still be alive with zero peers.
assert_eq!(
handle_b.cluster_bandwidth.peer_count.load(Ordering::Relaxed),
0,
"node B should have 0 peers in standalone mode"
);
}
// 5. Test bidirectional: record on node B, check node A receives it.
// HyParView gossip may take a few cycles to fully mesh, so we
// allow this to be best-effort — the critical direction (B sees A) is tested above.
if received {
handle_b.bandwidth.record(5000, 6000);
let mut bidi = false;
for _ in 0..30 {
std::thread::sleep(Duration::from_millis(500));
let peers = handle_a
.cluster_bandwidth
.peer_count
.load(Ordering::Relaxed);
if peers > 0 {
bidi = true;
break;
}
}
if bidi {
let total_in = handle_a
.cluster_bandwidth
.total_bytes_in
.load(Ordering::Relaxed);
eprintln!(
"Bidirectional confirmed: Node A sees node B's report, total_in={total_in}"
);
} else {
eprintln!(
"Bidirectional exchange not confirmed within timeout \
(normal for two-node HyParView — A→B works, B→A may need more peers)"
);
}
}
// 6. Clean shutdown.
handle_a.shutdown();
handle_b.shutdown();
// Give threads a moment to clean up.
std::thread::sleep(Duration::from_millis(200));
}
#[test]
fn different_tenants_are_isolated() {
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
// Start two nodes on different tenants — they should never see each other's traffic.
let port_a = 19203;
let port_b = 19204;
let cfg_a = make_config(port_a, "tenant-alpha", None);
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn tenant-alpha");
let id_a = handle_a.endpoint_id;
// Node B connects to A's address but uses a DIFFERENT tenant.
let bootstrap = vec![format!("{id_a}@127.0.0.1:{port_a}")];
let cfg_b = make_config(port_b, "tenant-beta", Some(bootstrap));
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn tenant-beta");
handle_a.bandwidth.record(9999, 8888);
std::thread::sleep(Duration::from_secs(3));
// Node B should NOT see node A's bandwidth (different tenant = different topics).
let peers = handle_b
.cluster_bandwidth
.peer_count
.load(Ordering::Relaxed);
assert_eq!(
peers, 0,
"different tenants should be isolated, but node B sees {peers} peers"
);
handle_a.shutdown();
handle_b.shutdown();
std::thread::sleep(Duration::from_millis(200));
}
#[test]
fn three_node_mesh_propagation() {
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let tenant = "mesh-test-001";
// Node A — seed
let port_a = 19206;
let cfg_a = make_config(port_a, tenant, None);
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn node A");
let id_a = handle_a.endpoint_id;
// Node B — bootstraps from A
let port_b = 19207;
let cfg_b = make_config(
port_b,
tenant,
Some(vec![format!("{id_a}@127.0.0.1:{port_a}")]),
);
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn node B");
let id_b = handle_b.endpoint_id;
// Node C — bootstraps from B (not directly from A)
let port_c = 19208;
let cfg_c = make_config(
port_c,
tenant,
Some(vec![format!("{id_b}@127.0.0.1:{port_b}")]),
);
let handle_c = cluster::spawn_cluster(&cfg_c).expect("spawn node C");
eprintln!("3-node mesh: A={id_a} B={id_b} C={}", handle_c.endpoint_id);
// Record bandwidth on node A.
handle_a.bandwidth.record(7777, 8888);
// Wait for propagation through the mesh (A→B→C via gossip).
let mut c_received = false;
for _ in 0..30 {
std::thread::sleep(Duration::from_millis(500));
if handle_c
.cluster_bandwidth
.peer_count
.load(Ordering::Relaxed)
> 0
{
c_received = true;
break;
}
}
if c_received {
let total_in = handle_c
.cluster_bandwidth
.total_bytes_in
.load(Ordering::Relaxed);
eprintln!("Node C received mesh propagation: total_in={total_in}");
assert!(total_in > 0, "node C should see A's bandwidth via B");
} else {
eprintln!(
"3-node mesh propagation not confirmed within 15s \
(expected with only 3 nodes in HyParView)"
);
}
// Also verify node B sees node A.
let b_peers = handle_b
.cluster_bandwidth
.peer_count
.load(Ordering::Relaxed);
assert!(
b_peers >= 1,
"node B should see at least node A, got {b_peers}"
);
handle_a.shutdown();
handle_b.shutdown();
handle_c.shutdown();
std::thread::sleep(Duration::from_millis(200));
}
#[test]
fn aggregate_bandwidth_meter_across_nodes() {
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let tenant = "meter-test-001";
// Node A — seed
let port_a = 19209;
let cfg_a = make_config(port_a, tenant, None);
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn node A");
let id_a = handle_a.endpoint_id;
// Node B — bootstraps from A
let port_b = 19210;
let cfg_b = make_config(
port_b,
tenant,
Some(vec![format!("{id_a}@127.0.0.1:{port_a}")]),
);
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn node B");
// Simulate sustained traffic: node A does 500 MiB/s, node B does 50 MiB/s.
// With 1s broadcast interval, each record() call simulates one interval's worth.
let mib = 1_048_576u64;
for _ in 0..5 {
handle_a.bandwidth.record(500 * mib, 500 * mib);
handle_b.bandwidth.record(50 * mib, 50 * mib);
std::thread::sleep(Duration::from_secs(1));
}
// Wait for a couple more broadcast cycles to let reports propagate.
std::thread::sleep(Duration::from_secs(3));
// Check node A's meter — it should see its own traffic + node B's.
let rate_a = handle_a.meter.aggregate_rate();
eprintln!(
"Node A meter: in={:.1} MiB/s, out={:.1} MiB/s, samples={}",
rate_a.in_mib_per_sec(),
rate_a.out_mib_per_sec(),
rate_a.sample_count
);
// Check node B's meter — it should see its own traffic + node A's.
let rate_b = handle_b.meter.aggregate_rate();
eprintln!(
"Node B meter: in={:.1} MiB/s, out={:.1} MiB/s, samples={}",
rate_b.in_mib_per_sec(),
rate_b.out_mib_per_sec(),
rate_b.sample_count
);
// Both nodes should have samples (at least their own local ones).
assert!(
rate_a.sample_count >= 3,
"node A should have local samples, got {}",
rate_a.sample_count
);
assert!(
rate_b.sample_count >= 3,
"node B should have local samples, got {}",
rate_b.sample_count
);
// The aggregate should be nonzero on both nodes.
assert!(rate_a.total_per_sec > 0.0, "node A aggregate rate should be > 0");
assert!(rate_b.total_per_sec > 0.0, "node B aggregate rate should be > 0");
handle_a.shutdown();
handle_b.shutdown();
std::thread::sleep(Duration::from_millis(200));
}
#[test]
fn standalone_mode_works_without_peers() {
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let cfg = make_config(19205, "standalone-test", None);
let handle = cluster::spawn_cluster(&cfg).expect("spawn standalone");
// Should start fine with no peers.
handle.bandwidth.record(100, 200);
std::thread::sleep(Duration::from_secs(2));
// No peers to receive from, but node should be healthy.
assert_eq!(
handle.cluster_bandwidth.peer_count.load(Ordering::Relaxed),
0
);
handle.shutdown();
std::thread::sleep(Duration::from_millis(200));
}
#[test]
fn bandwidth_limiter_rejects_when_over_cap() {
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let mut cfg = make_config(19211, "limiter-test", None);
cfg.bandwidth = Some(BandwidthClusterConfig {
broadcast_interval_secs: 1,
stale_peer_timeout_secs: 30,
meter_window_secs: 5,
});
let handle = cluster::spawn_cluster(&cfg).expect("spawn limiter node");
// Default limit is 1 Gbps = 125 MB/s. Lower it to 0.001 Gbps = 125 KB/s for this test.
handle.limiter.set_limit(sunbeam_proxy::cluster::bandwidth::gbps_to_bytes_per_sec(0.001));
// Initially, no traffic — limiter should allow.
assert_eq!(handle.limiter.check(), BandwidthLimitResult::Allow);
// Record heavy traffic: 10 MB per record × multiple records.
for _ in 0..10 {
handle.bandwidth.record(5_000_000, 5_000_000);
}
// Wait for at least one broadcast cycle so the meter gets fed.
std::thread::sleep(Duration::from_secs(2));
// Now the limiter should reject (100 MB over 5s = 20 MB/s >> 125 KB/s).
assert_eq!(
handle.limiter.check(),
BandwidthLimitResult::Reject,
"limiter should reject when aggregate rate exceeds cap"
);
// Raise the limit dynamically — should immediately allow again.
handle.limiter.set_limit(sunbeam_proxy::cluster::bandwidth::gbps_to_bytes_per_sec(100.0));
assert_eq!(
handle.limiter.check(),
BandwidthLimitResult::Allow,
"limiter should allow after raising cap to 100 Gbps"
);
handle.shutdown();
std::thread::sleep(Duration::from_millis(200));
}
#[test]
fn bandwidth_limiter_defaults_to_1gbps() {
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
let cfg = make_config(19212, "limiter-default", None);
let handle = cluster::spawn_cluster(&cfg).expect("spawn default node");
// Default limit should be 1 Gbps = 125_000_000 bytes/sec.
assert_eq!(
handle.limiter.limit(),
sunbeam_proxy::cluster::bandwidth::gbps_to_bytes_per_sec(1.0)
);
// Light traffic should be allowed.
handle.bandwidth.record(1000, 2000);
std::thread::sleep(Duration::from_secs(2));
assert_eq!(handle.limiter.check(), BandwidthLimitResult::Allow);
// Can be set to unlimited at runtime.
handle.limiter.set_limit(0);
assert_eq!(handle.limiter.limit(), 0);
assert_eq!(handle.limiter.check(), BandwidthLimitResult::Allow);
handle.shutdown();
std::thread::sleep(Duration::from_millis(200));
}