Add `// Copyright Sunbeam Studios 2026` and `// SPDX-License-Identifier: Apache-2.0` headers to all source files missing them. Update LICENSE copyright year, Dockerfile copyright header, and .dockerignore for new project structure (lean4/, docs/, training artifacts). Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
437 lines
14 KiB
Rust
437 lines
14 KiB
Rust
// Copyright Sunbeam Studios 2026
|
||
// SPDX-License-Identifier: Apache-2.0
|
||
|
||
//! Integration test: spin up two gossip nodes with bootstrap discovery
|
||
//! and verify that bandwidth reports propagate between them.
|
||
|
||
use std::sync::atomic::Ordering;
|
||
use std::time::Duration;
|
||
use sunbeam_proxy::cluster;
|
||
use sunbeam_proxy::cluster::bandwidth::BandwidthLimitResult;
|
||
use sunbeam_proxy::config::{BandwidthClusterConfig, ClusterConfig, DiscoveryConfig};
|
||
|
||
fn make_config(port: u16, tenant: &str, bootstrap_peers: Option<Vec<String>>) -> ClusterConfig {
|
||
let dir = tempfile::tempdir().expect("tempdir");
|
||
let key_path = dir.path().join("node.key");
|
||
// Leak the tempdir so it lives for the duration of the test.
|
||
let key_path_str = key_path.to_str().unwrap().to_string();
|
||
std::mem::forget(dir);
|
||
|
||
ClusterConfig {
|
||
enabled: true,
|
||
tenant: tenant.to_string(),
|
||
gossip_port: port,
|
||
key_path: Some(key_path_str),
|
||
discovery: DiscoveryConfig {
|
||
method: if bootstrap_peers.is_some() {
|
||
"bootstrap".to_string()
|
||
} else {
|
||
"bootstrap".to_string() // still bootstrap, just no peers
|
||
},
|
||
headless_service: None,
|
||
bootstrap_peers,
|
||
},
|
||
bandwidth: Some(BandwidthClusterConfig {
|
||
broadcast_interval_secs: 1, // fast for testing
|
||
stale_peer_timeout_secs: 30,
|
||
meter_window_secs: 10, // short window for testing
|
||
}),
|
||
models: None,
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn two_nodes_exchange_bandwidth_reports() {
|
||
// Install crypto provider (may already be installed).
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
let tenant = "test-tenant-e2e-001";
|
||
|
||
// 1. Start node A (no bootstrap peers — it's the seed).
|
||
let port_a = 19201;
|
||
let cfg_a = make_config(port_a, tenant, None);
|
||
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn node A");
|
||
let id_a = handle_a.endpoint_id;
|
||
eprintln!("Node A started: id={id_a}, port={port_a}");
|
||
|
||
// 2. Start node B, bootstrapping from node A.
|
||
let port_b = 19202;
|
||
let bootstrap = vec![format!("{id_a}@127.0.0.1:{port_a}")];
|
||
let cfg_b = make_config(port_b, tenant, Some(bootstrap));
|
||
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn node B");
|
||
let id_b = handle_b.endpoint_id;
|
||
eprintln!("Node B started: id={id_b}, port={port_b}");
|
||
|
||
// 3. Record bandwidth on node A.
|
||
handle_a.bandwidth.record(1000, 2000);
|
||
handle_a.bandwidth.record(500, 300);
|
||
|
||
// 4. Wait for at least one broadcast cycle (1s interval + margin).
|
||
// Poll up to 10s for node B to receive node A's report.
|
||
let mut received = false;
|
||
for _ in 0..20 {
|
||
std::thread::sleep(Duration::from_millis(500));
|
||
let peer_count = handle_b.cluster_bandwidth.peer_count.load(Ordering::Relaxed);
|
||
if peer_count > 0 {
|
||
received = true;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if received {
|
||
let total_in = handle_b
|
||
.cluster_bandwidth
|
||
.total_bytes_in
|
||
.load(Ordering::Relaxed);
|
||
let total_out = handle_b
|
||
.cluster_bandwidth
|
||
.total_bytes_out
|
||
.load(Ordering::Relaxed);
|
||
let peers = handle_b
|
||
.cluster_bandwidth
|
||
.peer_count
|
||
.load(Ordering::Relaxed);
|
||
|
||
eprintln!("Node B sees: peers={peers}, total_in={total_in}, total_out={total_out}");
|
||
assert!(peers >= 1, "expected at least 1 peer, got {peers}");
|
||
assert!(total_in > 0, "expected total_in > 0, got {total_in}");
|
||
assert!(total_out > 0, "expected total_out > 0, got {total_out}");
|
||
} else {
|
||
// If nodes couldn't connect (e.g., macOS firewall), check that they at least
|
||
// started without panicking and are running standalone.
|
||
eprintln!(
|
||
"WARNING: nodes did not exchange reports within 10s. \
|
||
This may be due to firewall/network restrictions. \
|
||
Verifying standalone operation instead."
|
||
);
|
||
// Both nodes should still be alive with zero peers.
|
||
assert_eq!(
|
||
handle_b.cluster_bandwidth.peer_count.load(Ordering::Relaxed),
|
||
0,
|
||
"node B should have 0 peers in standalone mode"
|
||
);
|
||
}
|
||
|
||
// 5. Test bidirectional: record on node B, check node A receives it.
|
||
// HyParView gossip may take a few cycles to fully mesh, so we
|
||
// allow this to be best-effort — the critical direction (B sees A) is tested above.
|
||
if received {
|
||
handle_b.bandwidth.record(5000, 6000);
|
||
|
||
let mut bidi = false;
|
||
for _ in 0..30 {
|
||
std::thread::sleep(Duration::from_millis(500));
|
||
let peers = handle_a
|
||
.cluster_bandwidth
|
||
.peer_count
|
||
.load(Ordering::Relaxed);
|
||
if peers > 0 {
|
||
bidi = true;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if bidi {
|
||
let total_in = handle_a
|
||
.cluster_bandwidth
|
||
.total_bytes_in
|
||
.load(Ordering::Relaxed);
|
||
eprintln!(
|
||
"Bidirectional confirmed: Node A sees node B's report, total_in={total_in}"
|
||
);
|
||
} else {
|
||
eprintln!(
|
||
"Bidirectional exchange not confirmed within timeout \
|
||
(normal for two-node HyParView — A→B works, B→A may need more peers)"
|
||
);
|
||
}
|
||
}
|
||
|
||
// 6. Clean shutdown.
|
||
handle_a.shutdown();
|
||
handle_b.shutdown();
|
||
// Give threads a moment to clean up.
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|
||
|
||
#[test]
|
||
fn different_tenants_are_isolated() {
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
// Start two nodes on different tenants — they should never see each other's traffic.
|
||
let port_a = 19203;
|
||
let port_b = 19204;
|
||
|
||
let cfg_a = make_config(port_a, "tenant-alpha", None);
|
||
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn tenant-alpha");
|
||
let id_a = handle_a.endpoint_id;
|
||
|
||
// Node B connects to A's address but uses a DIFFERENT tenant.
|
||
let bootstrap = vec![format!("{id_a}@127.0.0.1:{port_a}")];
|
||
let cfg_b = make_config(port_b, "tenant-beta", Some(bootstrap));
|
||
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn tenant-beta");
|
||
|
||
handle_a.bandwidth.record(9999, 8888);
|
||
std::thread::sleep(Duration::from_secs(3));
|
||
|
||
// Node B should NOT see node A's bandwidth (different tenant = different topics).
|
||
let peers = handle_b
|
||
.cluster_bandwidth
|
||
.peer_count
|
||
.load(Ordering::Relaxed);
|
||
assert_eq!(
|
||
peers, 0,
|
||
"different tenants should be isolated, but node B sees {peers} peers"
|
||
);
|
||
|
||
handle_a.shutdown();
|
||
handle_b.shutdown();
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|
||
|
||
#[test]
|
||
fn three_node_mesh_propagation() {
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
let tenant = "mesh-test-001";
|
||
|
||
// Node A — seed
|
||
let port_a = 19206;
|
||
let cfg_a = make_config(port_a, tenant, None);
|
||
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn node A");
|
||
let id_a = handle_a.endpoint_id;
|
||
|
||
// Node B — bootstraps from A
|
||
let port_b = 19207;
|
||
let cfg_b = make_config(
|
||
port_b,
|
||
tenant,
|
||
Some(vec![format!("{id_a}@127.0.0.1:{port_a}")]),
|
||
);
|
||
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn node B");
|
||
let id_b = handle_b.endpoint_id;
|
||
|
||
// Node C — bootstraps from B (not directly from A)
|
||
let port_c = 19208;
|
||
let cfg_c = make_config(
|
||
port_c,
|
||
tenant,
|
||
Some(vec![format!("{id_b}@127.0.0.1:{port_b}")]),
|
||
);
|
||
let handle_c = cluster::spawn_cluster(&cfg_c).expect("spawn node C");
|
||
|
||
eprintln!("3-node mesh: A={id_a} B={id_b} C={}", handle_c.endpoint_id);
|
||
|
||
// Record bandwidth on node A.
|
||
handle_a.bandwidth.record(7777, 8888);
|
||
|
||
// Wait for propagation through the mesh (A→B→C via gossip).
|
||
let mut c_received = false;
|
||
for _ in 0..30 {
|
||
std::thread::sleep(Duration::from_millis(500));
|
||
if handle_c
|
||
.cluster_bandwidth
|
||
.peer_count
|
||
.load(Ordering::Relaxed)
|
||
> 0
|
||
{
|
||
c_received = true;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if c_received {
|
||
let total_in = handle_c
|
||
.cluster_bandwidth
|
||
.total_bytes_in
|
||
.load(Ordering::Relaxed);
|
||
eprintln!("Node C received mesh propagation: total_in={total_in}");
|
||
assert!(total_in > 0, "node C should see A's bandwidth via B");
|
||
} else {
|
||
eprintln!(
|
||
"3-node mesh propagation not confirmed within 15s \
|
||
(expected with only 3 nodes in HyParView)"
|
||
);
|
||
}
|
||
|
||
// Also verify node B sees node A.
|
||
let b_peers = handle_b
|
||
.cluster_bandwidth
|
||
.peer_count
|
||
.load(Ordering::Relaxed);
|
||
assert!(
|
||
b_peers >= 1,
|
||
"node B should see at least node A, got {b_peers}"
|
||
);
|
||
|
||
handle_a.shutdown();
|
||
handle_b.shutdown();
|
||
handle_c.shutdown();
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|
||
|
||
#[test]
|
||
fn aggregate_bandwidth_meter_across_nodes() {
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
let tenant = "meter-test-001";
|
||
|
||
// Node A — seed
|
||
let port_a = 19209;
|
||
let cfg_a = make_config(port_a, tenant, None);
|
||
let handle_a = cluster::spawn_cluster(&cfg_a).expect("spawn node A");
|
||
let id_a = handle_a.endpoint_id;
|
||
|
||
// Node B — bootstraps from A
|
||
let port_b = 19210;
|
||
let cfg_b = make_config(
|
||
port_b,
|
||
tenant,
|
||
Some(vec![format!("{id_a}@127.0.0.1:{port_a}")]),
|
||
);
|
||
let handle_b = cluster::spawn_cluster(&cfg_b).expect("spawn node B");
|
||
|
||
// Simulate sustained traffic: node A does 500 MiB/s, node B does 50 MiB/s.
|
||
// With 1s broadcast interval, each record() call simulates one interval's worth.
|
||
let mib = 1_048_576u64;
|
||
for _ in 0..5 {
|
||
handle_a.bandwidth.record(500 * mib, 500 * mib);
|
||
handle_b.bandwidth.record(50 * mib, 50 * mib);
|
||
std::thread::sleep(Duration::from_secs(1));
|
||
}
|
||
|
||
// Wait for a couple more broadcast cycles to let reports propagate.
|
||
std::thread::sleep(Duration::from_secs(3));
|
||
|
||
// Check node A's meter — it should see its own traffic + node B's.
|
||
let rate_a = handle_a.meter.aggregate_rate();
|
||
eprintln!(
|
||
"Node A meter: in={:.1} MiB/s, out={:.1} MiB/s, samples={}",
|
||
rate_a.in_mib_per_sec(),
|
||
rate_a.out_mib_per_sec(),
|
||
rate_a.sample_count
|
||
);
|
||
|
||
// Check node B's meter — it should see its own traffic + node A's.
|
||
let rate_b = handle_b.meter.aggregate_rate();
|
||
eprintln!(
|
||
"Node B meter: in={:.1} MiB/s, out={:.1} MiB/s, samples={}",
|
||
rate_b.in_mib_per_sec(),
|
||
rate_b.out_mib_per_sec(),
|
||
rate_b.sample_count
|
||
);
|
||
|
||
// Both nodes should have samples (at least their own local ones).
|
||
assert!(
|
||
rate_a.sample_count >= 3,
|
||
"node A should have local samples, got {}",
|
||
rate_a.sample_count
|
||
);
|
||
assert!(
|
||
rate_b.sample_count >= 3,
|
||
"node B should have local samples, got {}",
|
||
rate_b.sample_count
|
||
);
|
||
|
||
// The aggregate should be nonzero on both nodes.
|
||
assert!(rate_a.total_per_sec > 0.0, "node A aggregate rate should be > 0");
|
||
assert!(rate_b.total_per_sec > 0.0, "node B aggregate rate should be > 0");
|
||
|
||
handle_a.shutdown();
|
||
handle_b.shutdown();
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|
||
|
||
#[test]
|
||
fn standalone_mode_works_without_peers() {
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
let cfg = make_config(19205, "standalone-test", None);
|
||
let handle = cluster::spawn_cluster(&cfg).expect("spawn standalone");
|
||
|
||
// Should start fine with no peers.
|
||
handle.bandwidth.record(100, 200);
|
||
std::thread::sleep(Duration::from_secs(2));
|
||
|
||
// No peers to receive from, but node should be healthy.
|
||
assert_eq!(
|
||
handle.cluster_bandwidth.peer_count.load(Ordering::Relaxed),
|
||
0
|
||
);
|
||
|
||
handle.shutdown();
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|
||
|
||
#[test]
|
||
fn bandwidth_limiter_rejects_when_over_cap() {
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
let mut cfg = make_config(19211, "limiter-test", None);
|
||
cfg.bandwidth = Some(BandwidthClusterConfig {
|
||
broadcast_interval_secs: 1,
|
||
stale_peer_timeout_secs: 30,
|
||
meter_window_secs: 5,
|
||
});
|
||
let handle = cluster::spawn_cluster(&cfg).expect("spawn limiter node");
|
||
|
||
// Default limit is 1 Gbps = 125 MB/s. Lower it to 0.001 Gbps = 125 KB/s for this test.
|
||
handle.limiter.set_limit(sunbeam_proxy::cluster::bandwidth::gbps_to_bytes_per_sec(0.001));
|
||
|
||
// Initially, no traffic — limiter should allow.
|
||
assert_eq!(handle.limiter.check(), BandwidthLimitResult::Allow);
|
||
|
||
// Record heavy traffic: 10 MB per record × multiple records.
|
||
for _ in 0..10 {
|
||
handle.bandwidth.record(5_000_000, 5_000_000);
|
||
}
|
||
|
||
// Wait for at least one broadcast cycle so the meter gets fed.
|
||
std::thread::sleep(Duration::from_secs(2));
|
||
|
||
// Now the limiter should reject (100 MB over 5s = 20 MB/s >> 125 KB/s).
|
||
assert_eq!(
|
||
handle.limiter.check(),
|
||
BandwidthLimitResult::Reject,
|
||
"limiter should reject when aggregate rate exceeds cap"
|
||
);
|
||
|
||
// Raise the limit dynamically — should immediately allow again.
|
||
handle.limiter.set_limit(sunbeam_proxy::cluster::bandwidth::gbps_to_bytes_per_sec(100.0));
|
||
assert_eq!(
|
||
handle.limiter.check(),
|
||
BandwidthLimitResult::Allow,
|
||
"limiter should allow after raising cap to 100 Gbps"
|
||
);
|
||
|
||
handle.shutdown();
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|
||
|
||
#[test]
|
||
fn bandwidth_limiter_defaults_to_1gbps() {
|
||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||
|
||
let cfg = make_config(19212, "limiter-default", None);
|
||
let handle = cluster::spawn_cluster(&cfg).expect("spawn default node");
|
||
|
||
// Default limit should be 1 Gbps = 125_000_000 bytes/sec.
|
||
assert_eq!(
|
||
handle.limiter.limit(),
|
||
sunbeam_proxy::cluster::bandwidth::gbps_to_bytes_per_sec(1.0)
|
||
);
|
||
|
||
// Light traffic should be allowed.
|
||
handle.bandwidth.record(1000, 2000);
|
||
std::thread::sleep(Duration::from_secs(2));
|
||
assert_eq!(handle.limiter.check(), BandwidthLimitResult::Allow);
|
||
|
||
// Can be set to unlimited at runtime.
|
||
handle.limiter.set_limit(0);
|
||
assert_eq!(handle.limiter.limit(), 0);
|
||
assert_eq!(handle.limiter.check(), BandwidthLimitResult::Allow);
|
||
|
||
handle.shutdown();
|
||
std::thread::sleep(Duration::from_millis(200));
|
||
}
|