From 1ae185b5a5acb9b0642355b2a9b7487fefa538b0 Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Tue, 10 Mar 2026 23:38:20 +0000 Subject: [PATCH] feat(metrics): add Prometheus metrics and scrape endpoint Add a prometheus metrics module with counters for requests, DDoS/scanner/ rate-limit decisions, active connections gauge, and request duration histogram. Spawn a lightweight HTTP server on a configurable port (default 9090) serving /metrics and /health endpoints. Signed-off-by: Sienna Meridian Satterwhite --- Cargo.lock | 5 +- Cargo.toml | 3 ++ src/config.rs | 5 ++ src/lib.rs | 1 + src/main.rs | 20 ++++++- src/metrics.rs | 141 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 src/metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 5d81d1c..10e14c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1796,9 +1796,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libz-ng-sys" @@ -3449,6 +3449,7 @@ dependencies = [ "pingora-core", "pingora-http", "pingora-proxy", + "prometheus", "rustc-hash", "rustls", "serde", diff --git a/Cargo.toml b/Cargo.toml index 29d8afa..f7e0c22 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,9 @@ arc-swap = "1" # Reverse DNS for bot IP verification dns-lookup = "2" +# Prometheus metrics +prometheus = "0.13" + # Rustls crypto provider — must be installed before any TLS init rustls = { version = "0.23", features = ["aws-lc-rs"] } diff --git a/src/config.rs b/src/config.rs index 42ef401..9f4786d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -131,8 +131,13 @@ pub struct TlsFileConfig { #[derive(Debug, Deserialize, Clone)] pub struct TelemetryConfig { pub otlp_endpoint: String, + /// Port for the Prometheus metrics scrape endpoint. 0 = disabled. + #[serde(default = "default_metrics_port")] + pub metrics_port: u16, } +fn default_metrics_port() -> u16 { 9090 } + /// A path-prefix sub-route within a virtual host. /// Matched longest-prefix-first when multiple entries share a prefix. #[derive(Debug, Deserialize, Clone)] diff --git a/src/lib.rs b/src/lib.rs index 99b705d..d38b071 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ // without going through the binary entry point. pub mod acme; pub mod config; +pub mod metrics; pub mod ddos; pub mod dual_stack; pub mod proxy; diff --git a/src/main.rs b/src/main.rs index 3380312..0223fdc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -178,6 +178,9 @@ fn run_serve(upgrade: bool) -> Result<()> { // 1. Init telemetry (JSON logs + optional OTEL traces). telemetry::init(&cfg.telemetry.otlp_endpoint); + // 1b. Spawn metrics HTTP server (needs a tokio runtime for the TCP listener). + let metrics_port = cfg.telemetry.metrics_port; + // 2. Load DDoS detection model if configured. let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos { if ddos_cfg.enabled { @@ -366,7 +369,22 @@ fn run_serve(upgrade: bool) -> Result<()> { server.add_service(svc); - // 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured. + // 5b. Spawn metrics + health HTTP server on its own thread. + if metrics_port > 0 { + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("metrics runtime"); + rt.block_on(async { + sunbeam_proxy::metrics::spawn_metrics_server(metrics_port); + // Keep the runtime alive. + std::future::pending::<()>().await; + }); + }); + } + + // 5c. SSH TCP passthrough (port 22 → Gitea SSH), if configured. if let Some(ssh_cfg) = &cfg.ssh { let listen = ssh_cfg.listen.clone(); let backend = ssh_cfg.backend.clone(); diff --git a/src/metrics.rs b/src/metrics.rs new file mode 100644 index 0000000..061ed58 --- /dev/null +++ b/src/metrics.rs @@ -0,0 +1,141 @@ +use prometheus::{ + Encoder, Gauge, Histogram, HistogramOpts, IntCounterVec, Opts, Registry, TextEncoder, +}; +use std::sync::LazyLock; +use tokio::io::AsyncWriteExt; +use tokio::net::TcpListener; + +/// Global Prometheus registry shared across all proxy workers. +static REGISTRY: LazyLock = LazyLock::new(Registry::default); + +pub static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + let c = IntCounterVec::new( + Opts::new("sunbeam_requests_total", "Total HTTP requests processed"), + &["method", "host", "status", "backend"], + ) + .unwrap(); + REGISTRY.register(Box::new(c.clone())).unwrap(); + c +}); + +pub static REQUEST_DURATION: LazyLock = LazyLock::new(|| { + let h = Histogram::with_opts( + HistogramOpts::new( + "sunbeam_request_duration_seconds", + "Request duration in seconds", + ) + .buckets(vec![ + 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, + ]), + ) + .unwrap(); + REGISTRY.register(Box::new(h.clone())).unwrap(); + h +}); + +pub static DDOS_DECISIONS: LazyLock = LazyLock::new(|| { + let c = IntCounterVec::new( + Opts::new( + "sunbeam_ddos_decisions_total", + "DDoS detection decisions", + ), + &["decision"], + ) + .unwrap(); + REGISTRY.register(Box::new(c.clone())).unwrap(); + c +}); + +pub static SCANNER_DECISIONS: LazyLock = LazyLock::new(|| { + let c = IntCounterVec::new( + Opts::new( + "sunbeam_scanner_decisions_total", + "Scanner detection decisions", + ), + &["decision", "reason"], + ) + .unwrap(); + REGISTRY.register(Box::new(c.clone())).unwrap(); + c +}); + +pub static RATE_LIMIT_DECISIONS: LazyLock = LazyLock::new(|| { + let c = IntCounterVec::new( + Opts::new( + "sunbeam_rate_limit_decisions_total", + "Rate limit decisions", + ), + &["decision"], + ) + .unwrap(); + REGISTRY.register(Box::new(c.clone())).unwrap(); + c +}); + +pub static ACTIVE_CONNECTIONS: LazyLock = LazyLock::new(|| { + let g = Gauge::new( + "sunbeam_active_connections", + "Number of active connections being processed", + ) + .unwrap(); + REGISTRY.register(Box::new(g.clone())).unwrap(); + g +}); + +/// Spawn a lightweight HTTP server on `port` serving `/metrics` and `/health`. +/// Returns immediately; the server runs in the background on the tokio runtime. +/// Port 0 = disabled. +pub fn spawn_metrics_server(port: u16) { + if port == 0 { + return; + } + tokio::spawn(async move { + let addr = format!("0.0.0.0:{port}"); + let listener = match TcpListener::bind(&addr).await { + Ok(l) => l, + Err(e) => { + tracing::error!(error = %e, port, "failed to bind metrics server"); + return; + } + }; + tracing::info!(port, "metrics server listening"); + + loop { + let (mut stream, _) = match listener.accept().await { + Ok(conn) => conn, + Err(e) => { + tracing::warn!(error = %e, "metrics accept error"); + continue; + } + }; + + tokio::spawn(async move { + let mut buf = vec![0u8; 1024]; + let n = match tokio::io::AsyncReadExt::read(&mut stream, &mut buf).await { + Ok(n) => n, + Err(_) => return, + }; + let req = String::from_utf8_lossy(&buf[..n]); + + let (status, content_type, body) = if req.starts_with("GET /metrics") { + let encoder = TextEncoder::new(); + let families = REGISTRY.gather(); + let mut output = Vec::new(); + encoder.encode(&families, &mut output).unwrap(); + ("200 OK", "text/plain; version=0.0.4", output) + } else if req.starts_with("GET /health") { + ("200 OK", "text/plain", b"ok\n".to_vec()) + } else { + ("404 Not Found", "text/plain", b"not found\n".to_vec()) + }; + + let response = format!( + "HTTP/1.1 {status}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n", + body.len(), + ); + let _ = stream.write_all(response.as_bytes()).await; + let _ = stream.write_all(&body).await; + }); + } + }); +}