feat(metrics): add Prometheus metrics and scrape endpoint

Add a prometheus metrics module with counters for requests, DDoS/scanner/
rate-limit decisions, active connections gauge, and request duration
histogram. Spawn a lightweight HTTP server on a configurable port
(default 9090) serving /metrics and /health endpoints.

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
2026-03-10 23:38:20 +00:00
parent 70781679b5
commit 1ae185b5a5
6 changed files with 172 additions and 3 deletions

5
Cargo.lock generated
View File

@@ -1796,9 +1796,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.182"
version = "0.2.183"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
[[package]]
name = "libz-ng-sys"
@@ -3449,6 +3449,7 @@ dependencies = [
"pingora-core",
"pingora-http",
"pingora-proxy",
"prometheus",
"rustc-hash",
"rustls",
"serde",

View File

@@ -49,6 +49,9 @@ arc-swap = "1"
# Reverse DNS for bot IP verification
dns-lookup = "2"
# Prometheus metrics
prometheus = "0.13"
# Rustls crypto provider — must be installed before any TLS init
rustls = { version = "0.23", features = ["aws-lc-rs"] }

View File

@@ -131,8 +131,13 @@ pub struct TlsFileConfig {
#[derive(Debug, Deserialize, Clone)]
pub struct TelemetryConfig {
pub otlp_endpoint: String,
/// Port for the Prometheus metrics scrape endpoint. 0 = disabled.
#[serde(default = "default_metrics_port")]
pub metrics_port: u16,
}
fn default_metrics_port() -> u16 { 9090 }
/// A path-prefix sub-route within a virtual host.
/// Matched longest-prefix-first when multiple entries share a prefix.
#[derive(Debug, Deserialize, Clone)]

View File

@@ -3,6 +3,7 @@
// without going through the binary entry point.
pub mod acme;
pub mod config;
pub mod metrics;
pub mod ddos;
pub mod dual_stack;
pub mod proxy;

View File

@@ -178,6 +178,9 @@ fn run_serve(upgrade: bool) -> Result<()> {
// 1. Init telemetry (JSON logs + optional OTEL traces).
telemetry::init(&cfg.telemetry.otlp_endpoint);
// 1b. Spawn metrics HTTP server (needs a tokio runtime for the TCP listener).
let metrics_port = cfg.telemetry.metrics_port;
// 2. Load DDoS detection model if configured.
let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos {
if ddos_cfg.enabled {
@@ -366,7 +369,22 @@ fn run_serve(upgrade: bool) -> Result<()> {
server.add_service(svc);
// 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
// 5b. Spawn metrics + health HTTP server on its own thread.
if metrics_port > 0 {
std::thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("metrics runtime");
rt.block_on(async {
sunbeam_proxy::metrics::spawn_metrics_server(metrics_port);
// Keep the runtime alive.
std::future::pending::<()>().await;
});
});
}
// 5c. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
if let Some(ssh_cfg) = &cfg.ssh {
let listen = ssh_cfg.listen.clone();
let backend = ssh_cfg.backend.clone();

141
src/metrics.rs Normal file
View File

@@ -0,0 +1,141 @@
use prometheus::{
Encoder, Gauge, Histogram, HistogramOpts, IntCounterVec, Opts, Registry, TextEncoder,
};
use std::sync::LazyLock;
use tokio::io::AsyncWriteExt;
use tokio::net::TcpListener;
/// Global Prometheus registry shared across all proxy workers.
static REGISTRY: LazyLock<Registry> = LazyLock::new(Registry::default);
pub static REQUESTS_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
let c = IntCounterVec::new(
Opts::new("sunbeam_requests_total", "Total HTTP requests processed"),
&["method", "host", "status", "backend"],
)
.unwrap();
REGISTRY.register(Box::new(c.clone())).unwrap();
c
});
pub static REQUEST_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
let h = Histogram::with_opts(
HistogramOpts::new(
"sunbeam_request_duration_seconds",
"Request duration in seconds",
)
.buckets(vec![
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
]),
)
.unwrap();
REGISTRY.register(Box::new(h.clone())).unwrap();
h
});
pub static DDOS_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
let c = IntCounterVec::new(
Opts::new(
"sunbeam_ddos_decisions_total",
"DDoS detection decisions",
),
&["decision"],
)
.unwrap();
REGISTRY.register(Box::new(c.clone())).unwrap();
c
});
pub static SCANNER_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
let c = IntCounterVec::new(
Opts::new(
"sunbeam_scanner_decisions_total",
"Scanner detection decisions",
),
&["decision", "reason"],
)
.unwrap();
REGISTRY.register(Box::new(c.clone())).unwrap();
c
});
pub static RATE_LIMIT_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
let c = IntCounterVec::new(
Opts::new(
"sunbeam_rate_limit_decisions_total",
"Rate limit decisions",
),
&["decision"],
)
.unwrap();
REGISTRY.register(Box::new(c.clone())).unwrap();
c
});
pub static ACTIVE_CONNECTIONS: LazyLock<Gauge> = LazyLock::new(|| {
let g = Gauge::new(
"sunbeam_active_connections",
"Number of active connections being processed",
)
.unwrap();
REGISTRY.register(Box::new(g.clone())).unwrap();
g
});
/// Spawn a lightweight HTTP server on `port` serving `/metrics` and `/health`.
/// Returns immediately; the server runs in the background on the tokio runtime.
/// Port 0 = disabled.
pub fn spawn_metrics_server(port: u16) {
if port == 0 {
return;
}
tokio::spawn(async move {
let addr = format!("0.0.0.0:{port}");
let listener = match TcpListener::bind(&addr).await {
Ok(l) => l,
Err(e) => {
tracing::error!(error = %e, port, "failed to bind metrics server");
return;
}
};
tracing::info!(port, "metrics server listening");
loop {
let (mut stream, _) = match listener.accept().await {
Ok(conn) => conn,
Err(e) => {
tracing::warn!(error = %e, "metrics accept error");
continue;
}
};
tokio::spawn(async move {
let mut buf = vec![0u8; 1024];
let n = match tokio::io::AsyncReadExt::read(&mut stream, &mut buf).await {
Ok(n) => n,
Err(_) => return,
};
let req = String::from_utf8_lossy(&buf[..n]);
let (status, content_type, body) = if req.starts_with("GET /metrics") {
let encoder = TextEncoder::new();
let families = REGISTRY.gather();
let mut output = Vec::new();
encoder.encode(&families, &mut output).unwrap();
("200 OK", "text/plain; version=0.0.4", output)
} else if req.starts_with("GET /health") {
("200 OK", "text/plain", b"ok\n".to_vec())
} else {
("404 Not Found", "text/plain", b"not found\n".to_vec())
};
let response = format!(
"HTTP/1.1 {status}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
body.len(),
);
let _ = stream.write_all(response.as_bytes()).await;
let _ = stream.write_all(&body).await;
});
}
});
}