feat(metrics): add Prometheus metrics and scrape endpoint
Add a prometheus metrics module with counters for requests, DDoS/scanner/ rate-limit decisions, active connections gauge, and request duration histogram. Spawn a lightweight HTTP server on a configurable port (default 9090) serving /metrics and /health endpoints. Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -1796,9 +1796,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.182"
|
||||
version = "0.2.183"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
||||
|
||||
[[package]]
|
||||
name = "libz-ng-sys"
|
||||
@@ -3449,6 +3449,7 @@ dependencies = [
|
||||
"pingora-core",
|
||||
"pingora-http",
|
||||
"pingora-proxy",
|
||||
"prometheus",
|
||||
"rustc-hash",
|
||||
"rustls",
|
||||
"serde",
|
||||
|
||||
@@ -49,6 +49,9 @@ arc-swap = "1"
|
||||
# Reverse DNS for bot IP verification
|
||||
dns-lookup = "2"
|
||||
|
||||
# Prometheus metrics
|
||||
prometheus = "0.13"
|
||||
|
||||
# Rustls crypto provider — must be installed before any TLS init
|
||||
rustls = { version = "0.23", features = ["aws-lc-rs"] }
|
||||
|
||||
|
||||
@@ -131,8 +131,13 @@ pub struct TlsFileConfig {
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct TelemetryConfig {
|
||||
pub otlp_endpoint: String,
|
||||
/// Port for the Prometheus metrics scrape endpoint. 0 = disabled.
|
||||
#[serde(default = "default_metrics_port")]
|
||||
pub metrics_port: u16,
|
||||
}
|
||||
|
||||
fn default_metrics_port() -> u16 { 9090 }
|
||||
|
||||
/// A path-prefix sub-route within a virtual host.
|
||||
/// Matched longest-prefix-first when multiple entries share a prefix.
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// without going through the binary entry point.
|
||||
pub mod acme;
|
||||
pub mod config;
|
||||
pub mod metrics;
|
||||
pub mod ddos;
|
||||
pub mod dual_stack;
|
||||
pub mod proxy;
|
||||
|
||||
20
src/main.rs
20
src/main.rs
@@ -178,6 +178,9 @@ fn run_serve(upgrade: bool) -> Result<()> {
|
||||
// 1. Init telemetry (JSON logs + optional OTEL traces).
|
||||
telemetry::init(&cfg.telemetry.otlp_endpoint);
|
||||
|
||||
// 1b. Spawn metrics HTTP server (needs a tokio runtime for the TCP listener).
|
||||
let metrics_port = cfg.telemetry.metrics_port;
|
||||
|
||||
// 2. Load DDoS detection model if configured.
|
||||
let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos {
|
||||
if ddos_cfg.enabled {
|
||||
@@ -366,7 +369,22 @@ fn run_serve(upgrade: bool) -> Result<()> {
|
||||
|
||||
server.add_service(svc);
|
||||
|
||||
// 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
|
||||
// 5b. Spawn metrics + health HTTP server on its own thread.
|
||||
if metrics_port > 0 {
|
||||
std::thread::spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("metrics runtime");
|
||||
rt.block_on(async {
|
||||
sunbeam_proxy::metrics::spawn_metrics_server(metrics_port);
|
||||
// Keep the runtime alive.
|
||||
std::future::pending::<()>().await;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 5c. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
|
||||
if let Some(ssh_cfg) = &cfg.ssh {
|
||||
let listen = ssh_cfg.listen.clone();
|
||||
let backend = ssh_cfg.backend.clone();
|
||||
|
||||
141
src/metrics.rs
Normal file
141
src/metrics.rs
Normal file
@@ -0,0 +1,141 @@
|
||||
use prometheus::{
|
||||
Encoder, Gauge, Histogram, HistogramOpts, IntCounterVec, Opts, Registry, TextEncoder,
|
||||
};
|
||||
use std::sync::LazyLock;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
/// Global Prometheus registry shared across all proxy workers.
|
||||
static REGISTRY: LazyLock<Registry> = LazyLock::new(Registry::default);
|
||||
|
||||
pub static REQUESTS_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||
let c = IntCounterVec::new(
|
||||
Opts::new("sunbeam_requests_total", "Total HTTP requests processed"),
|
||||
&["method", "host", "status", "backend"],
|
||||
)
|
||||
.unwrap();
|
||||
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||
c
|
||||
});
|
||||
|
||||
pub static REQUEST_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
|
||||
let h = Histogram::with_opts(
|
||||
HistogramOpts::new(
|
||||
"sunbeam_request_duration_seconds",
|
||||
"Request duration in seconds",
|
||||
)
|
||||
.buckets(vec![
|
||||
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
|
||||
]),
|
||||
)
|
||||
.unwrap();
|
||||
REGISTRY.register(Box::new(h.clone())).unwrap();
|
||||
h
|
||||
});
|
||||
|
||||
pub static DDOS_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||
let c = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"sunbeam_ddos_decisions_total",
|
||||
"DDoS detection decisions",
|
||||
),
|
||||
&["decision"],
|
||||
)
|
||||
.unwrap();
|
||||
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||
c
|
||||
});
|
||||
|
||||
pub static SCANNER_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||
let c = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"sunbeam_scanner_decisions_total",
|
||||
"Scanner detection decisions",
|
||||
),
|
||||
&["decision", "reason"],
|
||||
)
|
||||
.unwrap();
|
||||
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||
c
|
||||
});
|
||||
|
||||
pub static RATE_LIMIT_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||
let c = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"sunbeam_rate_limit_decisions_total",
|
||||
"Rate limit decisions",
|
||||
),
|
||||
&["decision"],
|
||||
)
|
||||
.unwrap();
|
||||
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||
c
|
||||
});
|
||||
|
||||
pub static ACTIVE_CONNECTIONS: LazyLock<Gauge> = LazyLock::new(|| {
|
||||
let g = Gauge::new(
|
||||
"sunbeam_active_connections",
|
||||
"Number of active connections being processed",
|
||||
)
|
||||
.unwrap();
|
||||
REGISTRY.register(Box::new(g.clone())).unwrap();
|
||||
g
|
||||
});
|
||||
|
||||
/// Spawn a lightweight HTTP server on `port` serving `/metrics` and `/health`.
|
||||
/// Returns immediately; the server runs in the background on the tokio runtime.
|
||||
/// Port 0 = disabled.
|
||||
pub fn spawn_metrics_server(port: u16) {
|
||||
if port == 0 {
|
||||
return;
|
||||
}
|
||||
tokio::spawn(async move {
|
||||
let addr = format!("0.0.0.0:{port}");
|
||||
let listener = match TcpListener::bind(&addr).await {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, port, "failed to bind metrics server");
|
||||
return;
|
||||
}
|
||||
};
|
||||
tracing::info!(port, "metrics server listening");
|
||||
|
||||
loop {
|
||||
let (mut stream, _) = match listener.accept().await {
|
||||
Ok(conn) => conn,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "metrics accept error");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 1024];
|
||||
let n = match tokio::io::AsyncReadExt::read(&mut stream, &mut buf).await {
|
||||
Ok(n) => n,
|
||||
Err(_) => return,
|
||||
};
|
||||
let req = String::from_utf8_lossy(&buf[..n]);
|
||||
|
||||
let (status, content_type, body) = if req.starts_with("GET /metrics") {
|
||||
let encoder = TextEncoder::new();
|
||||
let families = REGISTRY.gather();
|
||||
let mut output = Vec::new();
|
||||
encoder.encode(&families, &mut output).unwrap();
|
||||
("200 OK", "text/plain; version=0.0.4", output)
|
||||
} else if req.starts_with("GET /health") {
|
||||
("200 OK", "text/plain", b"ok\n".to_vec())
|
||||
} else {
|
||||
("404 Not Found", "text/plain", b"not found\n".to_vec())
|
||||
};
|
||||
|
||||
let response = format!(
|
||||
"HTTP/1.1 {status}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
|
||||
body.len(),
|
||||
);
|
||||
let _ = stream.write_all(response.as_bytes()).await;
|
||||
let _ = stream.write_all(&body).await;
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user