feat(metrics): add Prometheus metrics and scrape endpoint
Add a prometheus metrics module with counters for requests, DDoS/scanner/ rate-limit decisions, active connections gauge, and request duration histogram. Spawn a lightweight HTTP server on a configurable port (default 9090) serving /metrics and /health endpoints. Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -1796,9 +1796,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.182"
|
version = "0.2.183"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libz-ng-sys"
|
name = "libz-ng-sys"
|
||||||
@@ -3449,6 +3449,7 @@ dependencies = [
|
|||||||
"pingora-core",
|
"pingora-core",
|
||||||
"pingora-http",
|
"pingora-http",
|
||||||
"pingora-proxy",
|
"pingora-proxy",
|
||||||
|
"prometheus",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"rustls",
|
"rustls",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -49,6 +49,9 @@ arc-swap = "1"
|
|||||||
# Reverse DNS for bot IP verification
|
# Reverse DNS for bot IP verification
|
||||||
dns-lookup = "2"
|
dns-lookup = "2"
|
||||||
|
|
||||||
|
# Prometheus metrics
|
||||||
|
prometheus = "0.13"
|
||||||
|
|
||||||
# Rustls crypto provider — must be installed before any TLS init
|
# Rustls crypto provider — must be installed before any TLS init
|
||||||
rustls = { version = "0.23", features = ["aws-lc-rs"] }
|
rustls = { version = "0.23", features = ["aws-lc-rs"] }
|
||||||
|
|
||||||
|
|||||||
@@ -131,8 +131,13 @@ pub struct TlsFileConfig {
|
|||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
pub struct TelemetryConfig {
|
pub struct TelemetryConfig {
|
||||||
pub otlp_endpoint: String,
|
pub otlp_endpoint: String,
|
||||||
|
/// Port for the Prometheus metrics scrape endpoint. 0 = disabled.
|
||||||
|
#[serde(default = "default_metrics_port")]
|
||||||
|
pub metrics_port: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn default_metrics_port() -> u16 { 9090 }
|
||||||
|
|
||||||
/// A path-prefix sub-route within a virtual host.
|
/// A path-prefix sub-route within a virtual host.
|
||||||
/// Matched longest-prefix-first when multiple entries share a prefix.
|
/// Matched longest-prefix-first when multiple entries share a prefix.
|
||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
// without going through the binary entry point.
|
// without going through the binary entry point.
|
||||||
pub mod acme;
|
pub mod acme;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
|
pub mod metrics;
|
||||||
pub mod ddos;
|
pub mod ddos;
|
||||||
pub mod dual_stack;
|
pub mod dual_stack;
|
||||||
pub mod proxy;
|
pub mod proxy;
|
||||||
|
|||||||
20
src/main.rs
20
src/main.rs
@@ -178,6 +178,9 @@ fn run_serve(upgrade: bool) -> Result<()> {
|
|||||||
// 1. Init telemetry (JSON logs + optional OTEL traces).
|
// 1. Init telemetry (JSON logs + optional OTEL traces).
|
||||||
telemetry::init(&cfg.telemetry.otlp_endpoint);
|
telemetry::init(&cfg.telemetry.otlp_endpoint);
|
||||||
|
|
||||||
|
// 1b. Spawn metrics HTTP server (needs a tokio runtime for the TCP listener).
|
||||||
|
let metrics_port = cfg.telemetry.metrics_port;
|
||||||
|
|
||||||
// 2. Load DDoS detection model if configured.
|
// 2. Load DDoS detection model if configured.
|
||||||
let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos {
|
let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos {
|
||||||
if ddos_cfg.enabled {
|
if ddos_cfg.enabled {
|
||||||
@@ -366,7 +369,22 @@ fn run_serve(upgrade: bool) -> Result<()> {
|
|||||||
|
|
||||||
server.add_service(svc);
|
server.add_service(svc);
|
||||||
|
|
||||||
// 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
|
// 5b. Spawn metrics + health HTTP server on its own thread.
|
||||||
|
if metrics_port > 0 {
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
let rt = tokio::runtime::Builder::new_current_thread()
|
||||||
|
.enable_all()
|
||||||
|
.build()
|
||||||
|
.expect("metrics runtime");
|
||||||
|
rt.block_on(async {
|
||||||
|
sunbeam_proxy::metrics::spawn_metrics_server(metrics_port);
|
||||||
|
// Keep the runtime alive.
|
||||||
|
std::future::pending::<()>().await;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5c. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
|
||||||
if let Some(ssh_cfg) = &cfg.ssh {
|
if let Some(ssh_cfg) = &cfg.ssh {
|
||||||
let listen = ssh_cfg.listen.clone();
|
let listen = ssh_cfg.listen.clone();
|
||||||
let backend = ssh_cfg.backend.clone();
|
let backend = ssh_cfg.backend.clone();
|
||||||
|
|||||||
141
src/metrics.rs
Normal file
141
src/metrics.rs
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
use prometheus::{
|
||||||
|
Encoder, Gauge, Histogram, HistogramOpts, IntCounterVec, Opts, Registry, TextEncoder,
|
||||||
|
};
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use tokio::net::TcpListener;
|
||||||
|
|
||||||
|
/// Global Prometheus registry shared across all proxy workers.
|
||||||
|
static REGISTRY: LazyLock<Registry> = LazyLock::new(Registry::default);
|
||||||
|
|
||||||
|
pub static REQUESTS_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||||
|
let c = IntCounterVec::new(
|
||||||
|
Opts::new("sunbeam_requests_total", "Total HTTP requests processed"),
|
||||||
|
&["method", "host", "status", "backend"],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||||
|
c
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static REQUEST_DURATION: LazyLock<Histogram> = LazyLock::new(|| {
|
||||||
|
let h = Histogram::with_opts(
|
||||||
|
HistogramOpts::new(
|
||||||
|
"sunbeam_request_duration_seconds",
|
||||||
|
"Request duration in seconds",
|
||||||
|
)
|
||||||
|
.buckets(vec![
|
||||||
|
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
REGISTRY.register(Box::new(h.clone())).unwrap();
|
||||||
|
h
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static DDOS_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||||
|
let c = IntCounterVec::new(
|
||||||
|
Opts::new(
|
||||||
|
"sunbeam_ddos_decisions_total",
|
||||||
|
"DDoS detection decisions",
|
||||||
|
),
|
||||||
|
&["decision"],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||||
|
c
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static SCANNER_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||||
|
let c = IntCounterVec::new(
|
||||||
|
Opts::new(
|
||||||
|
"sunbeam_scanner_decisions_total",
|
||||||
|
"Scanner detection decisions",
|
||||||
|
),
|
||||||
|
&["decision", "reason"],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||||
|
c
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static RATE_LIMIT_DECISIONS: LazyLock<IntCounterVec> = LazyLock::new(|| {
|
||||||
|
let c = IntCounterVec::new(
|
||||||
|
Opts::new(
|
||||||
|
"sunbeam_rate_limit_decisions_total",
|
||||||
|
"Rate limit decisions",
|
||||||
|
),
|
||||||
|
&["decision"],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
REGISTRY.register(Box::new(c.clone())).unwrap();
|
||||||
|
c
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static ACTIVE_CONNECTIONS: LazyLock<Gauge> = LazyLock::new(|| {
|
||||||
|
let g = Gauge::new(
|
||||||
|
"sunbeam_active_connections",
|
||||||
|
"Number of active connections being processed",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
REGISTRY.register(Box::new(g.clone())).unwrap();
|
||||||
|
g
|
||||||
|
});
|
||||||
|
|
||||||
|
/// Spawn a lightweight HTTP server on `port` serving `/metrics` and `/health`.
|
||||||
|
/// Returns immediately; the server runs in the background on the tokio runtime.
|
||||||
|
/// Port 0 = disabled.
|
||||||
|
pub fn spawn_metrics_server(port: u16) {
|
||||||
|
if port == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let addr = format!("0.0.0.0:{port}");
|
||||||
|
let listener = match TcpListener::bind(&addr).await {
|
||||||
|
Ok(l) => l,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!(error = %e, port, "failed to bind metrics server");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
tracing::info!(port, "metrics server listening");
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let (mut stream, _) = match listener.accept().await {
|
||||||
|
Ok(conn) => conn,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(error = %e, "metrics accept error");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut buf = vec![0u8; 1024];
|
||||||
|
let n = match tokio::io::AsyncReadExt::read(&mut stream, &mut buf).await {
|
||||||
|
Ok(n) => n,
|
||||||
|
Err(_) => return,
|
||||||
|
};
|
||||||
|
let req = String::from_utf8_lossy(&buf[..n]);
|
||||||
|
|
||||||
|
let (status, content_type, body) = if req.starts_with("GET /metrics") {
|
||||||
|
let encoder = TextEncoder::new();
|
||||||
|
let families = REGISTRY.gather();
|
||||||
|
let mut output = Vec::new();
|
||||||
|
encoder.encode(&families, &mut output).unwrap();
|
||||||
|
("200 OK", "text/plain; version=0.0.4", output)
|
||||||
|
} else if req.starts_with("GET /health") {
|
||||||
|
("200 OK", "text/plain", b"ok\n".to_vec())
|
||||||
|
} else {
|
||||||
|
("404 Not Found", "text/plain", b"not found\n".to_vec())
|
||||||
|
};
|
||||||
|
|
||||||
|
let response = format!(
|
||||||
|
"HTTP/1.1 {status}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
|
||||||
|
body.len(),
|
||||||
|
);
|
||||||
|
let _ = stream.write_all(response.as_bytes()).await;
|
||||||
|
let _ = stream.write_all(&body).await;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user