feat(proxy): integrate DDoS, scanner, and rate limiter into request pipeline
Wire up all three detection layers in request_filter with pipeline logging at each stage for unfiltered training data. Add DDoS, scanner, and rate_limit config sections. Bot allowlist check before scanner model on the hot path. CLI subcommands for train/replay. Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
@@ -18,8 +18,102 @@ pub struct Config {
|
||||
pub routes: Vec<RouteConfig>,
|
||||
/// Optional SSH TCP passthrough (port 22 → Gitea SSH).
|
||||
pub ssh: Option<SshConfig>,
|
||||
/// Optional KNN-based DDoS detection.
|
||||
pub ddos: Option<DDoSConfig>,
|
||||
/// Optional per-identity rate limiting.
|
||||
pub rate_limit: Option<RateLimitConfig>,
|
||||
/// Optional per-request scanner detection.
|
||||
pub scanner: Option<ScannerConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct DDoSConfig {
|
||||
pub model_path: String,
|
||||
#[serde(default = "default_k")]
|
||||
pub k: usize,
|
||||
#[serde(default = "default_threshold")]
|
||||
pub threshold: f64,
|
||||
#[serde(default = "default_window_secs")]
|
||||
pub window_secs: u64,
|
||||
#[serde(default = "default_window_capacity")]
|
||||
pub window_capacity: usize,
|
||||
#[serde(default = "default_min_events")]
|
||||
pub min_events: usize,
|
||||
#[serde(default = "default_enabled")]
|
||||
pub enabled: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct RateLimitConfig {
|
||||
#[serde(default = "default_rl_enabled")]
|
||||
pub enabled: bool,
|
||||
#[serde(default)]
|
||||
pub bypass_cidrs: Vec<String>,
|
||||
#[serde(default = "default_eviction_interval")]
|
||||
pub eviction_interval_secs: u64,
|
||||
#[serde(default = "default_stale_after")]
|
||||
pub stale_after_secs: u64,
|
||||
pub authenticated: BucketConfig,
|
||||
pub unauthenticated: BucketConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct BucketConfig {
|
||||
pub burst: u32,
|
||||
pub rate: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct ScannerConfig {
|
||||
pub model_path: String,
|
||||
#[serde(default = "default_scanner_threshold")]
|
||||
pub threshold: f64,
|
||||
#[serde(default = "default_scanner_enabled")]
|
||||
pub enabled: bool,
|
||||
/// How often (seconds) to check the model file for changes. 0 = no hot-reload.
|
||||
#[serde(default = "default_scanner_poll_interval")]
|
||||
pub poll_interval_secs: u64,
|
||||
/// Bot allowlist rules. Verified bots bypass the scanner model.
|
||||
#[serde(default)]
|
||||
pub allowlist: Vec<BotAllowlistRule>,
|
||||
/// TTL (seconds) for verified bot IP cache entries.
|
||||
#[serde(default = "default_bot_cache_ttl")]
|
||||
pub bot_cache_ttl_secs: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct BotAllowlistRule {
|
||||
/// Case-insensitive UA prefix to match, e.g. "Googlebot".
|
||||
pub ua_prefix: String,
|
||||
/// Human-readable label for pipeline logs.
|
||||
pub reason: String,
|
||||
/// Reverse-DNS hostname suffixes for verification.
|
||||
/// e.g. ["googlebot.com", "google.com"]
|
||||
#[serde(default)]
|
||||
pub dns_suffixes: Vec<String>,
|
||||
/// CIDR ranges for instant IP verification.
|
||||
/// e.g. ["66.249.64.0/19"]
|
||||
#[serde(default)]
|
||||
pub cidrs: Vec<String>,
|
||||
}
|
||||
|
||||
fn default_bot_cache_ttl() -> u64 { 86400 } // 24h
|
||||
|
||||
fn default_scanner_threshold() -> f64 { 0.5 }
|
||||
fn default_scanner_enabled() -> bool { true }
|
||||
fn default_scanner_poll_interval() -> u64 { 30 }
|
||||
|
||||
fn default_rl_enabled() -> bool { true }
|
||||
fn default_eviction_interval() -> u64 { 300 }
|
||||
fn default_stale_after() -> u64 { 600 }
|
||||
|
||||
fn default_k() -> usize { 5 }
|
||||
fn default_threshold() -> f64 { 0.6 }
|
||||
fn default_window_secs() -> u64 { 60 }
|
||||
fn default_window_capacity() -> usize { 1000 }
|
||||
fn default_min_events() -> usize { 10 }
|
||||
fn default_enabled() -> bool { true }
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct ListenConfig {
|
||||
/// HTTP listener address, e.g., "0.0.0.0:80" or "[::]:80".
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
// without going through the binary entry point.
|
||||
pub mod acme;
|
||||
pub mod config;
|
||||
pub mod ddos;
|
||||
pub mod dual_stack;
|
||||
pub mod proxy;
|
||||
pub mod rate_limit;
|
||||
pub mod scanner;
|
||||
pub mod ssh;
|
||||
|
||||
286
src/main.rs
286
src/main.rs
@@ -4,16 +4,167 @@ mod watcher;
|
||||
|
||||
use sunbeam_proxy::{acme, config};
|
||||
use sunbeam_proxy::proxy::SunbeamProxy;
|
||||
use sunbeam_proxy::ddos;
|
||||
use sunbeam_proxy::rate_limit;
|
||||
use sunbeam_proxy::scanner;
|
||||
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::{Parser, Subcommand};
|
||||
use kube::Client;
|
||||
use pingora::server::{configuration::Opt, Server};
|
||||
use pingora_proxy::http_proxy_service;
|
||||
use std::sync::RwLock;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "sunbeam-proxy")]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Option<Commands>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Start the proxy server (default if no subcommand given)
|
||||
Serve {
|
||||
/// Pingora --upgrade flag for zero-downtime reload
|
||||
#[arg(long)]
|
||||
upgrade: bool,
|
||||
},
|
||||
/// Replay audit logs through the DDoS detector and rate limiter
|
||||
Replay {
|
||||
/// Path to audit log JSONL file
|
||||
#[arg(short, long)]
|
||||
input: String,
|
||||
/// Path to trained model file
|
||||
#[arg(short, long, default_value = "ddos_model.bin")]
|
||||
model: String,
|
||||
/// Optional config file (for rate limit settings)
|
||||
#[arg(short, long)]
|
||||
config: Option<String>,
|
||||
/// KNN k parameter
|
||||
#[arg(long, default_value = "5")]
|
||||
k: usize,
|
||||
/// Attack threshold
|
||||
#[arg(long, default_value = "0.6")]
|
||||
threshold: f64,
|
||||
/// Sliding window size in seconds
|
||||
#[arg(long, default_value = "60")]
|
||||
window_secs: u64,
|
||||
/// Minimum events per IP before classification
|
||||
#[arg(long, default_value = "10")]
|
||||
min_events: usize,
|
||||
/// Also run rate limiter during replay
|
||||
#[arg(long)]
|
||||
rate_limit: bool,
|
||||
},
|
||||
/// Train a DDoS detection model from audit logs
|
||||
Train {
|
||||
/// Path to audit log JSONL file
|
||||
#[arg(short, long)]
|
||||
input: String,
|
||||
/// Output model file path
|
||||
#[arg(short, long)]
|
||||
output: String,
|
||||
/// File with known-attack IPs (one per line)
|
||||
#[arg(long)]
|
||||
attack_ips: Option<String>,
|
||||
/// File with known-normal IPs (one per line)
|
||||
#[arg(long)]
|
||||
normal_ips: Option<String>,
|
||||
/// TOML file with heuristic auto-labeling thresholds
|
||||
#[arg(long)]
|
||||
heuristics: Option<String>,
|
||||
/// KNN k parameter
|
||||
#[arg(long, default_value = "5")]
|
||||
k: usize,
|
||||
/// Attack threshold (fraction of k neighbors)
|
||||
#[arg(long, default_value = "0.6")]
|
||||
threshold: f64,
|
||||
/// Sliding window size in seconds
|
||||
#[arg(long, default_value = "60")]
|
||||
window_secs: u64,
|
||||
/// Minimum events per IP to include in training
|
||||
#[arg(long, default_value = "10")]
|
||||
min_events: usize,
|
||||
},
|
||||
/// Train a per-request scanner detection model from audit logs
|
||||
TrainScanner {
|
||||
/// Path to audit log JSONL file
|
||||
#[arg(short, long)]
|
||||
input: String,
|
||||
/// Output model file path
|
||||
#[arg(short, long, default_value = "scanner_model.bin")]
|
||||
output: String,
|
||||
/// Directory (or file) containing .txt wordlists of scanner paths
|
||||
#[arg(long)]
|
||||
wordlists: Option<String>,
|
||||
/// Classification threshold
|
||||
#[arg(long, default_value = "0.5")]
|
||||
threshold: f64,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
match cli.command.unwrap_or(Commands::Serve { upgrade: false }) {
|
||||
Commands::Serve { upgrade } => run_serve(upgrade),
|
||||
Commands::Replay {
|
||||
input,
|
||||
model,
|
||||
config,
|
||||
k,
|
||||
threshold,
|
||||
window_secs,
|
||||
min_events,
|
||||
rate_limit,
|
||||
} => ddos::replay::run(ddos::replay::ReplayArgs {
|
||||
input,
|
||||
model_path: model,
|
||||
config_path: config,
|
||||
k,
|
||||
threshold,
|
||||
window_secs,
|
||||
min_events,
|
||||
rate_limit,
|
||||
}),
|
||||
Commands::Train {
|
||||
input,
|
||||
output,
|
||||
attack_ips,
|
||||
normal_ips,
|
||||
heuristics,
|
||||
k,
|
||||
threshold,
|
||||
window_secs,
|
||||
min_events,
|
||||
} => ddos::train::run(ddos::train::TrainArgs {
|
||||
input,
|
||||
output,
|
||||
attack_ips,
|
||||
normal_ips,
|
||||
heuristics,
|
||||
k,
|
||||
threshold,
|
||||
window_secs,
|
||||
min_events,
|
||||
}),
|
||||
Commands::TrainScanner {
|
||||
input,
|
||||
output,
|
||||
wordlists,
|
||||
threshold,
|
||||
} => scanner::train::run(scanner::train::TrainScannerArgs {
|
||||
input,
|
||||
output,
|
||||
wordlists,
|
||||
threshold,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn run_serve(upgrade: bool) -> Result<()> {
|
||||
// Install the aws-lc-rs crypto provider for rustls before any TLS init.
|
||||
// Required because rustls 0.23 no longer auto-selects a provider at compile time.
|
||||
rustls::crypto::aws_lc_rs::default_provider()
|
||||
@@ -27,10 +178,120 @@ fn main() -> Result<()> {
|
||||
// 1. Init telemetry (JSON logs + optional OTEL traces).
|
||||
telemetry::init(&cfg.telemetry.otlp_endpoint);
|
||||
|
||||
// 2. Detect --upgrade flag. When present, Pingora inherits listening socket
|
||||
// FDs from the upgrade Unix socket instead of binding fresh ports, enabling
|
||||
// zero-downtime cert/config reloads triggered by the K8s watcher below.
|
||||
let upgrade = std::env::args().any(|a| a == "--upgrade");
|
||||
// 2. Load DDoS detection model if configured.
|
||||
let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos {
|
||||
if ddos_cfg.enabled {
|
||||
match ddos::model::TrainedModel::load(
|
||||
std::path::Path::new(&ddos_cfg.model_path),
|
||||
Some(ddos_cfg.k),
|
||||
Some(ddos_cfg.threshold),
|
||||
) {
|
||||
Ok(model) => {
|
||||
let point_count = model.point_count();
|
||||
let detector = Arc::new(ddos::detector::DDoSDetector::new(model, ddos_cfg));
|
||||
tracing::info!(
|
||||
points = point_count,
|
||||
k = ddos_cfg.k,
|
||||
threshold = ddos_cfg.threshold,
|
||||
"DDoS detector loaded"
|
||||
);
|
||||
Some(detector)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "failed to load DDoS model; detection disabled");
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// 2b. Init rate limiter if configured.
|
||||
let rate_limiter = if let Some(rl_cfg) = &cfg.rate_limit {
|
||||
if rl_cfg.enabled {
|
||||
let limiter = Arc::new(rate_limit::limiter::RateLimiter::new(rl_cfg));
|
||||
let evict_limiter = limiter.clone();
|
||||
let interval = rl_cfg.eviction_interval_secs;
|
||||
std::thread::spawn(move || loop {
|
||||
std::thread::sleep(std::time::Duration::from_secs(interval));
|
||||
evict_limiter.evict_stale();
|
||||
});
|
||||
tracing::info!(
|
||||
auth_burst = rl_cfg.authenticated.burst,
|
||||
auth_rate = rl_cfg.authenticated.rate,
|
||||
unauth_burst = rl_cfg.unauthenticated.burst,
|
||||
unauth_rate = rl_cfg.unauthenticated.rate,
|
||||
"rate limiter enabled"
|
||||
);
|
||||
Some(limiter)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// 2c. Load scanner model if configured.
|
||||
let (scanner_detector, bot_allowlist) = if let Some(scanner_cfg) = &cfg.scanner {
|
||||
if scanner_cfg.enabled {
|
||||
match scanner::model::ScannerModel::load(std::path::Path::new(&scanner_cfg.model_path)) {
|
||||
Ok(mut model) => {
|
||||
let fragment_count = model.fragments.len();
|
||||
model.threshold = scanner_cfg.threshold;
|
||||
let detector = scanner::detector::ScannerDetector::new(&model, &cfg.routes);
|
||||
let handle = Arc::new(arc_swap::ArcSwap::from_pointee(detector));
|
||||
|
||||
// Start bot allowlist if rules are configured.
|
||||
let bot_allowlist = if !scanner_cfg.allowlist.is_empty() {
|
||||
let al = scanner::allowlist::BotAllowlist::spawn(
|
||||
&scanner_cfg.allowlist,
|
||||
scanner_cfg.bot_cache_ttl_secs,
|
||||
);
|
||||
tracing::info!(
|
||||
rules = scanner_cfg.allowlist.len(),
|
||||
"bot allowlist enabled"
|
||||
);
|
||||
Some(al)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Start background file watcher for hot-reload.
|
||||
if scanner_cfg.poll_interval_secs > 0 {
|
||||
let watcher_handle = handle.clone();
|
||||
let model_path = std::path::PathBuf::from(&scanner_cfg.model_path);
|
||||
let threshold = scanner_cfg.threshold;
|
||||
let routes = cfg.routes.clone();
|
||||
let interval = std::time::Duration::from_secs(scanner_cfg.poll_interval_secs);
|
||||
std::thread::spawn(move || {
|
||||
scanner::watcher::watch_scanner_model(
|
||||
watcher_handle, model_path, threshold, routes, interval,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
fragments = fragment_count,
|
||||
threshold = scanner_cfg.threshold,
|
||||
poll_interval_secs = scanner_cfg.poll_interval_secs,
|
||||
"scanner detector loaded"
|
||||
);
|
||||
(Some(handle), bot_allowlist)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "failed to load scanner model; scanner detection disabled");
|
||||
(None, None)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(None, None)
|
||||
}
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// 3. Fetch the TLS cert from K8s before Pingora binds the TLS port.
|
||||
// The Client is created and dropped within this temp runtime — we do NOT
|
||||
@@ -47,8 +308,6 @@ fn main() -> Result<()> {
|
||||
if let Err(e) =
|
||||
cert::fetch_and_write(&c, &cfg.tls.cert_path, &cfg.tls.key_path).await
|
||||
{
|
||||
// Non-fatal: Secret may not exist yet on first deploy (cert-manager
|
||||
// is still issuing), or the Secret name may differ in dev.
|
||||
tracing::warn!(error = %e, "cert fetch from K8s failed; using existing files");
|
||||
}
|
||||
}
|
||||
@@ -83,6 +342,10 @@ fn main() -> Result<()> {
|
||||
let proxy = SunbeamProxy {
|
||||
routes: cfg.routes.clone(),
|
||||
acme_routes: acme_routes.clone(),
|
||||
ddos_detector,
|
||||
scanner_detector,
|
||||
bot_allowlist,
|
||||
rate_limiter,
|
||||
};
|
||||
let mut svc = http_proxy_service(&server.configuration, proxy);
|
||||
|
||||
@@ -90,11 +353,6 @@ fn main() -> Result<()> {
|
||||
svc.add_tcp(&cfg.listen.http);
|
||||
|
||||
// Port 443: only add the TLS listener if the cert files exist.
|
||||
// On first deploy cert-manager hasn't issued the cert yet, so we start
|
||||
// HTTP-only. Once the pingora-tls Secret is created (ACME challenge
|
||||
// completes), the watcher in step 6 writes the cert files and triggers
|
||||
// a graceful upgrade. The upgrade process finds the cert files and adds
|
||||
// the TLS listener, inheriting the port-80 socket from the old process.
|
||||
let cert_exists = std::path::Path::new(&cfg.tls.cert_path).exists();
|
||||
if cert_exists {
|
||||
svc.add_tls(&cfg.listen.https, &cfg.tls.cert_path, &cfg.tls.key_path)?;
|
||||
@@ -109,7 +367,6 @@ fn main() -> Result<()> {
|
||||
server.add_service(svc);
|
||||
|
||||
// 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
|
||||
// Runs on its own OS thread + Tokio runtime — same pattern as the cert/ingress watcher.
|
||||
if let Some(ssh_cfg) = &cfg.ssh {
|
||||
let listen = ssh_cfg.listen.clone();
|
||||
let backend = ssh_cfg.backend.clone();
|
||||
@@ -123,10 +380,7 @@ fn main() -> Result<()> {
|
||||
});
|
||||
}
|
||||
|
||||
// 6. Background K8s watchers on their own OS thread + tokio runtime so they
|
||||
// don't interfere with Pingora's internal runtime. A fresh Client is
|
||||
// created here so its tower workers live on this runtime (not the
|
||||
// now-dropped temp runtime from step 3).
|
||||
// 6. Background K8s watchers on their own OS thread + tokio runtime.
|
||||
if k8s_available {
|
||||
let cert_path = cfg.tls.cert_path.clone();
|
||||
let key_path = cfg.tls.key_path.clone();
|
||||
|
||||
308
src/proxy.rs
308
src/proxy.rs
@@ -1,10 +1,20 @@
|
||||
use crate::acme::AcmeRoutes;
|
||||
use crate::config::RouteConfig;
|
||||
use crate::ddos::detector::DDoSDetector;
|
||||
use crate::ddos::model::DDoSAction;
|
||||
use crate::rate_limit::key;
|
||||
use crate::rate_limit::limiter::{RateLimitResult, RateLimiter};
|
||||
use crate::scanner::allowlist::BotAllowlist;
|
||||
use crate::scanner::detector::ScannerDetector;
|
||||
use crate::scanner::model::ScannerAction;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_trait::async_trait;
|
||||
use http::header::{CONNECTION, EXPECT, HOST, UPGRADE};
|
||||
use pingora_core::{upstreams::peer::HttpPeer, Result};
|
||||
use pingora_http::{RequestHeader, ResponseHeader};
|
||||
use pingora_proxy::{ProxyHttp, Session};
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct SunbeamProxy {
|
||||
@@ -12,6 +22,14 @@ pub struct SunbeamProxy {
|
||||
/// Per-challenge route table populated by the Ingress watcher.
|
||||
/// Maps `/.well-known/acme-challenge/<token>` → solver service address.
|
||||
pub acme_routes: AcmeRoutes,
|
||||
/// Optional KNN-based DDoS detector.
|
||||
pub ddos_detector: Option<Arc<DDoSDetector>>,
|
||||
/// Optional per-request scanner detector (hot-reloadable via ArcSwap).
|
||||
pub scanner_detector: Option<Arc<ArcSwap<ScannerDetector>>>,
|
||||
/// Optional verified-bot allowlist (bypasses scanner for known crawlers/agents).
|
||||
pub bot_allowlist: Option<Arc<BotAllowlist>>,
|
||||
/// Optional per-identity rate limiter.
|
||||
pub rate_limiter: Option<Arc<RateLimiter>>,
|
||||
}
|
||||
|
||||
pub struct RequestCtx {
|
||||
@@ -41,6 +59,37 @@ fn extract_host(session: &Session) -> String {
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Extract the real client IP, preferring trusted proxy headers.
|
||||
///
|
||||
/// Priority: CF-Connecting-IP → X-Real-IP → X-Forwarded-For (first) → socket addr.
|
||||
/// All traffic arrives via Cloudflare, so CF-Connecting-IP is the authoritative
|
||||
/// real client IP. The socket address is the Cloudflare edge node.
|
||||
fn extract_client_ip(session: &Session) -> Option<IpAddr> {
|
||||
let headers = &session.req_header().headers;
|
||||
|
||||
for header in &["cf-connecting-ip", "x-real-ip"] {
|
||||
if let Some(val) = headers.get(*header).and_then(|v| v.to_str().ok()) {
|
||||
if let Ok(ip) = val.trim().parse::<IpAddr>() {
|
||||
return Some(ip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// X-Forwarded-For: client, proxy1, proxy2 — take the first entry
|
||||
if let Some(val) = headers.get("x-forwarded-for").and_then(|v| v.to_str().ok()) {
|
||||
if let Some(first) = val.split(',').next() {
|
||||
if let Ok(ip) = first.trim().parse::<IpAddr>() {
|
||||
return Some(ip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: raw socket address
|
||||
session
|
||||
.client_addr()
|
||||
.and_then(|addr| addr.as_inet().map(|a| a.ip()))
|
||||
}
|
||||
|
||||
/// Strip the scheme prefix from a backend URL like `http://host:port`.
|
||||
fn backend_addr(backend: &str) -> &str {
|
||||
backend
|
||||
@@ -137,6 +186,193 @@ impl ProxyHttp for SunbeamProxy {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// ── Detection pipeline ───────────────────────────────────────────
|
||||
// Each layer emits an unfiltered pipeline log BEFORE acting on its
|
||||
// decision. This guarantees downstream training pipelines always
|
||||
// have the full traffic picture:
|
||||
// - "ddos" log = all HTTPS traffic (scanner training data)
|
||||
// - "scanner" log = traffic that passed DDoS (rate-limit training data)
|
||||
// - "rate_limit" log = traffic that passed scanner (validation data)
|
||||
|
||||
// DDoS detection: check the client IP against the KNN model.
|
||||
if let Some(detector) = &self.ddos_detector {
|
||||
if let Some(ip) = extract_client_ip(session) {
|
||||
let method = session.req_header().method.as_str();
|
||||
let path = session.req_header().uri.path();
|
||||
let host = extract_host(session);
|
||||
let user_agent = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("user-agent")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let content_length: u64 = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("content-length")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
let has_cookies = session.req_header().headers.get("cookie").is_some();
|
||||
let has_referer = session.req_header().headers.get("referer").is_some();
|
||||
let has_accept_language = session.req_header().headers.get("accept-language").is_some();
|
||||
let accept = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("accept")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let ddos_action = detector.check(ip, method, path, &host, user_agent, content_length, has_cookies, has_referer, has_accept_language);
|
||||
let decision = if matches!(ddos_action, DDoSAction::Block) { "block" } else { "allow" };
|
||||
|
||||
tracing::info!(
|
||||
target = "pipeline",
|
||||
layer = "ddos",
|
||||
decision,
|
||||
method,
|
||||
host = %host,
|
||||
path,
|
||||
client_ip = %ip,
|
||||
user_agent,
|
||||
content_length,
|
||||
has_cookies,
|
||||
has_referer,
|
||||
has_accept_language,
|
||||
accept,
|
||||
"pipeline"
|
||||
);
|
||||
|
||||
if matches!(ddos_action, DDoSAction::Block) {
|
||||
let mut resp = ResponseHeader::build(429, None)?;
|
||||
resp.insert_header("Retry-After", "60")?;
|
||||
resp.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(resp), true).await?;
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scanner detection: per-request classification of scanner/bot probes.
|
||||
// The detector is behind ArcSwap for lock-free hot-reload.
|
||||
if let Some(scanner_swap) = &self.scanner_detector {
|
||||
let method = session.req_header().method.as_str();
|
||||
let path = session.req_header().uri.path();
|
||||
let host = extract_host(session);
|
||||
let prefix = host.split('.').next().unwrap_or("");
|
||||
let has_cookies = session.req_header().headers.get("cookie").is_some();
|
||||
let has_referer = session.req_header().headers.get("referer").is_some();
|
||||
let has_accept_language = session.req_header().headers.get("accept-language").is_some();
|
||||
let accept = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("accept")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
let user_agent = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("user-agent")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let content_length: u64 = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("content-length")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
let client_ip = extract_client_ip(session);
|
||||
|
||||
// Bot allowlist: verified crawlers/agents bypass the scanner model.
|
||||
// CIDR rules are instant; DNS-verified IPs are cached after
|
||||
// background reverse+forward lookup.
|
||||
let bot_reason = self.bot_allowlist.as_ref().and_then(|al| {
|
||||
client_ip.and_then(|ip| al.check(user_agent, ip))
|
||||
});
|
||||
|
||||
let (decision, score, reason) = if let Some(bot_reason) = bot_reason {
|
||||
("allow", -1.0f64, bot_reason)
|
||||
} else {
|
||||
let scanner = scanner_swap.load();
|
||||
let verdict = scanner.check(
|
||||
method, path, prefix, has_cookies, has_referer,
|
||||
has_accept_language, accept, user_agent, content_length,
|
||||
);
|
||||
let d = if matches!(verdict.action, ScannerAction::Block) { "block" } else { "allow" };
|
||||
(d, verdict.score, verdict.reason)
|
||||
};
|
||||
|
||||
let client_ip_str = client_ip
|
||||
.map(|ip| ip.to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
tracing::info!(
|
||||
target = "pipeline",
|
||||
layer = "scanner",
|
||||
decision,
|
||||
score,
|
||||
reason,
|
||||
method,
|
||||
host = %host,
|
||||
path,
|
||||
client_ip = client_ip_str,
|
||||
user_agent,
|
||||
content_length,
|
||||
has_cookies,
|
||||
has_referer,
|
||||
has_accept_language,
|
||||
accept,
|
||||
"pipeline"
|
||||
);
|
||||
|
||||
if decision == "block" {
|
||||
let mut resp = ResponseHeader::build(403, None)?;
|
||||
resp.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(resp), true).await?;
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting: per-identity throttling.
|
||||
if let Some(limiter) = &self.rate_limiter {
|
||||
if let Some(ip) = extract_client_ip(session) {
|
||||
let cookie = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("cookie")
|
||||
.and_then(|v| v.to_str().ok());
|
||||
let auth = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("authorization")
|
||||
.and_then(|v| v.to_str().ok());
|
||||
let rl_key = key::extract_key(cookie, auth, ip);
|
||||
let rl_result = limiter.check(ip, rl_key);
|
||||
let decision = if matches!(rl_result, RateLimitResult::Reject { .. }) { "block" } else { "allow" };
|
||||
|
||||
tracing::info!(
|
||||
target = "pipeline",
|
||||
layer = "rate_limit",
|
||||
decision,
|
||||
method = %session.req_header().method,
|
||||
host = %extract_host(session),
|
||||
path = %session.req_header().uri.path(),
|
||||
client_ip = %ip,
|
||||
user_agent = session.req_header().headers.get("user-agent").and_then(|v| v.to_str().ok()).unwrap_or("-"),
|
||||
has_cookies = cookie.is_some(),
|
||||
"pipeline"
|
||||
);
|
||||
|
||||
if let RateLimitResult::Reject { retry_after } = rl_result {
|
||||
let mut resp = ResponseHeader::build(429, None)?;
|
||||
resp.insert_header("Retry-After", retry_after.to_string())?;
|
||||
resp.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(resp), true).await?;
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reject unknown host prefixes with 404.
|
||||
let host = extract_host(session);
|
||||
let prefix = host.split('.').next().unwrap_or("");
|
||||
@@ -311,30 +547,92 @@ impl ProxyHttp for SunbeamProxy {
|
||||
let status = session
|
||||
.response_written()
|
||||
.map_or(0, |r| r.status.as_u16());
|
||||
let duration_ms = ctx.start_time.elapsed().as_millis();
|
||||
let duration_ms = ctx.start_time.elapsed().as_millis() as u64;
|
||||
let backend = ctx
|
||||
.route
|
||||
.as_ref()
|
||||
.map(|r| r.backend.as_str())
|
||||
.unwrap_or("-");
|
||||
let client_ip = session
|
||||
.client_addr()
|
||||
.map(|a| a.to_string())
|
||||
.unwrap_or_else(|| "-".to_string());
|
||||
let client_ip = extract_client_ip(session)
|
||||
.map(|ip| ip.to_string())
|
||||
.unwrap_or_else(|| {
|
||||
session
|
||||
.client_addr()
|
||||
.map(|a| a.to_string())
|
||||
.unwrap_or_else(|| "-".to_string())
|
||||
});
|
||||
let error_str = error.map(|e| e.to_string());
|
||||
|
||||
let content_length: u64 = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("content-length")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
let user_agent = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("user-agent")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let referer = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("referer")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let accept_language = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("accept-language")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let accept = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("accept")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let has_cookies = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("cookie")
|
||||
.is_some();
|
||||
let cf_country = session
|
||||
.req_header()
|
||||
.headers
|
||||
.get("cf-ipcountry")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("-");
|
||||
let query = session.req_header().uri.query().unwrap_or("");
|
||||
|
||||
tracing::info!(
|
||||
target = "audit",
|
||||
method = %session.req_header().method,
|
||||
host = %extract_host(session),
|
||||
path = %session.req_header().uri.path(),
|
||||
query,
|
||||
client_ip,
|
||||
status,
|
||||
duration_ms,
|
||||
content_length,
|
||||
user_agent,
|
||||
referer,
|
||||
accept_language,
|
||||
accept,
|
||||
has_cookies,
|
||||
cf_country,
|
||||
backend,
|
||||
error = error_str,
|
||||
"request"
|
||||
);
|
||||
|
||||
if let Some(detector) = &self.ddos_detector {
|
||||
if let Some(ip) = extract_client_ip(session) {
|
||||
detector.record_response(ip, status, duration_ms as u32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user