Files
proxy/src/config.rs
Sienna Meridian Satterwhite a9f1fd83bd feat(ensemble): wire ensemble into scanner and DDoS detectors
Add use_ensemble config flag (default true) to both DDoSConfig and
ScannerConfig. When enabled, detectors call compiled-in ensemble weights
instead of loading model files. Also adds ensemble decision metrics and
makes model_path optional in config.

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
2026-03-10 23:38:21 +00:00

374 lines
12 KiB
Rust

use anyhow::{Context, Result};
use serde::Deserialize;
use std::fs;
#[derive(Debug, Deserialize, Clone)]
pub struct SshConfig {
/// Address to bind the SSH listener on, e.g. "0.0.0.0:22" or "[::]:22".
pub listen: String,
/// Upstream backend address, e.g. "gitea-ssh.devtools.svc.cluster.local:2222".
pub backend: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub listen: ListenConfig,
pub tls: TlsFileConfig,
pub telemetry: TelemetryConfig,
pub routes: Vec<RouteConfig>,
/// Optional SSH TCP passthrough (port 22 → Gitea SSH).
pub ssh: Option<SshConfig>,
/// Optional KNN-based DDoS detection.
pub ddos: Option<DDoSConfig>,
/// Optional per-identity rate limiting.
pub rate_limit: Option<RateLimitConfig>,
/// Optional per-request scanner detection.
pub scanner: Option<ScannerConfig>,
/// Kubernetes resource names and namespaces for watchers.
#[serde(default)]
pub kubernetes: KubernetesConfig,
/// Optional gossip-based cluster for multi-node state sharing.
pub cluster: Option<ClusterConfig>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct KubernetesConfig {
/// Namespace where the proxy's resources live (Secret, ConfigMap, Ingresses).
#[serde(default = "default_k8s_namespace")]
pub namespace: String,
/// Name of the TLS Secret watched for cert hot-reload.
#[serde(default = "default_tls_secret")]
pub tls_secret: String,
/// Name of the ConfigMap watched for config hot-reload.
#[serde(default = "default_config_configmap")]
pub config_configmap: String,
}
impl Default for KubernetesConfig {
fn default() -> Self {
Self {
namespace: default_k8s_namespace(),
tls_secret: default_tls_secret(),
config_configmap: default_config_configmap(),
}
}
}
fn default_k8s_namespace() -> String { "ingress".to_string() }
fn default_tls_secret() -> String { "pingora-tls".to_string() }
fn default_config_configmap() -> String { "pingora-config".to_string() }
#[derive(Debug, Deserialize, Clone)]
pub struct DDoSConfig {
#[serde(default)]
pub model_path: Option<String>,
#[serde(default = "default_k")]
pub k: usize,
#[serde(default = "default_threshold")]
pub threshold: f64,
#[serde(default = "default_window_secs")]
pub window_secs: u64,
#[serde(default = "default_window_capacity")]
pub window_capacity: usize,
#[serde(default = "default_min_events")]
pub min_events: usize,
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default = "default_use_ensemble")]
pub use_ensemble: bool,
}
#[derive(Debug, Deserialize, Clone)]
pub struct RateLimitConfig {
#[serde(default = "default_rl_enabled")]
pub enabled: bool,
#[serde(default)]
pub bypass_cidrs: Vec<String>,
#[serde(default = "default_eviction_interval")]
pub eviction_interval_secs: u64,
#[serde(default = "default_stale_after")]
pub stale_after_secs: u64,
pub authenticated: BucketConfig,
pub unauthenticated: BucketConfig,
}
#[derive(Debug, Deserialize, Clone)]
pub struct BucketConfig {
pub burst: u32,
pub rate: f64,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ScannerConfig {
#[serde(default)]
pub model_path: Option<String>,
#[serde(default = "default_scanner_threshold")]
pub threshold: f64,
#[serde(default = "default_scanner_enabled")]
pub enabled: bool,
/// How often (seconds) to check the model file for changes. 0 = no hot-reload.
#[serde(default = "default_scanner_poll_interval")]
pub poll_interval_secs: u64,
/// Bot allowlist rules. Verified bots bypass the scanner model.
#[serde(default)]
pub allowlist: Vec<BotAllowlistRule>,
/// TTL (seconds) for verified bot IP cache entries.
#[serde(default = "default_bot_cache_ttl")]
pub bot_cache_ttl_secs: u64,
#[serde(default = "default_use_ensemble")]
pub use_ensemble: bool,
}
#[derive(Debug, Deserialize, Clone)]
pub struct BotAllowlistRule {
/// Case-insensitive UA prefix to match, e.g. "Googlebot".
pub ua_prefix: String,
/// Human-readable label for pipeline logs.
pub reason: String,
/// Reverse-DNS hostname suffixes for verification.
/// e.g. ["googlebot.com", "google.com"]
#[serde(default)]
pub dns_suffixes: Vec<String>,
/// CIDR ranges for instant IP verification.
/// e.g. ["66.249.64.0/19"]
#[serde(default)]
pub cidrs: Vec<String>,
}
fn default_bot_cache_ttl() -> u64 { 86400 } // 24h
fn default_use_ensemble() -> bool { true }
fn default_scanner_threshold() -> f64 { 0.5 }
fn default_scanner_enabled() -> bool { true }
fn default_scanner_poll_interval() -> u64 { 30 }
fn default_rl_enabled() -> bool { true }
fn default_eviction_interval() -> u64 { 300 }
fn default_stale_after() -> u64 { 600 }
fn default_k() -> usize { 5 }
fn default_threshold() -> f64 { 0.6 }
fn default_window_secs() -> u64 { 60 }
fn default_window_capacity() -> usize { 1000 }
fn default_min_events() -> usize { 10 }
fn default_enabled() -> bool { true }
#[derive(Debug, Deserialize, Clone)]
pub struct ListenConfig {
/// HTTP listener address, e.g., "0.0.0.0:80" or "[::]:80".
pub http: String,
/// HTTPS listener address, e.g., "0.0.0.0:443" or "[::]:443".
pub https: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct TlsFileConfig {
pub cert_path: String,
pub key_path: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct TelemetryConfig {
pub otlp_endpoint: String,
/// Port for the Prometheus metrics scrape endpoint. 0 = disabled.
#[serde(default = "default_metrics_port")]
pub metrics_port: u16,
}
fn default_metrics_port() -> u16 { 9090 }
/// A path-prefix sub-route within a virtual host.
/// Matched longest-prefix-first when multiple entries share a prefix.
#[derive(Debug, Deserialize, Clone)]
pub struct PathRoute {
pub prefix: String,
pub backend: String,
/// Strip the matched prefix before forwarding to the backend.
#[serde(default)]
pub strip_prefix: bool,
#[serde(default)]
pub websocket: bool,
/// URL for auth subrequest (like nginx `auth_request`).
/// If set, the proxy makes an HTTP request to this URL before forwarding.
/// A non-2xx response blocks the request with 403.
#[serde(default)]
pub auth_request: Option<String>,
/// Headers to capture from the auth subrequest response and forward upstream.
#[serde(default)]
pub auth_capture_headers: Vec<String>,
/// Prefix to prepend to the upstream path after stripping.
#[serde(default)]
pub upstream_path_prefix: Option<String>,
}
/// A URL rewrite rule: requests matching `pattern` are served the file at `target`.
#[derive(Debug, Deserialize, Clone)]
pub struct RewriteRule {
/// Regex pattern matched against the request path.
pub pattern: String,
/// Static file path to serve (relative to `static_root`).
pub target: String,
}
/// A find/replace rule applied to response bodies.
#[derive(Debug, Deserialize, Clone)]
pub struct BodyRewrite {
/// String to find in the response body.
pub find: String,
/// String to replace it with.
pub replace: String,
/// Content-types to apply this rewrite to (e.g. `["text/html"]`).
#[serde(default)]
pub types: Vec<String>,
}
/// A response header to add to every response for this route.
#[derive(Debug, Deserialize, Clone)]
pub struct HeaderRule {
pub name: String,
pub value: String,
}
/// Per-route HTTP response cache configuration.
#[derive(Debug, Deserialize, Clone)]
pub struct CacheConfig {
#[serde(default = "default_cache_enabled")]
pub enabled: bool,
/// Default TTL in seconds when the upstream response has no Cache-Control header.
#[serde(default = "default_cache_ttl")]
pub default_ttl_secs: u64,
/// Seconds to serve stale content while revalidating in the background.
#[serde(default)]
pub stale_while_revalidate_secs: u32,
/// Max cacheable response body size in bytes (0 = no limit).
#[serde(default)]
pub max_file_size: usize,
}
fn default_cache_enabled() -> bool { true }
fn default_cache_ttl() -> u64 { 60 }
#[derive(Debug, Deserialize, Clone)]
pub struct RouteConfig {
pub host_prefix: String,
pub backend: String,
#[serde(default)]
pub websocket: bool,
/// When true, plain-HTTP requests for this host are forwarded as-is rather
/// than being redirected to HTTPS. Defaults to false (redirect enforced).
#[serde(default)]
pub disable_secure_redirection: bool,
/// Optional path-based sub-routes (longest prefix wins).
/// If the request path matches a sub-route, its backend is used instead.
#[serde(default)]
pub paths: Vec<PathRoute>,
/// Root directory for static file serving. If set, the proxy will try
/// to serve files from this directory before forwarding to the upstream.
#[serde(default)]
pub static_root: Option<String>,
/// Fallback file for SPA routing (e.g. "index.html").
#[serde(default)]
pub fallback: Option<String>,
/// URL rewrite rules applied before static file lookup.
#[serde(default)]
pub rewrites: Vec<RewriteRule>,
/// Response body find/replace rules (like nginx `sub_filter`).
#[serde(default)]
pub body_rewrites: Vec<BodyRewrite>,
/// Extra response headers added to every response for this route.
#[serde(default)]
pub response_headers: Vec<HeaderRule>,
/// HTTP response cache configuration for this route.
#[serde(default)]
pub cache: Option<CacheConfig>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ClusterConfig {
#[serde(default = "default_cluster_enabled")]
pub enabled: bool,
/// Tenant UUID — isolates unrelated deployments.
pub tenant: String,
/// UDP port for gossip protocol.
#[serde(default = "default_gossip_port")]
pub gossip_port: u16,
/// Path to persist the node identity key.
#[serde(default)]
pub key_path: Option<String>,
/// Peer discovery configuration.
#[serde(default)]
pub discovery: DiscoveryConfig,
/// Bandwidth broadcast settings.
#[serde(default)]
pub bandwidth: Option<BandwidthClusterConfig>,
/// Model distribution settings.
#[serde(default)]
pub models: Option<ModelsConfig>,
}
fn default_cluster_enabled() -> bool { true }
fn default_gossip_port() -> u16 { 11204 }
#[derive(Debug, Deserialize, Clone)]
pub struct DiscoveryConfig {
/// "k8s" or "bootstrap".
#[serde(default = "default_discovery_method")]
pub method: String,
/// Headless service for k8s DNS discovery.
#[serde(default)]
pub headless_service: Option<String>,
/// Static bootstrap peers ("endpointid@host:port").
#[serde(default)]
pub bootstrap_peers: Option<Vec<String>>,
}
impl Default for DiscoveryConfig {
fn default() -> Self {
Self {
method: default_discovery_method(),
headless_service: None,
bootstrap_peers: None,
}
}
}
fn default_discovery_method() -> String { "k8s".to_string() }
#[derive(Debug, Deserialize, Clone)]
pub struct BandwidthClusterConfig {
#[serde(default = "default_broadcast_interval")]
pub broadcast_interval_secs: u64,
#[serde(default = "default_stale_peer_timeout")]
pub stale_peer_timeout_secs: u64,
/// Sliding window size for aggregate bandwidth rate calculation.
#[serde(default = "default_meter_window")]
pub meter_window_secs: u64,
}
fn default_meter_window() -> u64 { 30 }
fn default_broadcast_interval() -> u64 { 1 }
fn default_stale_peer_timeout() -> u64 { 30 }
#[derive(Debug, Deserialize, Clone)]
pub struct ModelsConfig {
#[serde(default = "default_model_dir")]
pub model_dir: String,
#[serde(default = "default_max_model_size")]
pub max_model_size_bytes: u64,
#[serde(default = "default_chunk_size")]
pub chunk_size: u32,
}
fn default_model_dir() -> String { "/models".to_string() }
fn default_max_model_size() -> u64 { 52_428_800 } // 50MB
fn default_chunk_size() -> u32 { 65_536 } // 64KB
impl Config {
pub fn load(path: &str) -> Result<Self> {
let raw = fs::read_to_string(path)
.with_context(|| format!("reading config from {path}"))?;
toml::from_str(&raw).with_context(|| "parsing config.toml")
}
}