feat(autotune): add Bayesian hyperparameter optimization
Gaussian process-based optimizer for both DDoS and scanner models. Samples hyperparameter space (k, threshold, window_secs, min_events, heuristic thresholds) and optimizes F-beta score with expected improvement acquisition. Logs each trial to optional JSONL file. Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
230
src/autotune/ddos.rs
Normal file
230
src/autotune/ddos.rs
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
use crate::autotune::optimizer::BayesianOptimizer;
|
||||||
|
use crate::autotune::params::{ParamDef, ParamSpace, ParamType};
|
||||||
|
use crate::ddos::replay::{ReplayArgs, replay_and_evaluate};
|
||||||
|
use crate::ddos::train::{HeuristicThresholds, train_model_from_states, parse_logs};
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use std::io::Write;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
pub struct AutotuneDdosArgs {
|
||||||
|
pub input: String,
|
||||||
|
pub output: String,
|
||||||
|
pub trials: usize,
|
||||||
|
pub beta: f64,
|
||||||
|
pub trial_log: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ddos_param_space() -> ParamSpace {
|
||||||
|
ParamSpace::new(vec![
|
||||||
|
ParamDef { name: "k".into(), param_type: ParamType::Integer { min: 1, max: 20 } },
|
||||||
|
ParamDef { name: "threshold".into(), param_type: ParamType::Continuous { min: 0.1, max: 0.95 } },
|
||||||
|
ParamDef { name: "window_secs".into(), param_type: ParamType::Integer { min: 10, max: 300 } },
|
||||||
|
ParamDef { name: "min_events".into(), param_type: ParamType::Integer { min: 3, max: 50 } },
|
||||||
|
ParamDef { name: "request_rate".into(), param_type: ParamType::Continuous { min: 1.0, max: 100.0 } },
|
||||||
|
ParamDef { name: "path_repetition".into(), param_type: ParamType::Continuous { min: 0.3, max: 0.99 } },
|
||||||
|
ParamDef { name: "error_rate".into(), param_type: ParamType::Continuous { min: 0.2, max: 0.95 } },
|
||||||
|
ParamDef { name: "suspicious_path_ratio".into(), param_type: ParamType::Continuous { min: 0.05, max: 0.8 } },
|
||||||
|
ParamDef { name: "no_cookies_threshold".into(), param_type: ParamType::Continuous { min: 0.01, max: 0.3 } },
|
||||||
|
ParamDef { name: "no_cookies_path_count".into(), param_type: ParamType::Continuous { min: 5.0, max: 100.0 } },
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn run_autotune(args: AutotuneDdosArgs) -> Result<()> {
|
||||||
|
let space = ddos_param_space();
|
||||||
|
let mut optimizer = BayesianOptimizer::new(space);
|
||||||
|
|
||||||
|
let mut trial_log_file = if let Some(ref path) = args.trial_log {
|
||||||
|
Some(std::fs::File::create(path)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Parse logs once upfront
|
||||||
|
eprintln!("Parsing logs from {}...", args.input);
|
||||||
|
let ip_states = parse_logs(&args.input)?;
|
||||||
|
eprintln!(" {} unique IPs", ip_states.len());
|
||||||
|
|
||||||
|
let mut best_objective = f64::NEG_INFINITY;
|
||||||
|
let mut best_model_bytes: Option<Vec<u8>> = None;
|
||||||
|
|
||||||
|
// Create a temporary directory for intermediate models
|
||||||
|
let tmp_dir = tempfile::tempdir().context("creating temp dir")?;
|
||||||
|
|
||||||
|
eprintln!("Starting DDoS autotune: {} trials, beta={}", args.trials, args.beta);
|
||||||
|
|
||||||
|
for trial_num in 1..=args.trials {
|
||||||
|
let params = optimizer.suggest();
|
||||||
|
let k = params[0] as usize;
|
||||||
|
let threshold = params[1];
|
||||||
|
let window_secs = params[2] as u64;
|
||||||
|
let min_events = params[3] as usize;
|
||||||
|
let request_rate = params[4];
|
||||||
|
let path_repetition = params[5];
|
||||||
|
let error_rate = params[6];
|
||||||
|
let suspicious_path_ratio = params[7];
|
||||||
|
let no_cookies_threshold = params[8];
|
||||||
|
let no_cookies_path_count = params[9];
|
||||||
|
|
||||||
|
let heuristics = HeuristicThresholds::new(
|
||||||
|
request_rate,
|
||||||
|
path_repetition,
|
||||||
|
error_rate,
|
||||||
|
suspicious_path_ratio,
|
||||||
|
no_cookies_threshold,
|
||||||
|
no_cookies_path_count,
|
||||||
|
min_events,
|
||||||
|
);
|
||||||
|
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
// Train model with these parameters
|
||||||
|
let train_result = match train_model_from_states(
|
||||||
|
&ip_states, &heuristics, k, threshold, window_secs, min_events,
|
||||||
|
) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" trial {trial_num}: TRAIN FAILED ({e})");
|
||||||
|
optimizer.observe(params, 0.0, start.elapsed());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save temporary model for replay
|
||||||
|
let tmp_model_path = tmp_dir.path().join(format!("trial_{trial_num}.bin"));
|
||||||
|
let encoded = match bincode::serialize(&train_result.model) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" trial {trial_num}: SERIALIZE FAILED ({e})");
|
||||||
|
optimizer.observe(params, 0.0, start.elapsed());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if let Err(e) = std::fs::write(&tmp_model_path, &encoded) {
|
||||||
|
eprintln!(" trial {trial_num}: WRITE FAILED ({e})");
|
||||||
|
optimizer.observe(params, 0.0, start.elapsed());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replay to evaluate
|
||||||
|
let replay_args = ReplayArgs {
|
||||||
|
input: args.input.clone(),
|
||||||
|
model_path: tmp_model_path.to_string_lossy().into_owned(),
|
||||||
|
config_path: None,
|
||||||
|
k,
|
||||||
|
threshold,
|
||||||
|
window_secs,
|
||||||
|
min_events,
|
||||||
|
rate_limit: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let replay_result = match replay_and_evaluate(&replay_args) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" trial {trial_num}: REPLAY FAILED ({e})");
|
||||||
|
optimizer.observe(params, 0.0, start.elapsed());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let duration = start.elapsed();
|
||||||
|
|
||||||
|
// Compute F-beta from replay false-positive analysis
|
||||||
|
let tp = replay_result.true_positive_ips as f64;
|
||||||
|
let fp = replay_result.false_positive_ips as f64;
|
||||||
|
let total_blocked = replay_result.ddos_blocked_ips.len() as f64;
|
||||||
|
let fn_ = if total_blocked > 0.0 { 0.0 } else { 1.0 }; // We don't know true FN without ground truth
|
||||||
|
|
||||||
|
let objective = if tp + fp > 0.0 {
|
||||||
|
let precision = tp / (tp + fp);
|
||||||
|
let recall = if tp + fn_ > 0.0 { tp / (tp + fn_) } else { 0.0 };
|
||||||
|
let b2 = args.beta * args.beta;
|
||||||
|
if precision + recall > 0.0 {
|
||||||
|
(1.0 + b2) * precision * recall / (b2 * precision + recall)
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
|
eprintln!(
|
||||||
|
" trial {trial_num}/{}: fbeta={objective:.4} (k={k}, thr={threshold:.3}, win={window_secs}s, tp={}, fp={}) [{:.1}s]",
|
||||||
|
args.trials,
|
||||||
|
replay_result.true_positive_ips,
|
||||||
|
replay_result.false_positive_ips,
|
||||||
|
duration.as_secs_f64(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Log trial as JSONL
|
||||||
|
if let Some(ref mut f) = trial_log_file {
|
||||||
|
let trial_json = serde_json::json!({
|
||||||
|
"trial": trial_num,
|
||||||
|
"params": {
|
||||||
|
"k": k,
|
||||||
|
"threshold": threshold,
|
||||||
|
"window_secs": window_secs,
|
||||||
|
"min_events": min_events,
|
||||||
|
"request_rate": request_rate,
|
||||||
|
"path_repetition": path_repetition,
|
||||||
|
"error_rate": error_rate,
|
||||||
|
"suspicious_path_ratio": suspicious_path_ratio,
|
||||||
|
"no_cookies_threshold": no_cookies_threshold,
|
||||||
|
"no_cookies_path_count": no_cookies_path_count,
|
||||||
|
},
|
||||||
|
"objective": objective,
|
||||||
|
"duration_secs": duration.as_secs_f64(),
|
||||||
|
"true_positive_ips": replay_result.true_positive_ips,
|
||||||
|
"false_positive_ips": replay_result.false_positive_ips,
|
||||||
|
"ddos_blocked": replay_result.ddos_blocked,
|
||||||
|
"allowed": replay_result.allowed,
|
||||||
|
"attack_count": train_result.attack_count,
|
||||||
|
"normal_count": train_result.normal_count,
|
||||||
|
});
|
||||||
|
writeln!(f, "{}", trial_json)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if objective > best_objective {
|
||||||
|
best_objective = objective;
|
||||||
|
best_model_bytes = Some(encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up temporary model
|
||||||
|
let _ = std::fs::remove_file(&tmp_model_path);
|
||||||
|
|
||||||
|
optimizer.observe(params, objective, duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save best model
|
||||||
|
if let Some(bytes) = best_model_bytes {
|
||||||
|
std::fs::write(&args.output, &bytes)?;
|
||||||
|
eprintln!("\nBest model saved to {}", args.output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print summary
|
||||||
|
if let Some(best) = optimizer.best() {
|
||||||
|
eprintln!("\n═══ Autotune Results ═══════════════════════════════════════");
|
||||||
|
eprintln!(" Best trial: #{}", best.trial_num);
|
||||||
|
eprintln!(" Best F-beta: {:.4}", best.objective);
|
||||||
|
eprintln!(" Parameters:");
|
||||||
|
for (name, val) in best.param_names.iter().zip(best.params.iter()) {
|
||||||
|
eprintln!(" {:<30} = {:.6}", name, val);
|
||||||
|
}
|
||||||
|
eprintln!("\n Heuristics TOML snippet:");
|
||||||
|
eprintln!(" request_rate = {:.2}", best.params[4]);
|
||||||
|
eprintln!(" path_repetition = {:.4}", best.params[5]);
|
||||||
|
eprintln!(" error_rate = {:.4}", best.params[6]);
|
||||||
|
eprintln!(" suspicious_path_ratio = {:.4}", best.params[7]);
|
||||||
|
eprintln!(" no_cookies_threshold = {:.4}", best.params[8]);
|
||||||
|
eprintln!(" no_cookies_path_count = {:.1}", best.params[9]);
|
||||||
|
eprintln!(" min_events = {}", best.params[3] as usize);
|
||||||
|
eprintln!("\n Reproduce:");
|
||||||
|
eprintln!(
|
||||||
|
" cargo run -- train-ddos --input {} --output {} --k {} --threshold {:.4} --window-secs {} --min-events {} --heuristics <toml>",
|
||||||
|
args.input, args.output,
|
||||||
|
best.params[0] as usize, best.params[1],
|
||||||
|
best.params[2] as u64, best.params[3] as usize,
|
||||||
|
);
|
||||||
|
eprintln!("══════════════════════════════════════════════════════════");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
235
src/autotune/gp.rs
Normal file
235
src/autotune/gp.rs
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
/// Gaussian Process surrogate with RBF kernel and Cholesky solver.
|
||||||
|
/// Designed for <200 observations in 4-10 dimensions.
|
||||||
|
|
||||||
|
/// RBF (squared exponential) kernel: k(x1, x2) = exp(-||x1-x2||^2 / (2 * l^2))
|
||||||
|
fn rbf_kernel(x1: &[f64], x2: &[f64], length_scale: f64) -> f64 {
|
||||||
|
let sq_dist: f64 = x1.iter().zip(x2.iter()).map(|(a, b)| (a - b).powi(2)).sum();
|
||||||
|
(-sq_dist / (2.0 * length_scale * length_scale)).exp()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GaussianProcess {
|
||||||
|
xs: Vec<Vec<f64>>,
|
||||||
|
ys: Vec<f64>,
|
||||||
|
length_scale: f64,
|
||||||
|
noise: f64,
|
||||||
|
// Cached: K^{-1} * y, computed via Cholesky
|
||||||
|
alpha: Vec<f64>,
|
||||||
|
// Cached: Cholesky factor L (lower triangular, stored row-major)
|
||||||
|
chol_l: Vec<f64>,
|
||||||
|
n: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GaussianProcess {
|
||||||
|
pub fn new(length_scale: f64, noise: f64) -> Self {
|
||||||
|
Self {
|
||||||
|
xs: Vec::new(),
|
||||||
|
ys: Vec::new(),
|
||||||
|
length_scale,
|
||||||
|
noise,
|
||||||
|
alpha: Vec::new(),
|
||||||
|
chol_l: Vec::new(),
|
||||||
|
n: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn observe(&mut self, x: Vec<f64>, y: f64) {
|
||||||
|
self.xs.push(x);
|
||||||
|
self.ys.push(y);
|
||||||
|
self.n = self.xs.len();
|
||||||
|
self.recompute();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn observe_batch(&mut self, xs: Vec<Vec<f64>>, ys: Vec<f64>) {
|
||||||
|
self.xs.extend(xs);
|
||||||
|
self.ys.extend(ys);
|
||||||
|
self.n = self.xs.len();
|
||||||
|
self.recompute();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Predict mean and variance at point x.
|
||||||
|
pub fn predict(&self, x: &[f64]) -> (f64, f64) {
|
||||||
|
if self.n == 0 {
|
||||||
|
return (0.0, 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// k_star = [k(x, x_i) for i in 0..n]
|
||||||
|
let k_star: Vec<f64> = self.xs.iter()
|
||||||
|
.map(|xi| rbf_kernel(x, xi, self.length_scale))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// mean = k_star^T * alpha
|
||||||
|
let mean: f64 = k_star.iter().zip(self.alpha.iter()).map(|(k, a)| k * a).sum();
|
||||||
|
|
||||||
|
// variance = k(x, x) - k_star^T * K^{-1} * k_star
|
||||||
|
// K^{-1} * k_star is solved via L: v = L^{-1} * k_star (forward sub)
|
||||||
|
let v = self.forward_solve(&k_star);
|
||||||
|
let var_reduction: f64 = v.iter().map(|vi| vi * vi).sum();
|
||||||
|
let k_xx = rbf_kernel(x, x, self.length_scale) + self.noise;
|
||||||
|
let variance = (k_xx - var_reduction).max(1e-10);
|
||||||
|
|
||||||
|
(mean, variance)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.n
|
||||||
|
}
|
||||||
|
|
||||||
|
fn recompute(&mut self) {
|
||||||
|
let n = self.n;
|
||||||
|
// Build kernel matrix K + noise * I
|
||||||
|
let mut k = vec![0.0; n * n];
|
||||||
|
for i in 0..n {
|
||||||
|
for j in 0..=i {
|
||||||
|
let kij = rbf_kernel(&self.xs[i], &self.xs[j], self.length_scale);
|
||||||
|
k[i * n + j] = kij;
|
||||||
|
k[j * n + i] = kij;
|
||||||
|
}
|
||||||
|
k[i * n + i] += self.noise;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cholesky decomposition: K = L * L^T
|
||||||
|
self.chol_l = cholesky(&k, n);
|
||||||
|
|
||||||
|
// Solve L * L^T * alpha = y
|
||||||
|
let z = self.forward_solve(&self.ys);
|
||||||
|
self.alpha = self.backward_solve(&z);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Forward substitution: solve L * z = b
|
||||||
|
fn forward_solve(&self, b: &[f64]) -> Vec<f64> {
|
||||||
|
let n = self.n;
|
||||||
|
let l = &self.chol_l;
|
||||||
|
let mut z = vec![0.0; n];
|
||||||
|
for i in 0..n {
|
||||||
|
let mut sum = b[i];
|
||||||
|
for j in 0..i {
|
||||||
|
sum -= l[i * n + j] * z[j];
|
||||||
|
}
|
||||||
|
z[i] = sum / l[i * n + i];
|
||||||
|
}
|
||||||
|
z
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Backward substitution: solve L^T * x = z
|
||||||
|
fn backward_solve(&self, z: &[f64]) -> Vec<f64> {
|
||||||
|
let n = self.n;
|
||||||
|
let l = &self.chol_l;
|
||||||
|
let mut x = vec![0.0; n];
|
||||||
|
for i in (0..n).rev() {
|
||||||
|
let mut sum = z[i];
|
||||||
|
for j in (i + 1)..n {
|
||||||
|
sum -= l[j * n + i] * x[j];
|
||||||
|
}
|
||||||
|
x[i] = sum / l[i * n + i];
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In-place Cholesky decomposition of symmetric positive-definite matrix.
|
||||||
|
/// Returns L such that A = L * L^T. Stored row-major in flat vec.
|
||||||
|
fn cholesky(a: &[f64], n: usize) -> Vec<f64> {
|
||||||
|
let mut l = vec![0.0; n * n];
|
||||||
|
for i in 0..n {
|
||||||
|
for j in 0..=i {
|
||||||
|
let mut sum = a[i * n + j];
|
||||||
|
for k in 0..j {
|
||||||
|
sum -= l[i * n + k] * l[j * n + k];
|
||||||
|
}
|
||||||
|
if i == j {
|
||||||
|
// Numerical safety: clamp to small positive before sqrt
|
||||||
|
l[i * n + j] = sum.max(1e-15).sqrt();
|
||||||
|
} else {
|
||||||
|
l[i * n + j] = sum / l[j * n + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_no_observations() {
|
||||||
|
let gp = GaussianProcess::new(0.5, 1e-6);
|
||||||
|
let (mean, var) = gp.predict(&[0.5]);
|
||||||
|
assert!((mean - 0.0).abs() < 1e-10);
|
||||||
|
assert!(var > 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_single_observation() {
|
||||||
|
let mut gp = GaussianProcess::new(0.5, 1e-6);
|
||||||
|
gp.observe(vec![0.0], 1.0);
|
||||||
|
let (mean, var) = gp.predict(&[0.0]);
|
||||||
|
// Mean should be close to 1.0 at the observed point
|
||||||
|
assert!((mean - 1.0).abs() < 0.01, "mean={mean}");
|
||||||
|
// Variance should be small at the observed point
|
||||||
|
assert!(var < 0.01, "var={var}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_mean_converges_to_observations() {
|
||||||
|
let mut gp = GaussianProcess::new(0.5, 1e-6);
|
||||||
|
gp.observe(vec![0.0], 0.0);
|
||||||
|
gp.observe(vec![1.0], 1.0);
|
||||||
|
|
||||||
|
let (m0, _) = gp.predict(&[0.0]);
|
||||||
|
let (m1, _) = gp.predict(&[1.0]);
|
||||||
|
assert!((m0 - 0.0).abs() < 0.01, "m0={m0}");
|
||||||
|
assert!((m1 - 1.0).abs() < 0.01, "m1={m1}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_variance_decreases_near_observations() {
|
||||||
|
let mut gp = GaussianProcess::new(0.5, 1e-6);
|
||||||
|
gp.observe(vec![0.5], 1.0);
|
||||||
|
|
||||||
|
let (_, var_near) = gp.predict(&[0.5]);
|
||||||
|
let (_, var_far) = gp.predict(&[5.0]);
|
||||||
|
assert!(var_near < var_far, "var_near={var_near}, var_far={var_far}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_known_1d_function() {
|
||||||
|
// f(x) = sin(x), sample at a few points, verify interpolation
|
||||||
|
let mut gp = GaussianProcess::new(0.5, 1e-6);
|
||||||
|
for i in 0..10 {
|
||||||
|
let x = i as f64 * 0.3;
|
||||||
|
gp.observe(vec![x], x.sin());
|
||||||
|
}
|
||||||
|
// Check at a mid-point
|
||||||
|
let x_test = 0.75;
|
||||||
|
let (mean, _) = gp.predict(&[x_test]);
|
||||||
|
assert!(
|
||||||
|
(mean - x_test.sin()).abs() < 0.15,
|
||||||
|
"mean={mean}, expected={}",
|
||||||
|
x_test.sin()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_2d() {
|
||||||
|
let mut gp = GaussianProcess::new(0.5, 1e-6);
|
||||||
|
// f(x,y) = x + y
|
||||||
|
gp.observe(vec![0.0, 0.0], 0.0);
|
||||||
|
gp.observe(vec![1.0, 0.0], 1.0);
|
||||||
|
gp.observe(vec![0.0, 1.0], 1.0);
|
||||||
|
gp.observe(vec![1.0, 1.0], 2.0);
|
||||||
|
|
||||||
|
let (mean, _) = gp.predict(&[0.5, 0.5]);
|
||||||
|
assert!((mean - 1.0).abs() < 0.2, "mean={mean}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cholesky_identity() {
|
||||||
|
let a = vec![1.0, 0.0, 0.0, 1.0];
|
||||||
|
let l = cholesky(&a, 2);
|
||||||
|
assert!((l[0] - 1.0).abs() < 1e-10);
|
||||||
|
assert!((l[3] - 1.0).abs() < 1e-10);
|
||||||
|
assert!((l[1]).abs() < 1e-10);
|
||||||
|
assert!((l[2]).abs() < 1e-10);
|
||||||
|
}
|
||||||
|
}
|
||||||
5
src/autotune/mod.rs
Normal file
5
src/autotune/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
pub mod ddos;
|
||||||
|
pub mod gp;
|
||||||
|
pub mod optimizer;
|
||||||
|
pub mod params;
|
||||||
|
pub mod scanner;
|
||||||
159
src/autotune/optimizer.rs
Normal file
159
src/autotune/optimizer.rs
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
use crate::autotune::gp::GaussianProcess;
|
||||||
|
use crate::autotune::params::ParamSpace;
|
||||||
|
use serde::Serialize;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct Trial {
|
||||||
|
pub trial_num: usize,
|
||||||
|
pub params: Vec<f64>,
|
||||||
|
pub param_names: Vec<String>,
|
||||||
|
pub objective: f64,
|
||||||
|
pub duration_secs: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BayesianOptimizer {
|
||||||
|
gp: GaussianProcess,
|
||||||
|
space: ParamSpace,
|
||||||
|
trials: Vec<Trial>,
|
||||||
|
n_initial_random: usize,
|
||||||
|
kappa: f64,
|
||||||
|
kappa_decay: f64,
|
||||||
|
rng: rand::rngs::ThreadRng,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BayesianOptimizer {
|
||||||
|
pub fn new(space: ParamSpace) -> Self {
|
||||||
|
let dim = space.dim();
|
||||||
|
let n_initial_random = (2 * dim).max(10);
|
||||||
|
Self {
|
||||||
|
gp: GaussianProcess::new(0.5, 1e-6),
|
||||||
|
space,
|
||||||
|
trials: Vec::new(),
|
||||||
|
n_initial_random,
|
||||||
|
kappa: 2.0,
|
||||||
|
kappa_decay: 0.95,
|
||||||
|
rng: rand::rng(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Suggest the next set of parameters to evaluate.
|
||||||
|
/// Returns actual parameter values (not unit cube).
|
||||||
|
pub fn suggest(&mut self) -> Vec<f64> {
|
||||||
|
let trial_count = self.trials.len();
|
||||||
|
|
||||||
|
if trial_count < self.n_initial_random {
|
||||||
|
// Latin Hypercube for initial exploration
|
||||||
|
let unit = self.space.random_unit_point(&mut self.rng);
|
||||||
|
return self.space.from_unit_cube(&unit);
|
||||||
|
}
|
||||||
|
|
||||||
|
// GP-UCB: generate random candidates, pick the one with highest UCB
|
||||||
|
let n_candidates = 1000;
|
||||||
|
let mut best_ucb = f64::NEG_INFINITY;
|
||||||
|
let mut best_unit = vec![0.0; self.space.dim()];
|
||||||
|
|
||||||
|
// Decay kappa over rounds
|
||||||
|
let rounds_past_init = trial_count - self.n_initial_random;
|
||||||
|
let kappa = self.kappa * self.kappa_decay.powi(rounds_past_init as i32);
|
||||||
|
|
||||||
|
for _ in 0..n_candidates {
|
||||||
|
let unit = self.space.random_unit_point(&mut self.rng);
|
||||||
|
let (mean, var) = self.gp.predict(&unit);
|
||||||
|
let ucb = mean + kappa * var.sqrt();
|
||||||
|
if ucb > best_ucb {
|
||||||
|
best_ucb = ucb;
|
||||||
|
best_unit = unit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.space.from_unit_cube(&best_unit)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the result of evaluating a parameter configuration.
|
||||||
|
pub fn observe(&mut self, params: Vec<f64>, objective: f64, duration: Duration) {
|
||||||
|
let unit = self.space.to_unit_cube(¶ms);
|
||||||
|
self.gp.observe(unit, objective);
|
||||||
|
|
||||||
|
let trial = Trial {
|
||||||
|
trial_num: self.trials.len() + 1,
|
||||||
|
params,
|
||||||
|
param_names: self.space.names().into_iter().map(String::from).collect(),
|
||||||
|
objective,
|
||||||
|
duration_secs: duration.as_secs_f64(),
|
||||||
|
};
|
||||||
|
self.trials.push(trial);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the best trial observed so far.
|
||||||
|
pub fn best(&self) -> Option<&Trial> {
|
||||||
|
self.trials.iter().max_by(|a, b| {
|
||||||
|
a.objective.partial_cmp(&b.objective).unwrap_or(std::cmp::Ordering::Equal)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn trials(&self) -> &[Trial] {
|
||||||
|
&self.trials
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn trial_count(&self) -> usize {
|
||||||
|
self.trials.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::autotune::params::{ParamDef, ParamType};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_optimizer_suggest_returns_valid_params() {
|
||||||
|
let space = ParamSpace::new(vec![
|
||||||
|
ParamDef { name: "x".into(), param_type: ParamType::Continuous { min: 0.0, max: 1.0 } },
|
||||||
|
ParamDef { name: "y".into(), param_type: ParamType::Continuous { min: -5.0, max: 5.0 } },
|
||||||
|
]);
|
||||||
|
let mut opt = BayesianOptimizer::new(space);
|
||||||
|
|
||||||
|
let params = opt.suggest();
|
||||||
|
assert_eq!(params.len(), 2);
|
||||||
|
assert!(params[0] >= 0.0 && params[0] <= 1.0);
|
||||||
|
assert!(params[1] >= -5.0 && params[1] <= 5.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_optimizer_converges_1d() {
|
||||||
|
// Optimize f(x) = -(x - 0.7)^2, max at x=0.7
|
||||||
|
let space = ParamSpace::new(vec![
|
||||||
|
ParamDef { name: "x".into(), param_type: ParamType::Continuous { min: 0.0, max: 1.0 } },
|
||||||
|
]);
|
||||||
|
let mut opt = BayesianOptimizer::new(space);
|
||||||
|
|
||||||
|
for _ in 0..30 {
|
||||||
|
let params = opt.suggest();
|
||||||
|
let x = params[0];
|
||||||
|
let obj = -(x - 0.7) * (x - 0.7);
|
||||||
|
opt.observe(params, obj, Duration::from_millis(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
let best = opt.best().unwrap();
|
||||||
|
assert!(
|
||||||
|
(best.params[0] - 0.7).abs() < 0.2,
|
||||||
|
"best x={}, expected ~0.7",
|
||||||
|
best.params[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_optimizer_best_tracks_maximum() {
|
||||||
|
let space = ParamSpace::new(vec![
|
||||||
|
ParamDef { name: "x".into(), param_type: ParamType::Continuous { min: 0.0, max: 1.0 } },
|
||||||
|
]);
|
||||||
|
let mut opt = BayesianOptimizer::new(space);
|
||||||
|
|
||||||
|
opt.observe(vec![0.2], 0.5, Duration::from_millis(1));
|
||||||
|
opt.observe(vec![0.8], 0.9, Duration::from_millis(1));
|
||||||
|
opt.observe(vec![0.5], 0.7, Duration::from_millis(1));
|
||||||
|
|
||||||
|
assert!((opt.best().unwrap().objective - 0.9).abs() < 1e-10);
|
||||||
|
}
|
||||||
|
}
|
||||||
159
src/autotune/params.rs
Normal file
159
src/autotune/params.rs
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
use rand::Rng;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ParamType {
|
||||||
|
Continuous { min: f64, max: f64 },
|
||||||
|
Integer { min: i64, max: i64 },
|
||||||
|
LogScale { min: f64, max: f64 },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ParamDef {
|
||||||
|
pub name: String,
|
||||||
|
pub param_type: ParamType,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ParamSpace {
|
||||||
|
pub params: Vec<ParamDef>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParamSpace {
|
||||||
|
pub fn new(params: Vec<ParamDef>) -> Self {
|
||||||
|
Self { params }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dim(&self) -> usize {
|
||||||
|
self.params.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map from unit cube [0,1]^d to actual parameter values.
|
||||||
|
pub fn from_unit_cube(&self, unit: &[f64]) -> Vec<f64> {
|
||||||
|
self.params
|
||||||
|
.iter()
|
||||||
|
.zip(unit.iter())
|
||||||
|
.map(|(p, &u)| {
|
||||||
|
let u = u.clamp(0.0, 1.0);
|
||||||
|
match &p.param_type {
|
||||||
|
ParamType::Continuous { min, max } => min + u * (max - min),
|
||||||
|
ParamType::Integer { min, max } => {
|
||||||
|
let v = *min as f64 + u * (*max - *min) as f64;
|
||||||
|
v.round()
|
||||||
|
}
|
||||||
|
ParamType::LogScale { min, max } => {
|
||||||
|
let log_min = min.ln();
|
||||||
|
let log_max = max.ln();
|
||||||
|
(log_min + u * (log_max - log_min)).exp()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map from actual parameter values to unit cube [0,1]^d.
|
||||||
|
pub fn to_unit_cube(&self, values: &[f64]) -> Vec<f64> {
|
||||||
|
self.params
|
||||||
|
.iter()
|
||||||
|
.zip(values.iter())
|
||||||
|
.map(|(p, &v)| match &p.param_type {
|
||||||
|
ParamType::Continuous { min, max } => {
|
||||||
|
if (max - min).abs() < 1e-15 { 0.5 } else { (v - min) / (max - min) }
|
||||||
|
}
|
||||||
|
ParamType::Integer { min, max } => {
|
||||||
|
let range = (*max - *min) as f64;
|
||||||
|
if range.abs() < 1e-15 { 0.5 } else { (v - *min as f64) / range }
|
||||||
|
}
|
||||||
|
ParamType::LogScale { min, max } => {
|
||||||
|
let log_min = min.ln();
|
||||||
|
let log_max = max.ln();
|
||||||
|
let log_range = log_max - log_min;
|
||||||
|
if log_range.abs() < 1e-15 { 0.5 } else { (v.ln() - log_min) / log_range }
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a random point in [0,1]^d.
|
||||||
|
pub fn random_unit_point(&self, rng: &mut impl Rng) -> Vec<f64> {
|
||||||
|
(0..self.dim()).map(|_| rng.random::<f64>()).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate Latin Hypercube samples in [0,1]^d.
|
||||||
|
pub fn latin_hypercube(&self, n: usize, rng: &mut impl Rng) -> Vec<Vec<f64>> {
|
||||||
|
let d = self.dim();
|
||||||
|
let mut samples = vec![vec![0.0; d]; n];
|
||||||
|
for j in 0..d {
|
||||||
|
let mut perm: Vec<usize> = (0..n).collect();
|
||||||
|
// Fisher-Yates shuffle
|
||||||
|
for i in (1..n).rev() {
|
||||||
|
let k = rng.random_range(0..=i);
|
||||||
|
perm.swap(i, k);
|
||||||
|
}
|
||||||
|
for i in 0..n {
|
||||||
|
let u: f64 = rng.random();
|
||||||
|
samples[i][j] = (perm[i] as f64 + u) / n as f64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
samples
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get parameter names.
|
||||||
|
pub fn names(&self) -> Vec<&str> {
|
||||||
|
self.params.iter().map(|p| p.name.as_str()).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn test_space() -> ParamSpace {
|
||||||
|
ParamSpace::new(vec![
|
||||||
|
ParamDef { name: "x".into(), param_type: ParamType::Continuous { min: 0.0, max: 10.0 } },
|
||||||
|
ParamDef { name: "n".into(), param_type: ParamType::Integer { min: 1, max: 20 } },
|
||||||
|
ParamDef { name: "lr".into(), param_type: ParamType::LogScale { min: 0.001, max: 0.1 } },
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unit_cube_roundtrip() {
|
||||||
|
let space = test_space();
|
||||||
|
let unit = vec![0.0, 0.5, 1.0];
|
||||||
|
let actual = space.from_unit_cube(&unit);
|
||||||
|
assert!((actual[0] - 0.0).abs() < 1e-10);
|
||||||
|
// Integer: min=1, max=20, u=0.5 → 1 + 0.5*19 = 10.5, round = 11
|
||||||
|
assert!((actual[1] - 11.0).abs() < 1e-10);
|
||||||
|
assert!((actual[2] - 0.1).abs() < 1e-10);
|
||||||
|
|
||||||
|
let back = space.to_unit_cube(&actual);
|
||||||
|
assert!((back[0] - 0.0).abs() < 1e-10);
|
||||||
|
assert!((back[2] - 1.0).abs() < 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_boundaries() {
|
||||||
|
let space = test_space();
|
||||||
|
let low = space.from_unit_cube(&[0.0, 0.0, 0.0]);
|
||||||
|
let high = space.from_unit_cube(&[1.0, 1.0, 1.0]);
|
||||||
|
assert!((low[0] - 0.0).abs() < 1e-10);
|
||||||
|
assert!((low[1] - 1.0).abs() < 1e-10);
|
||||||
|
assert!((low[2] - 0.001).abs() < 1e-6);
|
||||||
|
assert!((high[0] - 10.0).abs() < 1e-10);
|
||||||
|
assert!((high[1] - 20.0).abs() < 1e-10);
|
||||||
|
assert!((high[2] - 0.1).abs() < 1e-6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_latin_hypercube_coverage() {
|
||||||
|
let space = test_space();
|
||||||
|
let mut rng = rand::rng();
|
||||||
|
let samples = space.latin_hypercube(10, &mut rng);
|
||||||
|
assert_eq!(samples.len(), 10);
|
||||||
|
for s in &samples {
|
||||||
|
assert_eq!(s.len(), 3);
|
||||||
|
for &v in s {
|
||||||
|
assert!(v >= 0.0 && v <= 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
128
src/autotune/scanner.rs
Normal file
128
src/autotune/scanner.rs
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
use crate::autotune::optimizer::BayesianOptimizer;
|
||||||
|
use crate::autotune::params::{ParamDef, ParamSpace, ParamType};
|
||||||
|
use crate::scanner::train::{TrainScannerArgs, train_and_evaluate};
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
pub struct AutotuneScannerArgs {
|
||||||
|
pub input: String,
|
||||||
|
pub output: String,
|
||||||
|
pub wordlists: Option<String>,
|
||||||
|
pub csic: bool,
|
||||||
|
pub trials: usize,
|
||||||
|
pub beta: f64,
|
||||||
|
pub trial_log: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scanner_param_space() -> ParamSpace {
|
||||||
|
ParamSpace::new(vec![
|
||||||
|
ParamDef { name: "threshold".into(), param_type: ParamType::Continuous { min: 0.1, max: 0.95 } },
|
||||||
|
ParamDef { name: "learning_rate".into(), param_type: ParamType::LogScale { min: 0.001, max: 0.1 } },
|
||||||
|
ParamDef { name: "epochs".into(), param_type: ParamType::Integer { min: 100, max: 5000 } },
|
||||||
|
ParamDef { name: "class_weight_multiplier".into(), param_type: ParamType::Continuous { min: 0.5, max: 5.0 } },
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn run_autotune(args: AutotuneScannerArgs) -> Result<()> {
|
||||||
|
let space = scanner_param_space();
|
||||||
|
let mut optimizer = BayesianOptimizer::new(space);
|
||||||
|
|
||||||
|
let mut trial_log_file = if let Some(ref path) = args.trial_log {
|
||||||
|
Some(std::fs::File::create(path)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut best_objective = f64::NEG_INFINITY;
|
||||||
|
let mut best_model_bytes: Option<Vec<u8>> = None;
|
||||||
|
|
||||||
|
eprintln!("Starting scanner autotune: {} trials, beta={}", args.trials, args.beta);
|
||||||
|
|
||||||
|
for trial_num in 1..=args.trials {
|
||||||
|
let params = optimizer.suggest();
|
||||||
|
let threshold = params[0];
|
||||||
|
let learning_rate = params[1];
|
||||||
|
let epochs = params[2] as usize;
|
||||||
|
let class_weight_multiplier = params[3];
|
||||||
|
|
||||||
|
let train_args = TrainScannerArgs {
|
||||||
|
input: args.input.clone(),
|
||||||
|
output: String::new(), // don't save intermediate models
|
||||||
|
wordlists: args.wordlists.clone(),
|
||||||
|
threshold,
|
||||||
|
csic: args.csic,
|
||||||
|
};
|
||||||
|
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = match train_and_evaluate(&train_args, learning_rate, epochs, class_weight_multiplier) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" trial {trial_num}: FAILED ({e})");
|
||||||
|
optimizer.observe(params, 0.0, start.elapsed());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let duration = start.elapsed();
|
||||||
|
|
||||||
|
let objective = result.test_metrics.fbeta(args.beta);
|
||||||
|
|
||||||
|
eprintln!(
|
||||||
|
" trial {trial_num}/{}: fbeta={objective:.4} (threshold={threshold:.3}, lr={learning_rate:.5}, epochs={epochs}, cwm={class_weight_multiplier:.2}) [{:.1}s]",
|
||||||
|
args.trials,
|
||||||
|
duration.as_secs_f64(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Log trial as JSONL
|
||||||
|
if let Some(ref mut f) = trial_log_file {
|
||||||
|
let trial_json = serde_json::json!({
|
||||||
|
"trial": trial_num,
|
||||||
|
"params": {
|
||||||
|
"threshold": threshold,
|
||||||
|
"learning_rate": learning_rate,
|
||||||
|
"epochs": epochs,
|
||||||
|
"class_weight_multiplier": class_weight_multiplier,
|
||||||
|
},
|
||||||
|
"objective": objective,
|
||||||
|
"duration_secs": duration.as_secs_f64(),
|
||||||
|
"train_f1": result.train_metrics.f1(),
|
||||||
|
"test_precision": result.test_metrics.precision(),
|
||||||
|
"test_recall": result.test_metrics.recall(),
|
||||||
|
});
|
||||||
|
writeln!(f, "{}", trial_json)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if objective > best_objective {
|
||||||
|
best_objective = objective;
|
||||||
|
let encoded = bincode::serialize(&result.model)?;
|
||||||
|
best_model_bytes = Some(encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
optimizer.observe(params, objective, duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save best model
|
||||||
|
if let Some(bytes) = best_model_bytes {
|
||||||
|
std::fs::write(&args.output, &bytes)?;
|
||||||
|
eprintln!("\nBest model saved to {}", args.output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print summary
|
||||||
|
if let Some(best) = optimizer.best() {
|
||||||
|
eprintln!("\n═══ Autotune Results ═══════════════════════════════════════");
|
||||||
|
eprintln!(" Best trial: #{}", best.trial_num);
|
||||||
|
eprintln!(" Best F-beta: {:.4}", best.objective);
|
||||||
|
eprintln!(" Parameters:");
|
||||||
|
for (name, val) in best.param_names.iter().zip(best.params.iter()) {
|
||||||
|
eprintln!(" {:<30} = {:.6}", name, val);
|
||||||
|
}
|
||||||
|
eprintln!("\n Reproduce:");
|
||||||
|
eprintln!(
|
||||||
|
" cargo run -- train-scanner --input {} --output {} --threshold {:.4}",
|
||||||
|
args.input, args.output, best.params[0],
|
||||||
|
);
|
||||||
|
eprintln!("══════════════════════════════════════════════════════════");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user