feat: configurable k8s resources, CSIC training pipeline, unified Dockerfile

- Make K8s namespace, TLS secret, and config ConfigMap names configurable
  via [kubernetes] config section (previously hardcoded to "ingress")
- Add CSIC 2010 dataset converter and auto-download for scanner training
- Unify Dockerfile for local and production builds (remove cross-compile path)
- Bake ML models directory into container image
- Update CSIC dataset URL to self-hosted mirror (src.sunbeam.pt)
- Fix rate_limit pipeline log missing fields
- Consolidate docs/README.md into root README.md

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
2026-03-10 23:38:20 +00:00
parent 0baab92141
commit a5810dd8a7
23 changed files with 946 additions and 514 deletions

View File

@@ -50,6 +50,7 @@ impl ScannerDetector {
/// Returns a verdict with the action, raw score, and reason.
/// The score and reason are captured in pipeline logs so the training
/// pipeline always has unfiltered data to retrain from.
#[allow(clippy::too_many_arguments)]
pub fn check(
&self,
method: &str,
@@ -107,8 +108,8 @@ impl ScannerDetector {
// 3. Compute score = bias + dot(weights, features) + interaction terms
let mut score = self.weights[NUM_SCANNER_FEATURES + 2]; // bias (index 14)
for i in 0..NUM_SCANNER_FEATURES {
score += self.weights[i] * f[i];
for (i, &fi) in f.iter().enumerate().take(NUM_SCANNER_FEATURES) {
score += self.weights[i] * fi;
}
// Interaction: suspicious_path AND no_cookies
score += self.weights[12] * f[0] * (1.0 - f[3]);