From 867b6b2489c5d5cfebc0564965b7b3f3a956542c Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Tue, 10 Mar 2026 23:38:20 +0000 Subject: [PATCH] feat(proxy): integrate DDoS, scanner, and rate limiter into request pipeline Wire up all three detection layers in request_filter with pipeline logging at each stage for unfiltered training data. Add DDoS, scanner, and rate_limit config sections. Bot allowlist check before scanner model on the hot path. CLI subcommands for train/replay. Signed-off-by: Sienna Meridian Satterwhite --- Cargo.lock | 484 ++++++++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 19 ++ src/config.rs | 94 ++++++++++ src/lib.rs | 3 + src/main.rs | 286 +++++++++++++++++++++++++++-- src/proxy.rs | 308 +++++++++++++++++++++++++++++++- tests/e2e.rs | 2 +- 7 files changed, 1160 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 97eaa89..5d81d1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" + [[package]] name = "addr2line" version = "0.25.1" @@ -66,6 +72,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.21" @@ -334,6 +346,15 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -414,6 +435,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.56" @@ -469,6 +496,33 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.60" @@ -577,6 +631,70 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-queue" version = "0.3.12" @@ -592,6 +710,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -769,6 +893,18 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "dns-lookup" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf5597a4b7fe5275fc9dcf88ce26326bc8e4cb87d0130f33752d4c5f717793cf" +dependencies = [ + "cfg-if", + "libc", + "socket2 0.6.3", + "windows-sys 0.60.2", +] + [[package]] name = "dunce" version = "1.0.5" @@ -879,6 +1015,23 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnntw" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8466be0d69b615cc756992651fe2eb11bfbb2cbf945b21a7746844b8293cbfe8" +dependencies = [ + "crossbeam-channel", + "likely_stable", + "num-format", + "ordered-float 3.9.2", + "ouroboros 0.15.6", + "permutation", + "rayon", + "sync-unsafe-cell", + "thiserror 1.0.69", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1082,6 +1235,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -1111,6 +1275,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1394,12 +1564,32 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1620,6 +1810,15 @@ dependencies = [ "libc", ] +[[package]] +name = "likely_stable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61f7017d8abea1fc23ff7f01a8147b2656dea3aeb24d519aab6e2177eaf671c" +dependencies = [ + "rustc_version", +] + [[package]] name = "litemap" version = "0.8.1" @@ -1806,6 +2005,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -1854,6 +2063,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "openssl-probe" version = "0.1.6" @@ -1956,6 +2171,25 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-float" +version = "3.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ouroboros" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1358bd1558bd2a083fed428ffeda486fbfb323e698cdda7794259d592ca72db" +dependencies = [ + "aliasable", + "ouroboros_macro 0.15.6", +] + [[package]] name = "ouroboros" version = "0.18.5" @@ -1963,10 +2197,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" dependencies = [ "aliasable", - "ouroboros_macro", + "ouroboros_macro 0.18.5", "static_assertions", ] +[[package]] +name = "ouroboros_macro" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7d21ccd03305a674437ee1248f3ab5d4b1db095cf1caf49f1713ddf61956b7" +dependencies = [ + "Inflector", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "ouroboros_macro" version = "0.18.5" @@ -2025,6 +2272,12 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "permutation" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" + [[package]] name = "pest" version = "2.8.6" @@ -2188,7 +2441,7 @@ dependencies = [ "nix", "once_cell", "openssl-probe 0.1.6", - "ouroboros", + "ouroboros 0.18.5", "parking_lot", "percent-encoding", "pingora-error", @@ -2377,6 +2630,34 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -2416,6 +2697,30 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -2492,7 +2797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -2578,6 +2883,26 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -2699,6 +3024,12 @@ version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -2800,6 +3131,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2882,7 +3222,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ - "ordered-float", + "ordered-float 2.10.1", "serde", ] @@ -3089,7 +3429,13 @@ name = "sunbeam-proxy" version = "0.1.0" dependencies = [ "anyhow", + "arc-swap", "async-trait", + "bincode", + "clap", + "criterion", + "dns-lookup", + "fnntw", "futures", "http", "k8s-openapi", @@ -3103,6 +3449,7 @@ dependencies = [ "pingora-core", "pingora-http", "pingora-proxy", + "rustc-hash", "rustls", "serde", "serde_json", @@ -3135,6 +3482,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync-unsafe-cell" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8deaecba5382c095cb432cd1e21068dadb144208f057b13720e76bf89749beb4" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -3255,6 +3608,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.50.0" @@ -3660,6 +4023,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3763,6 +4136,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -3775,7 +4157,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -3784,7 +4166,16 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", ] [[package]] @@ -3802,14 +4193,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -3818,48 +4226,96 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.15" diff --git a/Cargo.toml b/Cargo.toml index 7708e8d..29d8afa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,18 @@ opentelemetry-otlp = { version = "0.27", features = ["http-proto", "reqwest-c serde_json = "1" anyhow = "1" +# DDoS detection (KNN classifier) +clap = { version = "4", features = ["derive"] } +fnntw = "0.4" +bincode = "1" +rustc-hash = "2" + +# Lock-free Arc swapping for hot-reload (scanner model) +arc-swap = "1" + +# Reverse DNS for bot IP verification +dns-lookup = "2" + # Rustls crypto provider — must be installed before any TLS init rustls = { version = "0.23", features = ["aws-lc-rs"] } @@ -45,6 +57,13 @@ kube = { version = "3", features = ["runtime", "client"] } k8s-openapi = { version = "0.27", features = ["v1_35"] } libc = "0.2" +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "scanner_bench" +harness = false + [profile.release] opt-level = 3 lto = true diff --git a/src/config.rs b/src/config.rs index 1040b60..42ef401 100644 --- a/src/config.rs +++ b/src/config.rs @@ -18,8 +18,102 @@ pub struct Config { pub routes: Vec, /// Optional SSH TCP passthrough (port 22 → Gitea SSH). pub ssh: Option, + /// Optional KNN-based DDoS detection. + pub ddos: Option, + /// Optional per-identity rate limiting. + pub rate_limit: Option, + /// Optional per-request scanner detection. + pub scanner: Option, } +#[derive(Debug, Deserialize, Clone)] +pub struct DDoSConfig { + pub model_path: String, + #[serde(default = "default_k")] + pub k: usize, + #[serde(default = "default_threshold")] + pub threshold: f64, + #[serde(default = "default_window_secs")] + pub window_secs: u64, + #[serde(default = "default_window_capacity")] + pub window_capacity: usize, + #[serde(default = "default_min_events")] + pub min_events: usize, + #[serde(default = "default_enabled")] + pub enabled: bool, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct RateLimitConfig { + #[serde(default = "default_rl_enabled")] + pub enabled: bool, + #[serde(default)] + pub bypass_cidrs: Vec, + #[serde(default = "default_eviction_interval")] + pub eviction_interval_secs: u64, + #[serde(default = "default_stale_after")] + pub stale_after_secs: u64, + pub authenticated: BucketConfig, + pub unauthenticated: BucketConfig, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct BucketConfig { + pub burst: u32, + pub rate: f64, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct ScannerConfig { + pub model_path: String, + #[serde(default = "default_scanner_threshold")] + pub threshold: f64, + #[serde(default = "default_scanner_enabled")] + pub enabled: bool, + /// How often (seconds) to check the model file for changes. 0 = no hot-reload. + #[serde(default = "default_scanner_poll_interval")] + pub poll_interval_secs: u64, + /// Bot allowlist rules. Verified bots bypass the scanner model. + #[serde(default)] + pub allowlist: Vec, + /// TTL (seconds) for verified bot IP cache entries. + #[serde(default = "default_bot_cache_ttl")] + pub bot_cache_ttl_secs: u64, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct BotAllowlistRule { + /// Case-insensitive UA prefix to match, e.g. "Googlebot". + pub ua_prefix: String, + /// Human-readable label for pipeline logs. + pub reason: String, + /// Reverse-DNS hostname suffixes for verification. + /// e.g. ["googlebot.com", "google.com"] + #[serde(default)] + pub dns_suffixes: Vec, + /// CIDR ranges for instant IP verification. + /// e.g. ["66.249.64.0/19"] + #[serde(default)] + pub cidrs: Vec, +} + +fn default_bot_cache_ttl() -> u64 { 86400 } // 24h + +fn default_scanner_threshold() -> f64 { 0.5 } +fn default_scanner_enabled() -> bool { true } +fn default_scanner_poll_interval() -> u64 { 30 } + +fn default_rl_enabled() -> bool { true } +fn default_eviction_interval() -> u64 { 300 } +fn default_stale_after() -> u64 { 600 } + +fn default_k() -> usize { 5 } +fn default_threshold() -> f64 { 0.6 } +fn default_window_secs() -> u64 { 60 } +fn default_window_capacity() -> usize { 1000 } +fn default_min_events() -> usize { 10 } +fn default_enabled() -> bool { true } + #[derive(Debug, Deserialize, Clone)] pub struct ListenConfig { /// HTTP listener address, e.g., "0.0.0.0:80" or "[::]:80". diff --git a/src/lib.rs b/src/lib.rs index 4ccad56..99b705d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,9 @@ // without going through the binary entry point. pub mod acme; pub mod config; +pub mod ddos; pub mod dual_stack; pub mod proxy; +pub mod rate_limit; +pub mod scanner; pub mod ssh; diff --git a/src/main.rs b/src/main.rs index ae779f0..3380312 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,16 +4,167 @@ mod watcher; use sunbeam_proxy::{acme, config}; use sunbeam_proxy::proxy::SunbeamProxy; +use sunbeam_proxy::ddos; +use sunbeam_proxy::rate_limit; +use sunbeam_proxy::scanner; use std::{collections::HashMap, sync::Arc}; use anyhow::Result; +use clap::{Parser, Subcommand}; use kube::Client; use pingora::server::{configuration::Opt, Server}; use pingora_proxy::http_proxy_service; use std::sync::RwLock; +#[derive(Parser)] +#[command(name = "sunbeam-proxy")] +struct Cli { + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Commands { + /// Start the proxy server (default if no subcommand given) + Serve { + /// Pingora --upgrade flag for zero-downtime reload + #[arg(long)] + upgrade: bool, + }, + /// Replay audit logs through the DDoS detector and rate limiter + Replay { + /// Path to audit log JSONL file + #[arg(short, long)] + input: String, + /// Path to trained model file + #[arg(short, long, default_value = "ddos_model.bin")] + model: String, + /// Optional config file (for rate limit settings) + #[arg(short, long)] + config: Option, + /// KNN k parameter + #[arg(long, default_value = "5")] + k: usize, + /// Attack threshold + #[arg(long, default_value = "0.6")] + threshold: f64, + /// Sliding window size in seconds + #[arg(long, default_value = "60")] + window_secs: u64, + /// Minimum events per IP before classification + #[arg(long, default_value = "10")] + min_events: usize, + /// Also run rate limiter during replay + #[arg(long)] + rate_limit: bool, + }, + /// Train a DDoS detection model from audit logs + Train { + /// Path to audit log JSONL file + #[arg(short, long)] + input: String, + /// Output model file path + #[arg(short, long)] + output: String, + /// File with known-attack IPs (one per line) + #[arg(long)] + attack_ips: Option, + /// File with known-normal IPs (one per line) + #[arg(long)] + normal_ips: Option, + /// TOML file with heuristic auto-labeling thresholds + #[arg(long)] + heuristics: Option, + /// KNN k parameter + #[arg(long, default_value = "5")] + k: usize, + /// Attack threshold (fraction of k neighbors) + #[arg(long, default_value = "0.6")] + threshold: f64, + /// Sliding window size in seconds + #[arg(long, default_value = "60")] + window_secs: u64, + /// Minimum events per IP to include in training + #[arg(long, default_value = "10")] + min_events: usize, + }, + /// Train a per-request scanner detection model from audit logs + TrainScanner { + /// Path to audit log JSONL file + #[arg(short, long)] + input: String, + /// Output model file path + #[arg(short, long, default_value = "scanner_model.bin")] + output: String, + /// Directory (or file) containing .txt wordlists of scanner paths + #[arg(long)] + wordlists: Option, + /// Classification threshold + #[arg(long, default_value = "0.5")] + threshold: f64, + }, +} + fn main() -> Result<()> { + let cli = Cli::parse(); + match cli.command.unwrap_or(Commands::Serve { upgrade: false }) { + Commands::Serve { upgrade } => run_serve(upgrade), + Commands::Replay { + input, + model, + config, + k, + threshold, + window_secs, + min_events, + rate_limit, + } => ddos::replay::run(ddos::replay::ReplayArgs { + input, + model_path: model, + config_path: config, + k, + threshold, + window_secs, + min_events, + rate_limit, + }), + Commands::Train { + input, + output, + attack_ips, + normal_ips, + heuristics, + k, + threshold, + window_secs, + min_events, + } => ddos::train::run(ddos::train::TrainArgs { + input, + output, + attack_ips, + normal_ips, + heuristics, + k, + threshold, + window_secs, + min_events, + }), + Commands::TrainScanner { + input, + output, + wordlists, + threshold, + } => scanner::train::run(scanner::train::TrainScannerArgs { + input, + output, + wordlists, + threshold, + }), + } +} + +fn run_serve(upgrade: bool) -> Result<()> { // Install the aws-lc-rs crypto provider for rustls before any TLS init. // Required because rustls 0.23 no longer auto-selects a provider at compile time. rustls::crypto::aws_lc_rs::default_provider() @@ -27,10 +178,120 @@ fn main() -> Result<()> { // 1. Init telemetry (JSON logs + optional OTEL traces). telemetry::init(&cfg.telemetry.otlp_endpoint); - // 2. Detect --upgrade flag. When present, Pingora inherits listening socket - // FDs from the upgrade Unix socket instead of binding fresh ports, enabling - // zero-downtime cert/config reloads triggered by the K8s watcher below. - let upgrade = std::env::args().any(|a| a == "--upgrade"); + // 2. Load DDoS detection model if configured. + let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos { + if ddos_cfg.enabled { + match ddos::model::TrainedModel::load( + std::path::Path::new(&ddos_cfg.model_path), + Some(ddos_cfg.k), + Some(ddos_cfg.threshold), + ) { + Ok(model) => { + let point_count = model.point_count(); + let detector = Arc::new(ddos::detector::DDoSDetector::new(model, ddos_cfg)); + tracing::info!( + points = point_count, + k = ddos_cfg.k, + threshold = ddos_cfg.threshold, + "DDoS detector loaded" + ); + Some(detector) + } + Err(e) => { + tracing::warn!(error = %e, "failed to load DDoS model; detection disabled"); + None + } + } + } else { + None + } + } else { + None + }; + + // 2b. Init rate limiter if configured. + let rate_limiter = if let Some(rl_cfg) = &cfg.rate_limit { + if rl_cfg.enabled { + let limiter = Arc::new(rate_limit::limiter::RateLimiter::new(rl_cfg)); + let evict_limiter = limiter.clone(); + let interval = rl_cfg.eviction_interval_secs; + std::thread::spawn(move || loop { + std::thread::sleep(std::time::Duration::from_secs(interval)); + evict_limiter.evict_stale(); + }); + tracing::info!( + auth_burst = rl_cfg.authenticated.burst, + auth_rate = rl_cfg.authenticated.rate, + unauth_burst = rl_cfg.unauthenticated.burst, + unauth_rate = rl_cfg.unauthenticated.rate, + "rate limiter enabled" + ); + Some(limiter) + } else { + None + } + } else { + None + }; + + // 2c. Load scanner model if configured. + let (scanner_detector, bot_allowlist) = if let Some(scanner_cfg) = &cfg.scanner { + if scanner_cfg.enabled { + match scanner::model::ScannerModel::load(std::path::Path::new(&scanner_cfg.model_path)) { + Ok(mut model) => { + let fragment_count = model.fragments.len(); + model.threshold = scanner_cfg.threshold; + let detector = scanner::detector::ScannerDetector::new(&model, &cfg.routes); + let handle = Arc::new(arc_swap::ArcSwap::from_pointee(detector)); + + // Start bot allowlist if rules are configured. + let bot_allowlist = if !scanner_cfg.allowlist.is_empty() { + let al = scanner::allowlist::BotAllowlist::spawn( + &scanner_cfg.allowlist, + scanner_cfg.bot_cache_ttl_secs, + ); + tracing::info!( + rules = scanner_cfg.allowlist.len(), + "bot allowlist enabled" + ); + Some(al) + } else { + None + }; + + // Start background file watcher for hot-reload. + if scanner_cfg.poll_interval_secs > 0 { + let watcher_handle = handle.clone(); + let model_path = std::path::PathBuf::from(&scanner_cfg.model_path); + let threshold = scanner_cfg.threshold; + let routes = cfg.routes.clone(); + let interval = std::time::Duration::from_secs(scanner_cfg.poll_interval_secs); + std::thread::spawn(move || { + scanner::watcher::watch_scanner_model( + watcher_handle, model_path, threshold, routes, interval, + ); + }); + } + + tracing::info!( + fragments = fragment_count, + threshold = scanner_cfg.threshold, + poll_interval_secs = scanner_cfg.poll_interval_secs, + "scanner detector loaded" + ); + (Some(handle), bot_allowlist) + } + Err(e) => { + tracing::warn!(error = %e, "failed to load scanner model; scanner detection disabled"); + (None, None) + } + } + } else { + (None, None) + } + } else { + (None, None) + }; // 3. Fetch the TLS cert from K8s before Pingora binds the TLS port. // The Client is created and dropped within this temp runtime — we do NOT @@ -47,8 +308,6 @@ fn main() -> Result<()> { if let Err(e) = cert::fetch_and_write(&c, &cfg.tls.cert_path, &cfg.tls.key_path).await { - // Non-fatal: Secret may not exist yet on first deploy (cert-manager - // is still issuing), or the Secret name may differ in dev. tracing::warn!(error = %e, "cert fetch from K8s failed; using existing files"); } } @@ -83,6 +342,10 @@ fn main() -> Result<()> { let proxy = SunbeamProxy { routes: cfg.routes.clone(), acme_routes: acme_routes.clone(), + ddos_detector, + scanner_detector, + bot_allowlist, + rate_limiter, }; let mut svc = http_proxy_service(&server.configuration, proxy); @@ -90,11 +353,6 @@ fn main() -> Result<()> { svc.add_tcp(&cfg.listen.http); // Port 443: only add the TLS listener if the cert files exist. - // On first deploy cert-manager hasn't issued the cert yet, so we start - // HTTP-only. Once the pingora-tls Secret is created (ACME challenge - // completes), the watcher in step 6 writes the cert files and triggers - // a graceful upgrade. The upgrade process finds the cert files and adds - // the TLS listener, inheriting the port-80 socket from the old process. let cert_exists = std::path::Path::new(&cfg.tls.cert_path).exists(); if cert_exists { svc.add_tls(&cfg.listen.https, &cfg.tls.cert_path, &cfg.tls.key_path)?; @@ -109,7 +367,6 @@ fn main() -> Result<()> { server.add_service(svc); // 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured. - // Runs on its own OS thread + Tokio runtime — same pattern as the cert/ingress watcher. if let Some(ssh_cfg) = &cfg.ssh { let listen = ssh_cfg.listen.clone(); let backend = ssh_cfg.backend.clone(); @@ -123,10 +380,7 @@ fn main() -> Result<()> { }); } - // 6. Background K8s watchers on their own OS thread + tokio runtime so they - // don't interfere with Pingora's internal runtime. A fresh Client is - // created here so its tower workers live on this runtime (not the - // now-dropped temp runtime from step 3). + // 6. Background K8s watchers on their own OS thread + tokio runtime. if k8s_available { let cert_path = cfg.tls.cert_path.clone(); let key_path = cfg.tls.key_path.clone(); diff --git a/src/proxy.rs b/src/proxy.rs index 1737af8..d195f45 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -1,10 +1,20 @@ use crate::acme::AcmeRoutes; use crate::config::RouteConfig; +use crate::ddos::detector::DDoSDetector; +use crate::ddos::model::DDoSAction; +use crate::rate_limit::key; +use crate::rate_limit::limiter::{RateLimitResult, RateLimiter}; +use crate::scanner::allowlist::BotAllowlist; +use crate::scanner::detector::ScannerDetector; +use crate::scanner::model::ScannerAction; +use arc_swap::ArcSwap; use async_trait::async_trait; use http::header::{CONNECTION, EXPECT, HOST, UPGRADE}; use pingora_core::{upstreams::peer::HttpPeer, Result}; use pingora_http::{RequestHeader, ResponseHeader}; use pingora_proxy::{ProxyHttp, Session}; +use std::net::IpAddr; +use std::sync::Arc; use std::time::Instant; pub struct SunbeamProxy { @@ -12,6 +22,14 @@ pub struct SunbeamProxy { /// Per-challenge route table populated by the Ingress watcher. /// Maps `/.well-known/acme-challenge/` → solver service address. pub acme_routes: AcmeRoutes, + /// Optional KNN-based DDoS detector. + pub ddos_detector: Option>, + /// Optional per-request scanner detector (hot-reloadable via ArcSwap). + pub scanner_detector: Option>>, + /// Optional verified-bot allowlist (bypasses scanner for known crawlers/agents). + pub bot_allowlist: Option>, + /// Optional per-identity rate limiter. + pub rate_limiter: Option>, } pub struct RequestCtx { @@ -41,6 +59,37 @@ fn extract_host(session: &Session) -> String { .to_string() } +/// Extract the real client IP, preferring trusted proxy headers. +/// +/// Priority: CF-Connecting-IP → X-Real-IP → X-Forwarded-For (first) → socket addr. +/// All traffic arrives via Cloudflare, so CF-Connecting-IP is the authoritative +/// real client IP. The socket address is the Cloudflare edge node. +fn extract_client_ip(session: &Session) -> Option { + let headers = &session.req_header().headers; + + for header in &["cf-connecting-ip", "x-real-ip"] { + if let Some(val) = headers.get(*header).and_then(|v| v.to_str().ok()) { + if let Ok(ip) = val.trim().parse::() { + return Some(ip); + } + } + } + + // X-Forwarded-For: client, proxy1, proxy2 — take the first entry + if let Some(val) = headers.get("x-forwarded-for").and_then(|v| v.to_str().ok()) { + if let Some(first) = val.split(',').next() { + if let Ok(ip) = first.trim().parse::() { + return Some(ip); + } + } + } + + // Fallback: raw socket address + session + .client_addr() + .and_then(|addr| addr.as_inet().map(|a| a.ip())) +} + /// Strip the scheme prefix from a backend URL like `http://host:port`. fn backend_addr(backend: &str) -> &str { backend @@ -137,6 +186,193 @@ impl ProxyHttp for SunbeamProxy { return Ok(true); } + // ── Detection pipeline ─────────────────────────────────────────── + // Each layer emits an unfiltered pipeline log BEFORE acting on its + // decision. This guarantees downstream training pipelines always + // have the full traffic picture: + // - "ddos" log = all HTTPS traffic (scanner training data) + // - "scanner" log = traffic that passed DDoS (rate-limit training data) + // - "rate_limit" log = traffic that passed scanner (validation data) + + // DDoS detection: check the client IP against the KNN model. + if let Some(detector) = &self.ddos_detector { + if let Some(ip) = extract_client_ip(session) { + let method = session.req_header().method.as_str(); + let path = session.req_header().uri.path(); + let host = extract_host(session); + let user_agent = session + .req_header() + .headers + .get("user-agent") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let content_length: u64 = session + .req_header() + .headers + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + let has_cookies = session.req_header().headers.get("cookie").is_some(); + let has_referer = session.req_header().headers.get("referer").is_some(); + let has_accept_language = session.req_header().headers.get("accept-language").is_some(); + let accept = session + .req_header() + .headers + .get("accept") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let ddos_action = detector.check(ip, method, path, &host, user_agent, content_length, has_cookies, has_referer, has_accept_language); + let decision = if matches!(ddos_action, DDoSAction::Block) { "block" } else { "allow" }; + + tracing::info!( + target = "pipeline", + layer = "ddos", + decision, + method, + host = %host, + path, + client_ip = %ip, + user_agent, + content_length, + has_cookies, + has_referer, + has_accept_language, + accept, + "pipeline" + ); + + if matches!(ddos_action, DDoSAction::Block) { + let mut resp = ResponseHeader::build(429, None)?; + resp.insert_header("Retry-After", "60")?; + resp.insert_header("Content-Length", "0")?; + session.write_response_header(Box::new(resp), true).await?; + return Ok(true); + } + } + } + + // Scanner detection: per-request classification of scanner/bot probes. + // The detector is behind ArcSwap for lock-free hot-reload. + if let Some(scanner_swap) = &self.scanner_detector { + let method = session.req_header().method.as_str(); + let path = session.req_header().uri.path(); + let host = extract_host(session); + let prefix = host.split('.').next().unwrap_or(""); + let has_cookies = session.req_header().headers.get("cookie").is_some(); + let has_referer = session.req_header().headers.get("referer").is_some(); + let has_accept_language = session.req_header().headers.get("accept-language").is_some(); + let accept = session + .req_header() + .headers + .get("accept") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let user_agent = session + .req_header() + .headers + .get("user-agent") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let content_length: u64 = session + .req_header() + .headers + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + let client_ip = extract_client_ip(session); + + // Bot allowlist: verified crawlers/agents bypass the scanner model. + // CIDR rules are instant; DNS-verified IPs are cached after + // background reverse+forward lookup. + let bot_reason = self.bot_allowlist.as_ref().and_then(|al| { + client_ip.and_then(|ip| al.check(user_agent, ip)) + }); + + let (decision, score, reason) = if let Some(bot_reason) = bot_reason { + ("allow", -1.0f64, bot_reason) + } else { + let scanner = scanner_swap.load(); + let verdict = scanner.check( + method, path, prefix, has_cookies, has_referer, + has_accept_language, accept, user_agent, content_length, + ); + let d = if matches!(verdict.action, ScannerAction::Block) { "block" } else { "allow" }; + (d, verdict.score, verdict.reason) + }; + + let client_ip_str = client_ip + .map(|ip| ip.to_string()) + .unwrap_or_default(); + + tracing::info!( + target = "pipeline", + layer = "scanner", + decision, + score, + reason, + method, + host = %host, + path, + client_ip = client_ip_str, + user_agent, + content_length, + has_cookies, + has_referer, + has_accept_language, + accept, + "pipeline" + ); + + if decision == "block" { + let mut resp = ResponseHeader::build(403, None)?; + resp.insert_header("Content-Length", "0")?; + session.write_response_header(Box::new(resp), true).await?; + return Ok(true); + } + } + + // Rate limiting: per-identity throttling. + if let Some(limiter) = &self.rate_limiter { + if let Some(ip) = extract_client_ip(session) { + let cookie = session + .req_header() + .headers + .get("cookie") + .and_then(|v| v.to_str().ok()); + let auth = session + .req_header() + .headers + .get("authorization") + .and_then(|v| v.to_str().ok()); + let rl_key = key::extract_key(cookie, auth, ip); + let rl_result = limiter.check(ip, rl_key); + let decision = if matches!(rl_result, RateLimitResult::Reject { .. }) { "block" } else { "allow" }; + + tracing::info!( + target = "pipeline", + layer = "rate_limit", + decision, + method = %session.req_header().method, + host = %extract_host(session), + path = %session.req_header().uri.path(), + client_ip = %ip, + user_agent = session.req_header().headers.get("user-agent").and_then(|v| v.to_str().ok()).unwrap_or("-"), + has_cookies = cookie.is_some(), + "pipeline" + ); + + if let RateLimitResult::Reject { retry_after } = rl_result { + let mut resp = ResponseHeader::build(429, None)?; + resp.insert_header("Retry-After", retry_after.to_string())?; + resp.insert_header("Content-Length", "0")?; + session.write_response_header(Box::new(resp), true).await?; + return Ok(true); + } + } + } + // Reject unknown host prefixes with 404. let host = extract_host(session); let prefix = host.split('.').next().unwrap_or(""); @@ -311,30 +547,92 @@ impl ProxyHttp for SunbeamProxy { let status = session .response_written() .map_or(0, |r| r.status.as_u16()); - let duration_ms = ctx.start_time.elapsed().as_millis(); + let duration_ms = ctx.start_time.elapsed().as_millis() as u64; let backend = ctx .route .as_ref() .map(|r| r.backend.as_str()) .unwrap_or("-"); - let client_ip = session - .client_addr() - .map(|a| a.to_string()) - .unwrap_or_else(|| "-".to_string()); + let client_ip = extract_client_ip(session) + .map(|ip| ip.to_string()) + .unwrap_or_else(|| { + session + .client_addr() + .map(|a| a.to_string()) + .unwrap_or_else(|| "-".to_string()) + }); let error_str = error.map(|e| e.to_string()); + let content_length: u64 = session + .req_header() + .headers + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + let user_agent = session + .req_header() + .headers + .get("user-agent") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let referer = session + .req_header() + .headers + .get("referer") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let accept_language = session + .req_header() + .headers + .get("accept-language") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let accept = session + .req_header() + .headers + .get("accept") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let has_cookies = session + .req_header() + .headers + .get("cookie") + .is_some(); + let cf_country = session + .req_header() + .headers + .get("cf-ipcountry") + .and_then(|v| v.to_str().ok()) + .unwrap_or("-"); + let query = session.req_header().uri.query().unwrap_or(""); + tracing::info!( target = "audit", method = %session.req_header().method, host = %extract_host(session), path = %session.req_header().uri.path(), + query, client_ip, status, duration_ms, + content_length, + user_agent, + referer, + accept_language, + accept, + has_cookies, + cf_country, backend, error = error_str, "request" ); + + if let Some(detector) = &self.ddos_detector { + if let Some(ip) = extract_client_ip(session) { + detector.record_response(ip, status, duration_ms as u32); + } + } } } diff --git a/tests/e2e.rs b/tests/e2e.rs index ebe38f6..57e0539 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -101,7 +101,7 @@ fn start_proxy_once(backend_port: u16) { paths: vec![], }]; let acme_routes: AcmeRoutes = Arc::new(RwLock::new(HashMap::new())); - let proxy = SunbeamProxy { routes, acme_routes }; + let proxy = SunbeamProxy { routes, acme_routes, ddos_detector: None, scanner_detector: None, bot_allowlist: None, rate_limiter: None }; let opt = Opt { upgrade: false,