feat(proxy): integrate DDoS, scanner, and rate limiter into request pipeline

Wire up all three detection layers in request_filter with pipeline
logging at each stage for unfiltered training data. Add DDoS, scanner,
and rate_limit config sections. Bot allowlist check before scanner
model on the hot path. CLI subcommands for train/replay.

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
2026-03-10 23:38:20 +00:00
parent ae18b00fa4
commit 867b6b2489
7 changed files with 1160 additions and 36 deletions

484
Cargo.lock generated
View File

@@ -2,6 +2,12 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 4
[[package]]
name = "Inflector"
version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
[[package]] [[package]]
name = "addr2line" name = "addr2line"
version = "0.25.1" version = "0.25.1"
@@ -66,6 +72,12 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.21" version = "0.6.21"
@@ -334,6 +346,15 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.3.2" version = "1.3.2"
@@ -414,6 +435,12 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.56" version = "1.2.56"
@@ -469,6 +496,33 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.60" version = "4.5.60"
@@ -577,6 +631,70 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "criterion"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
dependencies = [
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"is-terminal",
"itertools 0.10.5",
"num-traits",
"once_cell",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]] [[package]]
name = "crossbeam-queue" name = "crossbeam-queue"
version = "0.3.12" version = "0.3.12"
@@ -592,6 +710,12 @@ version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]] [[package]]
name = "crypto-common" name = "crypto-common"
version = "0.1.7" version = "0.1.7"
@@ -769,6 +893,18 @@ dependencies = [
"syn 2.0.117", "syn 2.0.117",
] ]
[[package]]
name = "dns-lookup"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf5597a4b7fe5275fc9dcf88ce26326bc8e4cb87d0130f33752d4c5f717793cf"
dependencies = [
"cfg-if",
"libc",
"socket2 0.6.3",
"windows-sys 0.60.2",
]
[[package]] [[package]]
name = "dunce" name = "dunce"
version = "1.0.5" version = "1.0.5"
@@ -879,6 +1015,23 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "fnntw"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8466be0d69b615cc756992651fe2eb11bfbb2cbf945b21a7746844b8293cbfe8"
dependencies = [
"crossbeam-channel",
"likely_stable",
"num-format",
"ordered-float 3.9.2",
"ouroboros 0.15.6",
"permutation",
"rayon",
"sync-unsafe-cell",
"thiserror 1.0.69",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@@ -1082,6 +1235,17 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "half"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
dependencies = [
"cfg-if",
"crunchy",
"zerocopy",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.12.3" version = "0.12.3"
@@ -1111,6 +1275,12 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]] [[package]]
name = "hex" name = "hex"
version = "0.4.3" version = "0.4.3"
@@ -1394,12 +1564,32 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "is-terminal"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
dependencies = [
"hermit-abi",
"libc",
"windows-sys 0.61.2",
]
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.2" version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itertools" name = "itertools"
version = "0.14.0" version = "0.14.0"
@@ -1620,6 +1810,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "likely_stable"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d61f7017d8abea1fc23ff7f01a8147b2656dea3aeb24d519aab6e2177eaf671c"
dependencies = [
"rustc_version",
]
[[package]] [[package]]
name = "litemap" name = "litemap"
version = "0.8.1" version = "0.8.1"
@@ -1806,6 +2005,16 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
[[package]]
name = "num-format"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
dependencies = [
"arrayvec",
"itoa",
]
[[package]] [[package]]
name = "num-integer" name = "num-integer"
version = "0.1.46" version = "0.1.46"
@@ -1854,6 +2063,12 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "oorandom"
version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
[[package]] [[package]]
name = "openssl-probe" name = "openssl-probe"
version = "0.1.6" version = "0.1.6"
@@ -1956,6 +2171,25 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "ordered-float"
version = "3.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
dependencies = [
"num-traits",
]
[[package]]
name = "ouroboros"
version = "0.15.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1358bd1558bd2a083fed428ffeda486fbfb323e698cdda7794259d592ca72db"
dependencies = [
"aliasable",
"ouroboros_macro 0.15.6",
]
[[package]] [[package]]
name = "ouroboros" name = "ouroboros"
version = "0.18.5" version = "0.18.5"
@@ -1963,10 +2197,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59"
dependencies = [ dependencies = [
"aliasable", "aliasable",
"ouroboros_macro", "ouroboros_macro 0.18.5",
"static_assertions", "static_assertions",
] ]
[[package]]
name = "ouroboros_macro"
version = "0.15.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f7d21ccd03305a674437ee1248f3ab5d4b1db095cf1caf49f1713ddf61956b7"
dependencies = [
"Inflector",
"proc-macro-error",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]] [[package]]
name = "ouroboros_macro" name = "ouroboros_macro"
version = "0.18.5" version = "0.18.5"
@@ -2025,6 +2272,12 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "permutation"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7"
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.8.6" version = "2.8.6"
@@ -2188,7 +2441,7 @@ dependencies = [
"nix", "nix",
"once_cell", "once_cell",
"openssl-probe 0.1.6", "openssl-probe 0.1.6",
"ouroboros", "ouroboros 0.18.5",
"parking_lot", "parking_lot",
"percent-encoding", "percent-encoding",
"pingora-error", "pingora-error",
@@ -2377,6 +2630,34 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plotters"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
[[package]]
name = "plotters-svg"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
dependencies = [
"plotters-backend",
]
[[package]] [[package]]
name = "portable-atomic" name = "portable-atomic"
version = "1.13.1" version = "1.13.1"
@@ -2416,6 +2697,30 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn 1.0.109",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]] [[package]]
name = "proc-macro-error-attr2" name = "proc-macro-error-attr2"
version = "2.0.0" version = "2.0.0"
@@ -2492,7 +2797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"itertools", "itertools 0.14.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.117", "syn 2.0.117",
@@ -2578,6 +2883,26 @@ dependencies = [
"getrandom 0.3.4", "getrandom 0.3.4",
] ]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.18" version = "0.5.18"
@@ -2699,6 +3024,12 @@ version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"
@@ -2800,6 +3131,15 @@ version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]] [[package]]
name = "schannel" name = "schannel"
version = "0.1.28" version = "0.1.28"
@@ -2882,7 +3222,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
dependencies = [ dependencies = [
"ordered-float", "ordered-float 2.10.1",
"serde", "serde",
] ]
@@ -3089,7 +3429,13 @@ name = "sunbeam-proxy"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arc-swap",
"async-trait", "async-trait",
"bincode",
"clap",
"criterion",
"dns-lookup",
"fnntw",
"futures", "futures",
"http", "http",
"k8s-openapi", "k8s-openapi",
@@ -3103,6 +3449,7 @@ dependencies = [
"pingora-core", "pingora-core",
"pingora-http", "pingora-http",
"pingora-proxy", "pingora-proxy",
"rustc-hash",
"rustls", "rustls",
"serde", "serde",
"serde_json", "serde_json",
@@ -3135,6 +3482,12 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "sync-unsafe-cell"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8deaecba5382c095cb432cd1e21068dadb144208f057b13720e76bf89749beb4"
[[package]] [[package]]
name = "sync_wrapper" name = "sync_wrapper"
version = "1.0.2" version = "1.0.2"
@@ -3255,6 +3608,16 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.50.0" version = "1.50.0"
@@ -3660,6 +4023,16 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]] [[package]]
name = "want" name = "want"
version = "0.3.1" version = "0.3.1"
@@ -3763,6 +4136,15 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.61.2",
]
[[package]] [[package]]
name = "windows-link" name = "windows-link"
version = "0.2.1" version = "0.2.1"
@@ -3775,7 +4157,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [ dependencies = [
"windows-targets", "windows-targets 0.52.6",
] ]
[[package]] [[package]]
@@ -3784,7 +4166,16 @@ version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [ dependencies = [
"windows-targets", "windows-targets 0.52.6",
]
[[package]]
name = "windows-sys"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
dependencies = [
"windows-targets 0.53.5",
] ]
[[package]] [[package]]
@@ -3802,14 +4193,31 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [ dependencies = [
"windows_aarch64_gnullvm", "windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc", "windows_aarch64_msvc 0.52.6",
"windows_i686_gnu", "windows_i686_gnu 0.52.6",
"windows_i686_gnullvm", "windows_i686_gnullvm 0.52.6",
"windows_i686_msvc", "windows_i686_msvc 0.52.6",
"windows_x86_64_gnu", "windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm", "windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc", "windows_x86_64_msvc 0.52.6",
]
[[package]]
name = "windows-targets"
version = "0.53.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
dependencies = [
"windows-link",
"windows_aarch64_gnullvm 0.53.1",
"windows_aarch64_msvc 0.53.1",
"windows_i686_gnu 0.53.1",
"windows_i686_gnullvm 0.53.1",
"windows_i686_msvc 0.53.1",
"windows_x86_64_gnu 0.53.1",
"windows_x86_64_gnullvm 0.53.1",
"windows_x86_64_msvc 0.53.1",
] ]
[[package]] [[package]]
@@ -3818,48 +4226,96 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_aarch64_msvc"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnu"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
[[package]] [[package]]
name = "windows_i686_gnullvm" name = "windows_i686_gnullvm"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_gnullvm"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_i686_msvc"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnu"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
[[package]] [[package]]
name = "windows_x86_64_gnullvm" name = "windows_x86_64_gnullvm"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "windows_x86_64_msvc"
version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.7.15" version = "0.7.15"

View File

@@ -37,6 +37,18 @@ opentelemetry-otlp = { version = "0.27", features = ["http-proto", "reqwest-c
serde_json = "1" serde_json = "1"
anyhow = "1" anyhow = "1"
# DDoS detection (KNN classifier)
clap = { version = "4", features = ["derive"] }
fnntw = "0.4"
bincode = "1"
rustc-hash = "2"
# Lock-free Arc swapping for hot-reload (scanner model)
arc-swap = "1"
# Reverse DNS for bot IP verification
dns-lookup = "2"
# Rustls crypto provider — must be installed before any TLS init # Rustls crypto provider — must be installed before any TLS init
rustls = { version = "0.23", features = ["aws-lc-rs"] } rustls = { version = "0.23", features = ["aws-lc-rs"] }
@@ -45,6 +57,13 @@ kube = { version = "3", features = ["runtime", "client"] }
k8s-openapi = { version = "0.27", features = ["v1_35"] } k8s-openapi = { version = "0.27", features = ["v1_35"] }
libc = "0.2" libc = "0.2"
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "scanner_bench"
harness = false
[profile.release] [profile.release]
opt-level = 3 opt-level = 3
lto = true lto = true

View File

@@ -18,8 +18,102 @@ pub struct Config {
pub routes: Vec<RouteConfig>, pub routes: Vec<RouteConfig>,
/// Optional SSH TCP passthrough (port 22 → Gitea SSH). /// Optional SSH TCP passthrough (port 22 → Gitea SSH).
pub ssh: Option<SshConfig>, pub ssh: Option<SshConfig>,
/// Optional KNN-based DDoS detection.
pub ddos: Option<DDoSConfig>,
/// Optional per-identity rate limiting.
pub rate_limit: Option<RateLimitConfig>,
/// Optional per-request scanner detection.
pub scanner: Option<ScannerConfig>,
} }
#[derive(Debug, Deserialize, Clone)]
pub struct DDoSConfig {
pub model_path: String,
#[serde(default = "default_k")]
pub k: usize,
#[serde(default = "default_threshold")]
pub threshold: f64,
#[serde(default = "default_window_secs")]
pub window_secs: u64,
#[serde(default = "default_window_capacity")]
pub window_capacity: usize,
#[serde(default = "default_min_events")]
pub min_events: usize,
#[serde(default = "default_enabled")]
pub enabled: bool,
}
#[derive(Debug, Deserialize, Clone)]
pub struct RateLimitConfig {
#[serde(default = "default_rl_enabled")]
pub enabled: bool,
#[serde(default)]
pub bypass_cidrs: Vec<String>,
#[serde(default = "default_eviction_interval")]
pub eviction_interval_secs: u64,
#[serde(default = "default_stale_after")]
pub stale_after_secs: u64,
pub authenticated: BucketConfig,
pub unauthenticated: BucketConfig,
}
#[derive(Debug, Deserialize, Clone)]
pub struct BucketConfig {
pub burst: u32,
pub rate: f64,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ScannerConfig {
pub model_path: String,
#[serde(default = "default_scanner_threshold")]
pub threshold: f64,
#[serde(default = "default_scanner_enabled")]
pub enabled: bool,
/// How often (seconds) to check the model file for changes. 0 = no hot-reload.
#[serde(default = "default_scanner_poll_interval")]
pub poll_interval_secs: u64,
/// Bot allowlist rules. Verified bots bypass the scanner model.
#[serde(default)]
pub allowlist: Vec<BotAllowlistRule>,
/// TTL (seconds) for verified bot IP cache entries.
#[serde(default = "default_bot_cache_ttl")]
pub bot_cache_ttl_secs: u64,
}
#[derive(Debug, Deserialize, Clone)]
pub struct BotAllowlistRule {
/// Case-insensitive UA prefix to match, e.g. "Googlebot".
pub ua_prefix: String,
/// Human-readable label for pipeline logs.
pub reason: String,
/// Reverse-DNS hostname suffixes for verification.
/// e.g. ["googlebot.com", "google.com"]
#[serde(default)]
pub dns_suffixes: Vec<String>,
/// CIDR ranges for instant IP verification.
/// e.g. ["66.249.64.0/19"]
#[serde(default)]
pub cidrs: Vec<String>,
}
fn default_bot_cache_ttl() -> u64 { 86400 } // 24h
fn default_scanner_threshold() -> f64 { 0.5 }
fn default_scanner_enabled() -> bool { true }
fn default_scanner_poll_interval() -> u64 { 30 }
fn default_rl_enabled() -> bool { true }
fn default_eviction_interval() -> u64 { 300 }
fn default_stale_after() -> u64 { 600 }
fn default_k() -> usize { 5 }
fn default_threshold() -> f64 { 0.6 }
fn default_window_secs() -> u64 { 60 }
fn default_window_capacity() -> usize { 1000 }
fn default_min_events() -> usize { 10 }
fn default_enabled() -> bool { true }
#[derive(Debug, Deserialize, Clone)] #[derive(Debug, Deserialize, Clone)]
pub struct ListenConfig { pub struct ListenConfig {
/// HTTP listener address, e.g., "0.0.0.0:80" or "[::]:80". /// HTTP listener address, e.g., "0.0.0.0:80" or "[::]:80".

View File

@@ -3,6 +3,9 @@
// without going through the binary entry point. // without going through the binary entry point.
pub mod acme; pub mod acme;
pub mod config; pub mod config;
pub mod ddos;
pub mod dual_stack; pub mod dual_stack;
pub mod proxy; pub mod proxy;
pub mod rate_limit;
pub mod scanner;
pub mod ssh; pub mod ssh;

View File

@@ -4,16 +4,167 @@ mod watcher;
use sunbeam_proxy::{acme, config}; use sunbeam_proxy::{acme, config};
use sunbeam_proxy::proxy::SunbeamProxy; use sunbeam_proxy::proxy::SunbeamProxy;
use sunbeam_proxy::ddos;
use sunbeam_proxy::rate_limit;
use sunbeam_proxy::scanner;
use std::{collections::HashMap, sync::Arc}; use std::{collections::HashMap, sync::Arc};
use anyhow::Result; use anyhow::Result;
use clap::{Parser, Subcommand};
use kube::Client; use kube::Client;
use pingora::server::{configuration::Opt, Server}; use pingora::server::{configuration::Opt, Server};
use pingora_proxy::http_proxy_service; use pingora_proxy::http_proxy_service;
use std::sync::RwLock; use std::sync::RwLock;
#[derive(Parser)]
#[command(name = "sunbeam-proxy")]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
/// Start the proxy server (default if no subcommand given)
Serve {
/// Pingora --upgrade flag for zero-downtime reload
#[arg(long)]
upgrade: bool,
},
/// Replay audit logs through the DDoS detector and rate limiter
Replay {
/// Path to audit log JSONL file
#[arg(short, long)]
input: String,
/// Path to trained model file
#[arg(short, long, default_value = "ddos_model.bin")]
model: String,
/// Optional config file (for rate limit settings)
#[arg(short, long)]
config: Option<String>,
/// KNN k parameter
#[arg(long, default_value = "5")]
k: usize,
/// Attack threshold
#[arg(long, default_value = "0.6")]
threshold: f64,
/// Sliding window size in seconds
#[arg(long, default_value = "60")]
window_secs: u64,
/// Minimum events per IP before classification
#[arg(long, default_value = "10")]
min_events: usize,
/// Also run rate limiter during replay
#[arg(long)]
rate_limit: bool,
},
/// Train a DDoS detection model from audit logs
Train {
/// Path to audit log JSONL file
#[arg(short, long)]
input: String,
/// Output model file path
#[arg(short, long)]
output: String,
/// File with known-attack IPs (one per line)
#[arg(long)]
attack_ips: Option<String>,
/// File with known-normal IPs (one per line)
#[arg(long)]
normal_ips: Option<String>,
/// TOML file with heuristic auto-labeling thresholds
#[arg(long)]
heuristics: Option<String>,
/// KNN k parameter
#[arg(long, default_value = "5")]
k: usize,
/// Attack threshold (fraction of k neighbors)
#[arg(long, default_value = "0.6")]
threshold: f64,
/// Sliding window size in seconds
#[arg(long, default_value = "60")]
window_secs: u64,
/// Minimum events per IP to include in training
#[arg(long, default_value = "10")]
min_events: usize,
},
/// Train a per-request scanner detection model from audit logs
TrainScanner {
/// Path to audit log JSONL file
#[arg(short, long)]
input: String,
/// Output model file path
#[arg(short, long, default_value = "scanner_model.bin")]
output: String,
/// Directory (or file) containing .txt wordlists of scanner paths
#[arg(long)]
wordlists: Option<String>,
/// Classification threshold
#[arg(long, default_value = "0.5")]
threshold: f64,
},
}
fn main() -> Result<()> { fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command.unwrap_or(Commands::Serve { upgrade: false }) {
Commands::Serve { upgrade } => run_serve(upgrade),
Commands::Replay {
input,
model,
config,
k,
threshold,
window_secs,
min_events,
rate_limit,
} => ddos::replay::run(ddos::replay::ReplayArgs {
input,
model_path: model,
config_path: config,
k,
threshold,
window_secs,
min_events,
rate_limit,
}),
Commands::Train {
input,
output,
attack_ips,
normal_ips,
heuristics,
k,
threshold,
window_secs,
min_events,
} => ddos::train::run(ddos::train::TrainArgs {
input,
output,
attack_ips,
normal_ips,
heuristics,
k,
threshold,
window_secs,
min_events,
}),
Commands::TrainScanner {
input,
output,
wordlists,
threshold,
} => scanner::train::run(scanner::train::TrainScannerArgs {
input,
output,
wordlists,
threshold,
}),
}
}
fn run_serve(upgrade: bool) -> Result<()> {
// Install the aws-lc-rs crypto provider for rustls before any TLS init. // Install the aws-lc-rs crypto provider for rustls before any TLS init.
// Required because rustls 0.23 no longer auto-selects a provider at compile time. // Required because rustls 0.23 no longer auto-selects a provider at compile time.
rustls::crypto::aws_lc_rs::default_provider() rustls::crypto::aws_lc_rs::default_provider()
@@ -27,10 +178,120 @@ fn main() -> Result<()> {
// 1. Init telemetry (JSON logs + optional OTEL traces). // 1. Init telemetry (JSON logs + optional OTEL traces).
telemetry::init(&cfg.telemetry.otlp_endpoint); telemetry::init(&cfg.telemetry.otlp_endpoint);
// 2. Detect --upgrade flag. When present, Pingora inherits listening socket // 2. Load DDoS detection model if configured.
// FDs from the upgrade Unix socket instead of binding fresh ports, enabling let ddos_detector = if let Some(ddos_cfg) = &cfg.ddos {
// zero-downtime cert/config reloads triggered by the K8s watcher below. if ddos_cfg.enabled {
let upgrade = std::env::args().any(|a| a == "--upgrade"); match ddos::model::TrainedModel::load(
std::path::Path::new(&ddos_cfg.model_path),
Some(ddos_cfg.k),
Some(ddos_cfg.threshold),
) {
Ok(model) => {
let point_count = model.point_count();
let detector = Arc::new(ddos::detector::DDoSDetector::new(model, ddos_cfg));
tracing::info!(
points = point_count,
k = ddos_cfg.k,
threshold = ddos_cfg.threshold,
"DDoS detector loaded"
);
Some(detector)
}
Err(e) => {
tracing::warn!(error = %e, "failed to load DDoS model; detection disabled");
None
}
}
} else {
None
}
} else {
None
};
// 2b. Init rate limiter if configured.
let rate_limiter = if let Some(rl_cfg) = &cfg.rate_limit {
if rl_cfg.enabled {
let limiter = Arc::new(rate_limit::limiter::RateLimiter::new(rl_cfg));
let evict_limiter = limiter.clone();
let interval = rl_cfg.eviction_interval_secs;
std::thread::spawn(move || loop {
std::thread::sleep(std::time::Duration::from_secs(interval));
evict_limiter.evict_stale();
});
tracing::info!(
auth_burst = rl_cfg.authenticated.burst,
auth_rate = rl_cfg.authenticated.rate,
unauth_burst = rl_cfg.unauthenticated.burst,
unauth_rate = rl_cfg.unauthenticated.rate,
"rate limiter enabled"
);
Some(limiter)
} else {
None
}
} else {
None
};
// 2c. Load scanner model if configured.
let (scanner_detector, bot_allowlist) = if let Some(scanner_cfg) = &cfg.scanner {
if scanner_cfg.enabled {
match scanner::model::ScannerModel::load(std::path::Path::new(&scanner_cfg.model_path)) {
Ok(mut model) => {
let fragment_count = model.fragments.len();
model.threshold = scanner_cfg.threshold;
let detector = scanner::detector::ScannerDetector::new(&model, &cfg.routes);
let handle = Arc::new(arc_swap::ArcSwap::from_pointee(detector));
// Start bot allowlist if rules are configured.
let bot_allowlist = if !scanner_cfg.allowlist.is_empty() {
let al = scanner::allowlist::BotAllowlist::spawn(
&scanner_cfg.allowlist,
scanner_cfg.bot_cache_ttl_secs,
);
tracing::info!(
rules = scanner_cfg.allowlist.len(),
"bot allowlist enabled"
);
Some(al)
} else {
None
};
// Start background file watcher for hot-reload.
if scanner_cfg.poll_interval_secs > 0 {
let watcher_handle = handle.clone();
let model_path = std::path::PathBuf::from(&scanner_cfg.model_path);
let threshold = scanner_cfg.threshold;
let routes = cfg.routes.clone();
let interval = std::time::Duration::from_secs(scanner_cfg.poll_interval_secs);
std::thread::spawn(move || {
scanner::watcher::watch_scanner_model(
watcher_handle, model_path, threshold, routes, interval,
);
});
}
tracing::info!(
fragments = fragment_count,
threshold = scanner_cfg.threshold,
poll_interval_secs = scanner_cfg.poll_interval_secs,
"scanner detector loaded"
);
(Some(handle), bot_allowlist)
}
Err(e) => {
tracing::warn!(error = %e, "failed to load scanner model; scanner detection disabled");
(None, None)
}
}
} else {
(None, None)
}
} else {
(None, None)
};
// 3. Fetch the TLS cert from K8s before Pingora binds the TLS port. // 3. Fetch the TLS cert from K8s before Pingora binds the TLS port.
// The Client is created and dropped within this temp runtime — we do NOT // The Client is created and dropped within this temp runtime — we do NOT
@@ -47,8 +308,6 @@ fn main() -> Result<()> {
if let Err(e) = if let Err(e) =
cert::fetch_and_write(&c, &cfg.tls.cert_path, &cfg.tls.key_path).await cert::fetch_and_write(&c, &cfg.tls.cert_path, &cfg.tls.key_path).await
{ {
// Non-fatal: Secret may not exist yet on first deploy (cert-manager
// is still issuing), or the Secret name may differ in dev.
tracing::warn!(error = %e, "cert fetch from K8s failed; using existing files"); tracing::warn!(error = %e, "cert fetch from K8s failed; using existing files");
} }
} }
@@ -83,6 +342,10 @@ fn main() -> Result<()> {
let proxy = SunbeamProxy { let proxy = SunbeamProxy {
routes: cfg.routes.clone(), routes: cfg.routes.clone(),
acme_routes: acme_routes.clone(), acme_routes: acme_routes.clone(),
ddos_detector,
scanner_detector,
bot_allowlist,
rate_limiter,
}; };
let mut svc = http_proxy_service(&server.configuration, proxy); let mut svc = http_proxy_service(&server.configuration, proxy);
@@ -90,11 +353,6 @@ fn main() -> Result<()> {
svc.add_tcp(&cfg.listen.http); svc.add_tcp(&cfg.listen.http);
// Port 443: only add the TLS listener if the cert files exist. // Port 443: only add the TLS listener if the cert files exist.
// On first deploy cert-manager hasn't issued the cert yet, so we start
// HTTP-only. Once the pingora-tls Secret is created (ACME challenge
// completes), the watcher in step 6 writes the cert files and triggers
// a graceful upgrade. The upgrade process finds the cert files and adds
// the TLS listener, inheriting the port-80 socket from the old process.
let cert_exists = std::path::Path::new(&cfg.tls.cert_path).exists(); let cert_exists = std::path::Path::new(&cfg.tls.cert_path).exists();
if cert_exists { if cert_exists {
svc.add_tls(&cfg.listen.https, &cfg.tls.cert_path, &cfg.tls.key_path)?; svc.add_tls(&cfg.listen.https, &cfg.tls.cert_path, &cfg.tls.key_path)?;
@@ -109,7 +367,6 @@ fn main() -> Result<()> {
server.add_service(svc); server.add_service(svc);
// 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured. // 5b. SSH TCP passthrough (port 22 → Gitea SSH), if configured.
// Runs on its own OS thread + Tokio runtime — same pattern as the cert/ingress watcher.
if let Some(ssh_cfg) = &cfg.ssh { if let Some(ssh_cfg) = &cfg.ssh {
let listen = ssh_cfg.listen.clone(); let listen = ssh_cfg.listen.clone();
let backend = ssh_cfg.backend.clone(); let backend = ssh_cfg.backend.clone();
@@ -123,10 +380,7 @@ fn main() -> Result<()> {
}); });
} }
// 6. Background K8s watchers on their own OS thread + tokio runtime so they // 6. Background K8s watchers on their own OS thread + tokio runtime.
// don't interfere with Pingora's internal runtime. A fresh Client is
// created here so its tower workers live on this runtime (not the
// now-dropped temp runtime from step 3).
if k8s_available { if k8s_available {
let cert_path = cfg.tls.cert_path.clone(); let cert_path = cfg.tls.cert_path.clone();
let key_path = cfg.tls.key_path.clone(); let key_path = cfg.tls.key_path.clone();

View File

@@ -1,10 +1,20 @@
use crate::acme::AcmeRoutes; use crate::acme::AcmeRoutes;
use crate::config::RouteConfig; use crate::config::RouteConfig;
use crate::ddos::detector::DDoSDetector;
use crate::ddos::model::DDoSAction;
use crate::rate_limit::key;
use crate::rate_limit::limiter::{RateLimitResult, RateLimiter};
use crate::scanner::allowlist::BotAllowlist;
use crate::scanner::detector::ScannerDetector;
use crate::scanner::model::ScannerAction;
use arc_swap::ArcSwap;
use async_trait::async_trait; use async_trait::async_trait;
use http::header::{CONNECTION, EXPECT, HOST, UPGRADE}; use http::header::{CONNECTION, EXPECT, HOST, UPGRADE};
use pingora_core::{upstreams::peer::HttpPeer, Result}; use pingora_core::{upstreams::peer::HttpPeer, Result};
use pingora_http::{RequestHeader, ResponseHeader}; use pingora_http::{RequestHeader, ResponseHeader};
use pingora_proxy::{ProxyHttp, Session}; use pingora_proxy::{ProxyHttp, Session};
use std::net::IpAddr;
use std::sync::Arc;
use std::time::Instant; use std::time::Instant;
pub struct SunbeamProxy { pub struct SunbeamProxy {
@@ -12,6 +22,14 @@ pub struct SunbeamProxy {
/// Per-challenge route table populated by the Ingress watcher. /// Per-challenge route table populated by the Ingress watcher.
/// Maps `/.well-known/acme-challenge/<token>` → solver service address. /// Maps `/.well-known/acme-challenge/<token>` → solver service address.
pub acme_routes: AcmeRoutes, pub acme_routes: AcmeRoutes,
/// Optional KNN-based DDoS detector.
pub ddos_detector: Option<Arc<DDoSDetector>>,
/// Optional per-request scanner detector (hot-reloadable via ArcSwap).
pub scanner_detector: Option<Arc<ArcSwap<ScannerDetector>>>,
/// Optional verified-bot allowlist (bypasses scanner for known crawlers/agents).
pub bot_allowlist: Option<Arc<BotAllowlist>>,
/// Optional per-identity rate limiter.
pub rate_limiter: Option<Arc<RateLimiter>>,
} }
pub struct RequestCtx { pub struct RequestCtx {
@@ -41,6 +59,37 @@ fn extract_host(session: &Session) -> String {
.to_string() .to_string()
} }
/// Extract the real client IP, preferring trusted proxy headers.
///
/// Priority: CF-Connecting-IP → X-Real-IP → X-Forwarded-For (first) → socket addr.
/// All traffic arrives via Cloudflare, so CF-Connecting-IP is the authoritative
/// real client IP. The socket address is the Cloudflare edge node.
fn extract_client_ip(session: &Session) -> Option<IpAddr> {
let headers = &session.req_header().headers;
for header in &["cf-connecting-ip", "x-real-ip"] {
if let Some(val) = headers.get(*header).and_then(|v| v.to_str().ok()) {
if let Ok(ip) = val.trim().parse::<IpAddr>() {
return Some(ip);
}
}
}
// X-Forwarded-For: client, proxy1, proxy2 — take the first entry
if let Some(val) = headers.get("x-forwarded-for").and_then(|v| v.to_str().ok()) {
if let Some(first) = val.split(',').next() {
if let Ok(ip) = first.trim().parse::<IpAddr>() {
return Some(ip);
}
}
}
// Fallback: raw socket address
session
.client_addr()
.and_then(|addr| addr.as_inet().map(|a| a.ip()))
}
/// Strip the scheme prefix from a backend URL like `http://host:port`. /// Strip the scheme prefix from a backend URL like `http://host:port`.
fn backend_addr(backend: &str) -> &str { fn backend_addr(backend: &str) -> &str {
backend backend
@@ -137,6 +186,193 @@ impl ProxyHttp for SunbeamProxy {
return Ok(true); return Ok(true);
} }
// ── Detection pipeline ───────────────────────────────────────────
// Each layer emits an unfiltered pipeline log BEFORE acting on its
// decision. This guarantees downstream training pipelines always
// have the full traffic picture:
// - "ddos" log = all HTTPS traffic (scanner training data)
// - "scanner" log = traffic that passed DDoS (rate-limit training data)
// - "rate_limit" log = traffic that passed scanner (validation data)
// DDoS detection: check the client IP against the KNN model.
if let Some(detector) = &self.ddos_detector {
if let Some(ip) = extract_client_ip(session) {
let method = session.req_header().method.as_str();
let path = session.req_header().uri.path();
let host = extract_host(session);
let user_agent = session
.req_header()
.headers
.get("user-agent")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let content_length: u64 = session
.req_header()
.headers
.get("content-length")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let has_cookies = session.req_header().headers.get("cookie").is_some();
let has_referer = session.req_header().headers.get("referer").is_some();
let has_accept_language = session.req_header().headers.get("accept-language").is_some();
let accept = session
.req_header()
.headers
.get("accept")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let ddos_action = detector.check(ip, method, path, &host, user_agent, content_length, has_cookies, has_referer, has_accept_language);
let decision = if matches!(ddos_action, DDoSAction::Block) { "block" } else { "allow" };
tracing::info!(
target = "pipeline",
layer = "ddos",
decision,
method,
host = %host,
path,
client_ip = %ip,
user_agent,
content_length,
has_cookies,
has_referer,
has_accept_language,
accept,
"pipeline"
);
if matches!(ddos_action, DDoSAction::Block) {
let mut resp = ResponseHeader::build(429, None)?;
resp.insert_header("Retry-After", "60")?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
}
}
// Scanner detection: per-request classification of scanner/bot probes.
// The detector is behind ArcSwap for lock-free hot-reload.
if let Some(scanner_swap) = &self.scanner_detector {
let method = session.req_header().method.as_str();
let path = session.req_header().uri.path();
let host = extract_host(session);
let prefix = host.split('.').next().unwrap_or("");
let has_cookies = session.req_header().headers.get("cookie").is_some();
let has_referer = session.req_header().headers.get("referer").is_some();
let has_accept_language = session.req_header().headers.get("accept-language").is_some();
let accept = session
.req_header()
.headers
.get("accept")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
let user_agent = session
.req_header()
.headers
.get("user-agent")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let content_length: u64 = session
.req_header()
.headers
.get("content-length")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let client_ip = extract_client_ip(session);
// Bot allowlist: verified crawlers/agents bypass the scanner model.
// CIDR rules are instant; DNS-verified IPs are cached after
// background reverse+forward lookup.
let bot_reason = self.bot_allowlist.as_ref().and_then(|al| {
client_ip.and_then(|ip| al.check(user_agent, ip))
});
let (decision, score, reason) = if let Some(bot_reason) = bot_reason {
("allow", -1.0f64, bot_reason)
} else {
let scanner = scanner_swap.load();
let verdict = scanner.check(
method, path, prefix, has_cookies, has_referer,
has_accept_language, accept, user_agent, content_length,
);
let d = if matches!(verdict.action, ScannerAction::Block) { "block" } else { "allow" };
(d, verdict.score, verdict.reason)
};
let client_ip_str = client_ip
.map(|ip| ip.to_string())
.unwrap_or_default();
tracing::info!(
target = "pipeline",
layer = "scanner",
decision,
score,
reason,
method,
host = %host,
path,
client_ip = client_ip_str,
user_agent,
content_length,
has_cookies,
has_referer,
has_accept_language,
accept,
"pipeline"
);
if decision == "block" {
let mut resp = ResponseHeader::build(403, None)?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
}
// Rate limiting: per-identity throttling.
if let Some(limiter) = &self.rate_limiter {
if let Some(ip) = extract_client_ip(session) {
let cookie = session
.req_header()
.headers
.get("cookie")
.and_then(|v| v.to_str().ok());
let auth = session
.req_header()
.headers
.get("authorization")
.and_then(|v| v.to_str().ok());
let rl_key = key::extract_key(cookie, auth, ip);
let rl_result = limiter.check(ip, rl_key);
let decision = if matches!(rl_result, RateLimitResult::Reject { .. }) { "block" } else { "allow" };
tracing::info!(
target = "pipeline",
layer = "rate_limit",
decision,
method = %session.req_header().method,
host = %extract_host(session),
path = %session.req_header().uri.path(),
client_ip = %ip,
user_agent = session.req_header().headers.get("user-agent").and_then(|v| v.to_str().ok()).unwrap_or("-"),
has_cookies = cookie.is_some(),
"pipeline"
);
if let RateLimitResult::Reject { retry_after } = rl_result {
let mut resp = ResponseHeader::build(429, None)?;
resp.insert_header("Retry-After", retry_after.to_string())?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
}
}
// Reject unknown host prefixes with 404. // Reject unknown host prefixes with 404.
let host = extract_host(session); let host = extract_host(session);
let prefix = host.split('.').next().unwrap_or(""); let prefix = host.split('.').next().unwrap_or("");
@@ -311,30 +547,92 @@ impl ProxyHttp for SunbeamProxy {
let status = session let status = session
.response_written() .response_written()
.map_or(0, |r| r.status.as_u16()); .map_or(0, |r| r.status.as_u16());
let duration_ms = ctx.start_time.elapsed().as_millis(); let duration_ms = ctx.start_time.elapsed().as_millis() as u64;
let backend = ctx let backend = ctx
.route .route
.as_ref() .as_ref()
.map(|r| r.backend.as_str()) .map(|r| r.backend.as_str())
.unwrap_or("-"); .unwrap_or("-");
let client_ip = session let client_ip = extract_client_ip(session)
.client_addr() .map(|ip| ip.to_string())
.map(|a| a.to_string()) .unwrap_or_else(|| {
.unwrap_or_else(|| "-".to_string()); session
.client_addr()
.map(|a| a.to_string())
.unwrap_or_else(|| "-".to_string())
});
let error_str = error.map(|e| e.to_string()); let error_str = error.map(|e| e.to_string());
let content_length: u64 = session
.req_header()
.headers
.get("content-length")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let user_agent = session
.req_header()
.headers
.get("user-agent")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let referer = session
.req_header()
.headers
.get("referer")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let accept_language = session
.req_header()
.headers
.get("accept-language")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let accept = session
.req_header()
.headers
.get("accept")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let has_cookies = session
.req_header()
.headers
.get("cookie")
.is_some();
let cf_country = session
.req_header()
.headers
.get("cf-ipcountry")
.and_then(|v| v.to_str().ok())
.unwrap_or("-");
let query = session.req_header().uri.query().unwrap_or("");
tracing::info!( tracing::info!(
target = "audit", target = "audit",
method = %session.req_header().method, method = %session.req_header().method,
host = %extract_host(session), host = %extract_host(session),
path = %session.req_header().uri.path(), path = %session.req_header().uri.path(),
query,
client_ip, client_ip,
status, status,
duration_ms, duration_ms,
content_length,
user_agent,
referer,
accept_language,
accept,
has_cookies,
cf_country,
backend, backend,
error = error_str, error = error_str,
"request" "request"
); );
if let Some(detector) = &self.ddos_detector {
if let Some(ip) = extract_client_ip(session) {
detector.record_response(ip, status, duration_ms as u32);
}
}
} }
} }

View File

@@ -101,7 +101,7 @@ fn start_proxy_once(backend_port: u16) {
paths: vec![], paths: vec![],
}]; }];
let acme_routes: AcmeRoutes = Arc::new(RwLock::new(HashMap::new())); let acme_routes: AcmeRoutes = Arc::new(RwLock::new(HashMap::new()));
let proxy = SunbeamProxy { routes, acme_routes }; let proxy = SunbeamProxy { routes, acme_routes, ddos_detector: None, scanner_detector: None, bot_allowlist: None, rate_limiter: None };
let opt = Opt { let opt = Opt {
upgrade: false, upgrade: false,