From 76ad9e93e5218a33ac712cdf646436691533d163 Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Tue, 10 Mar 2026 23:38:20 +0000 Subject: [PATCH] feat(static_files): add static file serving, SPA fallback, rewrites, body rewriting, and auth subrequests Add static file serving with try_files chain ($uri, $uri.html, $uri/index.html, fallback), regex-based URL rewrites compiled at startup, response body find/replace for text/html and JS content, auth subrequests with header capture for path routes, and custom response headers per route. Extends RouteConfig with static_root, fallback, rewrites, body_rewrites, and response_headers fields. Signed-off-by: Sienna Meridian Satterwhite --- Cargo.lock | 106 +++++++++++ Cargo.toml | 7 + benches/scanner_bench.rs | 15 ++ src/config.rs | 55 ++++++ src/lib.rs | 1 + src/main.rs | 5 + src/proxy.rs | 383 +++++++++++++++++++++++++++++++++++---- src/scanner/detector.rs | 5 + src/static_files.rs | 153 ++++++++++++++++ tests/e2e.rs | 8 +- tests/scanner_test.rs | 10 + 11 files changed, 710 insertions(+), 38 deletions(-) create mode 100644 src/static_files.rs diff --git a/Cargo.lock b/Cargo.lock index 4efb8eb..8593e5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -487,6 +487,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.44" @@ -1170,8 +1176,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1181,9 +1189,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi 5.3.0", "wasip2", + "wasm-bindgen", ] [[package]] @@ -1410,6 +1420,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -1902,6 +1913,12 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + [[package]] name = "matchers" version = "0.2.0" @@ -2861,6 +2878,61 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2 0.5.10", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.5.10", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.45" @@ -3014,16 +3086,21 @@ dependencies = [ "http-body", "http-body-util", "hyper", + "hyper-rustls", "hyper-util", "js-sys", "log", "percent-encoding", "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", + "tokio-rustls", "tower 0.5.3", "tower-http", "tower-service", @@ -3031,6 +3108,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", + "webpki-roots", ] [[package]] @@ -3162,6 +3240,7 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ + "web-time", "zeroize", ] @@ -3490,6 +3569,7 @@ dependencies = [ "arc-swap", "async-trait", "bincode", + "bytes", "clap", "criterion", "dns-lookup", @@ -3508,6 +3588,8 @@ dependencies = [ "pingora-http", "pingora-proxy", "prometheus", + "regex", + "reqwest", "rustc-hash", "rustls", "serde", @@ -3678,6 +3760,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.50.0" @@ -4256,6 +4353,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi-util" version = "0.1.11" diff --git a/Cargo.toml b/Cargo.toml index fb50451..ca64aef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,13 @@ prometheus = "0.13" # Request IDs uuid = { version = "1", features = ["v4"] } +# Static file serving and body rewriting +bytes = "1" +regex = "1" + +# Auth subrequests +reqwest = { version = "0.12", features = ["rustls-tls"], default-features = false } + # Rustls crypto provider — must be installed before any TLS init rustls = { version = "0.23", features = ["aws-lc-rs"] } diff --git a/benches/scanner_bench.rs b/benches/scanner_bench.rs index 2f56626..4918bfe 100644 --- a/benches/scanner_bench.rs +++ b/benches/scanner_bench.rs @@ -45,6 +45,11 @@ fn make_detector() -> ScannerDetector { websocket: false, disable_secure_redirection: false, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }, RouteConfig { host_prefix: "src".into(), @@ -52,6 +57,11 @@ fn make_detector() -> ScannerDetector { websocket: false, disable_secure_redirection: false, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }, RouteConfig { host_prefix: "docs".into(), @@ -59,6 +69,11 @@ fn make_detector() -> ScannerDetector { websocket: false, disable_secure_redirection: false, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }, ]; diff --git a/src/config.rs b/src/config.rs index 9f4786d..a2f4155 100644 --- a/src/config.rs +++ b/src/config.rs @@ -149,6 +149,45 @@ pub struct PathRoute { pub strip_prefix: bool, #[serde(default)] pub websocket: bool, + /// URL for auth subrequest (like nginx `auth_request`). + /// If set, the proxy makes an HTTP request to this URL before forwarding. + /// A non-2xx response blocks the request with 403. + #[serde(default)] + pub auth_request: Option, + /// Headers to capture from the auth subrequest response and forward upstream. + #[serde(default)] + pub auth_capture_headers: Vec, + /// Prefix to prepend to the upstream path after stripping. + #[serde(default)] + pub upstream_path_prefix: Option, +} + +/// A URL rewrite rule: requests matching `pattern` are served the file at `target`. +#[derive(Debug, Deserialize, Clone)] +pub struct RewriteRule { + /// Regex pattern matched against the request path. + pub pattern: String, + /// Static file path to serve (relative to `static_root`). + pub target: String, +} + +/// A find/replace rule applied to response bodies. +#[derive(Debug, Deserialize, Clone)] +pub struct BodyRewrite { + /// String to find in the response body. + pub find: String, + /// String to replace it with. + pub replace: String, + /// Content-types to apply this rewrite to (e.g. `["text/html"]`). + #[serde(default)] + pub types: Vec, +} + +/// A response header to add to every response for this route. +#[derive(Debug, Deserialize, Clone)] +pub struct HeaderRule { + pub name: String, + pub value: String, } #[derive(Debug, Deserialize, Clone)] @@ -165,6 +204,22 @@ pub struct RouteConfig { /// If the request path matches a sub-route, its backend is used instead. #[serde(default)] pub paths: Vec, + /// Root directory for static file serving. If set, the proxy will try + /// to serve files from this directory before forwarding to the upstream. + #[serde(default)] + pub static_root: Option, + /// Fallback file for SPA routing (e.g. "index.html"). + #[serde(default)] + pub fallback: Option, + /// URL rewrite rules applied before static file lookup. + #[serde(default)] + pub rewrites: Vec, + /// Response body find/replace rules (like nginx `sub_filter`). + #[serde(default)] + pub body_rewrites: Vec, + /// Extra response headers added to every response for this route. + #[serde(default)] + pub response_headers: Vec, } impl Config { diff --git a/src/lib.rs b/src/lib.rs index d38b071..508dfd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,3 +10,4 @@ pub mod proxy; pub mod rate_limit; pub mod scanner; pub mod ssh; +pub mod static_files; diff --git a/src/main.rs b/src/main.rs index 0223fdc..9deeb63 100644 --- a/src/main.rs +++ b/src/main.rs @@ -342,6 +342,9 @@ fn run_serve(upgrade: bool) -> Result<()> { // Pingora's async proxy calls without cross-runtime waker concerns. let acme_routes: acme::AcmeRoutes = Arc::new(RwLock::new(HashMap::new())); + let compiled_rewrites = SunbeamProxy::compile_rewrites(&cfg.routes); + let http_client = reqwest::Client::new(); + let proxy = SunbeamProxy { routes: cfg.routes.clone(), acme_routes: acme_routes.clone(), @@ -349,6 +352,8 @@ fn run_serve(upgrade: bool) -> Result<()> { scanner_detector, bot_allowlist, rate_limiter, + compiled_rewrites, + http_client, }; let mut svc = http_proxy_service(&server.configuration, proxy); diff --git a/src/proxy.rs b/src/proxy.rs index eff2f49..f8c180b 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -10,14 +10,22 @@ use crate::scanner::detector::ScannerDetector; use crate::scanner::model::ScannerAction; use arc_swap::ArcSwap; use async_trait::async_trait; +use bytes::Bytes; use http::header::{CONNECTION, EXPECT, HOST, UPGRADE}; use pingora_core::{upstreams::peer::HttpPeer, Result}; use pingora_http::{RequestHeader, ResponseHeader}; use pingora_proxy::{ProxyHttp, Session}; +use regex::Regex; use std::net::IpAddr; use std::sync::Arc; use std::time::Instant; +/// A compiled rewrite rule (regex compiled once at startup). +pub struct CompiledRewrite { + pub pattern: Regex, + pub target: String, +} + pub struct SunbeamProxy { pub routes: Vec, /// Per-challenge route table populated by the Ingress watcher. @@ -30,6 +38,10 @@ pub struct SunbeamProxy { pub bot_allowlist: Option>, /// Optional per-identity rate limiter. pub rate_limiter: Option>, + /// Compiled rewrite rules per route (indexed by host_prefix). + pub compiled_rewrites: Vec<(String, Vec)>, + /// Shared reqwest client for auth subrequests. + pub http_client: reqwest::Client, } pub struct RequestCtx { @@ -45,12 +57,61 @@ pub struct RequestCtx { pub strip_prefix: Option, /// Original downstream scheme ("http" or "https"), captured in request_filter. pub downstream_scheme: &'static str, + /// Whether this request was served from static files (skip upstream). + pub served_static: bool, + /// Captured auth subrequest headers to forward upstream. + pub auth_headers: Vec<(String, String)>, + /// Upstream path prefix to prepend (from PathRoute config). + pub upstream_path_prefix: Option, + /// Whether response body rewriting is needed for this request. + pub body_rewrite_rules: Vec<(String, String)>, + /// Buffered response body for body rewriting. + pub body_buffer: Option>, } impl SunbeamProxy { fn find_route(&self, prefix: &str) -> Option<&RouteConfig> { self.routes.iter().find(|r| r.host_prefix == prefix) } + + fn find_rewrites(&self, prefix: &str) -> Option<&[CompiledRewrite]> { + self.compiled_rewrites + .iter() + .find(|(p, _)| p == prefix) + .map(|(_, rules)| rules.as_slice()) + } + + /// Compile all rewrite rules from routes at startup. + pub fn compile_rewrites(routes: &[RouteConfig]) -> Vec<(String, Vec)> { + routes + .iter() + .filter(|r| !r.rewrites.is_empty()) + .map(|r| { + let compiled = r + .rewrites + .iter() + .filter_map(|rw| { + match Regex::new(&rw.pattern) { + Ok(re) => Some(CompiledRewrite { + pattern: re, + target: rw.target.clone(), + }), + Err(e) => { + tracing::error!( + host_prefix = %r.host_prefix, + pattern = %rw.pattern, + error = %e, + "failed to compile rewrite regex" + ); + None + } + } + }) + .collect(); + (r.host_prefix.clone(), compiled) + }) + .collect() + } } fn extract_host(session: &Session) -> String { @@ -123,6 +184,11 @@ impl ProxyHttp for SunbeamProxy { acme_backend: None, downstream_scheme: "https", strip_prefix: None, + served_static: false, + auth_headers: Vec::new(), + upstream_path_prefix: None, + body_rewrite_rules: Vec::new(), + body_buffer: None, } } @@ -181,8 +247,6 @@ impl ProxyHttp for SunbeamProxy { // Routes that explicitly opt out of HTTPS enforcement pass through. // All other requests — including unknown hosts — are redirected. - // This is as close to an L4 redirect as HTTP allows: the upstream is - // never contacted; the 301 is written directly to the downstream socket. if self .find_route(prefix) .map(|r| r.disable_secure_redirection) @@ -305,8 +369,6 @@ impl ProxyHttp for SunbeamProxy { let client_ip = extract_client_ip(session); // Bot allowlist: verified crawlers/agents bypass the scanner model. - // CIDR rules are instant; DNS-verified IPs are cached after - // background reverse+forward lookup. let bot_reason = self.bot_allowlist.as_ref().and_then(|al| { client_ip.and_then(|ip| al.check(user_agent, ip)) }); @@ -405,27 +467,139 @@ impl ProxyHttp for SunbeamProxy { // Reject unknown host prefixes with 404. let host = extract_host(session); let prefix = host.split('.').next().unwrap_or(""); - if self.find_route(prefix).is_none() { - let mut resp = ResponseHeader::build(404, None)?; - resp.insert_header("Content-Length", "0")?; - session.write_response_header(Box::new(resp), true).await?; - return Ok(true); + let route = match self.find_route(prefix) { + Some(r) => r, + None => { + let mut resp = ResponseHeader::build(404, None)?; + resp.insert_header("Content-Length", "0")?; + session.write_response_header(Box::new(resp), true).await?; + return Ok(true); + } + }; + + // Store route early so downstream hooks can access it. + ctx.route = Some(route.clone()); + + // ── Static file serving ────────────────────────────────────────── + if let Some(static_root) = &route.static_root { + let req_path = session.req_header().uri.path().to_string(); + + // Check path sub-routes first: if a path route matches, skip static + // serving and let it go to the upstream backend. + let path_route_match = route + .paths + .iter() + .any(|p| req_path.starts_with(p.prefix.as_str())); + + if !path_route_match { + // Apply rewrite rules before static file lookup. + let mut serve_path = req_path.clone(); + if let Some(rewrites) = self.find_rewrites(prefix) { + for rw in rewrites { + if rw.pattern.is_match(&req_path) { + serve_path = rw.target.clone(); + break; + } + } + } + + let extra_headers: Vec<(String, String)> = route + .response_headers + .iter() + .map(|h| (h.name.clone(), h.value.clone())) + .collect(); + + let served = crate::static_files::try_serve( + session, + static_root, + route.fallback.as_deref(), + &serve_path, + extra_headers, + ) + .await?; + + if served { + ctx.served_static = true; + ctx.route = Some(route.clone()); + return Ok(true); + } + } + } + + // ── Auth subrequest for path routes ────────────────────────────── + { + let req_path = session.req_header().uri.path().to_string(); + let path_route = route + .paths + .iter() + .filter(|p| req_path.starts_with(p.prefix.as_str())) + .max_by_key(|p| p.prefix.len()); + + if let Some(pr) = path_route { + if let Some(auth_url) = &pr.auth_request { + // Forward the original request's cookies and auth headers. + let mut auth_req = self.http_client.get(auth_url); + if let Some(cookie) = session.req_header().headers.get("cookie") { + auth_req = auth_req.header("cookie", cookie.to_str().unwrap_or("")); + } + if let Some(auth_hdr) = session.req_header().headers.get("authorization") { + auth_req = auth_req.header("authorization", auth_hdr.to_str().unwrap_or("")); + } + // Forward the original path for context. + auth_req = auth_req.header("x-original-uri", &req_path); + + match auth_req.send().await { + Ok(resp) if resp.status().is_success() => { + // Capture specified headers from the auth response. + for hdr_name in &pr.auth_capture_headers { + if let Some(val) = resp.headers().get(hdr_name.as_str()) { + if let Ok(v) = val.to_str() { + ctx.auth_headers.push((hdr_name.clone(), v.to_string())); + } + } + } + } + Ok(resp) => { + let status = resp.status().as_u16(); + tracing::info!( + auth_url, + status, + "auth subrequest denied" + ); + let mut r = ResponseHeader::build(403, None)?; + r.insert_header("Content-Length", "0")?; + session.write_response_header(Box::new(r), true).await?; + return Ok(true); + } + Err(e) => { + tracing::error!( + auth_url, + error = %e, + "auth subrequest failed" + ); + let mut r = ResponseHeader::build(502, None)?; + r.insert_header("Content-Length", "0")?; + session.write_response_header(Box::new(r), true).await?; + return Ok(true); + } + } + + // Store upstream_path_prefix for upstream_request_filter. + ctx.upstream_path_prefix = pr.upstream_path_prefix.clone(); + } + } + } + + // Prepare body rewrite rules if the route has them. + if !route.body_rewrites.is_empty() { + ctx.body_rewrite_rules = route + .body_rewrites + .iter() + .map(|br| (br.find.clone(), br.replace.clone())) + .collect(); } // Handle Expect: 100-continue before connecting to upstream. - // - // Docker's OCI distribution protocol sends Expect: 100-continue for - // large layer blob uploads (typically > 5 MB). Without this, Pingora - // forwards the header to the upstream (e.g. Gitea), the upstream - // responds with 100 Continue, and Pingora must then proxy that - // informational response back to the client. Pingora's handling of - // upstream informational responses is unreliable and can cause the - // upload to fail with a spurious 400 for the client. - // - // By responding with 100 Continue here — before upstream_peer is - // even called — we unblock the client immediately. The Expect header - // is stripped in upstream_request_filter so the upstream never sends - // its own 100 Continue. if session .req_header() .headers @@ -473,12 +647,20 @@ impl ProxyHttp for SunbeamProxy { if pr.strip_prefix { ctx.strip_prefix = Some(pr.prefix.clone()); } + if ctx.upstream_path_prefix.is_none() { + ctx.upstream_path_prefix = pr.upstream_path_prefix.clone(); + } ctx.route = Some(crate::config::RouteConfig { host_prefix: route.host_prefix.clone(), backend: pr.backend.clone(), websocket: pr.websocket || route.websocket, disable_secure_redirection: route.disable_secure_redirection, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }); return Ok(Box::new(HttpPeer::new( backend_addr(&pr.backend), @@ -495,7 +677,8 @@ impl ProxyHttp for SunbeamProxy { ))) } - /// Copy WebSocket upgrade headers and apply path prefix stripping. + /// Copy WebSocket upgrade headers, apply path prefix stripping, and forward + /// auth subrequest headers. async fn upstream_request_filter( &self, session: &mut Session, @@ -505,11 +688,7 @@ impl ProxyHttp for SunbeamProxy { where Self::CTX: Send + Sync, { - // Inform backends of the original downstream scheme so they can construct - // correct absolute URLs (e.g. OIDC redirect_uri, CSRF checks). - // Must use insert_header (not headers.insert) so that both base.headers - // and the CaseMap are updated together — header_to_h1_wire zips them - // and silently drops headers only present in base.headers. + // Inform backends of the original downstream scheme. upstream_req .insert_header("x-forwarded-proto", ctx.downstream_scheme) .map_err(|e| { @@ -537,11 +716,20 @@ impl ProxyHttp for SunbeamProxy { } } - // Strip Expect: 100-continue — the proxy already sent 100 Continue to - // the downstream client in request_filter, so we must not forward the - // header to the upstream. If the upstream also sees Expect it will - // send its own 100 Continue, which Pingora cannot reliably proxy back - // (it has already been consumed) and which can corrupt the response. + // Forward captured auth subrequest headers (pass owned Strings — + // Pingora's IntoCaseHeaderName is impl'd for String, not &str). + let auth_headers: Vec<_> = ctx.auth_headers.drain(..).collect(); + for (name, value) in auth_headers { + upstream_req.insert_header(name, value).map_err(|e| { + pingora_core::Error::because( + pingora_core::ErrorType::InternalError, + "failed to insert auth header", + e, + ) + })?; + } + + // Strip Expect: 100-continue. upstream_req.remove_header("expect"); // Strip path prefix before forwarding (e.g. /kratos → /). @@ -550,6 +738,15 @@ impl ProxyHttp for SunbeamProxy { let old_path = old_uri.path(); if let Some(stripped) = old_path.strip_prefix(prefix.as_str()) { let new_path = if stripped.is_empty() { "/" } else { stripped }; + + // Prepend upstream_path_prefix if configured. + let new_path = if let Some(up_prefix) = &ctx.upstream_path_prefix { + let trimmed = new_path.strip_prefix('/').unwrap_or(new_path); + format!("{up_prefix}{trimmed}") + } else { + new_path.to_string() + }; + let query_part = old_uri .query() .map(|q| format!("?{q}")) @@ -568,12 +765,35 @@ impl ProxyHttp for SunbeamProxy { http::Uri::from_parts(parts).expect("valid uri parts"), ); } + } else if let Some(up_prefix) = &ctx.upstream_path_prefix { + // No strip_prefix but upstream_path_prefix is set — prepend it. + let old_uri = upstream_req.uri.clone(); + let old_path = old_uri.path(); + let trimmed = old_path.strip_prefix('/').unwrap_or(old_path); + let new_path = format!("{up_prefix}{trimmed}"); + let query_part = old_uri + .query() + .map(|q| format!("?{q}")) + .unwrap_or_default(); + let new_pq: http::uri::PathAndQuery = + format!("{new_path}{query_part}").parse().map_err(|e| { + pingora_core::Error::because( + pingora_core::ErrorType::InternalError, + "invalid uri after prefix prepend", + e, + ) + })?; + let mut parts = old_uri.into_parts(); + parts.path_and_query = Some(new_pq); + upstream_req.set_uri( + http::Uri::from_parts(parts).expect("valid uri parts"), + ); } Ok(()) } - /// Add X-Request-Id response header so clients can correlate. + /// Add X-Request-Id and custom response headers. async fn upstream_response_filter( &self, _session: &mut Session, @@ -583,10 +803,74 @@ impl ProxyHttp for SunbeamProxy { where Self::CTX: Send + Sync, { + // Add X-Request-Id to the response so clients can correlate. let _ = upstream_response.insert_header("x-request-id", &ctx.request_id); + + // Add route-level response headers (owned Strings for Pingora's IntoCaseHeaderName). + if let Some(route) = &ctx.route { + for hdr in &route.response_headers { + let _ = upstream_response.insert_header(hdr.name.clone(), hdr.value.clone()); + } + } + + // Check if body rewriting applies to this response's content-type. + if !ctx.body_rewrite_rules.is_empty() { + let content_type = upstream_response + .headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + // Only buffer text/html and application/javascript responses. + let should_rewrite = content_type.starts_with("text/html") + || content_type.starts_with("application/javascript") + || content_type.starts_with("text/javascript"); + + if should_rewrite { + ctx.body_buffer = Some(Vec::new()); + // Remove content-length since we'll modify the body. + upstream_response.remove_header("content-length"); + } else { + // Don't rewrite non-matching content types. + ctx.body_rewrite_rules.clear(); + } + } + Ok(()) } + /// Buffer and rewrite response bodies when body_rewrite rules are active. + fn response_body_filter( + &self, + _session: &mut Session, + body: &mut Option, + end_of_stream: bool, + ctx: &mut RequestCtx, + ) -> Result> + where + Self::CTX: Send + Sync, + { + if ctx.body_buffer.is_none() { + return Ok(None); + } + + // Accumulate chunks into the buffer. + if let Some(data) = body.take() { + ctx.body_buffer.as_mut().unwrap().extend_from_slice(&data); + } + + if end_of_stream { + let buffer = ctx.body_buffer.take().unwrap(); + let mut result = String::from_utf8_lossy(&buffer).into_owned(); + for (find, replace) in &ctx.body_rewrite_rules { + result = result.replace(find.as_str(), replace.as_str()); + } + *body = Some(Bytes::from(result)); + } + + Ok(None) + } + /// Emit a structured JSON audit log line for every request. async fn logging( &self, @@ -747,6 +1031,11 @@ mod tests { acme_backend: None, strip_prefix: None, downstream_scheme: "https", + served_static: false, + auth_headers: Vec::new(), + upstream_path_prefix: None, + body_rewrite_rules: Vec::new(), + body_buffer: None, }; assert_eq!(ctx.downstream_scheme, "https"); } @@ -758,8 +1047,6 @@ mod tests { } /// remove_header("expect") strips the header from the upstream request. - /// This is tested independently of the async proxy logic because - /// upstream_request_filter requires a live session. #[test] fn test_expect_header_stripped_before_upstream() { let mut req = RequestHeader::build("PUT", b"/v2/studio/image/blobs/uploads/uuid", None).unwrap(); @@ -768,7 +1055,6 @@ mod tests { assert!(req.headers.get("expect").is_some(), "expect header should be present before stripping"); req.remove_header("expect"); assert!(req.headers.get("expect").is_none(), "expect header should be gone after remove_header"); - // Content-Length must survive the strip. assert!(req.headers.get("content-length").is_some()); } @@ -778,4 +1064,27 @@ mod tests { assert_eq!(id.len(), 36); assert!(uuid::Uuid::parse_str(&id).is_ok()); } + + #[test] + fn test_compile_rewrites_valid() { + let routes = vec![RouteConfig { + host_prefix: "docs".into(), + backend: "http://localhost:8080".into(), + websocket: false, + disable_secure_redirection: false, + paths: vec![], + static_root: Some("/srv/docs".into()), + fallback: Some("index.html".into()), + rewrites: vec![crate::config::RewriteRule { + pattern: r"^/docs/[0-9a-f-]+/?$".into(), + target: "/docs/[id]/index.html".into(), + }], + body_rewrites: vec![], + response_headers: vec![], + }]; + let compiled = SunbeamProxy::compile_rewrites(&routes); + assert_eq!(compiled.len(), 1); + assert_eq!(compiled[0].1.len(), 1); + assert!(compiled[0].1[0].pattern.is_match("/docs/abc-def/")); + } } diff --git a/src/scanner/detector.rs b/src/scanner/detector.rs index 2704ce3..66a7def 100644 --- a/src/scanner/detector.rs +++ b/src/scanner/detector.rs @@ -161,6 +161,11 @@ mod tests { websocket: false, disable_secure_redirection: false, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }]; ScannerDetector::new(&model, &routes) } diff --git a/src/static_files.rs b/src/static_files.rs new file mode 100644 index 0000000..6d2e4a4 --- /dev/null +++ b/src/static_files.rs @@ -0,0 +1,153 @@ +use pingora_http::ResponseHeader; +use pingora_proxy::Session; +use std::path::{Path, PathBuf}; + +/// Hardcoded content-type map for common static file extensions. +pub fn content_type_for(ext: &str) -> &'static str { + match ext { + "html" | "htm" => "text/html; charset=utf-8", + "css" => "text/css; charset=utf-8", + "js" | "mjs" => "application/javascript; charset=utf-8", + "json" => "application/json; charset=utf-8", + "svg" => "image/svg+xml", + "png" => "image/png", + "jpg" | "jpeg" => "image/jpeg", + "gif" => "image/gif", + "ico" => "image/x-icon", + "webp" => "image/webp", + "avif" => "image/avif", + "woff" => "font/woff", + "woff2" => "font/woff2", + "ttf" => "font/ttf", + "otf" => "font/otf", + "eot" => "application/vnd.ms-fontobject", + "xml" => "application/xml; charset=utf-8", + "txt" => "text/plain; charset=utf-8", + "map" => "application/json", + "webmanifest" => "application/manifest+json", + "mp4" => "video/mp4", + "webm" => "video/webm", + "mp3" => "audio/mpeg", + "pdf" => "application/pdf", + "wasm" => "application/wasm", + _ => "application/octet-stream", + } +} + +/// Cache-control header value based on extension. +pub fn cache_control_for(ext: &str) -> &'static str { + match ext { + "js" | "mjs" | "css" | "woff" | "woff2" | "ttf" | "otf" | "eot" | "wasm" => { + "public, max-age=31536000, immutable" + } + "png" | "jpg" | "jpeg" | "gif" | "webp" | "avif" | "svg" | "ico" => { + "public, max-age=86400" + } + _ => "no-cache", + } +} + +/// File read result — gathered before writing to session. +struct StaticFile { + body: Vec, + content_type: &'static str, + cache_control: &'static str, + len: u64, +} + +/// Try to read a file from disk. Returns None if the file doesn't exist. +async fn read_static_file(path: &Path) -> Option { + let metadata = match tokio::fs::metadata(path).await { + Ok(m) if m.is_file() => m, + _ => return None, + }; + + let body = match tokio::fs::read(path).await { + Ok(b) => b, + Err(_) => return None, + }; + + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + Some(StaticFile { + len: metadata.len(), + body, + content_type: content_type_for(ext), + cache_control: cache_control_for(ext), + }) +} + +/// Try to resolve and serve a static file for the given request. +/// +/// Implements a `try_files` chain: +/// 1. `$uri` (exact path) +/// 2. `$uri.html` +/// 3. `$uri/index.html` +/// 4. fallback file (e.g. `index.html` for SPA) +/// +/// Returns `Ok(true)` if a response was written (caller should stop processing), +/// `Ok(false)` if no static file matched (caller should proceed to upstream). +pub async fn try_serve( + session: &mut Session, + static_root: &str, + fallback: Option<&str>, + path: &str, + extra_headers: Vec<(String, String)>, +) -> pingora_core::Result { + let root = Path::new(static_root); + + // Sanitize: reject path traversal attempts. + if path.contains("..") { + return Ok(false); + } + + // Strip leading slash for path joining. + let relative = path.strip_prefix('/').unwrap_or(path); + + // try_files chain: exact → .html → /index.html + let candidates: Vec = if relative.is_empty() { + vec![root.join("index.html")] + } else { + vec![ + root.join(relative), + root.join(format!("{relative}.html")), + root.join(format!("{relative}/index.html")), + ] + }; + + // Find the first matching file. + let mut file = None; + for candidate in &candidates { + if let Some(f) = read_static_file(candidate).await { + file = Some(f); + break; + } + } + + // Try fallback if no candidate matched. + if file.is_none() { + if let Some(fb) = fallback { + file = read_static_file(&root.join(fb)).await; + } + } + + let file = match file { + Some(f) => f, + None => return Ok(false), + }; + + // Write the response. + let mut resp = ResponseHeader::build(200, None)?; + resp.insert_header("Content-Type", file.content_type)?; + resp.insert_header("Content-Length", file.len.to_string())?; + resp.insert_header("Cache-Control", file.cache_control)?; + + for (name, value) in extra_headers { + resp.insert_header(name, value)?; + } + + session.write_response_header(Box::new(resp), false).await?; + session.write_response_body(Some(file.body.into()), true).await?; + + Ok(true) +} diff --git a/tests/e2e.rs b/tests/e2e.rs index 57e0539..039c8ea 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -99,9 +99,15 @@ fn start_proxy_once(backend_port: u16) { // without needing TLS certificates in the test environment. disable_secure_redirection: true, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }]; let acme_routes: AcmeRoutes = Arc::new(RwLock::new(HashMap::new())); - let proxy = SunbeamProxy { routes, acme_routes, ddos_detector: None, scanner_detector: None, bot_allowlist: None, rate_limiter: None }; + let compiled_rewrites = SunbeamProxy::compile_rewrites(&routes); + let proxy = SunbeamProxy { routes, acme_routes, ddos_detector: None, scanner_detector: None, bot_allowlist: None, rate_limiter: None, compiled_rewrites, http_client: reqwest::Client::new() }; let opt = Opt { upgrade: false, diff --git a/tests/scanner_test.rs b/tests/scanner_test.rs index b23e4dc..311f2fd 100644 --- a/tests/scanner_test.rs +++ b/tests/scanner_test.rs @@ -13,6 +13,11 @@ fn test_routes() -> Vec { websocket: false, disable_secure_redirection: false, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }, RouteConfig { host_prefix: "api".into(), @@ -20,6 +25,11 @@ fn test_routes() -> Vec { websocket: false, disable_secure_redirection: false, paths: vec![], + static_root: None, + fallback: None, + rewrites: vec![], + body_rewrites: vec![], + response_headers: vec![], }, ] }