feat(static_files): add static file serving, SPA fallback, rewrites, body rewriting, and auth subrequests

Add static file serving with try_files chain ($uri, $uri.html,
$uri/index.html, fallback), regex-based URL rewrites compiled at
startup, response body find/replace for text/html and JS content,
auth subrequests with header capture for path routes, and custom
response headers per route. Extends RouteConfig with static_root,
fallback, rewrites, body_rewrites, and response_headers fields.

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
2026-03-10 23:38:20 +00:00
parent 0fd10110ff
commit 76ad9e93e5
11 changed files with 710 additions and 38 deletions

106
Cargo.lock generated
View File

@@ -487,6 +487,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]] [[package]]
name = "chrono" name = "chrono"
version = "0.4.44" version = "0.4.44"
@@ -1170,8 +1176,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"js-sys",
"libc", "libc",
"wasi", "wasi",
"wasm-bindgen",
] ]
[[package]] [[package]]
@@ -1181,9 +1189,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"js-sys",
"libc", "libc",
"r-efi 5.3.0", "r-efi 5.3.0",
"wasip2", "wasip2",
"wasm-bindgen",
] ]
[[package]] [[package]]
@@ -1410,6 +1420,7 @@ dependencies = [
"tokio", "tokio",
"tokio-rustls", "tokio-rustls",
"tower-service", "tower-service",
"webpki-roots",
] ]
[[package]] [[package]]
@@ -1902,6 +1913,12 @@ dependencies = [
"hashbrown 0.16.1", "hashbrown 0.16.1",
] ]
[[package]]
name = "lru-slab"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]] [[package]]
name = "matchers" name = "matchers"
version = "0.2.0" version = "0.2.0"
@@ -2861,6 +2878,61 @@ version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
[[package]]
name = "quinn"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
dependencies = [
"bytes",
"cfg_aliases",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2 0.5.10",
"thiserror 2.0.18",
"tokio",
"tracing",
"web-time",
]
[[package]]
name = "quinn-proto"
version = "0.11.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.2",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.18",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2 0.5.10",
"tracing",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.45" version = "1.0.45"
@@ -3014,16 +3086,21 @@ dependencies = [
"http-body", "http-body",
"http-body-util", "http-body-util",
"hyper", "hyper",
"hyper-rustls",
"hyper-util", "hyper-util",
"js-sys", "js-sys",
"log", "log",
"percent-encoding", "percent-encoding",
"pin-project-lite", "pin-project-lite",
"quinn",
"rustls",
"rustls-pki-types",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"sync_wrapper", "sync_wrapper",
"tokio", "tokio",
"tokio-rustls",
"tower 0.5.3", "tower 0.5.3",
"tower-http", "tower-http",
"tower-service", "tower-service",
@@ -3031,6 +3108,7 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
"wasm-bindgen-futures", "wasm-bindgen-futures",
"web-sys", "web-sys",
"webpki-roots",
] ]
[[package]] [[package]]
@@ -3162,6 +3240,7 @@ version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
dependencies = [ dependencies = [
"web-time",
"zeroize", "zeroize",
] ]
@@ -3490,6 +3569,7 @@ dependencies = [
"arc-swap", "arc-swap",
"async-trait", "async-trait",
"bincode", "bincode",
"bytes",
"clap", "clap",
"criterion", "criterion",
"dns-lookup", "dns-lookup",
@@ -3508,6 +3588,8 @@ dependencies = [
"pingora-http", "pingora-http",
"pingora-proxy", "pingora-proxy",
"prometheus", "prometheus",
"regex",
"reqwest",
"rustc-hash", "rustc-hash",
"rustls", "rustls",
"serde", "serde",
@@ -3678,6 +3760,21 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "tinyvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.50.0" version = "1.50.0"
@@ -4256,6 +4353,15 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "webpki-roots"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
dependencies = [
"rustls-pki-types",
]
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.11" version = "0.1.11"

View File

@@ -55,6 +55,13 @@ prometheus = "0.13"
# Request IDs # Request IDs
uuid = { version = "1", features = ["v4"] } uuid = { version = "1", features = ["v4"] }
# Static file serving and body rewriting
bytes = "1"
regex = "1"
# Auth subrequests
reqwest = { version = "0.12", features = ["rustls-tls"], default-features = false }
# Rustls crypto provider — must be installed before any TLS init # Rustls crypto provider — must be installed before any TLS init
rustls = { version = "0.23", features = ["aws-lc-rs"] } rustls = { version = "0.23", features = ["aws-lc-rs"] }

View File

@@ -45,6 +45,11 @@ fn make_detector() -> ScannerDetector {
websocket: false, websocket: false,
disable_secure_redirection: false, disable_secure_redirection: false,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}, },
RouteConfig { RouteConfig {
host_prefix: "src".into(), host_prefix: "src".into(),
@@ -52,6 +57,11 @@ fn make_detector() -> ScannerDetector {
websocket: false, websocket: false,
disable_secure_redirection: false, disable_secure_redirection: false,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}, },
RouteConfig { RouteConfig {
host_prefix: "docs".into(), host_prefix: "docs".into(),
@@ -59,6 +69,11 @@ fn make_detector() -> ScannerDetector {
websocket: false, websocket: false,
disable_secure_redirection: false, disable_secure_redirection: false,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}, },
]; ];

View File

@@ -149,6 +149,45 @@ pub struct PathRoute {
pub strip_prefix: bool, pub strip_prefix: bool,
#[serde(default)] #[serde(default)]
pub websocket: bool, pub websocket: bool,
/// URL for auth subrequest (like nginx `auth_request`).
/// If set, the proxy makes an HTTP request to this URL before forwarding.
/// A non-2xx response blocks the request with 403.
#[serde(default)]
pub auth_request: Option<String>,
/// Headers to capture from the auth subrequest response and forward upstream.
#[serde(default)]
pub auth_capture_headers: Vec<String>,
/// Prefix to prepend to the upstream path after stripping.
#[serde(default)]
pub upstream_path_prefix: Option<String>,
}
/// A URL rewrite rule: requests matching `pattern` are served the file at `target`.
#[derive(Debug, Deserialize, Clone)]
pub struct RewriteRule {
/// Regex pattern matched against the request path.
pub pattern: String,
/// Static file path to serve (relative to `static_root`).
pub target: String,
}
/// A find/replace rule applied to response bodies.
#[derive(Debug, Deserialize, Clone)]
pub struct BodyRewrite {
/// String to find in the response body.
pub find: String,
/// String to replace it with.
pub replace: String,
/// Content-types to apply this rewrite to (e.g. `["text/html"]`).
#[serde(default)]
pub types: Vec<String>,
}
/// A response header to add to every response for this route.
#[derive(Debug, Deserialize, Clone)]
pub struct HeaderRule {
pub name: String,
pub value: String,
} }
#[derive(Debug, Deserialize, Clone)] #[derive(Debug, Deserialize, Clone)]
@@ -165,6 +204,22 @@ pub struct RouteConfig {
/// If the request path matches a sub-route, its backend is used instead. /// If the request path matches a sub-route, its backend is used instead.
#[serde(default)] #[serde(default)]
pub paths: Vec<PathRoute>, pub paths: Vec<PathRoute>,
/// Root directory for static file serving. If set, the proxy will try
/// to serve files from this directory before forwarding to the upstream.
#[serde(default)]
pub static_root: Option<String>,
/// Fallback file for SPA routing (e.g. "index.html").
#[serde(default)]
pub fallback: Option<String>,
/// URL rewrite rules applied before static file lookup.
#[serde(default)]
pub rewrites: Vec<RewriteRule>,
/// Response body find/replace rules (like nginx `sub_filter`).
#[serde(default)]
pub body_rewrites: Vec<BodyRewrite>,
/// Extra response headers added to every response for this route.
#[serde(default)]
pub response_headers: Vec<HeaderRule>,
} }
impl Config { impl Config {

View File

@@ -10,3 +10,4 @@ pub mod proxy;
pub mod rate_limit; pub mod rate_limit;
pub mod scanner; pub mod scanner;
pub mod ssh; pub mod ssh;
pub mod static_files;

View File

@@ -342,6 +342,9 @@ fn run_serve(upgrade: bool) -> Result<()> {
// Pingora's async proxy calls without cross-runtime waker concerns. // Pingora's async proxy calls without cross-runtime waker concerns.
let acme_routes: acme::AcmeRoutes = Arc::new(RwLock::new(HashMap::new())); let acme_routes: acme::AcmeRoutes = Arc::new(RwLock::new(HashMap::new()));
let compiled_rewrites = SunbeamProxy::compile_rewrites(&cfg.routes);
let http_client = reqwest::Client::new();
let proxy = SunbeamProxy { let proxy = SunbeamProxy {
routes: cfg.routes.clone(), routes: cfg.routes.clone(),
acme_routes: acme_routes.clone(), acme_routes: acme_routes.clone(),
@@ -349,6 +352,8 @@ fn run_serve(upgrade: bool) -> Result<()> {
scanner_detector, scanner_detector,
bot_allowlist, bot_allowlist,
rate_limiter, rate_limiter,
compiled_rewrites,
http_client,
}; };
let mut svc = http_proxy_service(&server.configuration, proxy); let mut svc = http_proxy_service(&server.configuration, proxy);

View File

@@ -10,14 +10,22 @@ use crate::scanner::detector::ScannerDetector;
use crate::scanner::model::ScannerAction; use crate::scanner::model::ScannerAction;
use arc_swap::ArcSwap; use arc_swap::ArcSwap;
use async_trait::async_trait; use async_trait::async_trait;
use bytes::Bytes;
use http::header::{CONNECTION, EXPECT, HOST, UPGRADE}; use http::header::{CONNECTION, EXPECT, HOST, UPGRADE};
use pingora_core::{upstreams::peer::HttpPeer, Result}; use pingora_core::{upstreams::peer::HttpPeer, Result};
use pingora_http::{RequestHeader, ResponseHeader}; use pingora_http::{RequestHeader, ResponseHeader};
use pingora_proxy::{ProxyHttp, Session}; use pingora_proxy::{ProxyHttp, Session};
use regex::Regex;
use std::net::IpAddr; use std::net::IpAddr;
use std::sync::Arc; use std::sync::Arc;
use std::time::Instant; use std::time::Instant;
/// A compiled rewrite rule (regex compiled once at startup).
pub struct CompiledRewrite {
pub pattern: Regex,
pub target: String,
}
pub struct SunbeamProxy { pub struct SunbeamProxy {
pub routes: Vec<RouteConfig>, pub routes: Vec<RouteConfig>,
/// Per-challenge route table populated by the Ingress watcher. /// Per-challenge route table populated by the Ingress watcher.
@@ -30,6 +38,10 @@ pub struct SunbeamProxy {
pub bot_allowlist: Option<Arc<BotAllowlist>>, pub bot_allowlist: Option<Arc<BotAllowlist>>,
/// Optional per-identity rate limiter. /// Optional per-identity rate limiter.
pub rate_limiter: Option<Arc<RateLimiter>>, pub rate_limiter: Option<Arc<RateLimiter>>,
/// Compiled rewrite rules per route (indexed by host_prefix).
pub compiled_rewrites: Vec<(String, Vec<CompiledRewrite>)>,
/// Shared reqwest client for auth subrequests.
pub http_client: reqwest::Client,
} }
pub struct RequestCtx { pub struct RequestCtx {
@@ -45,12 +57,61 @@ pub struct RequestCtx {
pub strip_prefix: Option<String>, pub strip_prefix: Option<String>,
/// Original downstream scheme ("http" or "https"), captured in request_filter. /// Original downstream scheme ("http" or "https"), captured in request_filter.
pub downstream_scheme: &'static str, pub downstream_scheme: &'static str,
/// Whether this request was served from static files (skip upstream).
pub served_static: bool,
/// Captured auth subrequest headers to forward upstream.
pub auth_headers: Vec<(String, String)>,
/// Upstream path prefix to prepend (from PathRoute config).
pub upstream_path_prefix: Option<String>,
/// Whether response body rewriting is needed for this request.
pub body_rewrite_rules: Vec<(String, String)>,
/// Buffered response body for body rewriting.
pub body_buffer: Option<Vec<u8>>,
} }
impl SunbeamProxy { impl SunbeamProxy {
fn find_route(&self, prefix: &str) -> Option<&RouteConfig> { fn find_route(&self, prefix: &str) -> Option<&RouteConfig> {
self.routes.iter().find(|r| r.host_prefix == prefix) self.routes.iter().find(|r| r.host_prefix == prefix)
} }
fn find_rewrites(&self, prefix: &str) -> Option<&[CompiledRewrite]> {
self.compiled_rewrites
.iter()
.find(|(p, _)| p == prefix)
.map(|(_, rules)| rules.as_slice())
}
/// Compile all rewrite rules from routes at startup.
pub fn compile_rewrites(routes: &[RouteConfig]) -> Vec<(String, Vec<CompiledRewrite>)> {
routes
.iter()
.filter(|r| !r.rewrites.is_empty())
.map(|r| {
let compiled = r
.rewrites
.iter()
.filter_map(|rw| {
match Regex::new(&rw.pattern) {
Ok(re) => Some(CompiledRewrite {
pattern: re,
target: rw.target.clone(),
}),
Err(e) => {
tracing::error!(
host_prefix = %r.host_prefix,
pattern = %rw.pattern,
error = %e,
"failed to compile rewrite regex"
);
None
}
}
})
.collect();
(r.host_prefix.clone(), compiled)
})
.collect()
}
} }
fn extract_host(session: &Session) -> String { fn extract_host(session: &Session) -> String {
@@ -123,6 +184,11 @@ impl ProxyHttp for SunbeamProxy {
acme_backend: None, acme_backend: None,
downstream_scheme: "https", downstream_scheme: "https",
strip_prefix: None, strip_prefix: None,
served_static: false,
auth_headers: Vec::new(),
upstream_path_prefix: None,
body_rewrite_rules: Vec::new(),
body_buffer: None,
} }
} }
@@ -181,8 +247,6 @@ impl ProxyHttp for SunbeamProxy {
// Routes that explicitly opt out of HTTPS enforcement pass through. // Routes that explicitly opt out of HTTPS enforcement pass through.
// All other requests — including unknown hosts — are redirected. // All other requests — including unknown hosts — are redirected.
// This is as close to an L4 redirect as HTTP allows: the upstream is
// never contacted; the 301 is written directly to the downstream socket.
if self if self
.find_route(prefix) .find_route(prefix)
.map(|r| r.disable_secure_redirection) .map(|r| r.disable_secure_redirection)
@@ -305,8 +369,6 @@ impl ProxyHttp for SunbeamProxy {
let client_ip = extract_client_ip(session); let client_ip = extract_client_ip(session);
// Bot allowlist: verified crawlers/agents bypass the scanner model. // Bot allowlist: verified crawlers/agents bypass the scanner model.
// CIDR rules are instant; DNS-verified IPs are cached after
// background reverse+forward lookup.
let bot_reason = self.bot_allowlist.as_ref().and_then(|al| { let bot_reason = self.bot_allowlist.as_ref().and_then(|al| {
client_ip.and_then(|ip| al.check(user_agent, ip)) client_ip.and_then(|ip| al.check(user_agent, ip))
}); });
@@ -405,27 +467,139 @@ impl ProxyHttp for SunbeamProxy {
// Reject unknown host prefixes with 404. // Reject unknown host prefixes with 404.
let host = extract_host(session); let host = extract_host(session);
let prefix = host.split('.').next().unwrap_or(""); let prefix = host.split('.').next().unwrap_or("");
if self.find_route(prefix).is_none() { let route = match self.find_route(prefix) {
let mut resp = ResponseHeader::build(404, None)?; Some(r) => r,
resp.insert_header("Content-Length", "0")?; None => {
session.write_response_header(Box::new(resp), true).await?; let mut resp = ResponseHeader::build(404, None)?;
return Ok(true); resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
};
// Store route early so downstream hooks can access it.
ctx.route = Some(route.clone());
// ── Static file serving ──────────────────────────────────────────
if let Some(static_root) = &route.static_root {
let req_path = session.req_header().uri.path().to_string();
// Check path sub-routes first: if a path route matches, skip static
// serving and let it go to the upstream backend.
let path_route_match = route
.paths
.iter()
.any(|p| req_path.starts_with(p.prefix.as_str()));
if !path_route_match {
// Apply rewrite rules before static file lookup.
let mut serve_path = req_path.clone();
if let Some(rewrites) = self.find_rewrites(prefix) {
for rw in rewrites {
if rw.pattern.is_match(&req_path) {
serve_path = rw.target.clone();
break;
}
}
}
let extra_headers: Vec<(String, String)> = route
.response_headers
.iter()
.map(|h| (h.name.clone(), h.value.clone()))
.collect();
let served = crate::static_files::try_serve(
session,
static_root,
route.fallback.as_deref(),
&serve_path,
extra_headers,
)
.await?;
if served {
ctx.served_static = true;
ctx.route = Some(route.clone());
return Ok(true);
}
}
}
// ── Auth subrequest for path routes ──────────────────────────────
{
let req_path = session.req_header().uri.path().to_string();
let path_route = route
.paths
.iter()
.filter(|p| req_path.starts_with(p.prefix.as_str()))
.max_by_key(|p| p.prefix.len());
if let Some(pr) = path_route {
if let Some(auth_url) = &pr.auth_request {
// Forward the original request's cookies and auth headers.
let mut auth_req = self.http_client.get(auth_url);
if let Some(cookie) = session.req_header().headers.get("cookie") {
auth_req = auth_req.header("cookie", cookie.to_str().unwrap_or(""));
}
if let Some(auth_hdr) = session.req_header().headers.get("authorization") {
auth_req = auth_req.header("authorization", auth_hdr.to_str().unwrap_or(""));
}
// Forward the original path for context.
auth_req = auth_req.header("x-original-uri", &req_path);
match auth_req.send().await {
Ok(resp) if resp.status().is_success() => {
// Capture specified headers from the auth response.
for hdr_name in &pr.auth_capture_headers {
if let Some(val) = resp.headers().get(hdr_name.as_str()) {
if let Ok(v) = val.to_str() {
ctx.auth_headers.push((hdr_name.clone(), v.to_string()));
}
}
}
}
Ok(resp) => {
let status = resp.status().as_u16();
tracing::info!(
auth_url,
status,
"auth subrequest denied"
);
let mut r = ResponseHeader::build(403, None)?;
r.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(r), true).await?;
return Ok(true);
}
Err(e) => {
tracing::error!(
auth_url,
error = %e,
"auth subrequest failed"
);
let mut r = ResponseHeader::build(502, None)?;
r.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(r), true).await?;
return Ok(true);
}
}
// Store upstream_path_prefix for upstream_request_filter.
ctx.upstream_path_prefix = pr.upstream_path_prefix.clone();
}
}
}
// Prepare body rewrite rules if the route has them.
if !route.body_rewrites.is_empty() {
ctx.body_rewrite_rules = route
.body_rewrites
.iter()
.map(|br| (br.find.clone(), br.replace.clone()))
.collect();
} }
// Handle Expect: 100-continue before connecting to upstream. // Handle Expect: 100-continue before connecting to upstream.
//
// Docker's OCI distribution protocol sends Expect: 100-continue for
// large layer blob uploads (typically > 5 MB). Without this, Pingora
// forwards the header to the upstream (e.g. Gitea), the upstream
// responds with 100 Continue, and Pingora must then proxy that
// informational response back to the client. Pingora's handling of
// upstream informational responses is unreliable and can cause the
// upload to fail with a spurious 400 for the client.
//
// By responding with 100 Continue here — before upstream_peer is
// even called — we unblock the client immediately. The Expect header
// is stripped in upstream_request_filter so the upstream never sends
// its own 100 Continue.
if session if session
.req_header() .req_header()
.headers .headers
@@ -473,12 +647,20 @@ impl ProxyHttp for SunbeamProxy {
if pr.strip_prefix { if pr.strip_prefix {
ctx.strip_prefix = Some(pr.prefix.clone()); ctx.strip_prefix = Some(pr.prefix.clone());
} }
if ctx.upstream_path_prefix.is_none() {
ctx.upstream_path_prefix = pr.upstream_path_prefix.clone();
}
ctx.route = Some(crate::config::RouteConfig { ctx.route = Some(crate::config::RouteConfig {
host_prefix: route.host_prefix.clone(), host_prefix: route.host_prefix.clone(),
backend: pr.backend.clone(), backend: pr.backend.clone(),
websocket: pr.websocket || route.websocket, websocket: pr.websocket || route.websocket,
disable_secure_redirection: route.disable_secure_redirection, disable_secure_redirection: route.disable_secure_redirection,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}); });
return Ok(Box::new(HttpPeer::new( return Ok(Box::new(HttpPeer::new(
backend_addr(&pr.backend), backend_addr(&pr.backend),
@@ -495,7 +677,8 @@ impl ProxyHttp for SunbeamProxy {
))) )))
} }
/// Copy WebSocket upgrade headers and apply path prefix stripping. /// Copy WebSocket upgrade headers, apply path prefix stripping, and forward
/// auth subrequest headers.
async fn upstream_request_filter( async fn upstream_request_filter(
&self, &self,
session: &mut Session, session: &mut Session,
@@ -505,11 +688,7 @@ impl ProxyHttp for SunbeamProxy {
where where
Self::CTX: Send + Sync, Self::CTX: Send + Sync,
{ {
// Inform backends of the original downstream scheme so they can construct // Inform backends of the original downstream scheme.
// correct absolute URLs (e.g. OIDC redirect_uri, CSRF checks).
// Must use insert_header (not headers.insert) so that both base.headers
// and the CaseMap are updated together — header_to_h1_wire zips them
// and silently drops headers only present in base.headers.
upstream_req upstream_req
.insert_header("x-forwarded-proto", ctx.downstream_scheme) .insert_header("x-forwarded-proto", ctx.downstream_scheme)
.map_err(|e| { .map_err(|e| {
@@ -537,11 +716,20 @@ impl ProxyHttp for SunbeamProxy {
} }
} }
// Strip Expect: 100-continue — the proxy already sent 100 Continue to // Forward captured auth subrequest headers (pass owned Strings —
// the downstream client in request_filter, so we must not forward the // Pingora's IntoCaseHeaderName is impl'd for String, not &str).
// header to the upstream. If the upstream also sees Expect it will let auth_headers: Vec<_> = ctx.auth_headers.drain(..).collect();
// send its own 100 Continue, which Pingora cannot reliably proxy back for (name, value) in auth_headers {
// (it has already been consumed) and which can corrupt the response. upstream_req.insert_header(name, value).map_err(|e| {
pingora_core::Error::because(
pingora_core::ErrorType::InternalError,
"failed to insert auth header",
e,
)
})?;
}
// Strip Expect: 100-continue.
upstream_req.remove_header("expect"); upstream_req.remove_header("expect");
// Strip path prefix before forwarding (e.g. /kratos → /). // Strip path prefix before forwarding (e.g. /kratos → /).
@@ -550,6 +738,15 @@ impl ProxyHttp for SunbeamProxy {
let old_path = old_uri.path(); let old_path = old_uri.path();
if let Some(stripped) = old_path.strip_prefix(prefix.as_str()) { if let Some(stripped) = old_path.strip_prefix(prefix.as_str()) {
let new_path = if stripped.is_empty() { "/" } else { stripped }; let new_path = if stripped.is_empty() { "/" } else { stripped };
// Prepend upstream_path_prefix if configured.
let new_path = if let Some(up_prefix) = &ctx.upstream_path_prefix {
let trimmed = new_path.strip_prefix('/').unwrap_or(new_path);
format!("{up_prefix}{trimmed}")
} else {
new_path.to_string()
};
let query_part = old_uri let query_part = old_uri
.query() .query()
.map(|q| format!("?{q}")) .map(|q| format!("?{q}"))
@@ -568,12 +765,35 @@ impl ProxyHttp for SunbeamProxy {
http::Uri::from_parts(parts).expect("valid uri parts"), http::Uri::from_parts(parts).expect("valid uri parts"),
); );
} }
} else if let Some(up_prefix) = &ctx.upstream_path_prefix {
// No strip_prefix but upstream_path_prefix is set — prepend it.
let old_uri = upstream_req.uri.clone();
let old_path = old_uri.path();
let trimmed = old_path.strip_prefix('/').unwrap_or(old_path);
let new_path = format!("{up_prefix}{trimmed}");
let query_part = old_uri
.query()
.map(|q| format!("?{q}"))
.unwrap_or_default();
let new_pq: http::uri::PathAndQuery =
format!("{new_path}{query_part}").parse().map_err(|e| {
pingora_core::Error::because(
pingora_core::ErrorType::InternalError,
"invalid uri after prefix prepend",
e,
)
})?;
let mut parts = old_uri.into_parts();
parts.path_and_query = Some(new_pq);
upstream_req.set_uri(
http::Uri::from_parts(parts).expect("valid uri parts"),
);
} }
Ok(()) Ok(())
} }
/// Add X-Request-Id response header so clients can correlate. /// Add X-Request-Id and custom response headers.
async fn upstream_response_filter( async fn upstream_response_filter(
&self, &self,
_session: &mut Session, _session: &mut Session,
@@ -583,10 +803,74 @@ impl ProxyHttp for SunbeamProxy {
where where
Self::CTX: Send + Sync, Self::CTX: Send + Sync,
{ {
// Add X-Request-Id to the response so clients can correlate.
let _ = upstream_response.insert_header("x-request-id", &ctx.request_id); let _ = upstream_response.insert_header("x-request-id", &ctx.request_id);
// Add route-level response headers (owned Strings for Pingora's IntoCaseHeaderName).
if let Some(route) = &ctx.route {
for hdr in &route.response_headers {
let _ = upstream_response.insert_header(hdr.name.clone(), hdr.value.clone());
}
}
// Check if body rewriting applies to this response's content-type.
if !ctx.body_rewrite_rules.is_empty() {
let content_type = upstream_response
.headers
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
// Only buffer text/html and application/javascript responses.
let should_rewrite = content_type.starts_with("text/html")
|| content_type.starts_with("application/javascript")
|| content_type.starts_with("text/javascript");
if should_rewrite {
ctx.body_buffer = Some(Vec::new());
// Remove content-length since we'll modify the body.
upstream_response.remove_header("content-length");
} else {
// Don't rewrite non-matching content types.
ctx.body_rewrite_rules.clear();
}
}
Ok(()) Ok(())
} }
/// Buffer and rewrite response bodies when body_rewrite rules are active.
fn response_body_filter(
&self,
_session: &mut Session,
body: &mut Option<Bytes>,
end_of_stream: bool,
ctx: &mut RequestCtx,
) -> Result<Option<std::time::Duration>>
where
Self::CTX: Send + Sync,
{
if ctx.body_buffer.is_none() {
return Ok(None);
}
// Accumulate chunks into the buffer.
if let Some(data) = body.take() {
ctx.body_buffer.as_mut().unwrap().extend_from_slice(&data);
}
if end_of_stream {
let buffer = ctx.body_buffer.take().unwrap();
let mut result = String::from_utf8_lossy(&buffer).into_owned();
for (find, replace) in &ctx.body_rewrite_rules {
result = result.replace(find.as_str(), replace.as_str());
}
*body = Some(Bytes::from(result));
}
Ok(None)
}
/// Emit a structured JSON audit log line for every request. /// Emit a structured JSON audit log line for every request.
async fn logging( async fn logging(
&self, &self,
@@ -747,6 +1031,11 @@ mod tests {
acme_backend: None, acme_backend: None,
strip_prefix: None, strip_prefix: None,
downstream_scheme: "https", downstream_scheme: "https",
served_static: false,
auth_headers: Vec::new(),
upstream_path_prefix: None,
body_rewrite_rules: Vec::new(),
body_buffer: None,
}; };
assert_eq!(ctx.downstream_scheme, "https"); assert_eq!(ctx.downstream_scheme, "https");
} }
@@ -758,8 +1047,6 @@ mod tests {
} }
/// remove_header("expect") strips the header from the upstream request. /// remove_header("expect") strips the header from the upstream request.
/// This is tested independently of the async proxy logic because
/// upstream_request_filter requires a live session.
#[test] #[test]
fn test_expect_header_stripped_before_upstream() { fn test_expect_header_stripped_before_upstream() {
let mut req = RequestHeader::build("PUT", b"/v2/studio/image/blobs/uploads/uuid", None).unwrap(); let mut req = RequestHeader::build("PUT", b"/v2/studio/image/blobs/uploads/uuid", None).unwrap();
@@ -768,7 +1055,6 @@ mod tests {
assert!(req.headers.get("expect").is_some(), "expect header should be present before stripping"); assert!(req.headers.get("expect").is_some(), "expect header should be present before stripping");
req.remove_header("expect"); req.remove_header("expect");
assert!(req.headers.get("expect").is_none(), "expect header should be gone after remove_header"); assert!(req.headers.get("expect").is_none(), "expect header should be gone after remove_header");
// Content-Length must survive the strip.
assert!(req.headers.get("content-length").is_some()); assert!(req.headers.get("content-length").is_some());
} }
@@ -778,4 +1064,27 @@ mod tests {
assert_eq!(id.len(), 36); assert_eq!(id.len(), 36);
assert!(uuid::Uuid::parse_str(&id).is_ok()); assert!(uuid::Uuid::parse_str(&id).is_ok());
} }
#[test]
fn test_compile_rewrites_valid() {
let routes = vec![RouteConfig {
host_prefix: "docs".into(),
backend: "http://localhost:8080".into(),
websocket: false,
disable_secure_redirection: false,
paths: vec![],
static_root: Some("/srv/docs".into()),
fallback: Some("index.html".into()),
rewrites: vec![crate::config::RewriteRule {
pattern: r"^/docs/[0-9a-f-]+/?$".into(),
target: "/docs/[id]/index.html".into(),
}],
body_rewrites: vec![],
response_headers: vec![],
}];
let compiled = SunbeamProxy::compile_rewrites(&routes);
assert_eq!(compiled.len(), 1);
assert_eq!(compiled[0].1.len(), 1);
assert!(compiled[0].1[0].pattern.is_match("/docs/abc-def/"));
}
} }

View File

@@ -161,6 +161,11 @@ mod tests {
websocket: false, websocket: false,
disable_secure_redirection: false, disable_secure_redirection: false,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}]; }];
ScannerDetector::new(&model, &routes) ScannerDetector::new(&model, &routes)
} }

153
src/static_files.rs Normal file
View File

@@ -0,0 +1,153 @@
use pingora_http::ResponseHeader;
use pingora_proxy::Session;
use std::path::{Path, PathBuf};
/// Hardcoded content-type map for common static file extensions.
pub fn content_type_for(ext: &str) -> &'static str {
match ext {
"html" | "htm" => "text/html; charset=utf-8",
"css" => "text/css; charset=utf-8",
"js" | "mjs" => "application/javascript; charset=utf-8",
"json" => "application/json; charset=utf-8",
"svg" => "image/svg+xml",
"png" => "image/png",
"jpg" | "jpeg" => "image/jpeg",
"gif" => "image/gif",
"ico" => "image/x-icon",
"webp" => "image/webp",
"avif" => "image/avif",
"woff" => "font/woff",
"woff2" => "font/woff2",
"ttf" => "font/ttf",
"otf" => "font/otf",
"eot" => "application/vnd.ms-fontobject",
"xml" => "application/xml; charset=utf-8",
"txt" => "text/plain; charset=utf-8",
"map" => "application/json",
"webmanifest" => "application/manifest+json",
"mp4" => "video/mp4",
"webm" => "video/webm",
"mp3" => "audio/mpeg",
"pdf" => "application/pdf",
"wasm" => "application/wasm",
_ => "application/octet-stream",
}
}
/// Cache-control header value based on extension.
pub fn cache_control_for(ext: &str) -> &'static str {
match ext {
"js" | "mjs" | "css" | "woff" | "woff2" | "ttf" | "otf" | "eot" | "wasm" => {
"public, max-age=31536000, immutable"
}
"png" | "jpg" | "jpeg" | "gif" | "webp" | "avif" | "svg" | "ico" => {
"public, max-age=86400"
}
_ => "no-cache",
}
}
/// File read result — gathered before writing to session.
struct StaticFile {
body: Vec<u8>,
content_type: &'static str,
cache_control: &'static str,
len: u64,
}
/// Try to read a file from disk. Returns None if the file doesn't exist.
async fn read_static_file(path: &Path) -> Option<StaticFile> {
let metadata = match tokio::fs::metadata(path).await {
Ok(m) if m.is_file() => m,
_ => return None,
};
let body = match tokio::fs::read(path).await {
Ok(b) => b,
Err(_) => return None,
};
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
Some(StaticFile {
len: metadata.len(),
body,
content_type: content_type_for(ext),
cache_control: cache_control_for(ext),
})
}
/// Try to resolve and serve a static file for the given request.
///
/// Implements a `try_files` chain:
/// 1. `$uri` (exact path)
/// 2. `$uri.html`
/// 3. `$uri/index.html`
/// 4. fallback file (e.g. `index.html` for SPA)
///
/// Returns `Ok(true)` if a response was written (caller should stop processing),
/// `Ok(false)` if no static file matched (caller should proceed to upstream).
pub async fn try_serve(
session: &mut Session,
static_root: &str,
fallback: Option<&str>,
path: &str,
extra_headers: Vec<(String, String)>,
) -> pingora_core::Result<bool> {
let root = Path::new(static_root);
// Sanitize: reject path traversal attempts.
if path.contains("..") {
return Ok(false);
}
// Strip leading slash for path joining.
let relative = path.strip_prefix('/').unwrap_or(path);
// try_files chain: exact → .html → /index.html
let candidates: Vec<PathBuf> = if relative.is_empty() {
vec![root.join("index.html")]
} else {
vec![
root.join(relative),
root.join(format!("{relative}.html")),
root.join(format!("{relative}/index.html")),
]
};
// Find the first matching file.
let mut file = None;
for candidate in &candidates {
if let Some(f) = read_static_file(candidate).await {
file = Some(f);
break;
}
}
// Try fallback if no candidate matched.
if file.is_none() {
if let Some(fb) = fallback {
file = read_static_file(&root.join(fb)).await;
}
}
let file = match file {
Some(f) => f,
None => return Ok(false),
};
// Write the response.
let mut resp = ResponseHeader::build(200, None)?;
resp.insert_header("Content-Type", file.content_type)?;
resp.insert_header("Content-Length", file.len.to_string())?;
resp.insert_header("Cache-Control", file.cache_control)?;
for (name, value) in extra_headers {
resp.insert_header(name, value)?;
}
session.write_response_header(Box::new(resp), false).await?;
session.write_response_body(Some(file.body.into()), true).await?;
Ok(true)
}

View File

@@ -99,9 +99,15 @@ fn start_proxy_once(backend_port: u16) {
// without needing TLS certificates in the test environment. // without needing TLS certificates in the test environment.
disable_secure_redirection: true, disable_secure_redirection: true,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}]; }];
let acme_routes: AcmeRoutes = Arc::new(RwLock::new(HashMap::new())); let acme_routes: AcmeRoutes = Arc::new(RwLock::new(HashMap::new()));
let proxy = SunbeamProxy { routes, acme_routes, ddos_detector: None, scanner_detector: None, bot_allowlist: None, rate_limiter: None }; let compiled_rewrites = SunbeamProxy::compile_rewrites(&routes);
let proxy = SunbeamProxy { routes, acme_routes, ddos_detector: None, scanner_detector: None, bot_allowlist: None, rate_limiter: None, compiled_rewrites, http_client: reqwest::Client::new() };
let opt = Opt { let opt = Opt {
upgrade: false, upgrade: false,

View File

@@ -13,6 +13,11 @@ fn test_routes() -> Vec<RouteConfig> {
websocket: false, websocket: false,
disable_secure_redirection: false, disable_secure_redirection: false,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}, },
RouteConfig { RouteConfig {
host_prefix: "api".into(), host_prefix: "api".into(),
@@ -20,6 +25,11 @@ fn test_routes() -> Vec<RouteConfig> {
websocket: false, websocket: false,
disable_secure_redirection: false, disable_secure_redirection: false,
paths: vec![], paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
}, },
] ]
} }