feat(static_files): add static file serving, SPA fallback, rewrites, body rewriting, and auth subrequests

Add static file serving with try_files chain ($uri, $uri.html,
$uri/index.html, fallback), regex-based URL rewrites compiled at
startup, response body find/replace for text/html and JS content,
auth subrequests with header capture for path routes, and custom
response headers per route. Extends RouteConfig with static_root,
fallback, rewrites, body_rewrites, and response_headers fields.

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
2026-03-10 23:38:20 +00:00
parent 0fd10110ff
commit 76ad9e93e5
11 changed files with 710 additions and 38 deletions

View File

@@ -10,14 +10,22 @@ use crate::scanner::detector::ScannerDetector;
use crate::scanner::model::ScannerAction;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use bytes::Bytes;
use http::header::{CONNECTION, EXPECT, HOST, UPGRADE};
use pingora_core::{upstreams::peer::HttpPeer, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_proxy::{ProxyHttp, Session};
use regex::Regex;
use std::net::IpAddr;
use std::sync::Arc;
use std::time::Instant;
/// A compiled rewrite rule (regex compiled once at startup).
pub struct CompiledRewrite {
pub pattern: Regex,
pub target: String,
}
pub struct SunbeamProxy {
pub routes: Vec<RouteConfig>,
/// Per-challenge route table populated by the Ingress watcher.
@@ -30,6 +38,10 @@ pub struct SunbeamProxy {
pub bot_allowlist: Option<Arc<BotAllowlist>>,
/// Optional per-identity rate limiter.
pub rate_limiter: Option<Arc<RateLimiter>>,
/// Compiled rewrite rules per route (indexed by host_prefix).
pub compiled_rewrites: Vec<(String, Vec<CompiledRewrite>)>,
/// Shared reqwest client for auth subrequests.
pub http_client: reqwest::Client,
}
pub struct RequestCtx {
@@ -45,12 +57,61 @@ pub struct RequestCtx {
pub strip_prefix: Option<String>,
/// Original downstream scheme ("http" or "https"), captured in request_filter.
pub downstream_scheme: &'static str,
/// Whether this request was served from static files (skip upstream).
pub served_static: bool,
/// Captured auth subrequest headers to forward upstream.
pub auth_headers: Vec<(String, String)>,
/// Upstream path prefix to prepend (from PathRoute config).
pub upstream_path_prefix: Option<String>,
/// Whether response body rewriting is needed for this request.
pub body_rewrite_rules: Vec<(String, String)>,
/// Buffered response body for body rewriting.
pub body_buffer: Option<Vec<u8>>,
}
impl SunbeamProxy {
fn find_route(&self, prefix: &str) -> Option<&RouteConfig> {
self.routes.iter().find(|r| r.host_prefix == prefix)
}
fn find_rewrites(&self, prefix: &str) -> Option<&[CompiledRewrite]> {
self.compiled_rewrites
.iter()
.find(|(p, _)| p == prefix)
.map(|(_, rules)| rules.as_slice())
}
/// Compile all rewrite rules from routes at startup.
pub fn compile_rewrites(routes: &[RouteConfig]) -> Vec<(String, Vec<CompiledRewrite>)> {
routes
.iter()
.filter(|r| !r.rewrites.is_empty())
.map(|r| {
let compiled = r
.rewrites
.iter()
.filter_map(|rw| {
match Regex::new(&rw.pattern) {
Ok(re) => Some(CompiledRewrite {
pattern: re,
target: rw.target.clone(),
}),
Err(e) => {
tracing::error!(
host_prefix = %r.host_prefix,
pattern = %rw.pattern,
error = %e,
"failed to compile rewrite regex"
);
None
}
}
})
.collect();
(r.host_prefix.clone(), compiled)
})
.collect()
}
}
fn extract_host(session: &Session) -> String {
@@ -123,6 +184,11 @@ impl ProxyHttp for SunbeamProxy {
acme_backend: None,
downstream_scheme: "https",
strip_prefix: None,
served_static: false,
auth_headers: Vec::new(),
upstream_path_prefix: None,
body_rewrite_rules: Vec::new(),
body_buffer: None,
}
}
@@ -181,8 +247,6 @@ impl ProxyHttp for SunbeamProxy {
// Routes that explicitly opt out of HTTPS enforcement pass through.
// All other requests — including unknown hosts — are redirected.
// This is as close to an L4 redirect as HTTP allows: the upstream is
// never contacted; the 301 is written directly to the downstream socket.
if self
.find_route(prefix)
.map(|r| r.disable_secure_redirection)
@@ -305,8 +369,6 @@ impl ProxyHttp for SunbeamProxy {
let client_ip = extract_client_ip(session);
// Bot allowlist: verified crawlers/agents bypass the scanner model.
// CIDR rules are instant; DNS-verified IPs are cached after
// background reverse+forward lookup.
let bot_reason = self.bot_allowlist.as_ref().and_then(|al| {
client_ip.and_then(|ip| al.check(user_agent, ip))
});
@@ -405,27 +467,139 @@ impl ProxyHttp for SunbeamProxy {
// Reject unknown host prefixes with 404.
let host = extract_host(session);
let prefix = host.split('.').next().unwrap_or("");
if self.find_route(prefix).is_none() {
let mut resp = ResponseHeader::build(404, None)?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
let route = match self.find_route(prefix) {
Some(r) => r,
None => {
let mut resp = ResponseHeader::build(404, None)?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
};
// Store route early so downstream hooks can access it.
ctx.route = Some(route.clone());
// ── Static file serving ──────────────────────────────────────────
if let Some(static_root) = &route.static_root {
let req_path = session.req_header().uri.path().to_string();
// Check path sub-routes first: if a path route matches, skip static
// serving and let it go to the upstream backend.
let path_route_match = route
.paths
.iter()
.any(|p| req_path.starts_with(p.prefix.as_str()));
if !path_route_match {
// Apply rewrite rules before static file lookup.
let mut serve_path = req_path.clone();
if let Some(rewrites) = self.find_rewrites(prefix) {
for rw in rewrites {
if rw.pattern.is_match(&req_path) {
serve_path = rw.target.clone();
break;
}
}
}
let extra_headers: Vec<(String, String)> = route
.response_headers
.iter()
.map(|h| (h.name.clone(), h.value.clone()))
.collect();
let served = crate::static_files::try_serve(
session,
static_root,
route.fallback.as_deref(),
&serve_path,
extra_headers,
)
.await?;
if served {
ctx.served_static = true;
ctx.route = Some(route.clone());
return Ok(true);
}
}
}
// ── Auth subrequest for path routes ──────────────────────────────
{
let req_path = session.req_header().uri.path().to_string();
let path_route = route
.paths
.iter()
.filter(|p| req_path.starts_with(p.prefix.as_str()))
.max_by_key(|p| p.prefix.len());
if let Some(pr) = path_route {
if let Some(auth_url) = &pr.auth_request {
// Forward the original request's cookies and auth headers.
let mut auth_req = self.http_client.get(auth_url);
if let Some(cookie) = session.req_header().headers.get("cookie") {
auth_req = auth_req.header("cookie", cookie.to_str().unwrap_or(""));
}
if let Some(auth_hdr) = session.req_header().headers.get("authorization") {
auth_req = auth_req.header("authorization", auth_hdr.to_str().unwrap_or(""));
}
// Forward the original path for context.
auth_req = auth_req.header("x-original-uri", &req_path);
match auth_req.send().await {
Ok(resp) if resp.status().is_success() => {
// Capture specified headers from the auth response.
for hdr_name in &pr.auth_capture_headers {
if let Some(val) = resp.headers().get(hdr_name.as_str()) {
if let Ok(v) = val.to_str() {
ctx.auth_headers.push((hdr_name.clone(), v.to_string()));
}
}
}
}
Ok(resp) => {
let status = resp.status().as_u16();
tracing::info!(
auth_url,
status,
"auth subrequest denied"
);
let mut r = ResponseHeader::build(403, None)?;
r.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(r), true).await?;
return Ok(true);
}
Err(e) => {
tracing::error!(
auth_url,
error = %e,
"auth subrequest failed"
);
let mut r = ResponseHeader::build(502, None)?;
r.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(r), true).await?;
return Ok(true);
}
}
// Store upstream_path_prefix for upstream_request_filter.
ctx.upstream_path_prefix = pr.upstream_path_prefix.clone();
}
}
}
// Prepare body rewrite rules if the route has them.
if !route.body_rewrites.is_empty() {
ctx.body_rewrite_rules = route
.body_rewrites
.iter()
.map(|br| (br.find.clone(), br.replace.clone()))
.collect();
}
// Handle Expect: 100-continue before connecting to upstream.
//
// Docker's OCI distribution protocol sends Expect: 100-continue for
// large layer blob uploads (typically > 5 MB). Without this, Pingora
// forwards the header to the upstream (e.g. Gitea), the upstream
// responds with 100 Continue, and Pingora must then proxy that
// informational response back to the client. Pingora's handling of
// upstream informational responses is unreliable and can cause the
// upload to fail with a spurious 400 for the client.
//
// By responding with 100 Continue here — before upstream_peer is
// even called — we unblock the client immediately. The Expect header
// is stripped in upstream_request_filter so the upstream never sends
// its own 100 Continue.
if session
.req_header()
.headers
@@ -473,12 +647,20 @@ impl ProxyHttp for SunbeamProxy {
if pr.strip_prefix {
ctx.strip_prefix = Some(pr.prefix.clone());
}
if ctx.upstream_path_prefix.is_none() {
ctx.upstream_path_prefix = pr.upstream_path_prefix.clone();
}
ctx.route = Some(crate::config::RouteConfig {
host_prefix: route.host_prefix.clone(),
backend: pr.backend.clone(),
websocket: pr.websocket || route.websocket,
disable_secure_redirection: route.disable_secure_redirection,
paths: vec![],
static_root: None,
fallback: None,
rewrites: vec![],
body_rewrites: vec![],
response_headers: vec![],
});
return Ok(Box::new(HttpPeer::new(
backend_addr(&pr.backend),
@@ -495,7 +677,8 @@ impl ProxyHttp for SunbeamProxy {
)))
}
/// Copy WebSocket upgrade headers and apply path prefix stripping.
/// Copy WebSocket upgrade headers, apply path prefix stripping, and forward
/// auth subrequest headers.
async fn upstream_request_filter(
&self,
session: &mut Session,
@@ -505,11 +688,7 @@ impl ProxyHttp for SunbeamProxy {
where
Self::CTX: Send + Sync,
{
// Inform backends of the original downstream scheme so they can construct
// correct absolute URLs (e.g. OIDC redirect_uri, CSRF checks).
// Must use insert_header (not headers.insert) so that both base.headers
// and the CaseMap are updated together — header_to_h1_wire zips them
// and silently drops headers only present in base.headers.
// Inform backends of the original downstream scheme.
upstream_req
.insert_header("x-forwarded-proto", ctx.downstream_scheme)
.map_err(|e| {
@@ -537,11 +716,20 @@ impl ProxyHttp for SunbeamProxy {
}
}
// Strip Expect: 100-continue — the proxy already sent 100 Continue to
// the downstream client in request_filter, so we must not forward the
// header to the upstream. If the upstream also sees Expect it will
// send its own 100 Continue, which Pingora cannot reliably proxy back
// (it has already been consumed) and which can corrupt the response.
// Forward captured auth subrequest headers (pass owned Strings —
// Pingora's IntoCaseHeaderName is impl'd for String, not &str).
let auth_headers: Vec<_> = ctx.auth_headers.drain(..).collect();
for (name, value) in auth_headers {
upstream_req.insert_header(name, value).map_err(|e| {
pingora_core::Error::because(
pingora_core::ErrorType::InternalError,
"failed to insert auth header",
e,
)
})?;
}
// Strip Expect: 100-continue.
upstream_req.remove_header("expect");
// Strip path prefix before forwarding (e.g. /kratos → /).
@@ -550,6 +738,15 @@ impl ProxyHttp for SunbeamProxy {
let old_path = old_uri.path();
if let Some(stripped) = old_path.strip_prefix(prefix.as_str()) {
let new_path = if stripped.is_empty() { "/" } else { stripped };
// Prepend upstream_path_prefix if configured.
let new_path = if let Some(up_prefix) = &ctx.upstream_path_prefix {
let trimmed = new_path.strip_prefix('/').unwrap_or(new_path);
format!("{up_prefix}{trimmed}")
} else {
new_path.to_string()
};
let query_part = old_uri
.query()
.map(|q| format!("?{q}"))
@@ -568,12 +765,35 @@ impl ProxyHttp for SunbeamProxy {
http::Uri::from_parts(parts).expect("valid uri parts"),
);
}
} else if let Some(up_prefix) = &ctx.upstream_path_prefix {
// No strip_prefix but upstream_path_prefix is set — prepend it.
let old_uri = upstream_req.uri.clone();
let old_path = old_uri.path();
let trimmed = old_path.strip_prefix('/').unwrap_or(old_path);
let new_path = format!("{up_prefix}{trimmed}");
let query_part = old_uri
.query()
.map(|q| format!("?{q}"))
.unwrap_or_default();
let new_pq: http::uri::PathAndQuery =
format!("{new_path}{query_part}").parse().map_err(|e| {
pingora_core::Error::because(
pingora_core::ErrorType::InternalError,
"invalid uri after prefix prepend",
e,
)
})?;
let mut parts = old_uri.into_parts();
parts.path_and_query = Some(new_pq);
upstream_req.set_uri(
http::Uri::from_parts(parts).expect("valid uri parts"),
);
}
Ok(())
}
/// Add X-Request-Id response header so clients can correlate.
/// Add X-Request-Id and custom response headers.
async fn upstream_response_filter(
&self,
_session: &mut Session,
@@ -583,10 +803,74 @@ impl ProxyHttp for SunbeamProxy {
where
Self::CTX: Send + Sync,
{
// Add X-Request-Id to the response so clients can correlate.
let _ = upstream_response.insert_header("x-request-id", &ctx.request_id);
// Add route-level response headers (owned Strings for Pingora's IntoCaseHeaderName).
if let Some(route) = &ctx.route {
for hdr in &route.response_headers {
let _ = upstream_response.insert_header(hdr.name.clone(), hdr.value.clone());
}
}
// Check if body rewriting applies to this response's content-type.
if !ctx.body_rewrite_rules.is_empty() {
let content_type = upstream_response
.headers
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
// Only buffer text/html and application/javascript responses.
let should_rewrite = content_type.starts_with("text/html")
|| content_type.starts_with("application/javascript")
|| content_type.starts_with("text/javascript");
if should_rewrite {
ctx.body_buffer = Some(Vec::new());
// Remove content-length since we'll modify the body.
upstream_response.remove_header("content-length");
} else {
// Don't rewrite non-matching content types.
ctx.body_rewrite_rules.clear();
}
}
Ok(())
}
/// Buffer and rewrite response bodies when body_rewrite rules are active.
fn response_body_filter(
&self,
_session: &mut Session,
body: &mut Option<Bytes>,
end_of_stream: bool,
ctx: &mut RequestCtx,
) -> Result<Option<std::time::Duration>>
where
Self::CTX: Send + Sync,
{
if ctx.body_buffer.is_none() {
return Ok(None);
}
// Accumulate chunks into the buffer.
if let Some(data) = body.take() {
ctx.body_buffer.as_mut().unwrap().extend_from_slice(&data);
}
if end_of_stream {
let buffer = ctx.body_buffer.take().unwrap();
let mut result = String::from_utf8_lossy(&buffer).into_owned();
for (find, replace) in &ctx.body_rewrite_rules {
result = result.replace(find.as_str(), replace.as_str());
}
*body = Some(Bytes::from(result));
}
Ok(None)
}
/// Emit a structured JSON audit log line for every request.
async fn logging(
&self,
@@ -747,6 +1031,11 @@ mod tests {
acme_backend: None,
strip_prefix: None,
downstream_scheme: "https",
served_static: false,
auth_headers: Vec::new(),
upstream_path_prefix: None,
body_rewrite_rules: Vec::new(),
body_buffer: None,
};
assert_eq!(ctx.downstream_scheme, "https");
}
@@ -758,8 +1047,6 @@ mod tests {
}
/// remove_header("expect") strips the header from the upstream request.
/// This is tested independently of the async proxy logic because
/// upstream_request_filter requires a live session.
#[test]
fn test_expect_header_stripped_before_upstream() {
let mut req = RequestHeader::build("PUT", b"/v2/studio/image/blobs/uploads/uuid", None).unwrap();
@@ -768,7 +1055,6 @@ mod tests {
assert!(req.headers.get("expect").is_some(), "expect header should be present before stripping");
req.remove_header("expect");
assert!(req.headers.get("expect").is_none(), "expect header should be gone after remove_header");
// Content-Length must survive the strip.
assert!(req.headers.get("content-length").is_some());
}
@@ -778,4 +1064,27 @@ mod tests {
assert_eq!(id.len(), 36);
assert!(uuid::Uuid::parse_str(&id).is_ok());
}
#[test]
fn test_compile_rewrites_valid() {
let routes = vec![RouteConfig {
host_prefix: "docs".into(),
backend: "http://localhost:8080".into(),
websocket: false,
disable_secure_redirection: false,
paths: vec![],
static_root: Some("/srv/docs".into()),
fallback: Some("index.html".into()),
rewrites: vec![crate::config::RewriteRule {
pattern: r"^/docs/[0-9a-f-]+/?$".into(),
target: "/docs/[id]/index.html".into(),
}],
body_rewrites: vec![],
response_headers: vec![],
}];
let compiled = SunbeamProxy::compile_rewrites(&routes);
assert_eq!(compiled.len(), 1);
assert_eq!(compiled[0].1.len(), 1);
assert!(compiled[0].1[0].pattern.is_match("/docs/abc-def/"));
}
}