feat(static_files): add static file serving, SPA fallback, rewrites, body rewriting, and auth subrequests
Add static file serving with try_files chain ($uri, $uri.html, $uri/index.html, fallback), regex-based URL rewrites compiled at startup, response body find/replace for text/html and JS content, auth subrequests with header capture for path routes, and custom response headers per route. Extends RouteConfig with static_root, fallback, rewrites, body_rewrites, and response_headers fields. Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
383
src/proxy.rs
383
src/proxy.rs
@@ -10,14 +10,22 @@ use crate::scanner::detector::ScannerDetector;
|
||||
use crate::scanner::model::ScannerAction;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use http::header::{CONNECTION, EXPECT, HOST, UPGRADE};
|
||||
use pingora_core::{upstreams::peer::HttpPeer, Result};
|
||||
use pingora_http::{RequestHeader, ResponseHeader};
|
||||
use pingora_proxy::{ProxyHttp, Session};
|
||||
use regex::Regex;
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
/// A compiled rewrite rule (regex compiled once at startup).
|
||||
pub struct CompiledRewrite {
|
||||
pub pattern: Regex,
|
||||
pub target: String,
|
||||
}
|
||||
|
||||
pub struct SunbeamProxy {
|
||||
pub routes: Vec<RouteConfig>,
|
||||
/// Per-challenge route table populated by the Ingress watcher.
|
||||
@@ -30,6 +38,10 @@ pub struct SunbeamProxy {
|
||||
pub bot_allowlist: Option<Arc<BotAllowlist>>,
|
||||
/// Optional per-identity rate limiter.
|
||||
pub rate_limiter: Option<Arc<RateLimiter>>,
|
||||
/// Compiled rewrite rules per route (indexed by host_prefix).
|
||||
pub compiled_rewrites: Vec<(String, Vec<CompiledRewrite>)>,
|
||||
/// Shared reqwest client for auth subrequests.
|
||||
pub http_client: reqwest::Client,
|
||||
}
|
||||
|
||||
pub struct RequestCtx {
|
||||
@@ -45,12 +57,61 @@ pub struct RequestCtx {
|
||||
pub strip_prefix: Option<String>,
|
||||
/// Original downstream scheme ("http" or "https"), captured in request_filter.
|
||||
pub downstream_scheme: &'static str,
|
||||
/// Whether this request was served from static files (skip upstream).
|
||||
pub served_static: bool,
|
||||
/// Captured auth subrequest headers to forward upstream.
|
||||
pub auth_headers: Vec<(String, String)>,
|
||||
/// Upstream path prefix to prepend (from PathRoute config).
|
||||
pub upstream_path_prefix: Option<String>,
|
||||
/// Whether response body rewriting is needed for this request.
|
||||
pub body_rewrite_rules: Vec<(String, String)>,
|
||||
/// Buffered response body for body rewriting.
|
||||
pub body_buffer: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl SunbeamProxy {
|
||||
fn find_route(&self, prefix: &str) -> Option<&RouteConfig> {
|
||||
self.routes.iter().find(|r| r.host_prefix == prefix)
|
||||
}
|
||||
|
||||
fn find_rewrites(&self, prefix: &str) -> Option<&[CompiledRewrite]> {
|
||||
self.compiled_rewrites
|
||||
.iter()
|
||||
.find(|(p, _)| p == prefix)
|
||||
.map(|(_, rules)| rules.as_slice())
|
||||
}
|
||||
|
||||
/// Compile all rewrite rules from routes at startup.
|
||||
pub fn compile_rewrites(routes: &[RouteConfig]) -> Vec<(String, Vec<CompiledRewrite>)> {
|
||||
routes
|
||||
.iter()
|
||||
.filter(|r| !r.rewrites.is_empty())
|
||||
.map(|r| {
|
||||
let compiled = r
|
||||
.rewrites
|
||||
.iter()
|
||||
.filter_map(|rw| {
|
||||
match Regex::new(&rw.pattern) {
|
||||
Ok(re) => Some(CompiledRewrite {
|
||||
pattern: re,
|
||||
target: rw.target.clone(),
|
||||
}),
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
host_prefix = %r.host_prefix,
|
||||
pattern = %rw.pattern,
|
||||
error = %e,
|
||||
"failed to compile rewrite regex"
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
(r.host_prefix.clone(), compiled)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_host(session: &Session) -> String {
|
||||
@@ -123,6 +184,11 @@ impl ProxyHttp for SunbeamProxy {
|
||||
acme_backend: None,
|
||||
downstream_scheme: "https",
|
||||
strip_prefix: None,
|
||||
served_static: false,
|
||||
auth_headers: Vec::new(),
|
||||
upstream_path_prefix: None,
|
||||
body_rewrite_rules: Vec::new(),
|
||||
body_buffer: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,8 +247,6 @@ impl ProxyHttp for SunbeamProxy {
|
||||
|
||||
// Routes that explicitly opt out of HTTPS enforcement pass through.
|
||||
// All other requests — including unknown hosts — are redirected.
|
||||
// This is as close to an L4 redirect as HTTP allows: the upstream is
|
||||
// never contacted; the 301 is written directly to the downstream socket.
|
||||
if self
|
||||
.find_route(prefix)
|
||||
.map(|r| r.disable_secure_redirection)
|
||||
@@ -305,8 +369,6 @@ impl ProxyHttp for SunbeamProxy {
|
||||
let client_ip = extract_client_ip(session);
|
||||
|
||||
// Bot allowlist: verified crawlers/agents bypass the scanner model.
|
||||
// CIDR rules are instant; DNS-verified IPs are cached after
|
||||
// background reverse+forward lookup.
|
||||
let bot_reason = self.bot_allowlist.as_ref().and_then(|al| {
|
||||
client_ip.and_then(|ip| al.check(user_agent, ip))
|
||||
});
|
||||
@@ -405,27 +467,139 @@ impl ProxyHttp for SunbeamProxy {
|
||||
// Reject unknown host prefixes with 404.
|
||||
let host = extract_host(session);
|
||||
let prefix = host.split('.').next().unwrap_or("");
|
||||
if self.find_route(prefix).is_none() {
|
||||
let mut resp = ResponseHeader::build(404, None)?;
|
||||
resp.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(resp), true).await?;
|
||||
return Ok(true);
|
||||
let route = match self.find_route(prefix) {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
let mut resp = ResponseHeader::build(404, None)?;
|
||||
resp.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(resp), true).await?;
|
||||
return Ok(true);
|
||||
}
|
||||
};
|
||||
|
||||
// Store route early so downstream hooks can access it.
|
||||
ctx.route = Some(route.clone());
|
||||
|
||||
// ── Static file serving ──────────────────────────────────────────
|
||||
if let Some(static_root) = &route.static_root {
|
||||
let req_path = session.req_header().uri.path().to_string();
|
||||
|
||||
// Check path sub-routes first: if a path route matches, skip static
|
||||
// serving and let it go to the upstream backend.
|
||||
let path_route_match = route
|
||||
.paths
|
||||
.iter()
|
||||
.any(|p| req_path.starts_with(p.prefix.as_str()));
|
||||
|
||||
if !path_route_match {
|
||||
// Apply rewrite rules before static file lookup.
|
||||
let mut serve_path = req_path.clone();
|
||||
if let Some(rewrites) = self.find_rewrites(prefix) {
|
||||
for rw in rewrites {
|
||||
if rw.pattern.is_match(&req_path) {
|
||||
serve_path = rw.target.clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let extra_headers: Vec<(String, String)> = route
|
||||
.response_headers
|
||||
.iter()
|
||||
.map(|h| (h.name.clone(), h.value.clone()))
|
||||
.collect();
|
||||
|
||||
let served = crate::static_files::try_serve(
|
||||
session,
|
||||
static_root,
|
||||
route.fallback.as_deref(),
|
||||
&serve_path,
|
||||
extra_headers,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if served {
|
||||
ctx.served_static = true;
|
||||
ctx.route = Some(route.clone());
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Auth subrequest for path routes ──────────────────────────────
|
||||
{
|
||||
let req_path = session.req_header().uri.path().to_string();
|
||||
let path_route = route
|
||||
.paths
|
||||
.iter()
|
||||
.filter(|p| req_path.starts_with(p.prefix.as_str()))
|
||||
.max_by_key(|p| p.prefix.len());
|
||||
|
||||
if let Some(pr) = path_route {
|
||||
if let Some(auth_url) = &pr.auth_request {
|
||||
// Forward the original request's cookies and auth headers.
|
||||
let mut auth_req = self.http_client.get(auth_url);
|
||||
if let Some(cookie) = session.req_header().headers.get("cookie") {
|
||||
auth_req = auth_req.header("cookie", cookie.to_str().unwrap_or(""));
|
||||
}
|
||||
if let Some(auth_hdr) = session.req_header().headers.get("authorization") {
|
||||
auth_req = auth_req.header("authorization", auth_hdr.to_str().unwrap_or(""));
|
||||
}
|
||||
// Forward the original path for context.
|
||||
auth_req = auth_req.header("x-original-uri", &req_path);
|
||||
|
||||
match auth_req.send().await {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
// Capture specified headers from the auth response.
|
||||
for hdr_name in &pr.auth_capture_headers {
|
||||
if let Some(val) = resp.headers().get(hdr_name.as_str()) {
|
||||
if let Ok(v) = val.to_str() {
|
||||
ctx.auth_headers.push((hdr_name.clone(), v.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(resp) => {
|
||||
let status = resp.status().as_u16();
|
||||
tracing::info!(
|
||||
auth_url,
|
||||
status,
|
||||
"auth subrequest denied"
|
||||
);
|
||||
let mut r = ResponseHeader::build(403, None)?;
|
||||
r.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(r), true).await?;
|
||||
return Ok(true);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
auth_url,
|
||||
error = %e,
|
||||
"auth subrequest failed"
|
||||
);
|
||||
let mut r = ResponseHeader::build(502, None)?;
|
||||
r.insert_header("Content-Length", "0")?;
|
||||
session.write_response_header(Box::new(r), true).await?;
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
// Store upstream_path_prefix for upstream_request_filter.
|
||||
ctx.upstream_path_prefix = pr.upstream_path_prefix.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare body rewrite rules if the route has them.
|
||||
if !route.body_rewrites.is_empty() {
|
||||
ctx.body_rewrite_rules = route
|
||||
.body_rewrites
|
||||
.iter()
|
||||
.map(|br| (br.find.clone(), br.replace.clone()))
|
||||
.collect();
|
||||
}
|
||||
|
||||
// Handle Expect: 100-continue before connecting to upstream.
|
||||
//
|
||||
// Docker's OCI distribution protocol sends Expect: 100-continue for
|
||||
// large layer blob uploads (typically > 5 MB). Without this, Pingora
|
||||
// forwards the header to the upstream (e.g. Gitea), the upstream
|
||||
// responds with 100 Continue, and Pingora must then proxy that
|
||||
// informational response back to the client. Pingora's handling of
|
||||
// upstream informational responses is unreliable and can cause the
|
||||
// upload to fail with a spurious 400 for the client.
|
||||
//
|
||||
// By responding with 100 Continue here — before upstream_peer is
|
||||
// even called — we unblock the client immediately. The Expect header
|
||||
// is stripped in upstream_request_filter so the upstream never sends
|
||||
// its own 100 Continue.
|
||||
if session
|
||||
.req_header()
|
||||
.headers
|
||||
@@ -473,12 +647,20 @@ impl ProxyHttp for SunbeamProxy {
|
||||
if pr.strip_prefix {
|
||||
ctx.strip_prefix = Some(pr.prefix.clone());
|
||||
}
|
||||
if ctx.upstream_path_prefix.is_none() {
|
||||
ctx.upstream_path_prefix = pr.upstream_path_prefix.clone();
|
||||
}
|
||||
ctx.route = Some(crate::config::RouteConfig {
|
||||
host_prefix: route.host_prefix.clone(),
|
||||
backend: pr.backend.clone(),
|
||||
websocket: pr.websocket || route.websocket,
|
||||
disable_secure_redirection: route.disable_secure_redirection,
|
||||
paths: vec![],
|
||||
static_root: None,
|
||||
fallback: None,
|
||||
rewrites: vec![],
|
||||
body_rewrites: vec![],
|
||||
response_headers: vec![],
|
||||
});
|
||||
return Ok(Box::new(HttpPeer::new(
|
||||
backend_addr(&pr.backend),
|
||||
@@ -495,7 +677,8 @@ impl ProxyHttp for SunbeamProxy {
|
||||
)))
|
||||
}
|
||||
|
||||
/// Copy WebSocket upgrade headers and apply path prefix stripping.
|
||||
/// Copy WebSocket upgrade headers, apply path prefix stripping, and forward
|
||||
/// auth subrequest headers.
|
||||
async fn upstream_request_filter(
|
||||
&self,
|
||||
session: &mut Session,
|
||||
@@ -505,11 +688,7 @@ impl ProxyHttp for SunbeamProxy {
|
||||
where
|
||||
Self::CTX: Send + Sync,
|
||||
{
|
||||
// Inform backends of the original downstream scheme so they can construct
|
||||
// correct absolute URLs (e.g. OIDC redirect_uri, CSRF checks).
|
||||
// Must use insert_header (not headers.insert) so that both base.headers
|
||||
// and the CaseMap are updated together — header_to_h1_wire zips them
|
||||
// and silently drops headers only present in base.headers.
|
||||
// Inform backends of the original downstream scheme.
|
||||
upstream_req
|
||||
.insert_header("x-forwarded-proto", ctx.downstream_scheme)
|
||||
.map_err(|e| {
|
||||
@@ -537,11 +716,20 @@ impl ProxyHttp for SunbeamProxy {
|
||||
}
|
||||
}
|
||||
|
||||
// Strip Expect: 100-continue — the proxy already sent 100 Continue to
|
||||
// the downstream client in request_filter, so we must not forward the
|
||||
// header to the upstream. If the upstream also sees Expect it will
|
||||
// send its own 100 Continue, which Pingora cannot reliably proxy back
|
||||
// (it has already been consumed) and which can corrupt the response.
|
||||
// Forward captured auth subrequest headers (pass owned Strings —
|
||||
// Pingora's IntoCaseHeaderName is impl'd for String, not &str).
|
||||
let auth_headers: Vec<_> = ctx.auth_headers.drain(..).collect();
|
||||
for (name, value) in auth_headers {
|
||||
upstream_req.insert_header(name, value).map_err(|e| {
|
||||
pingora_core::Error::because(
|
||||
pingora_core::ErrorType::InternalError,
|
||||
"failed to insert auth header",
|
||||
e,
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Strip Expect: 100-continue.
|
||||
upstream_req.remove_header("expect");
|
||||
|
||||
// Strip path prefix before forwarding (e.g. /kratos → /).
|
||||
@@ -550,6 +738,15 @@ impl ProxyHttp for SunbeamProxy {
|
||||
let old_path = old_uri.path();
|
||||
if let Some(stripped) = old_path.strip_prefix(prefix.as_str()) {
|
||||
let new_path = if stripped.is_empty() { "/" } else { stripped };
|
||||
|
||||
// Prepend upstream_path_prefix if configured.
|
||||
let new_path = if let Some(up_prefix) = &ctx.upstream_path_prefix {
|
||||
let trimmed = new_path.strip_prefix('/').unwrap_or(new_path);
|
||||
format!("{up_prefix}{trimmed}")
|
||||
} else {
|
||||
new_path.to_string()
|
||||
};
|
||||
|
||||
let query_part = old_uri
|
||||
.query()
|
||||
.map(|q| format!("?{q}"))
|
||||
@@ -568,12 +765,35 @@ impl ProxyHttp for SunbeamProxy {
|
||||
http::Uri::from_parts(parts).expect("valid uri parts"),
|
||||
);
|
||||
}
|
||||
} else if let Some(up_prefix) = &ctx.upstream_path_prefix {
|
||||
// No strip_prefix but upstream_path_prefix is set — prepend it.
|
||||
let old_uri = upstream_req.uri.clone();
|
||||
let old_path = old_uri.path();
|
||||
let trimmed = old_path.strip_prefix('/').unwrap_or(old_path);
|
||||
let new_path = format!("{up_prefix}{trimmed}");
|
||||
let query_part = old_uri
|
||||
.query()
|
||||
.map(|q| format!("?{q}"))
|
||||
.unwrap_or_default();
|
||||
let new_pq: http::uri::PathAndQuery =
|
||||
format!("{new_path}{query_part}").parse().map_err(|e| {
|
||||
pingora_core::Error::because(
|
||||
pingora_core::ErrorType::InternalError,
|
||||
"invalid uri after prefix prepend",
|
||||
e,
|
||||
)
|
||||
})?;
|
||||
let mut parts = old_uri.into_parts();
|
||||
parts.path_and_query = Some(new_pq);
|
||||
upstream_req.set_uri(
|
||||
http::Uri::from_parts(parts).expect("valid uri parts"),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add X-Request-Id response header so clients can correlate.
|
||||
/// Add X-Request-Id and custom response headers.
|
||||
async fn upstream_response_filter(
|
||||
&self,
|
||||
_session: &mut Session,
|
||||
@@ -583,10 +803,74 @@ impl ProxyHttp for SunbeamProxy {
|
||||
where
|
||||
Self::CTX: Send + Sync,
|
||||
{
|
||||
// Add X-Request-Id to the response so clients can correlate.
|
||||
let _ = upstream_response.insert_header("x-request-id", &ctx.request_id);
|
||||
|
||||
// Add route-level response headers (owned Strings for Pingora's IntoCaseHeaderName).
|
||||
if let Some(route) = &ctx.route {
|
||||
for hdr in &route.response_headers {
|
||||
let _ = upstream_response.insert_header(hdr.name.clone(), hdr.value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Check if body rewriting applies to this response's content-type.
|
||||
if !ctx.body_rewrite_rules.is_empty() {
|
||||
let content_type = upstream_response
|
||||
.headers
|
||||
.get("content-type")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Only buffer text/html and application/javascript responses.
|
||||
let should_rewrite = content_type.starts_with("text/html")
|
||||
|| content_type.starts_with("application/javascript")
|
||||
|| content_type.starts_with("text/javascript");
|
||||
|
||||
if should_rewrite {
|
||||
ctx.body_buffer = Some(Vec::new());
|
||||
// Remove content-length since we'll modify the body.
|
||||
upstream_response.remove_header("content-length");
|
||||
} else {
|
||||
// Don't rewrite non-matching content types.
|
||||
ctx.body_rewrite_rules.clear();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Buffer and rewrite response bodies when body_rewrite rules are active.
|
||||
fn response_body_filter(
|
||||
&self,
|
||||
_session: &mut Session,
|
||||
body: &mut Option<Bytes>,
|
||||
end_of_stream: bool,
|
||||
ctx: &mut RequestCtx,
|
||||
) -> Result<Option<std::time::Duration>>
|
||||
where
|
||||
Self::CTX: Send + Sync,
|
||||
{
|
||||
if ctx.body_buffer.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Accumulate chunks into the buffer.
|
||||
if let Some(data) = body.take() {
|
||||
ctx.body_buffer.as_mut().unwrap().extend_from_slice(&data);
|
||||
}
|
||||
|
||||
if end_of_stream {
|
||||
let buffer = ctx.body_buffer.take().unwrap();
|
||||
let mut result = String::from_utf8_lossy(&buffer).into_owned();
|
||||
for (find, replace) in &ctx.body_rewrite_rules {
|
||||
result = result.replace(find.as_str(), replace.as_str());
|
||||
}
|
||||
*body = Some(Bytes::from(result));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Emit a structured JSON audit log line for every request.
|
||||
async fn logging(
|
||||
&self,
|
||||
@@ -747,6 +1031,11 @@ mod tests {
|
||||
acme_backend: None,
|
||||
strip_prefix: None,
|
||||
downstream_scheme: "https",
|
||||
served_static: false,
|
||||
auth_headers: Vec::new(),
|
||||
upstream_path_prefix: None,
|
||||
body_rewrite_rules: Vec::new(),
|
||||
body_buffer: None,
|
||||
};
|
||||
assert_eq!(ctx.downstream_scheme, "https");
|
||||
}
|
||||
@@ -758,8 +1047,6 @@ mod tests {
|
||||
}
|
||||
|
||||
/// remove_header("expect") strips the header from the upstream request.
|
||||
/// This is tested independently of the async proxy logic because
|
||||
/// upstream_request_filter requires a live session.
|
||||
#[test]
|
||||
fn test_expect_header_stripped_before_upstream() {
|
||||
let mut req = RequestHeader::build("PUT", b"/v2/studio/image/blobs/uploads/uuid", None).unwrap();
|
||||
@@ -768,7 +1055,6 @@ mod tests {
|
||||
assert!(req.headers.get("expect").is_some(), "expect header should be present before stripping");
|
||||
req.remove_header("expect");
|
||||
assert!(req.headers.get("expect").is_none(), "expect header should be gone after remove_header");
|
||||
// Content-Length must survive the strip.
|
||||
assert!(req.headers.get("content-length").is_some());
|
||||
}
|
||||
|
||||
@@ -778,4 +1064,27 @@ mod tests {
|
||||
assert_eq!(id.len(), 36);
|
||||
assert!(uuid::Uuid::parse_str(&id).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compile_rewrites_valid() {
|
||||
let routes = vec![RouteConfig {
|
||||
host_prefix: "docs".into(),
|
||||
backend: "http://localhost:8080".into(),
|
||||
websocket: false,
|
||||
disable_secure_redirection: false,
|
||||
paths: vec![],
|
||||
static_root: Some("/srv/docs".into()),
|
||||
fallback: Some("index.html".into()),
|
||||
rewrites: vec![crate::config::RewriteRule {
|
||||
pattern: r"^/docs/[0-9a-f-]+/?$".into(),
|
||||
target: "/docs/[id]/index.html".into(),
|
||||
}],
|
||||
body_rewrites: vec![],
|
||||
response_headers: vec![],
|
||||
}];
|
||||
let compiled = SunbeamProxy::compile_rewrites(&routes);
|
||||
assert_eq!(compiled.len(), 1);
|
||||
assert_eq!(compiled[0].1.len(), 1);
|
||||
assert!(compiled[0].1[0].pattern.is_match("/docs/abc-def/"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user