feat: initial sunbeam-proxy implementation

Custom Pingora-based edge proxy for the Sunbeam infrastructure stack.

- HTTPS termination: mkcert file-based (local dev) or rustls-acme ACME (production)
- Host-prefix routing with path-based sub-routing (auth virtual host)
- HTTP→HTTPS redirect, WebSocket passthrough
- cert-manager HTTP-01 challenge routing via Kubernetes Ingress watcher
- TLS cert auto-reload via K8s Secret watcher
- JSON structured audit logging (tracing-subscriber)
- OpenTelemetry OTLP stub (disabled by default)
- Multi-stage Dockerfile: musl static binary on chainguard/static distroless image

Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
This commit is contained in:
2026-03-10 23:38:19 +00:00
commit 6ec0f78a5b
13 changed files with 5042 additions and 0 deletions

5
.dockerignore Normal file
View File

@@ -0,0 +1,5 @@
target/
.git/
.gitignore
*.md
certs/

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
/target/
certs/
*.pem
*.key
*.crt

4089
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

47
Cargo.toml Normal file
View File

@@ -0,0 +1,47 @@
[package]
name = "sunbeam-proxy"
version = "0.1.0"
edition = "2021"
[dependencies]
# Pingora with rustls backend (pure Rust TLS, no BoringSSL C build)
pingora = { version = "0.7", features = ["rustls"] }
pingora-proxy = { version = "0.7", features = ["rustls"] }
pingora-core = { version = "0.7", features = ["rustls"] }
pingora-http = "0.7"
# HTTP header constants
http = "1"
# Config
serde = { version = "1", features = ["derive"] }
toml = "0.8"
# Async
tokio = { version = "1", features = ["full"] }
futures = "0.3"
async-trait = "0.1"
# Structured logging + OTEL
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }
tracing-opentelemetry = "0.28"
opentelemetry = { version = "0.27", features = ["trace"] }
opentelemetry_sdk = { version = "0.27", features = ["rt-tokio"] }
opentelemetry-otlp = { version = "0.27", features = ["http-proto", "reqwest-client"] }
serde_json = "1"
anyhow = "1"
# Rustls crypto provider — must be installed before any TLS init
rustls = { version = "0.23", features = ["aws-lc-rs"] }
# K8s watcher for cert/config hot-reload
kube = { version = "3", features = ["runtime", "client"] }
k8s-openapi = { version = "0.27", features = ["v1_35"] }
libc = "0.2"
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
strip = true

67
Dockerfile Normal file
View File

@@ -0,0 +1,67 @@
# ── Stage 1: build ──────────────────────────────────────────────
# rust:slim tracks the latest stable Rust release.
# Multi-arch image; Docker buildx selects the native platform image.
FROM rust:slim AS builder
ARG TARGETARCH
# musl-tools: musl-gcc for static linking.
# curl: download tini init binary.
# No cmake/go needed: we use the rustls feature flag (pure Rust TLS).
RUN apt-get update && apt-get install -y musl-tools curl cmake && rm -rf /var/lib/apt/lists/*
# Map Docker TARGETARCH to the appropriate Rust musl target,
# then configure Cargo to use musl-gcc as the linker for that target.
RUN case "${TARGETARCH}" in \
"amd64") RUST_TARGET="x86_64-unknown-linux-musl" ;; \
"arm64") RUST_TARGET="aarch64-unknown-linux-musl" ;; \
*) echo "Unsupported arch: ${TARGETARCH}" && exit 1 ;; \
esac && \
echo "${RUST_TARGET}" > /rust-target && \
rustup target add "${RUST_TARGET}" && \
mkdir -p /root/.cargo && \
printf '[target.%s]\nlinker = "musl-gcc"\n' "${RUST_TARGET}" \
>> /root/.cargo/config.toml
ENV RUSTFLAGS="-C target-feature=+crt-static"
WORKDIR /build
# Cache dependency compilation separately from source changes.
# RUSTFLAGS must match the real build or Cargo will recompile everything.
COPY Cargo.toml Cargo.lock ./
RUN mkdir src && \
echo 'fn main() {}' > src/main.rs && \
cargo build --release --target "$(cat /rust-target)" ; \
rm -rf src
# Build the real binary.
COPY src/ ./src/
RUN touch src/main.rs && \
cargo build --release --target "$(cat /rust-target)" && \
cp "target/$(cat /rust-target)/release/sunbeam-proxy" /sunbeam-proxy
# Download tini static init binary (musl, no glibc dependency).
# tini as PID 1 ensures the container stays alive when Pingora re-execs itself
# during a graceful upgrade: the new process is re-parented to tini, and tini
# correctly reaps the old process when it exits after draining connections.
RUN case "${TARGETARCH}" in \
"amd64") TINI_ARCH="amd64" ;; \
"arm64") TINI_ARCH="arm64" ;; \
*) echo "Unsupported arch: ${TARGETARCH}" && exit 1 ;; \
esac && \
curl -fsSL -o /tini \
"https://github.com/krallin/tini/releases/download/v0.19.0/tini-static-${TINI_ARCH}" && \
chmod +x /tini
# ── Stage 2: distroless final ────────────────────────────────────
# cgr.dev/chainguard/static is multi-arch (amd64 + arm64).
# No shell, no package manager — minimal attack surface.
FROM cgr.dev/chainguard/static:latest
COPY --from=builder /tini /tini
COPY --from=builder /sunbeam-proxy /usr/local/bin/sunbeam-proxy
EXPOSE 80 443
# tini as PID 1 so Pingora's graceful-upgrade re-exec doesn't kill the container.
ENTRYPOINT ["/tini", "--", "/usr/local/bin/sunbeam-proxy"]

67
dev.toml Normal file
View File

@@ -0,0 +1,67 @@
# Local dev config for running sunbeam-proxy directly on macOS.
#
# Uses non-privileged ports (8080/8443) and a mkcert cert for localhost.
# Certs are generated once with:
# mkcert -cert-file certs/tls.crt -key-file certs/tls.key localhost 127.0.0.1
#
# Run with:
# SUNBEAM_CONFIG=dev.toml RUST_LOG=info cargo run
#
# Then test:
# curl -v http://localhost:8080/ # → 301 to https
# curl -vk https://localhost:8443/ -H "Host: docs.localhost" # → 502 (backend unreachable, routing works)
# curl -vk https://localhost:8443/.well-known/acme-challenge/test # → 404 (no active challenge)
[listen]
http = "0.0.0.0:8080"
https = "0.0.0.0:8443"
[tls]
cert_path = "certs/tls.crt"
key_path = "certs/tls.key"
[telemetry]
otlp_endpoint = ""
# Dummy routes that mirror production — backends won't be reachable locally
# but routing, TLS termination, and redirect logic are fully exercised.
[[routes]]
host_prefix = "docs"
backend = "http://127.0.0.1:9001"
websocket = true
[[routes]]
host_prefix = "meet"
backend = "http://127.0.0.1:9002"
websocket = true
[[routes]]
host_prefix = "drive"
backend = "http://127.0.0.1:9003"
[[routes]]
host_prefix = "mail"
backend = "http://127.0.0.1:9004"
[[routes]]
host_prefix = "chat"
backend = "http://127.0.0.1:9005"
websocket = true
[[routes]]
host_prefix = "people"
backend = "http://127.0.0.1:9006"
[[routes]]
host_prefix = "src"
backend = "http://127.0.0.1:9007"
websocket = true
[[routes]]
host_prefix = "auth"
backend = "http://127.0.0.1:9008"
[[routes]]
host_prefix = "s3"
backend = "http://127.0.0.1:9009"

96
src/acme.rs Normal file
View File

@@ -0,0 +1,96 @@
use futures::StreamExt;
use k8s_openapi::api::networking::v1::Ingress;
use kube::{runtime::watcher, Api, Client};
use std::{collections::HashMap, sync::{Arc, RwLock}};
/// Maps a challenge path to the backend address that can answer it.
///
/// Key: `/.well-known/acme-challenge/<token>`
/// Value: `cm-acme-http-solver-<hash>.ingress.svc.cluster.local:8089`
///
/// cert-manager creates one Ingress per challenge domain with exactly this
/// path and backend. Our proxy consults this table to route each challenge
/// request to the specific solver pod that holds the matching token, which
/// is required for multi-SAN certificates (one solver pod per domain, all
/// running concurrently).
///
/// Uses std::sync::RwLock (not tokio) so reads are wait-free and the table
/// can be written from the watcher runtime without cross-runtime waker issues.
pub type AcmeRoutes = Arc<RwLock<HashMap<String, String>>>;
/// Watch Ingress objects in the ingress namespace and maintain `routes`.
///
/// cert-manager creates an Ingress for each HTTP-01 challenge it manages.
/// The Ingress contains a path rule for `/.well-known/acme-challenge/<token>`
/// pointing to a per-challenge solver Service. We populate the route table
/// from these rules so the proxy can forward each challenge token to the
/// correct solver pod without the nondeterminism of a shared stable Service.
pub async fn watch_ingresses(client: Client, routes: AcmeRoutes) {
let api: Api<Ingress> = Api::namespaced(client, "ingress");
// Verify Ingress API access before entering the watch loop. A failure here
// almost always means cert-manager is not installed or RBAC is wrong.
if let Err(e) = api.list(&Default::default()).await {
tracing::error!(
error = %e,
"initial Ingress list failed — is cert-manager installed? \
is the pingora-watcher Role bound correctly?"
);
// Continue into the watch loop; it will surface further errors.
}
let mut stream = Box::pin(watcher(api, watcher::Config::default()));
while let Some(result) = stream.next().await {
match result {
Ok(watcher::Event::Apply(ing)) => {
let mut map = routes.write().unwrap_or_else(|e| e.into_inner());
upsert_routes(&ing, &mut map);
}
Ok(watcher::Event::Delete(ing)) => {
let mut map = routes.write().unwrap_or_else(|e| e.into_inner());
remove_routes(&ing, &mut map);
}
Ok(_) => {}
Err(e) => {
tracing::warn!(error = %e, "Ingress watcher error; retrying in 10s");
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
}
}
}
}
fn upsert_routes(ingress: &Ingress, map: &mut HashMap<String, String>) {
let Some(spec) = &ingress.spec else { return };
for rule in spec.rules.as_deref().unwrap_or(&[]) {
let Some(http) = &rule.http else { continue };
for p in &http.paths {
let Some(path) = p.path.as_deref() else { continue };
if !path.starts_with("/.well-known/acme-challenge/") {
continue;
}
let Some(svc) = p.backend.service.as_ref() else { continue };
let Some(port) = svc.port.as_ref().and_then(|p| p.number) else { continue };
let backend = format!(
"{}.ingress.svc.cluster.local:{port}",
svc.name
);
tracing::debug!(path, %backend, "added ACME challenge route");
map.insert(path.to_string(), backend);
}
}
}
fn remove_routes(ingress: &Ingress, map: &mut HashMap<String, String>) {
let Some(spec) = &ingress.spec else { return };
for rule in spec.rules.as_deref().unwrap_or(&[]) {
let Some(http) = &rule.http else { continue };
for p in &http.paths {
let Some(path) = p.path.as_deref() else { continue };
if path.starts_with("/.well-known/acme-challenge/") {
tracing::debug!(path, "removed ACME challenge route");
map.remove(path);
}
}
}
}

50
src/cert.rs Normal file
View File

@@ -0,0 +1,50 @@
use anyhow::{Context, Result};
use k8s_openapi::api::core::v1::Secret;
use kube::{Api, Client};
/// Fetch the `pingora-tls` Secret from the ingress namespace and write
/// `tls.crt` / `tls.key` to the paths declared in config.toml.
///
/// Called at startup (non-upgrade) so the proxy never depends on kubelet
/// volume-sync timing: the cert files are written directly from the K8s API
/// before `svc.add_tls()` is called.
pub async fn fetch_and_write(client: &Client, cert_path: &str, key_path: &str) -> Result<()> {
let api: Api<Secret> = Api::namespaced(client.clone(), "ingress");
let secret = api
.get("pingora-tls")
.await
.context("fetching pingora-tls Secret from K8s API")?;
write_from_secret(&secret, cert_path, key_path)
}
/// Write `tls.crt` and `tls.key` from a Secret data map to the configured paths.
///
/// k8s-openapi base64-decodes Secret values automatically, so `data["tls.crt"].0`
/// is the raw PEM bytes ready to write. Called both from `fetch_and_write` at
/// startup and directly from the cert watcher when an `Apply` event delivers
/// the updated Secret object without requiring an additional API round-trip.
pub fn write_from_secret(secret: &Secret, cert_path: &str, key_path: &str) -> Result<()> {
let data = secret
.data
.as_ref()
.ok_or_else(|| anyhow::anyhow!("pingora-tls Secret has no data"))?;
let crt = data
.get("tls.crt")
.ok_or_else(|| anyhow::anyhow!("pingora-tls missing tls.crt"))?;
let key = data
.get("tls.key")
.ok_or_else(|| anyhow::anyhow!("pingora-tls missing tls.key"))?;
// /etc/tls is an emptyDir; create it if the pod just started.
if let Some(parent) = std::path::Path::new(cert_path).parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("creating cert dir {}", parent.display()))?;
}
std::fs::write(cert_path, &crt.0).with_context(|| format!("writing {cert_path}"))?;
std::fs::write(key_path, &key.0).with_context(|| format!("writing {key_path}"))?;
tracing::info!(cert_path, key_path, "cert files written from K8s Secret");
Ok(())
}

61
src/config.rs Normal file
View File

@@ -0,0 +1,61 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use std::fs;
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub listen: ListenConfig,
pub tls: TlsFileConfig,
pub telemetry: TelemetryConfig,
pub routes: Vec<RouteConfig>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ListenConfig {
pub http: String,
pub https: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct TlsFileConfig {
pub cert_path: String,
pub key_path: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct TelemetryConfig {
pub otlp_endpoint: String,
}
/// A path-prefix sub-route within a virtual host.
/// Matched longest-prefix-first when multiple entries share a prefix.
#[derive(Debug, Deserialize, Clone)]
pub struct PathRoute {
pub prefix: String,
pub backend: String,
/// Strip the matched prefix before forwarding to the backend.
#[serde(default)]
pub strip_prefix: bool,
#[serde(default)]
pub websocket: bool,
}
#[derive(Debug, Deserialize, Clone)]
pub struct RouteConfig {
pub host_prefix: String,
pub backend: String,
#[serde(default)]
pub websocket: bool,
/// Optional path-based sub-routes (longest prefix wins).
/// If the request path matches a sub-route, its backend is used instead.
#[serde(default)]
pub paths: Vec<PathRoute>,
}
impl Config {
pub fn load(path: &str) -> Result<Self> {
let raw = fs::read_to_string(path)
.with_context(|| format!("reading config from {path}"))?;
toml::from_str(&raw).with_context(|| "parsing config.toml")
}
}

127
src/main.rs Normal file
View File

@@ -0,0 +1,127 @@
mod acme;
mod cert;
mod config;
mod proxy;
mod telemetry;
mod watcher;
use std::{collections::HashMap, sync::Arc};
use anyhow::Result;
use kube::Client;
use pingora::server::{configuration::Opt, Server};
use pingora_proxy::http_proxy_service;
use proxy::SunbeamProxy;
use std::sync::RwLock;
fn main() -> Result<()> {
// Install the aws-lc-rs crypto provider for rustls before any TLS init.
// Required because rustls 0.23 no longer auto-selects a provider at compile time.
rustls::crypto::aws_lc_rs::default_provider()
.install_default()
.expect("crypto provider already installed");
let config_path = std::env::var("SUNBEAM_CONFIG")
.unwrap_or_else(|_| "/etc/pingora/config.toml".to_string());
let cfg = config::Config::load(&config_path)?;
// 1. Init telemetry (JSON logs + optional OTEL traces).
telemetry::init(&cfg.telemetry.otlp_endpoint);
// 2. Detect --upgrade flag. When present, Pingora inherits listening socket
// FDs from the upgrade Unix socket instead of binding fresh ports, enabling
// zero-downtime cert/config reloads triggered by the K8s watcher below.
let upgrade = std::env::args().any(|a| a == "--upgrade");
// 3. Fetch the TLS cert from K8s before Pingora binds the TLS port.
// The Client is created and dropped within this temp runtime — we do NOT
// carry it across runtime boundaries, which would kill its tower workers.
// The watcher thread creates its own fresh Client on its own runtime.
let k8s_available = {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?;
rt.block_on(async {
match Client::try_default().await {
Ok(c) => {
if !upgrade {
if let Err(e) =
cert::fetch_and_write(&c, &cfg.tls.cert_path, &cfg.tls.key_path).await
{
// Non-fatal: Secret may not exist yet on first deploy (cert-manager
// is still issuing), or the Secret name may differ in dev.
tracing::warn!(error = %e, "cert fetch from K8s failed; using existing files");
}
}
true
}
Err(e) => {
tracing::warn!(error = %e, "no K8s client; cert auto-reload and ACME routing disabled");
false
}
}
})
};
let opt = Opt {
upgrade,
daemon: false,
nocapture: false,
test: false,
conf: None,
};
// 4. Create Pingora server and bootstrap (binds ports or inherits FDs).
let mut server = Server::new(Some(opt))?;
server.bootstrap();
// 5. Shared ACME challenge route table. Populated by the Ingress watcher;
// consulted by the proxy for every /.well-known/acme-challenge/ request.
// Uses std::sync::RwLock so reads are sync and lock-guard-safe across
// Pingora's async proxy calls without cross-runtime waker concerns.
let acme_routes: acme::AcmeRoutes = Arc::new(RwLock::new(HashMap::new()));
let proxy = SunbeamProxy {
routes: cfg.routes.clone(),
acme_routes: acme_routes.clone(),
};
let mut svc = http_proxy_service(&server.configuration, proxy);
// Port 80: plain HTTP — 301 → HTTPS, except for ACME HTTP-01 challenges.
// Port 443: TLS-terminated HTTPS. Cert written to /etc/tls/ by cert::* above.
svc.add_tcp(&cfg.listen.http);
svc.add_tls(&cfg.listen.https, &cfg.tls.cert_path, &cfg.tls.key_path)?;
server.add_service(svc);
// 6. Background K8s watchers on their own OS thread + tokio runtime so they
// don't interfere with Pingora's internal runtime. A fresh Client is
// created here so its tower workers live on this runtime (not the
// now-dropped temp runtime from step 3).
if k8s_available {
let cert_path = cfg.tls.cert_path.clone();
let key_path = cfg.tls.key_path.clone();
std::thread::spawn(move || {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("watcher runtime");
rt.block_on(async move {
let client = match Client::try_default().await {
Ok(c) => c,
Err(e) => {
tracing::error!(error = %e, "watcher: failed to create K8s client; watchers disabled");
return;
}
};
tokio::join!(
acme::watch_ingresses(client.clone(), acme_routes),
watcher::run_watcher(client, cert_path, key_path),
);
});
});
}
tracing::info!(upgrade, "sunbeam-proxy starting");
server.run_forever();
}

266
src/proxy.rs Normal file
View File

@@ -0,0 +1,266 @@
use crate::acme::AcmeRoutes;
use crate::config::RouteConfig;
use async_trait::async_trait;
use http::header::{CONNECTION, HOST, UPGRADE};
use pingora_core::{upstreams::peer::HttpPeer, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_proxy::{ProxyHttp, Session};
use std::time::Instant;
pub struct SunbeamProxy {
pub routes: Vec<RouteConfig>,
/// Per-challenge route table populated by the Ingress watcher.
/// Maps `/.well-known/acme-challenge/<token>` → solver service address.
pub acme_routes: AcmeRoutes,
}
pub struct RequestCtx {
pub route: Option<RouteConfig>,
pub start_time: Instant,
/// Resolved solver backend address for this ACME challenge, if applicable.
pub acme_backend: Option<String>,
/// Path prefix to strip before forwarding to the upstream (e.g. "/kratos").
pub strip_prefix: Option<String>,
}
impl SunbeamProxy {
fn find_route(&self, prefix: &str) -> Option<&RouteConfig> {
self.routes.iter().find(|r| r.host_prefix == prefix)
}
}
fn extract_host(session: &Session) -> String {
session
.req_header()
.headers
.get(HOST)
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string()
}
/// Strip the scheme prefix from a backend URL like `http://host:port`.
fn backend_addr(backend: &str) -> &str {
backend
.trim_start_matches("https://")
.trim_start_matches("http://")
}
/// Returns true if the downstream connection is plain HTTP (no TLS).
fn is_plain_http(session: &Session) -> bool {
session
.digest()
.map(|d| d.ssl_digest.is_none())
.unwrap_or(true)
}
#[async_trait]
impl ProxyHttp for SunbeamProxy {
type CTX = RequestCtx;
fn new_ctx(&self) -> RequestCtx {
RequestCtx {
route: None,
start_time: Instant::now(),
acme_backend: None,
strip_prefix: None,
}
}
/// HTTP → HTTPS redirect; ACME HTTP-01 challenges pass through on plain HTTP.
async fn request_filter(
&self,
session: &mut Session,
ctx: &mut RequestCtx,
) -> Result<bool>
where
Self::CTX: Send + Sync,
{
if is_plain_http(session) {
let path = session.req_header().uri.path().to_string();
// cert-manager HTTP-01 challenge: look up the token path in the
// Ingress-backed route table. Each challenge Ingress maps exactly
// one token to exactly one solver Service, so this routes the request
// to the right solver pod even when multiple challenges run in parallel.
if path.starts_with("/.well-known/acme-challenge/") {
// Drop the guard before any await point (RwLockReadGuard is !Send).
let backend = self
.acme_routes
.read()
.unwrap_or_else(|e| e.into_inner())
.get(&path)
.cloned();
if let Some(backend) = backend {
ctx.acme_backend = Some(backend);
return Ok(false); // pass to upstream_peer
}
// No route yet: challenge Ingress hasn't arrived from cert-manager.
let mut resp = ResponseHeader::build(404, None)?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
// All other plain-HTTP traffic: redirect to HTTPS.
let host = extract_host(session);
let location = format!("https://{host}{path}");
let mut resp = ResponseHeader::build(301, None)?;
resp.insert_header("Location", location)?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
// Reject unknown host prefixes with 404.
let host = extract_host(session);
let prefix = host.split('.').next().unwrap_or("");
if self.find_route(prefix).is_none() {
let mut resp = ResponseHeader::build(404, None)?;
resp.insert_header("Content-Length", "0")?;
session.write_response_header(Box::new(resp), true).await?;
return Ok(true);
}
Ok(false)
}
async fn upstream_peer(
&self,
session: &mut Session,
ctx: &mut RequestCtx,
) -> Result<Box<HttpPeer>> {
// ACME challenge: backend was resolved in request_filter.
if let Some(backend) = &ctx.acme_backend {
return Ok(Box::new(HttpPeer::new(
backend_addr(backend),
false,
String::new(),
)));
}
let host = extract_host(session);
let prefix = host.split('.').next().unwrap_or("");
let route = self
.find_route(prefix)
.expect("route already validated in request_filter");
let path = session.req_header().uri.path().to_string();
// Check path sub-routes (longest matching prefix wins).
let path_route = route
.paths
.iter()
.filter(|p| path.starts_with(p.prefix.as_str()))
.max_by_key(|p| p.prefix.len());
if let Some(pr) = path_route {
if pr.strip_prefix {
ctx.strip_prefix = Some(pr.prefix.clone());
}
ctx.route = Some(crate::config::RouteConfig {
host_prefix: route.host_prefix.clone(),
backend: pr.backend.clone(),
websocket: pr.websocket || route.websocket,
paths: vec![],
});
return Ok(Box::new(HttpPeer::new(
backend_addr(&pr.backend),
false,
String::new(),
)));
}
ctx.route = Some(route.clone());
Ok(Box::new(HttpPeer::new(
backend_addr(&route.backend),
false,
String::new(),
)))
}
/// Copy WebSocket upgrade headers and apply path prefix stripping.
async fn upstream_request_filter(
&self,
session: &mut Session,
upstream_req: &mut RequestHeader,
ctx: &mut RequestCtx,
) -> Result<()>
where
Self::CTX: Send + Sync,
{
if ctx.route.as_ref().map(|r| r.websocket).unwrap_or(false) {
for name in &[CONNECTION, UPGRADE] {
if let Some(val) = session.req_header().headers.get(name.clone()) {
upstream_req.insert_header(name.clone(), val)?;
}
}
}
// Strip path prefix before forwarding (e.g. /kratos → /).
if let Some(prefix) = &ctx.strip_prefix {
let old_uri = upstream_req.uri.clone();
let old_path = old_uri.path();
if let Some(stripped) = old_path.strip_prefix(prefix.as_str()) {
let new_path = if stripped.is_empty() { "/" } else { stripped };
let query_part = old_uri
.query()
.map(|q| format!("?{q}"))
.unwrap_or_default();
let new_pq: http::uri::PathAndQuery =
format!("{new_path}{query_part}").parse().map_err(|e| {
pingora_core::Error::because(
pingora_core::ErrorType::InternalError,
"invalid uri after prefix strip",
e,
)
})?;
let mut parts = old_uri.into_parts();
parts.path_and_query = Some(new_pq);
upstream_req.set_uri(
http::Uri::from_parts(parts).expect("valid uri parts"),
);
}
}
Ok(())
}
/// Emit a structured JSON audit log line for every request.
async fn logging(
&self,
session: &mut Session,
error: Option<&pingora_core::Error>,
ctx: &mut RequestCtx,
) where
Self::CTX: Send + Sync,
{
let status = session
.response_written()
.map_or(0, |r| r.status.as_u16());
let duration_ms = ctx.start_time.elapsed().as_millis();
let backend = ctx
.route
.as_ref()
.map(|r| r.backend.as_str())
.unwrap_or("-");
let client_ip = session
.client_addr()
.map(|a| a.to_string())
.unwrap_or_else(|| "-".to_string());
let error_str = error.map(|e| e.to_string());
tracing::info!(
target = "audit",
method = %session.req_header().method,
host = %extract_host(session),
path = %session.req_header().uri.path(),
client_ip,
status,
duration_ms,
backend,
error = error_str,
"request"
);
}
}

40
src/telemetry.rs Normal file
View File

@@ -0,0 +1,40 @@
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_otlp::WithExportConfig;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
pub fn init(otlp_endpoint: &str) {
let fmt_layer = tracing_subscriber::fmt::layer()
.json()
.with_current_span(true)
.with_target(true);
let env_filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new("info"));
if otlp_endpoint.is_empty() {
tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.init();
} else {
let exporter = opentelemetry_otlp::SpanExporter::builder()
.with_http()
.with_endpoint(otlp_endpoint)
.build()
.expect("failed to build OTLP span exporter");
let provider = opentelemetry_sdk::trace::TracerProvider::builder()
.with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio)
.build();
opentelemetry::global::set_tracer_provider(provider.clone());
let tracer = provider.tracer("sunbeam-proxy");
let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer);
tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.with(otel_layer)
.init();
}
}

122
src/watcher.rs Normal file
View File

@@ -0,0 +1,122 @@
use futures::StreamExt;
use k8s_openapi::api::core::v1::{ConfigMap, Secret};
use kube::{runtime::watcher, Api, Client};
use tokio::sync::mpsc;
/// Watch `pingora-tls` and `pingora-config` in the ingress namespace.
///
/// On cert change: write new cert bytes from the Apply event directly to the
/// configured paths (avoiding kubelet volume-sync delay), then trigger a
/// graceful upgrade so the new process reads the updated cert immediately.
///
/// On config change: trigger the upgrade immediately; the kubelet usually
/// syncs ConfigMap volumes within ~60s, so the new process reads the updated
/// config shortly after restarting.
///
/// No-ops when no K8s client is available (e.g. ad-hoc local runs outside a
/// cluster) so the binary works in both environments.
pub async fn run_watcher(client: Client, cert_path: String, key_path: String) {
let (tx, mut rx) = mpsc::channel::<()>(2);
let secret_api: Api<Secret> = Api::namespaced(client.clone(), "ingress");
let cm_api: Api<ConfigMap> = Api::namespaced(client.clone(), "ingress");
tokio::spawn(watch_secret(secret_api, cert_path, key_path, tx.clone()));
tokio::spawn(watch_configmap(cm_api, tx));
if rx.recv().await.is_some() {
tracing::info!("initiating graceful upgrade");
trigger_upgrade();
}
}
async fn watch_secret(
api: Api<Secret>,
cert_path: String,
key_path: String,
tx: mpsc::Sender<()>,
) {
let cfg = watcher::Config::default().fields("metadata.name=pingora-tls");
let mut stream = Box::pin(watcher(api, cfg));
let mut initialized = false;
while let Some(result) = stream.next().await {
match result {
Ok(watcher::Event::InitDone) => {
initialized = true;
tracing::debug!("pingora-tls watcher ready");
}
// Write the new cert directly from the event object before triggering the
// upgrade. The Apply event carries the full updated Secret, so we don't
// need a separate API call and the cert files are ready before the new
// process's svc.add_tls() runs.
Ok(watcher::Event::Apply(secret)) if initialized => {
tracing::info!("pingora-tls changed — writing new cert");
match crate::cert::write_from_secret(&secret, &cert_path, &key_path) {
Ok(()) => {
let _ = tx.send(()).await;
}
Err(e) => tracing::error!(error = %e, "cert write failed; skipping upgrade"),
}
return;
}
Ok(_) => {}
Err(e) => {
tracing::warn!(error = %e, resource = "pingora-tls", "watcher error; retrying in 10s");
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
}
}
}
}
async fn watch_configmap(api: Api<ConfigMap>, tx: mpsc::Sender<()>) {
let cfg = watcher::Config::default().fields("metadata.name=pingora-config");
let mut stream = Box::pin(watcher(api, cfg));
let mut initialized = false;
while let Some(result) = stream.next().await {
match result {
Ok(watcher::Event::InitDone) => {
initialized = true;
tracing::debug!("pingora-config watcher ready");
}
Ok(watcher::Event::Apply(_)) if initialized => {
tracing::info!("pingora-config changed — triggering upgrade");
let _ = tx.send(()).await;
return;
}
Ok(_) => {}
Err(e) => {
tracing::warn!(error = %e, resource = "pingora-config", "watcher error; retrying in 10s");
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
}
}
}
}
/// Spawn a new process with `--upgrade`, then send SIGQUIT to self.
///
/// Pingora's SIGQUIT handler transfers all listening socket FDs to the new
/// process via a Unix socket and begins draining existing connections. The
/// new process calls `Server::new(Some(Opt { upgrade: true }))` in
/// `bootstrap()`, inherits the FDs, and takes over without dropping connections.
fn trigger_upgrade() {
let exe = match std::env::current_exe() {
Ok(p) => p,
Err(e) => {
tracing::error!(error = %e, "cannot resolve current exe; upgrade aborted");
return;
}
};
match std::process::Command::new(&exe).arg("--upgrade").spawn() {
Ok(child) => tracing::info!(pid = child.id(), "upgrade process spawned"),
Err(e) => {
tracing::error!(error = %e, "failed to spawn upgrade process; upgrade aborted");
return;
}
}
// SAFETY: kill(getpid(), SIGQUIT) is always safe; we're only signalling ourselves.
unsafe { libc::kill(libc::getpid(), libc::SIGQUIT) };
}