feat(scripts): sunbeam.py — OpenBao DB engine, VSO seeding, E2E verify, error handling

Database secrets engine (_configure_db_engine):
- Creates a dedicated `vault` PostgreSQL user via CNPG peer auth (psql exec).
  CNPG enableSuperuserAccess=false blocks remote auth for the postgres
  superuser, so we create vault with CREATEROLE and grant ADMIN OPTION on
  each service role (required by PG 16+ to rotate passwords).
- Configures the OpenBao postgresql plugin (cnpg-postgres connection) and
  creates static roles for all PG_USERS with 24h rotation_period.
- All bao/psql calls now raise RuntimeError on non-zero exit — no more
  silent failures.

Credential seeding (_seed_openbao):
- Added secret/login-ui path (cookie-secret, csrf-cookie-secret) so the
  login UI no longer needs hardcoded values in its Deployment manifest.
- Removed all DB password fields from KV; passwords are now managed
  exclusively by the database secrets engine.

Lifecycle:
- pre_apply_cleanup() prunes stale VaultStaticSecrets that have been
  superseded by VaultDynamicSecrets of the same name, preventing the
  "not the owner" ownerRef conflict that blocked secret updates.
- status_check() no longer marks Completed/Succeeded pods as unhealthy.
- _vso_sync_status() added to status output: shows sync state (secretMAC
  for VSS, lastRenewalTime for VDS) across all managed namespaces.

Verification (--verify):
- New verify_vso() function writes a random sentinel to OpenBao, creates
  a VaultAuth + VaultStaticSecret in the ory namespace, waits up to 60s
  for VSO to sync, decodes the K8s Secret, and asserts the value matches.
  Cleans up all test resources unconditionally. Replaces the unreliable
  Helm test pod for integration testing.
This commit is contained in:
2026-03-02 18:34:28 +00:00
parent 7de6e94a8d
commit 527b3b0246

View File

@@ -12,6 +12,7 @@ Usage:
./scripts/sunbeam.py --gitea # bootstrap Gitea orgs + mirror amd64 images
./scripts/sunbeam.py --restart # restart services only
./scripts/sunbeam.py --status # show pod health across all namespaces
./scripts/sunbeam.py --verify # E2E test VSO → OpenBao integration
Requires: limactl mkcert kubectl kustomize linkerd jq yq
"""
@@ -20,6 +21,7 @@ import argparse
import base64
import json
import os
import secrets as _secrets
import shutil
import subprocess
import sys
@@ -35,20 +37,8 @@ SECRETS_DIR = REPO_ROOT / "secrets" / "local"
LIMA_VM = "sunbeam"
K8S_CTX = ["--context=sunbeam"]
# Deterministic local-dev credentials (not for production)
DB_PASSWORD = "localdev"
S3_ACCESS_KEY = "minioadmin"
S3_SECRET_KEY = "minioadmin"
HYDRA_SYSTEM_SECRET = "local-hydra-system-secret-at-least-16"
HYDRA_COOKIE_SECRET = "local-hydra-cookie-secret-at-least-16"
HYDRA_PAIRWISE_SALT = "local-hydra-pairwise-salt-value-1"
LIVEKIT_API_KEY = "devkey"
LIVEKIT_API_SECRET = "secret-placeholder"
PEOPLE_DJANGO_SECRET = "local-dev-people-django-secret-key-not-for-production"
# Gitea admin (deterministic for local dev; also set in gitea-values.yaml)
# Gitea admin username (not secret; password is generated and stored in OpenBao)
GITEA_ADMIN_USER = "gitea_admin"
GITEA_ADMIN_PASS = "localdev"
GITEA_ADMIN_EMAIL = "gitea@local.domain"
# Images that only ship linux/amd64 builds — patched + mirrored to our Gitea registry.
@@ -145,7 +135,7 @@ def create_secret(ns, name, **literals):
args += ["--dry-run=client", "-o=yaml"]
manifest = kube_out(*args)
if manifest:
kube("apply", "--server-side", "--field-manager=sunbeam", "-f", "-", input=manifest)
kube("apply", "--server-side", "--force-conflicts", "--field-manager=sunbeam", "-f", "-", input=manifest)
# ── 1. Prerequisites ──────────────────────────────────────────────────────────
def check_prerequisites():
@@ -238,6 +228,14 @@ def disable_traefik():
"/var/lib/rancher/k3s/server/manifests/traefik.yaml"],
capture_output=True,
)
# Write k3s config so Traefik can never return after a k3s restart.
subprocess.run(
["limactl", "shell", LIMA_VM, "sudo", "tee",
"/etc/rancher/k3s/config.yaml"],
input="disable:\n - traefik\n",
text=True,
capture_output=True,
)
ok("Done.")
# ── 5. cert-manager ───────────────────────────────────────────────────────────
@@ -318,7 +316,7 @@ def ensure_tls_secret(domain):
ok("Done.")
# ── 9. Lima VM registry trust + k3s config ────────────────────────────────────
def setup_lima_vm_registry(domain):
def setup_lima_vm_registry(domain, gitea_admin_pass=""):
"""Install mkcert root CA in the Lima VM and configure k3s to auth with Gitea.
Restarts k3s if either configuration changes so pods don't fight TLS errors
@@ -355,7 +353,7 @@ def setup_lima_vm_registry(domain):
f' "{registry_host}":\n'
f' auth:\n'
f' username: "{GITEA_ADMIN_USER}"\n'
f' password: "{GITEA_ADMIN_PASS}"\n'
f' password: "{gitea_admin_pass}"\n'
)
existing = capture_out(["limactl", "shell", LIMA_VM,
"sudo", "cat", "/etc/rancher/k3s/registries.yaml"])
@@ -386,10 +384,16 @@ def setup_lima_vm_registry(domain):
ok("k3s restarted.")
# ── 10. Apply manifests ────────────────────────────────────────────────────────
MANAGED_NS = ["data", "devtools", "ingress", "lasuite", "media", "ory", "storage"]
MANAGED_NS = ["data", "devtools", "ingress", "lasuite", "media", "ory", "storage",
"vault-secrets-operator"]
def pre_apply_cleanup():
"""Delete immutable resources that must be re-created on each apply."""
"""Delete immutable resources that must be re-created on each apply.
Also prunes VaultStaticSecrets that share a name with a VaultDynamicSecret —
kubectl apply doesn't delete the old resource when a manifest switches kinds,
and VSO refuses to overwrite a secret owned by a different resource type.
"""
ok("Cleaning up immutable Jobs and test Pods...")
for ns in MANAGED_NS:
kube("delete", "jobs", "--all", "-n", ns, "--ignore-not-found", check=False)
@@ -400,6 +404,25 @@ def pre_apply_cleanup():
if pod.endswith(("-test-connection", "-server-test", "-test")):
kube("delete", "pod", pod, "-n", ns, "--ignore-not-found", check=False)
# Prune VaultStaticSecrets that were replaced by VaultDynamicSecrets.
# When a manifest transitions a resource from VSS → VDS, apply won't delete
# the old VSS; it just creates the new VDS alongside it. VSO then errors
# "not the owner" because the K8s secret's ownerRef still points to the VSS.
ok("Pruning stale VaultStaticSecrets superseded by VaultDynamicSecrets...")
for ns in MANAGED_NS:
vss_names = set(kube_out(
"get", "vaultstaticsecret", "-n", ns,
"-o=jsonpath={.items[*].metadata.name}", "--ignore-not-found",
).split())
vds_names = set(kube_out(
"get", "vaultdynamicsecret", "-n", ns,
"-o=jsonpath={.items[*].metadata.name}", "--ignore-not-found",
).split())
for stale in vss_names & vds_names:
ok(f" deleting stale VaultStaticSecret {ns}/{stale}")
kube("delete", "vaultstaticsecret", stale, "-n", ns,
"--ignore-not-found", check=False)
def apply_manifests(domain):
step(f"Applying manifests (domain: {domain})...")
pre_apply_cleanup()
@@ -413,7 +436,7 @@ def apply_manifests(domain):
ok("Applied.")
# ── 11. Gitea bootstrap ────────────────────────────────────────────────────────
def bootstrap_gitea(domain):
def bootstrap_gitea(domain, gitea_admin_pass=""):
"""Ensure Gitea admin has a known password and create the studio/internal orgs."""
step("Bootstrapping Gitea...")
@@ -445,9 +468,9 @@ def bootstrap_gitea(domain):
capture_output=True, text=True,
)
# Ensure admin has our known password
# Ensure admin has the generated password
r = gitea_exec("gitea", "admin", "user", "change-password",
"--username", GITEA_ADMIN_USER, "--password", GITEA_ADMIN_PASS)
"--username", GITEA_ADMIN_USER, "--password", gitea_admin_pass)
if r.returncode == 0 or "password" in (r.stdout + r.stderr).lower():
ok(f"Admin '{GITEA_ADMIN_USER}' password set.")
else:
@@ -473,7 +496,7 @@ def bootstrap_gitea(domain):
"curl", "-s", "-X", method,
f"http://localhost:3000/api/v1{path}",
"-H", "Content-Type: application/json",
"-u", f"{GITEA_ADMIN_USER}:{GITEA_ADMIN_PASS}",
"-u", f"{GITEA_ADMIN_USER}:{gitea_admin_pass}",
]
if data:
args += ["-d", json.dumps(data)]
@@ -499,7 +522,7 @@ def bootstrap_gitea(domain):
else:
warn(f"Org '{org_name}': {result.get('message', result)}")
ok(f"Gitea ready — https://src.{domain} (studio / internal orgs)")
ok(f"Gitea ready — https://src.{domain} ({GITEA_ADMIN_USER} / <from openbao>)")
# ── 12. Mirror amd64-only images to Gitea registry ────────────────────────────
#
@@ -676,7 +699,7 @@ for _src, _tgt in TARGETS:
'''
def mirror_amd64_images(domain):
def mirror_amd64_images(domain, gitea_admin_pass=""):
"""Patch amd64-only images with an arm64 alias and push them to our Gitea registry."""
step("Mirroring amd64-only images to Gitea registry...")
@@ -689,7 +712,7 @@ def mirror_amd64_images(domain):
header = (
f"TARGETS = {repr(targets)}\n"
f"USER = {repr(GITEA_ADMIN_USER)}\n"
f"PASS = {repr(GITEA_ADMIN_PASS)}\n"
f"PASS = {repr(gitea_admin_pass)}\n"
)
script = header + _MIRROR_SCRIPT_BODY
@@ -715,8 +738,30 @@ def mirror_amd64_images(domain):
# ── 13. Seed secrets ──────────────────────────────────────────────────────────
def seed_secrets():
"""Seed OpenBao KV with crypto-random credentials, then mirror to K8s Secrets.
Returns a dict of credentials for use by callers (gitea admin pass, etc.).
Idempotent: reads existing OpenBao values before generating; never rotates.
"""
step("Seeding secrets...")
creds = _seed_openbao()
ob_pod = creds.pop("_ob_pod", "")
root_token = creds.pop("_root_token", "")
s3_access_key = creds.get("s3-access-key", "")
s3_secret_key = creds.get("s3-secret-key", "")
hydra_system = creds.get("hydra-system-secret", "")
hydra_cookie = creds.get("hydra-cookie-secret", "")
hydra_pairwise = creds.get("hydra-pairwise-salt", "")
kratos_secrets_default = creds.get("kratos-secrets-default", "")
kratos_secrets_cookie = creds.get("kratos-secrets-cookie", "")
hive_oidc_id = creds.get("hive-oidc-client-id", "hive-local")
hive_oidc_sec = creds.get("hive-oidc-client-secret", "")
django_secret = creds.get("people-django-secret", "")
gitea_admin_pass = creds.get("gitea-admin-password", "")
ok("Waiting for postgres cluster...")
pg_pod = ""
for _ in range(60):
@@ -733,7 +778,7 @@ def seed_secrets():
warn("Postgres not ready after 5 min — continuing anyway.")
if pg_pod:
ok("Ensuring postgres roles and databases...")
ok("Ensuring postgres roles and databases exist...")
db_map = {
"kratos": "kratos_db", "hydra": "hydra_db", "gitea": "gitea_db",
"hive": "hive_db", "docs": "docs_db", "meet": "meet_db",
@@ -742,11 +787,11 @@ def seed_secrets():
"people": "people_db", "find": "find_db",
}
for user in PG_USERS:
# Only CREATE if missing — passwords are managed by OpenBao static roles.
ensure_sql = (
f"DO $$ BEGIN "
f"IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname='{user}') "
f"THEN EXECUTE 'CREATE USER {user}'; END IF; END $$; "
f"ALTER USER {user} WITH PASSWORD '{DB_PASSWORD}';"
f"THEN EXECUTE 'CREATE USER {user}'; END IF; END $$;"
)
kube("exec", "-n", "data", pg_pod, "-c", "postgres", "--",
"psql", "-U", "postgres", "-c", ensure_sql, check=False)
@@ -755,45 +800,75 @@ def seed_secrets():
"psql", "-U", "postgres", "-c",
f"CREATE DATABASE {db} OWNER {user};", check=False)
ok("Creating K8s secrets...")
# Read CNPG superuser credentials and configure database secrets engine.
pg_user_b64 = kube_out("-n", "data", "get", "secret", "postgres-superuser",
"-o=jsonpath={.data.username}")
pg_pass_b64 = kube_out("-n", "data", "get", "secret", "postgres-superuser",
"-o=jsonpath={.data.password}")
pg_user = base64.b64decode(pg_user_b64).decode() if pg_user_b64 else "postgres"
pg_pass = base64.b64decode(pg_pass_b64).decode() if pg_pass_b64 else ""
if ob_pod and root_token and pg_pass:
try:
_configure_db_engine(ob_pod, root_token, pg_user, pg_pass)
except Exception as exc:
warn(f"DB engine config failed: {exc}")
else:
warn("Skipping DB engine config — missing ob_pod, root_token, or pg_pass.")
ok("Creating K8s secrets (VSO will overwrite on next sync)...")
ensure_ns("ory")
# Hydra app secrets — DSN comes from VaultDynamicSecret hydra-db-creds.
create_secret("ory", "hydra",
dsn=(f"postgresql://hydra:{DB_PASSWORD}@"
"postgres-rw.data.svc.cluster.local:5432/hydra_db?sslmode=disable"),
secretsSystem=HYDRA_SYSTEM_SECRET,
secretsCookie=HYDRA_COOKIE_SECRET,
**{"pairwise-salt": HYDRA_PAIRWISE_SALT},
secretsSystem=hydra_system,
secretsCookie=hydra_cookie,
**{"pairwise-salt": hydra_pairwise},
)
# Kratos non-rotating encryption keys — DSN comes from VaultDynamicSecret kratos-db-creds.
create_secret("ory", "kratos-app-secrets",
secretsDefault=kratos_secrets_default,
secretsCookie=kratos_secrets_cookie,
)
ensure_ns("devtools")
create_secret("devtools", "gitea-db-credentials", password=DB_PASSWORD)
# gitea-db-credentials comes from VaultDynamicSecret (static-creds/gitea).
create_secret("devtools", "gitea-s3-credentials",
**{"access-key": S3_ACCESS_KEY, "secret-key": S3_SECRET_KEY})
**{"access-key": s3_access_key, "secret-key": s3_secret_key})
create_secret("devtools", "gitea-admin-credentials",
username=GITEA_ADMIN_USER, password=gitea_admin_pass)
ensure_ns("storage")
s3_json = (
'{"identities":[{"name":"seaweed","credentials":[{"accessKey":"'
+ s3_access_key + '","secretKey":"' + s3_secret_key
+ '"}],"actions":["Admin","Read","Write","List","Tagging"]}]}'
)
create_secret("storage", "seaweedfs-s3-credentials",
S3_ACCESS_KEY=S3_ACCESS_KEY, S3_SECRET_KEY=S3_SECRET_KEY)
S3_ACCESS_KEY=s3_access_key, S3_SECRET_KEY=s3_secret_key)
create_secret("storage", "seaweedfs-s3-json", **{"s3.json": s3_json})
ensure_ns("lasuite")
create_secret("lasuite", "seaweedfs-s3-credentials",
S3_ACCESS_KEY=S3_ACCESS_KEY, S3_SECRET_KEY=S3_SECRET_KEY)
create_secret("lasuite", "hive-db-url",
url=(f"postgresql://hive:{DB_PASSWORD}@"
"postgres-rw.data.svc.cluster.local:5432/hive_db"))
S3_ACCESS_KEY=s3_access_key, S3_SECRET_KEY=s3_secret_key)
# hive-db-url and people-db-credentials come from VaultDynamicSecrets.
create_secret("lasuite", "hive-oidc",
**{"client-id": "hive-local", "client-secret": "hive-local-secret"})
create_secret("lasuite", "people-db-credentials", password=DB_PASSWORD)
**{"client-id": hive_oidc_id, "client-secret": hive_oidc_sec})
create_secret("lasuite", "people-django-secret",
DJANGO_SECRET_KEY=PEOPLE_DJANGO_SECRET)
DJANGO_SECRET_KEY=django_secret)
ensure_ns("media")
_seed_openbao()
ok("All secrets seeded.")
return creds
def _seed_openbao():
"""Initialize/unseal OpenBao, generate/read credentials idempotently, configure VSO auth.
Returns a dict of all generated credentials. Values are read from existing
OpenBao KV entries when present — re-running never rotates credentials.
"""
ob_pod = kube_out(
"-n", "data", "get", "pods",
"-l=app.kubernetes.io/name=openbao,component=server",
@@ -801,7 +876,7 @@ def _seed_openbao():
)
if not ob_pod:
ok("OpenBao pod not found — skipping.")
return
return {}
ok(f"OpenBao ({ob_pod})...")
kube("wait", "-n", "data", f"pod/{ob_pod}",
@@ -847,7 +922,7 @@ def _seed_openbao():
kube("delete", "pvc", "data-openbao-0", "-n", "data", "--ignore-not-found", check=False)
kube("delete", "pod", ob_pod, "-n", "data", "--ignore-not-found", check=False)
warn("OpenBao storage reset. Run --seed again after the pod restarts.")
return
return {}
else:
ok("Already initialized.")
existing_key = kube_out("-n", "data", "get", "secret", "openbao-keys",
@@ -863,22 +938,229 @@ def _seed_openbao():
ok("Unsealing...")
bao(f"bao operator unseal '{unseal_key}' 2>/dev/null")
if root_token:
ok("Seeding KV...")
pg_rw = "postgres-rw.data.svc.cluster.local:5432"
bao(f"""
BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' sh -c '
bao secrets enable -path=secret -version=2 kv 2>/dev/null || true
bao kv put secret/postgres password="{DB_PASSWORD}"
bao kv put secret/hydra db-password="{DB_PASSWORD}" system-secret="{HYDRA_SYSTEM_SECRET}" cookie-secret="{HYDRA_COOKIE_SECRET}" pairwise-salt="{HYDRA_PAIRWISE_SALT}"
bao kv put secret/kratos db-password="{DB_PASSWORD}"
bao kv put secret/gitea db-password="{DB_PASSWORD}" s3-access-key="{S3_ACCESS_KEY}" s3-secret-key="{S3_SECRET_KEY}"
bao kv put secret/seaweedfs access-key="{S3_ACCESS_KEY}" secret-key="{S3_SECRET_KEY}"
bao kv put secret/hive db-url="postgresql://hive:{DB_PASSWORD}@{pg_rw}/hive_db" oidc-client-id="hive-local" oidc-client-secret="hive-local-secret"
bao kv put secret/livekit api-key="{LIVEKIT_API_KEY}" api-secret="{LIVEKIT_API_SECRET}"
bao kv put secret/people db-password="{DB_PASSWORD}" django-secret-key="{PEOPLE_DJANGO_SECRET}"
'
""")
if not root_token:
warn("No root token available — skipping KV seeding.")
return {}
# Read-or-generate helper: preserves existing KV values; only generates missing ones.
def get_or_create(path, **fields):
raw = bao(
f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' "
f"bao kv get -format=json secret/{path} 2>/dev/null || echo '{{}}'"
)
existing = {}
try:
existing = json.loads(raw).get("data", {}).get("data", {})
except (json.JSONDecodeError, AttributeError):
pass
result = {}
for key, default_fn in fields.items():
result[key] = existing.get(key) or default_fn()
return result
def rand():
return _secrets.token_urlsafe(32)
ok("Seeding KV (idempotent — existing values preserved)...")
bao(f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' "
f"bao secrets enable -path=secret -version=2 kv 2>/dev/null || true")
# DB passwords removed — OpenBao database secrets engine manages them via static roles.
hydra = get_or_create("hydra",
**{"system-secret": rand,
"cookie-secret": rand,
"pairwise-salt": rand})
SMTP_URI = "smtp://postfix.lasuite.svc.cluster.local:25/?skip_ssl_verify=true"
kratos = get_or_create("kratos",
**{"secrets-default": rand,
"secrets-cookie": rand,
"smtp-connection-uri": lambda: SMTP_URI})
seaweedfs = get_or_create("seaweedfs",
**{"access-key": rand, "secret-key": rand})
gitea = get_or_create("gitea",
**{"admin-username": lambda: GITEA_ADMIN_USER,
"admin-password": rand})
hive = get_or_create("hive",
**{"oidc-client-id": lambda: "hive-local",
"oidc-client-secret": rand})
livekit = get_or_create("livekit",
**{"api-key": lambda: "devkey",
"api-secret": rand})
people = get_or_create("people",
**{"django-secret-key": rand})
login_ui = get_or_create("login-ui",
**{"cookie-secret": rand,
"csrf-cookie-secret": rand})
# Write all secrets to KV (idempotent — puts same values back)
bao(f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' sh -c '"
f"bao kv put secret/hydra system-secret=\"{hydra['system-secret']}\" cookie-secret=\"{hydra['cookie-secret']}\" pairwise-salt=\"{hydra['pairwise-salt']}\" && "
f"bao kv put secret/kratos secrets-default=\"{kratos['secrets-default']}\" secrets-cookie=\"{kratos['secrets-cookie']}\" smtp-connection-uri=\"{kratos['smtp-connection-uri']}\" && "
f"bao kv put secret/gitea admin-username=\"{gitea['admin-username']}\" admin-password=\"{gitea['admin-password']}\" && "
f"bao kv put secret/seaweedfs access-key=\"{seaweedfs['access-key']}\" secret-key=\"{seaweedfs['secret-key']}\" && "
f"bao kv put secret/hive oidc-client-id=\"{hive['oidc-client-id']}\" oidc-client-secret=\"{hive['oidc-client-secret']}\" && "
f"bao kv put secret/livekit api-key=\"{livekit['api-key']}\" api-secret=\"{livekit['api-secret']}\" && "
f"bao kv put secret/people django-secret-key=\"{people['django-secret-key']}\" && "
f"bao kv put secret/login-ui cookie-secret=\"{login_ui['cookie-secret']}\" csrf-cookie-secret=\"{login_ui['csrf-cookie-secret']}\""
f"'")
# Configure Kubernetes auth method so VSO can authenticate with OpenBao
ok("Configuring Kubernetes auth for VSO...")
bao(f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' "
f"bao auth enable kubernetes 2>/dev/null; true")
bao(f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' "
f"bao write auth/kubernetes/config "
f"kubernetes_host=https://kubernetes.default.svc.cluster.local")
policy_hcl = (
'path "secret/data/*" { capabilities = ["read"] }\n'
'path "secret/metadata/*" { capabilities = ["read", "list"] }\n'
'path "database/static-creds/*" { capabilities = ["read"] }\n'
)
policy_b64 = base64.b64encode(policy_hcl.encode()).decode()
bao(f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' "
f"sh -c 'echo {policy_b64} | base64 -d | bao policy write vso-reader -'")
bao(f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}' "
f"bao write auth/kubernetes/role/vso "
f"bound_service_account_names=default "
f"bound_service_account_namespaces=ory,devtools,storage,lasuite,media "
f"policies=vso-reader "
f"ttl=1h")
return {
"hydra-system-secret": hydra["system-secret"],
"hydra-cookie-secret": hydra["cookie-secret"],
"hydra-pairwise-salt": hydra["pairwise-salt"],
"kratos-secrets-default": kratos["secrets-default"],
"kratos-secrets-cookie": kratos["secrets-cookie"],
"s3-access-key": seaweedfs["access-key"],
"s3-secret-key": seaweedfs["secret-key"],
"gitea-admin-password": gitea["admin-password"],
"hive-oidc-client-id": hive["oidc-client-id"],
"hive-oidc-client-secret": hive["oidc-client-secret"],
"people-django-secret": people["django-secret-key"],
"livekit-api-key": livekit["api-key"],
"livekit-api-secret": livekit["api-secret"],
"_ob_pod": ob_pod,
"_root_token": root_token,
}
# ── 13b. Configure OpenBao database secrets engine ────────────────────────────
def _configure_db_engine(ob_pod, root_token, pg_user, pg_pass):
"""Enable OpenBao database secrets engine and create PostgreSQL static roles.
Static roles cause OpenBao to immediately set (and later rotate) each service
user's password via ALTER USER, eliminating hardcoded DB passwords.
Idempotent: bao write overwrites existing config/roles safely.
The `vault` PG user is created here (if absent) and used as the DB engine
connection user. pg_user/pg_pass (the CNPG superuser) are kept for potential
future use but are no longer used for the connection URL.
"""
ok("Configuring OpenBao database secrets engine...")
pg_rw = "postgres-rw.data.svc.cluster.local:5432"
bao_env = f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}'"
def bao(cmd, check=True):
r = subprocess.run(
["kubectl", *K8S_CTX, "-n", "data", "exec", ob_pod, "-c", "openbao",
"--", "sh", "-c", cmd],
capture_output=True, text=True,
)
if check and r.returncode != 0:
raise RuntimeError(f"bao command failed (exit {r.returncode}):\n{r.stderr.strip()}")
return r.stdout.strip()
# Enable database secrets engine — tolerate "already enabled" error via || true.
bao(f"{bao_env} bao secrets enable database 2>/dev/null || true", check=False)
# ── vault PG user setup ────────────────────────────────────────────────────
# Locate the CNPG primary pod for psql exec (peer auth — no password needed).
cnpg_pod = kube_out(
"-n", "data", "get", "pods",
"-l=cnpg.io/cluster=postgres,role=primary",
"-o=jsonpath={.items[0].metadata.name}",
)
if not cnpg_pod:
raise RuntimeError("Could not find CNPG primary pod for vault user setup.")
def psql(sql):
r = subprocess.run(
["kubectl", *K8S_CTX, "-n", "data", "exec", cnpg_pod, "-c", "postgres",
"--", "psql", "-U", "postgres", "-c", sql],
capture_output=True, text=True,
)
if r.returncode != 0:
raise RuntimeError(f"psql failed: {r.stderr.strip()}")
return r.stdout.strip()
# Read existing vault pg-password from OpenBao KV, or generate a new one.
existing_vault_pass = bao(
f"{bao_env} bao kv get -field=pg-password secret/vault 2>/dev/null || true",
check=False,
)
vault_pg_pass = existing_vault_pass.strip() if existing_vault_pass.strip() else _secrets.token_urlsafe(32)
# Store vault pg-password in OpenBao KV (idempotent).
bao(f"{bao_env} bao kv put secret/vault pg-password=\"{vault_pg_pass}\"")
ok("vault KV entry written.")
# Create vault PG user if absent, set its password, grant ADMIN OPTION on all service users.
create_vault_sql = (
f"DO $$ BEGIN "
f"IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'vault') THEN "
f"CREATE USER vault WITH LOGIN CREATEROLE; "
f"END IF; "
f"END $$;"
)
psql(create_vault_sql)
psql(f"ALTER USER vault WITH PASSWORD '{vault_pg_pass}';")
for user in PG_USERS:
psql(f"GRANT {user} TO vault WITH ADMIN OPTION;")
ok("vault PG user configured with ADMIN OPTION on all service roles.")
# ── DB engine connection config (uses vault user) ─────────────────────────
conn_url = (
"postgresql://{{username}}:{{password}}"
f"@{pg_rw}/postgres?sslmode=disable"
)
bao(
f"{bao_env} bao write database/config/cnpg-postgres"
f" plugin_name=postgresql-database-plugin"
f" allowed_roles='*'"
f" connection_url='{conn_url}'"
f" username='vault'"
f" password='{vault_pg_pass}'"
)
ok("DB engine connection configured (vault user).")
# Encode the rotation statement to avoid shell quoting issues with inner quotes.
rotation_b64 = base64.b64encode(
b"ALTER USER \"{{name}}\" WITH PASSWORD '{{password}}';"
).decode()
for user in PG_USERS:
bao(
f"{bao_env} sh -c '"
f"bao write database/static-roles/{user}"
f" db_name=cnpg-postgres"
f" username={user}"
f" rotation_period=86400"
f" \"rotation_statements=$(echo {rotation_b64} | base64 -d)\"'"
)
ok(f" static-role/{user}")
ok("Database secrets engine configured.")
# ── 14. Restart services ──────────────────────────────────────────────────────
def restart_services():
@@ -896,7 +1178,7 @@ def wait_for_core():
ok("Core services ready.")
# ── 16. Print URLs ────────────────────────────────────────────────────────────
def print_urls(domain):
def print_urls(domain, gitea_admin_pass=""):
print(f"\n{''*60}")
print(f" Stack is up. Domain: {domain}")
print(f"{''*60}")
@@ -908,7 +1190,7 @@ def print_urls(domain):
("Chat", f"https://chat.{domain}/"),
("Mail", f"https://mail.{domain}/"),
("People", f"https://people.{domain}/"),
("Gitea", f"https://src.{domain}/ ({GITEA_ADMIN_USER} / {GITEA_ADMIN_PASS})"),
("Gitea", f"https://src.{domain}/ ({GITEA_ADMIN_USER} / {gitea_admin_pass})"),
]:
print(f" {name:<10} {url}")
print()
@@ -957,7 +1239,9 @@ def status_check():
cur_ns = ns
icon = icon_map.get(status, "?")
unhealthy = status not in ("Running", "Completed", "Succeeded")
if not unhealthy and "/" in ready:
# Only check ready ratio for Running pods — Completed/Succeeded pods
# legitimately report 0/N containers ready.
if not unhealthy and status == "Running" and "/" in ready:
r, t = ready.split("/")
unhealthy = r != t
if unhealthy:
@@ -970,6 +1254,211 @@ def status_check():
else:
warn("Some pods are not ready.")
_vso_sync_status()
def _vso_sync_status():
"""Print VSO VaultStaticSecret and VaultDynamicSecret sync health.
VSS synced = status.secretMAC is non-empty.
VDS synced = status.lastRenewalTime is non-zero.
"""
step("VSO secret sync status...")
all_ok = True
# VaultStaticSecrets: synced when secretMAC is populated
vss_raw = capture_out([
"kubectl", *K8S_CTX, "get", "vaultstaticsecret", "-A", "--no-headers",
"-o=custom-columns="
"NS:.metadata.namespace,NAME:.metadata.name,MAC:.status.secretMAC",
])
cur_ns = None
for line in sorted(vss_raw.splitlines()):
cols = line.split()
if len(cols) < 2:
continue
ns, name = cols[0], cols[1]
mac = cols[2] if len(cols) > 2 else ""
synced = bool(mac and mac != "<none>")
if not synced:
all_ok = False
icon = "" if synced else ""
if ns != cur_ns:
print(f" {ns} (VSS):")
cur_ns = ns
print(f" {icon} {name}")
# VaultDynamicSecrets: synced when lastRenewalTime is non-zero
vds_raw = capture_out([
"kubectl", *K8S_CTX, "get", "vaultdynamicsecret", "-A", "--no-headers",
"-o=custom-columns="
"NS:.metadata.namespace,NAME:.metadata.name,RENEWED:.status.lastRenewalTime",
])
cur_ns = None
for line in sorted(vds_raw.splitlines()):
cols = line.split()
if len(cols) < 2:
continue
ns, name = cols[0], cols[1]
renewed = cols[2] if len(cols) > 2 else "0"
synced = renewed not in ("", "0", "<none>")
if not synced:
all_ok = False
icon = "" if synced else ""
if ns != cur_ns:
print(f" {ns} (VDS):")
cur_ns = ns
print(f" {icon} {name}")
print()
if all_ok:
ok("All VSO secrets synced.")
else:
warn("Some VSO secrets are not synced.")
# ── 17. VSO E2E verification ──────────────────────────────────────────────────
def verify_vso():
"""End-to-end test of VSO → OpenBao integration.
1. Writes a random value to OpenBao KV at secret/vso-test.
2. Creates a VaultAuth + VaultStaticSecret in the 'ory' namespace
(already bound to the 'vso' Kubernetes auth role).
3. Polls until VSO syncs the K8s Secret (up to 60s).
4. Reads and base64-decodes the K8s Secret; compares to the expected value.
5. Cleans up all test resources in a finally block.
"""
step("Verifying VSO → OpenBao integration (E2E)...")
ob_pod = kube_out(
"-n", "data", "get", "pods",
"-l=app.kubernetes.io/name=openbao,component=server",
"-o=jsonpath={.items[0].metadata.name}",
)
if not ob_pod:
die("OpenBao pod not found — run full bring-up first.")
root_token_enc = kube_out(
"-n", "data", "get", "secret", "openbao-keys",
"-o=jsonpath={.data.root-token}",
)
if not root_token_enc:
die("Could not read openbao-keys secret.")
root_token = base64.b64decode(root_token_enc).decode()
bao_env = f"BAO_ADDR=http://127.0.0.1:8200 BAO_TOKEN='{root_token}'"
def bao(cmd, *, check=True):
r = subprocess.run(
["kubectl", *K8S_CTX, "-n", "data", "exec", ob_pod, "-c", "openbao",
"--", "sh", "-c", cmd],
capture_output=True, text=True,
)
if check and r.returncode != 0:
raise RuntimeError(f"bao failed (exit {r.returncode}): {r.stderr.strip()}")
return r.stdout.strip()
test_value = _secrets.token_urlsafe(16)
test_ns = "ory"
test_name = "vso-verify"
def cleanup():
ok("Cleaning up test resources...")
kube("delete", "vaultstaticsecret", test_name, f"-n={test_ns}",
"--ignore-not-found", check=False)
kube("delete", "vaultauth", test_name, f"-n={test_ns}",
"--ignore-not-found", check=False)
kube("delete", "secret", test_name, f"-n={test_ns}",
"--ignore-not-found", check=False)
bao(f"{bao_env} bao kv delete secret/vso-test 2>/dev/null || true", check=False)
try:
# 1. Write test value to OpenBao KV
ok(f"Writing test sentinel to OpenBao secret/vso-test ...")
bao(f"{bao_env} bao kv put secret/vso-test test-key='{test_value}'")
# 2. Create VaultAuth in ory (already in vso role's bound namespaces)
ok(f"Creating VaultAuth {test_ns}/{test_name} ...")
kube_apply(f"""
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultAuth
metadata:
name: {test_name}
namespace: {test_ns}
spec:
method: kubernetes
mount: kubernetes
kubernetes:
role: vso
serviceAccount: default
""")
# 3. Create VaultStaticSecret pointing at our test KV path
ok(f"Creating VaultStaticSecret {test_ns}/{test_name} ...")
kube_apply(f"""
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: {test_name}
namespace: {test_ns}
spec:
vaultAuthRef: {test_name}
mount: secret
type: kv-v2
path: vso-test
refreshAfter: 10s
destination:
name: {test_name}
create: true
overwrite: true
""")
# 4. Poll until VSO sets secretMAC (= synced)
ok("Waiting for VSO to sync (up to 60s) ...")
deadline = time.time() + 60
synced = False
while time.time() < deadline:
mac = kube_out(
"get", "vaultstaticsecret", test_name, f"-n={test_ns}",
"-o=jsonpath={.status.secretMAC}", "--ignore-not-found",
)
if mac and mac not in ("<none>", ""):
synced = True
break
time.sleep(3)
if not synced:
msg = kube_out(
"get", "vaultstaticsecret", test_name, f"-n={test_ns}",
"-o=jsonpath={.status.conditions[0].message}", "--ignore-not-found",
)
raise RuntimeError(f"VSO did not sync within 60s. Last status: {msg or 'unknown'}")
# 5. Read and verify the K8s Secret value
ok("Verifying K8s Secret contents ...")
raw = kube_out(
"get", "secret", test_name, f"-n={test_ns}",
"-o=jsonpath={.data.test-key}", "--ignore-not-found",
)
if not raw:
raise RuntimeError(
f"K8s Secret {test_ns}/{test_name} not found or missing key 'test-key'."
)
actual = base64.b64decode(raw).decode()
if actual != test_value:
raise RuntimeError(
f"Value mismatch!\n expected: {test_value!r}\n got: {actual!r}"
)
ok(f"✓ Sentinel value matches — VSO → OpenBao integration is working.")
except Exception as exc:
cleanup()
die(f"VSO verification FAILED: {exc}")
cleanup()
ok("VSO E2E verification passed.")
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
@@ -979,29 +1468,33 @@ def main():
parser.add_argument("--gitea", action="store_true", help="Bootstrap Gitea orgs + mirror images")
parser.add_argument("--restart", action="store_true", help="Restart services only")
parser.add_argument("--status", action="store_true", help="Show pod health across all namespaces")
parser.add_argument("--verify", action="store_true", help="E2E test VSO → OpenBao integration")
args = parser.parse_args()
check_prerequisites()
# Partial-run modes — run in logical order: apply → gitea → seed → restart
# Partial-run modes — run in logical order: apply → seed → gitea → restart
if args.status:
status_check()
return
if args.verify:
verify_vso()
return
if args.apply or args.gitea or args.seed or args.restart:
ip = get_lima_ip()
domain = f"{ip}.sslip.io"
creds = {}
if args.apply:
setup_lima_vm_registry(domain)
apply_manifests(domain)
bootstrap_gitea(domain)
mirror_amd64_images(domain)
if args.gitea:
setup_lima_vm_registry(domain)
bootstrap_gitea(domain)
mirror_amd64_images(domain)
if args.seed:
seed_secrets()
if args.apply or args.gitea or args.seed:
creds = seed_secrets()
if args.apply or args.gitea:
admin_pass = creds.get("gitea-admin-password", "")
setup_lima_vm_registry(domain, admin_pass)
bootstrap_gitea(domain, admin_pass)
mirror_amd64_images(domain, admin_pass)
restart_services()
return
@@ -1013,14 +1506,15 @@ def main():
ensure_linkerd()
domain = ensure_tls_cert()
ensure_tls_secret(domain)
setup_lima_vm_registry(domain) # mkcert CA + registries.yaml + k3s restart if needed
apply_manifests(domain)
bootstrap_gitea(domain) # create studio/internal orgs
mirror_amd64_images(domain) # patch + push amd64-only images
seed_secrets()
creds = seed_secrets() # waits for OpenBao; generates/reads all credentials
admin_pass = creds.get("gitea-admin-password", "")
setup_lima_vm_registry(domain, admin_pass) # mkcert CA + registries.yaml + k3s restart
bootstrap_gitea(domain, admin_pass) # create studio/internal orgs
mirror_amd64_images(domain, admin_pass) # patch + push amd64-only images
restart_services()
wait_for_core()
print_urls(domain)
print_urls(domain, admin_pass)
if __name__ == "__main__":