chore: initial infrastructure scaffold

Kustomize base + overlays for the full Sunbeam k3s stack:
- base/mesh      — Linkerd edge (crds + control-plane + viz)
- base/ingress   — custom Pingora edge proxy
- base/ory       — Kratos 0.60.1 + Hydra 0.60.1 + login-ui
- base/data      — CloudNativePG 0.27.1, Valkey 8, OpenSearch 2
- base/storage   — SeaweedFS master + volume + filer (S3 on :8333)
- base/lasuite   — Hive sync daemon + La Suite app placeholders
- base/media     — LiveKit livekit-server 1.9.0
- base/devtools  — Gitea 12.5.0 (external PG + Valkey)
overlays/local   — sslip.io domain, mkcert TLS, Lima hostPort
overlays/production — stub (TODOs for sunbeam.pt values)
scripts/         — local-up/down/certs/urls helpers
justfile         — up / down / certs / urls targets
This commit is contained in:
2026-02-28 13:42:27 +00:00
commit 5d9bd7b067
51 changed files with 2647 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
secrets/local/
*.pem
*-key.pem
.DS_Store

View File

@@ -0,0 +1,21 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: data
resources:
- namespace.yaml
- postgres-cluster.yaml
- valkey-deployment.yaml
- valkey-service.yaml
- opensearch-deployment.yaml
- opensearch-service.yaml
helmCharts:
# CloudNativePG operator — chart name: cloudnative-pg
# helm repo add cnpg https://cloudnative-pg.github.io/charts
- name: cloudnative-pg
repo: https://cloudnative-pg.github.io/charts
version: "0.27.1"
releaseName: cnpg
namespace: data

6
base/data/namespace.yaml Normal file
View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: data
annotations:
linkerd.io/inject: enabled

View File

@@ -0,0 +1,44 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: opensearch
namespace: data
spec:
replicas: 1
selector:
matchLabels:
app: opensearch
template:
metadata:
labels:
app: opensearch
spec:
initContainers:
- name: sysctl
image: busybox
command: ["sysctl", "-w", "vm.max_map_count=262144"]
securityContext:
privileged: true
containers:
- name: opensearch
image: opensearchproject/opensearch:2
ports:
- name: http
containerPort: 9200
protocol: TCP
- name: transport
containerPort: 9300
protocol: TCP
env:
- name: discovery.type
value: single-node
- name: OPENSEARCH_JAVA_OPTS
value: "-Xms256m -Xmx512m"
- name: DISABLE_SECURITY_PLUGIN
value: "true"
resources:
limits:
memory: 512Mi
requests:
memory: 256Mi
cpu: 100m

View File

@@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
name: opensearch
namespace: data
spec:
selector:
app: opensearch
ports:
- name: http
port: 9200
targetPort: 9200
protocol: TCP
- name: transport
port: 9300
targetPort: 9300
protocol: TCP

View File

@@ -0,0 +1,52 @@
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: postgres
namespace: data
spec:
instances: 1
postgresql:
parameters:
max_connections: "100"
shared_buffers: "128MB"
work_mem: "4MB"
bootstrap:
initdb:
database: postgres
owner: postgres
secret:
name: postgres-superuser
postInitSQL:
# Create all 10 logical databases and their owners
- CREATE USER kratos;
- CREATE DATABASE kratos_db OWNER kratos;
- CREATE USER hydra;
- CREATE DATABASE hydra_db OWNER hydra;
- CREATE USER docs;
- CREATE DATABASE docs_db OWNER docs;
- CREATE USER meet;
- CREATE DATABASE meet_db OWNER meet;
- CREATE USER drive;
- CREATE DATABASE drive_db OWNER drive;
- CREATE USER messages;
- CREATE DATABASE messages_db OWNER messages;
- CREATE USER conversations;
- CREATE DATABASE conversations_db OWNER conversations;
- CREATE USER people;
- CREATE DATABASE people_db OWNER people;
- CREATE USER gitea;
- CREATE DATABASE gitea_db OWNER gitea;
- CREATE USER hive;
- CREATE DATABASE hive_db OWNER hive;
storage:
size: 10Gi
resources:
requests:
memory: 256Mi
cpu: 250m
limits:
memory: 512Mi

View File

@@ -0,0 +1,34 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: valkey
namespace: data
spec:
replicas: 1
selector:
matchLabels:
app: valkey
template:
metadata:
labels:
app: valkey
spec:
containers:
- name: valkey
image: valkey/valkey:8-alpine
ports:
- name: valkey
containerPort: 6379
protocol: TCP
args:
- valkey-server
- --maxmemory
- 56mb
- --maxmemory-policy
- allkeys-lru
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 25m

View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: valkey
namespace: data
spec:
selector:
app: valkey
ports:
- name: valkey
port: 6379
targetPort: 6379
protocol: TCP

View File

@@ -0,0 +1,76 @@
# Base Gitea Helm values (chart: gitea/gitea, v12.5.0).
# DOMAIN_SUFFIX is replaced by overlay patches.
# Reference: https://gitea.com/gitea/helm-gitea/src/branch/main/values.yaml
# Disable bundled DB and cache — we use shared CloudNativePG + Valkey
postgresql:
enabled: false
postgresql-ha:
enabled: false
valkey-cluster:
enabled: false
valkey:
enabled: false
gitea:
config:
server:
DOMAIN: src.DOMAIN_SUFFIX
ROOT_URL: https://src.DOMAIN_SUFFIX/
SSH_DOMAIN: src.DOMAIN_SUFFIX
LFS_START_SERVER: "true"
database:
DB_TYPE: postgres
HOST: postgres-rw.data.svc.cluster.local:5432
NAME: gitea_db
USER: gitea
# PASSWD injected via additionalConfigFromEnvs below
cache:
ADAPTER: redis
# Valkey is Redis protocol-compatible; Gitea's redis adapter works against Valkey
HOST: redis://valkey.data.svc.cluster.local:6379/0?pool_size=100&idle_timeout=180s
session:
PROVIDER: redis
PROVIDER_CONFIG: redis://valkey.data.svc.cluster.local:6379/1?pool_size=100&idle_timeout=180s
queue:
TYPE: redis
CONN_STR: redis://valkey.data.svc.cluster.local:6379/2?pool_size=100&idle_timeout=180s
storage:
STORAGE_TYPE: minio
MINIO_ENDPOINT: seaweedfs-filer.storage.svc.cluster.local:8333
MINIO_BUCKET: sunbeam-git-lfs
MINIO_USE_SSL: "false"
# MINIO_ACCESS_KEY_ID / MINIO_SECRET_ACCESS_KEY from gitea-s3-credentials Secret
additionalConfigFromEnvs:
- name: GITEA__DATABASE__PASSWD
valueFrom:
secretKeyRef:
name: gitea-db-credentials
key: password
- name: GITEA__STORAGE__MINIO_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: gitea-s3-credentials
key: access-key
- name: GITEA__STORAGE__MINIO_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: gitea-s3-credentials
key: secret-key
resources:
limits:
memory: 256Mi
requests:
memory: 128Mi
cpu: 100m
persistence:
enabled: true
size: 5Gi

View File

@@ -0,0 +1,18 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: devtools
resources:
- namespace.yaml
helmCharts:
# helm repo add gitea-charts https://dl.gitea.com/charts/
# Note: Gitea chart v10+ replaced Redis with Valkey-cluster by default.
# We disable bundled DB/cache (external CloudNativePG + Redis — see gitea-values.yaml).
- name: gitea
repo: https://dl.gitea.com/charts/
version: "12.5.0"
releaseName: gitea
namespace: devtools
valuesFile: gitea-values.yaml

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: devtools
annotations:
linkerd.io/inject: enabled

View File

@@ -0,0 +1,10 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: ingress
resources:
- namespace.yaml
- pingora-deployment.yaml
- pingora-service.yaml
- pingora-config.yaml

View File

@@ -0,0 +1,5 @@
apiVersion: v1
kind: Namespace
metadata:
name: ingress
# Linkerd annotation intentionally omitted — Pingora is the mesh ingress gateway

View File

@@ -0,0 +1,70 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: pingora-config
namespace: ingress
data:
config.toml: |
# Pingora hostname routing table
# The domain suffix (sunbeam.pt / <LIMA_IP>.sslip.io) is patched per overlay.
# TLS cert source (rustls-acme / mkcert) is patched per overlay.
[tls]
cert_path = "/etc/tls/tls.crt"
key_path = "/etc/tls/tls.key"
# acme = true # Uncommented in production overlay (rustls-acme + Let's Encrypt)
acme = false
[listen]
http = "0.0.0.0:80"
https = "0.0.0.0:443"
[turn]
backend = "livekit.media.svc.cluster.local:7880"
udp_listen = "0.0.0.0:3478"
relay_port_start = 49152
relay_port_end = 49252
# Host-prefix → backend mapping.
# Pingora matches on the subdomain prefix regardless of domain suffix,
# so these routes work identically for sunbeam.pt and *.sslip.io.
[[routes]]
host_prefix = "docs"
backend = "http://docs.lasuite.svc.cluster.local:8000"
websocket = true # Y.js CRDT sync
[[routes]]
host_prefix = "meet"
backend = "http://meet.lasuite.svc.cluster.local:8000"
websocket = true # LiveKit signaling
[[routes]]
host_prefix = "drive"
backend = "http://drive.lasuite.svc.cluster.local:8000"
[[routes]]
host_prefix = "mail"
backend = "http://messages.lasuite.svc.cluster.local:8000"
[[routes]]
host_prefix = "chat"
backend = "http://conversations.lasuite.svc.cluster.local:8000"
websocket = true # Vercel AI SDK streaming
[[routes]]
host_prefix = "people"
backend = "http://people.lasuite.svc.cluster.local:8000"
[[routes]]
host_prefix = "src"
backend = "http://gitea.devtools.svc.cluster.local:3000"
websocket = true # Gitea Actions runner
[[routes]]
host_prefix = "auth"
backend = "http://hydra.ory.svc.cluster.local:4444"
[[routes]]
host_prefix = "s3"
backend = "http://seaweedfs-filer.storage.svc.cluster.local:8333"

View File

@@ -0,0 +1,52 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: pingora
namespace: ingress
spec:
replicas: 1
selector:
matchLabels:
app: pingora
template:
metadata:
labels:
app: pingora
annotations:
# Pingora terminates TLS at the mesh boundary; sidecar injection is disabled here
linkerd.io/inject: disabled
spec:
containers:
- name: pingora
image: ghcr.io/sunbeam-studio/pingora:latest
ports:
- name: http
containerPort: 80
protocol: TCP
- name: https
containerPort: 443
protocol: TCP
- name: turn-udp
containerPort: 3478
protocol: UDP
# TURN relay range 4915249252 exposed via hostPort in local overlay
volumeMounts:
- name: config
mountPath: /etc/pingora
readOnly: true
- name: tls
mountPath: /etc/tls
readOnly: true
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 50m
volumes:
- name: config
configMap:
name: pingora-config
- name: tls
secret:
secretName: pingora-tls

View File

@@ -0,0 +1,24 @@
apiVersion: v1
kind: Service
metadata:
name: pingora
namespace: ingress
spec:
selector:
app: pingora
type: ClusterIP
ports:
- name: http
port: 80
targetPort: 80
protocol: TCP
- name: https
port: 443
targetPort: 443
protocol: TCP
- name: turn-udp
port: 3478
targetPort: 3478
protocol: UDP
# TURN relay ports 4915249252 are forwarded via hostPort on the pod (see deployment).
# Kubernetes Services do not support port ranges; UDP relay is handled at the node level.

View File

@@ -0,0 +1,28 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: hive-config
namespace: lasuite
data:
config.toml: |
[drive]
base_url = "http://drive.lasuite.svc.cluster.local:8000"
workspace = "Game Assets"
oidc_client_id = "hive"
oidc_token_url = "http://hydra.ory.svc.cluster.local:4444/oauth2/token"
# oidc_client_secret_file = "/run/secrets/hive-oidc" # mounted from Secret
[s3]
endpoint = "http://seaweedfs-filer.storage.svc.cluster.local:8333"
bucket = "sunbeam-game-assets"
region = "us-east-1"
# access_key_file = "/run/secrets/seaweedfs-key" # mounted from Secret
# secret_key_file = "/run/secrets/seaweedfs-secret" # mounted from Secret
[postgres]
# url_file = "/run/secrets/hive-db-url" # mounted from Secret
[sync]
interval_seconds = 30
temp_dir = "/tmp/hive"
large_file_threshold_mb = 50

View File

@@ -0,0 +1,44 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: hive
namespace: lasuite
spec:
replicas: 1
selector:
matchLabels:
app: hive
template:
metadata:
labels:
app: hive
spec:
containers:
- name: hive
image: ghcr.io/sunbeam-studio/hive:latest
volumeMounts:
- name: config
mountPath: /etc/hive
readOnly: true
- name: secrets
mountPath: /run/secrets
readOnly: true
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 25m
volumes:
- name: config
configMap:
name: hive-config
- name: secrets
projected:
sources:
- secret:
name: hive-oidc
- secret:
name: seaweedfs-s3-credentials
- secret:
name: hive-db-url

View File

@@ -0,0 +1,15 @@
# Hive has no inbound HTTP API — it is a reconciliation daemon only.
# This Service exists for Linkerd observability (metrics scraping).
apiVersion: v1
kind: Service
metadata:
name: hive
namespace: lasuite
spec:
selector:
app: hive
ports:
- name: metrics
port: 9090
targetPort: 9090
protocol: TCP

View File

@@ -0,0 +1,69 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: lasuite
resources:
- namespace.yaml
- hive-config.yaml
- hive-deployment.yaml
- hive-service.yaml
- seaweedfs-buckets.yaml
# La Suite Numérique Helm charts:
# Each component's chart lives in-tree inside its own GitHub repo (under helm/ or charts/).
# There is NO published Helm repo index at a suitenumerique.github.io URL — charts must be
# pulled from each component's repo individually.
#
# Options:
# a) Use Flux HelmRepository with type=git pointing at each suitenumerique/<app> repo.
# b) Package each chart locally (`helm package`) and commit to this repo under charts/.
# c) Use OCI if/when they start publishing to GHCR (check each repo's CI for ghcr.io pushes).
#
# Recommended starting points:
# - https://github.com/suitenumerique/docs (helm/ directory)
# - https://github.com/suitenumerique/meet (helm/ directory)
# - https://github.com/suitenumerique/drive (helm/ directory)
# - https://github.com/suitenumerique/people (helm/ directory)
# - https://github.com/suitenumerique/messages (check for helm/ directory)
# - https://github.com/suitenumerique/conversations (check for helm/ directory)
#
# TODO: Once each app's chart path is confirmed, add helmCharts entries here.
# Placeholder entries (commented out) — verify chart name and repo format first:
# helmCharts:
# - name: docs
# repo: oci://ghcr.io/suitenumerique/docs # hypothetical; verify on ghcr.io first
# version: "1.0.0"
# releaseName: docs
# namespace: lasuite
#
# - name: meet
# repo: oci://ghcr.io/suitenumerique/meet
# version: "1.0.0"
# releaseName: meet
# namespace: lasuite
#
# - name: drive
# repo: oci://ghcr.io/suitenumerique/drive
# version: "1.0.0"
# releaseName: drive
# namespace: lasuite
#
# - name: messages
# repo: oci://ghcr.io/suitenumerique/messages
# version: "1.0.0"
# releaseName: messages
# namespace: lasuite
#
# - name: conversations
# repo: oci://ghcr.io/suitenumerique/conversations
# version: "1.0.0"
# releaseName: conversations
# namespace: lasuite
#
# - name: people
# repo: oci://ghcr.io/suitenumerique/people
# version: "1.0.0"
# releaseName: people
# namespace: lasuite

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: lasuite
annotations:
linkerd.io/inject: enabled

View File

@@ -0,0 +1,37 @@
apiVersion: batch/v1
kind: Job
metadata:
name: seaweedfs-bucket-init
namespace: lasuite
annotations:
# Run once on first deploy; manually delete to re-run if needed.
helm.sh/hook: post-install
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: mc
image: minio/mc:latest
command:
- /bin/sh
- -c
- |
set -e
ENDPOINT=http://seaweedfs-filer.storage.svc.cluster.local:8333
mc alias set weed "$ENDPOINT" "$S3_ACCESS_KEY" "$S3_SECRET_KEY"
for bucket in \
sunbeam-docs \
sunbeam-meet \
sunbeam-drive \
sunbeam-messages \
sunbeam-conversations \
sunbeam-git-lfs \
sunbeam-game-assets; do
mc mb --ignore-existing "weed/$bucket"
echo "Ensured bucket: $bucket"
done
envFrom:
- secretRef:
name: seaweedfs-s3-credentials

View File

@@ -0,0 +1,17 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: media
resources:
- namespace.yaml
helmCharts:
# chart name is `livekit-server`, not `livekit-helm`
# helm repo add livekit https://helm.livekit.io
- name: livekit-server
repo: https://helm.livekit.io
version: "1.9.0"
releaseName: livekit
namespace: media
valuesFile: livekit-values.yaml

View File

@@ -0,0 +1,37 @@
# Base LiveKit Helm values (chart: livekit/livekit-server).
# DOMAIN_SUFFIX is replaced by overlay patches.
# API keys/secrets come from the livekit-keys Secret (loaded via extraEnv or config file).
# Reference: https://github.com/livekit/livekit-helm/blob/master/server-sample.yaml
livekit:
# LiveKit server config injected as config.yaml
port: 7880
log_level: info
rtc:
port_range_start: 49152
port_range_end: 49252
use_external_ip: true
turn:
enabled: true
domain: meet.DOMAIN_SUFFIX
tls_port: 5349
udp_port: 3478
external_tls: true
redis:
# Valkey is protocol-compatible with Redis; LiveKit sees this as a Redis endpoint
address: valkey.data.svc.cluster.local:6379
# API keys are loaded from a Kubernetes Secret and mounted as env vars.
# keys:
# <key>: <secret> # set in overlay Secret, not here
deployment:
resources:
limits:
memory: 128Mi
requests:
memory: 64Mi
cpu: 100m

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: media
annotations:
linkerd.io/inject: enabled

View File

@@ -0,0 +1,37 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
# NOTE: Linkerd stable releases moved behind a commercial paywall in Feb 2024.
# As of 2.15, stable artifacts are Buoyant Enterprise for Linkerd (BEL) only.
# The free channel is "edge" (weekly date-versioned builds).
#
# For local dev: local-up.sh installs Linkerd via the CLI directly:
# linkerd install --crds | kubectl apply -f -
# linkerd install | kubectl apply -f -
# which is simpler and uses whatever edge version the CLI was built against.
#
# The blocks below are the production Helm path (edge channel, pinned dates).
# To use stable BEL, change repo to https://helm.linkerd.io/stable with a
# valid BEL entitlement secret and use versions 1.8.0 / 1.16.11 / 30.12.11.
helmCharts:
- name: linkerd-crds
repo: https://helm.linkerd.io/edge
version: "2026.1.2"
releaseName: linkerd-crds
namespace: mesh
- name: linkerd-control-plane
repo: https://helm.linkerd.io/edge
version: "2025.12.3"
releaseName: linkerd-control-plane
namespace: mesh
- name: linkerd-viz
repo: https://helm.linkerd.io/edge
version: "2026.1.4"
releaseName: linkerd-viz
namespace: mesh

5
base/mesh/namespace.yaml Normal file
View File

@@ -0,0 +1,5 @@
apiVersion: v1
kind: Namespace
metadata:
name: mesh
# Linkerd annotation intentionally omitted — the control plane is not self-injected

View File

@@ -0,0 +1,45 @@
# Base Ory Hydra Helm values.
# DOMAIN_SUFFIX is replaced by overlay patches.
# DSN and system secrets come from the overlay-specific Secret.
hydra:
config:
dsn: "postgresql://hydra:$(HYDRA_DB_PASSWORD)@postgres-rw.data.svc.cluster.local:5432/hydra_db"
urls:
self:
issuer: https://auth.DOMAIN_SUFFIX/
consent: https://auth.DOMAIN_SUFFIX/consent
login: https://auth.DOMAIN_SUFFIX/login
logout: https://auth.DOMAIN_SUFFIX/logout
error: https://auth.DOMAIN_SUFFIX/error
secrets:
system:
- $(HYDRA_SYSTEM_SECRET)
cookie:
- $(HYDRA_COOKIE_SECRET)
oidc:
subject_identifiers:
supported_types:
- public
pairwise:
salt: $(HYDRA_PAIRWISE_SALT)
serve:
cookies:
same_site_mode: Lax
public:
cors:
enabled: true
allowed_origins:
- https://*.DOMAIN_SUFFIX
deployment:
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 25m

View File

@@ -0,0 +1,60 @@
# Base Ory Kratos Helm values.
# DOMAIN_SUFFIX is replaced by overlay patches (sunbeam.pt / <LIMA_IP>.sslip.io).
# DSN and SMTP credentials come from the overlay-specific Secret.
kratos:
config:
version: v0.13.0
dsn: "postgresql://kratos:$(KRATOS_DB_PASSWORD)@postgres-rw.data.svc.cluster.local:5432/kratos_db"
selfservice:
default_browser_return_url: https://auth.DOMAIN_SUFFIX/
allowed_return_urls:
- https://auth.DOMAIN_SUFFIX/
- https://docs.DOMAIN_SUFFIX/
- https://meet.DOMAIN_SUFFIX/
- https://drive.DOMAIN_SUFFIX/
- https://mail.DOMAIN_SUFFIX/
- https://chat.DOMAIN_SUFFIX/
- https://people.DOMAIN_SUFFIX/
- https://src.DOMAIN_SUFFIX/
flows:
login:
ui_url: https://auth.DOMAIN_SUFFIX/login
registration:
ui_url: https://auth.DOMAIN_SUFFIX/registration
recovery:
ui_url: https://auth.DOMAIN_SUFFIX/recovery
settings:
ui_url: https://auth.DOMAIN_SUFFIX/settings
identity:
default_schema_id: default
schemas:
- id: default
url: file:///etc/config/kratos/identity.schema.json
courier:
smtp:
connection_uri: "smtp://$(SMTP_USER):$(SMTP_PASSWORD)@localhost:25/"
from_address: no-reply@DOMAIN_SUFFIX
from_name: Sunbeam
serve:
public:
base_url: https://auth.DOMAIN_SUFFIX/kratos/
cors:
enabled: true
allowed_origins:
- https://*.DOMAIN_SUFFIX
admin:
base_url: http://kratos-admin.ory.svc.cluster.local:4434/
deployment:
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 25m

View File

@@ -0,0 +1,24 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: ory
resources:
- namespace.yaml
- login-ui-deployment.yaml
helmCharts:
# helm repo add ory https://k8s.ory.sh/helm/charts
- name: kratos
repo: https://k8s.ory.sh/helm/charts
version: "0.60.1"
releaseName: kratos
namespace: ory
valuesFile: kratos-values.yaml
- name: hydra
repo: https://k8s.ory.sh/helm/charts
version: "0.60.1"
releaseName: hydra
namespace: ory
valuesFile: hydra-values.yaml

View File

@@ -0,0 +1,49 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: login-ui
namespace: ory
spec:
replicas: 1
selector:
matchLabels:
app: login-ui
template:
metadata:
labels:
app: login-ui
spec:
containers:
- name: login-ui
image: ghcr.io/sunbeam-studio/login-ui:latest
ports:
- name: http
containerPort: 3000
protocol: TCP
env:
- name: KRATOS_PUBLIC_URL
value: "http://kratos-public.ory.svc.cluster.local:4433"
- name: HYDRA_ADMIN_URL
value: "http://hydra-admin.ory.svc.cluster.local:4445"
- name: PORT
value: "3000"
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 25m
---
apiVersion: v1
kind: Service
metadata:
name: login-ui
namespace: ory
spec:
selector:
app: login-ui
ports:
- name: http
port: 3000
targetPort: 3000
protocol: TCP

6
base/ory/namespace.yaml Normal file
View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: ory
annotations:
linkerd.io/inject: enabled

View File

@@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: storage
resources:
- namespace.yaml
- seaweedfs-config.yaml
- seaweedfs-master.yaml
- seaweedfs-volume.yaml
- seaweedfs-filer.yaml

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: storage
annotations:
linkerd.io/inject: enabled

View File

@@ -0,0 +1,24 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: seaweedfs-filer-config
namespace: storage
data:
filer.toml: |
# SeaweedFS filer configuration
# S3 API enabled on port 8333
[leveldb2]
enabled = true
dir = "/data/filer"
[s3]
enabled = true
port = 8333
# Credentials are loaded from the seaweedfs-s3-credentials Secret
# and passed as env vars (S3_ACCESS_KEY, S3_SECRET_KEY) to the filer.
master.toml: |
[master.maintenance]
sleep_minutes = 17
garbage_threshold = 0.3

View File

@@ -0,0 +1,74 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: seaweedfs-filer
namespace: storage
spec:
replicas: 1
selector:
matchLabels:
app: seaweedfs-filer
template:
metadata:
labels:
app: seaweedfs-filer
spec:
containers:
- name: filer
image: chrislusf/seaweedfs:latest
args:
- filer
- -port=8888
- -s3
- -s3.port=8333
- -master=seaweedfs-master.storage.svc.cluster.local:9333
ports:
- name: http
containerPort: 8888
protocol: TCP
- name: s3
containerPort: 8333
protocol: TCP
- name: grpc
containerPort: 18888
protocol: TCP
envFrom:
- secretRef:
name: seaweedfs-s3-credentials
volumeMounts:
- name: config
mountPath: /etc/seaweedfs
readOnly: true
- name: filer-data
mountPath: /data/filer
resources:
limits:
memory: 256Mi
requests:
memory: 128Mi
cpu: 50m
volumes:
- name: config
configMap:
name: seaweedfs-filer-config
- name: filer-data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: seaweedfs-filer
namespace: storage
spec:
selector:
app: seaweedfs-filer
ports:
- name: http
port: 8888
targetPort: 8888
- name: s3
port: 8333
targetPort: 8333
- name: grpc
port: 18888
targetPort: 18888

View File

@@ -0,0 +1,66 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: seaweedfs-master
namespace: storage
spec:
serviceName: seaweedfs-master
replicas: 1
selector:
matchLabels:
app: seaweedfs-master
template:
metadata:
labels:
app: seaweedfs-master
spec:
containers:
- name: master
image: chrislusf/seaweedfs:latest
args:
- master
- -port=9333
- -mdir=/data
- -defaultReplication=000
- -volumeSizeLimitMB=1000
ports:
- name: http
containerPort: 9333
protocol: TCP
- name: grpc
containerPort: 19333
protocol: TCP
volumeMounts:
- name: data
mountPath: /data
resources:
limits:
memory: 64Mi
requests:
memory: 32Mi
cpu: 25m
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: Service
metadata:
name: seaweedfs-master
namespace: storage
spec:
selector:
app: seaweedfs-master
clusterIP: None
ports:
- name: http
port: 9333
targetPort: 9333
- name: grpc
port: 19333
targetPort: 19333

View File

@@ -0,0 +1,66 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: seaweedfs-volume
namespace: storage
spec:
serviceName: seaweedfs-volume
replicas: 1
selector:
matchLabels:
app: seaweedfs-volume
template:
metadata:
labels:
app: seaweedfs-volume
spec:
containers:
- name: volume
image: chrislusf/seaweedfs:latest
args:
- volume
- -port=8080
- -mserver=seaweedfs-master.storage.svc.cluster.local:9333
- -dir=/data
- -max=50
ports:
- name: http
containerPort: 8080
protocol: TCP
- name: grpc
containerPort: 18080
protocol: TCP
volumeMounts:
- name: data
mountPath: /data
resources:
limits:
memory: 256Mi
requests:
memory: 128Mi
cpu: 50m
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: seaweedfs-volume
namespace: storage
spec:
selector:
app: seaweedfs-volume
clusterIP: None
ports:
- name: http
port: 8080
targetPort: 8080
- name: grpc
port: 18080
targetPort: 18080

17
justfile Normal file
View File

@@ -0,0 +1,17 @@
# Sunbeam infrastructure — local dev convenience targets
# Start Lima VM and deploy full stack
up:
bash scripts/local-up.sh
# Stop Lima VM (preserves disk)
down:
bash scripts/local-down.sh
# Regenerate mkcert wildcard cert for current Lima IP
certs:
bash scripts/local-certs.sh
# Print all service URLs
urls:
bash scripts/local-urls.sh

View File

@@ -0,0 +1,37 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
# Local dev overlay — targets Lima VM running k3s on macOS
# Deploy with: kubectl apply -k overlays/local/
resources:
- ../../base/mesh
- ../../base/ingress
- ../../base/ory
- ../../base/data
- ../../base/storage
- ../../base/lasuite
- ../../base/media
- ../../base/devtools
patches:
# sslip.io domain suffix derived from Lima VM IP
- path: values-domain.yaml
target:
kind: ConfigMap
name: pingora-config
# Disable rustls-acme; mount mkcert cert; enable hostPort for Lima
- path: values-pingora.yaml
target:
kind: Deployment
name: pingora
# Swap redirect URIs to *.sslip.io for Kratos and Hydra
- path: values-ory.yaml
target:
kind: ConfigMap
labelSelector: "app.kubernetes.io/part-of=ory"
# Apply §10.7 memory limits across all Deployments
- path: values-resources.yaml

View File

@@ -0,0 +1,21 @@
# Patch: replace DOMAIN_SUFFIX placeholder with <LIMA_IP>.sslip.io
# in the Pingora ConfigMap's routing table.
#
# How to apply: the local-up.sh script calls:
# LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
# sed "s/DOMAIN_SUFFIX/${LIMA_IP}.sslip.io/g" overlays/local/values-domain.yaml | kubectl apply -f -
#
# Or use kustomize's replacements feature if the IP is known at kustomize time.
#
# This is a strategic merge patch on the pingora-config ConfigMap.
apiVersion: v1
kind: ConfigMap
metadata:
name: pingora-config
namespace: ingress
data:
# DOMAIN_SUFFIX is substituted at deploy time by local-up.sh.
# The local overlay domain is: <LIMA_IP>.sslip.io
# Example: 192.168.5.2.sslip.io
domain-suffix: "LIMA_IP.sslip.io"

View File

@@ -0,0 +1,27 @@
# Patch: Ory redirect URIs → sslip.io hostnames for local dev.
# Applied as a strategic merge patch over the rendered Kratos/Hydra ConfigMaps.
#
# DOMAIN_SUFFIX is substituted by local-up.sh at deploy time.
# Production overlay uses sunbeam.pt.
# Kratos selfservice URLs
apiVersion: v1
kind: ConfigMap
metadata:
name: kratos-config
namespace: ory
data:
selfservice.default_browser_return_url: "https://auth.DOMAIN_SUFFIX/"
selfservice.flows.login.ui_url: "https://auth.DOMAIN_SUFFIX/login"
selfservice.flows.registration.ui_url: "https://auth.DOMAIN_SUFFIX/registration"
selfservice.flows.recovery.ui_url: "https://auth.DOMAIN_SUFFIX/recovery"
selfservice.flows.settings.ui_url: "https://auth.DOMAIN_SUFFIX/settings"
selfservice.allowed_return_urls: |
- https://auth.DOMAIN_SUFFIX/
- https://docs.DOMAIN_SUFFIX/
- https://meet.DOMAIN_SUFFIX/
- https://drive.DOMAIN_SUFFIX/
- https://mail.DOMAIN_SUFFIX/
- https://chat.DOMAIN_SUFFIX/
- https://people.DOMAIN_SUFFIX/
- https://src.DOMAIN_SUFFIX/

View File

@@ -0,0 +1,30 @@
# Patch: local Pingora overrides
# - Disables rustls-acme (ACME negotiation not needed locally)
# - Mounts mkcert wildcard cert from the pingora-tls Secret
# - Exposes TURN relay range as hostPort on the Lima VM
apiVersion: apps/v1
kind: Deployment
metadata:
name: pingora
namespace: ingress
spec:
template:
spec:
containers:
- name: pingora
env:
- name: ACME_ENABLED
value: "false"
ports:
# Expose full TURN relay range as hostPort so the Lima VM forwards UDP
- name: turn-relay-start
containerPort: 49152
hostPort: 49152
protocol: UDP
- name: turn-relay-end
containerPort: 49252
hostPort: 49252
protocol: UDP
# TLS cert comes from mkcert Secret created by scripts/local-certs.sh
# Secret name: pingora-tls, keys: tls.crt / tls.key

View File

@@ -0,0 +1,94 @@
# Patch: apply §10.7 memory limits to all Deployments in the local overlay.
# These are intentionally tight to stay within the 12 GB Lima VM budget.
#
# Applied as a strategic merge patch. Each stanza targets one Deployment by name.
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: pingora
namespace: ingress
spec:
template:
spec:
containers:
- name: pingora
resources:
limits:
memory: 64Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: valkey
namespace: data
spec:
template:
spec:
containers:
- name: valkey
resources:
limits:
memory: 64Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: opensearch
namespace: data
spec:
template:
spec:
containers:
- name: opensearch
resources:
limits:
memory: 512Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: seaweedfs-filer
namespace: storage
spec:
template:
spec:
containers:
- name: filer
resources:
limits:
memory: 256Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: login-ui
namespace: ory
spec:
template:
spec:
containers:
- name: login-ui
resources:
limits:
memory: 64Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: hive
namespace: lasuite
spec:
template:
spec:
containers:
- name: hive
resources:
limits:
memory: 64Mi

View File

@@ -0,0 +1,29 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
# Production overlay — targets Scaleway Elastic Metal (Paris)
# Deploy with: kubectl apply -k overlays/production/
# TODO: fill in all production values before first production deploy
resources:
- ../../base/mesh
- ../../base/ingress
- ../../base/ory
- ../../base/data
- ../../base/storage
- ../../base/lasuite
- ../../base/media
- ../../base/devtools
patches:
# TODO: set domain to sunbeam.pt
# - path: values-domain.yaml
# TODO: enable rustls-acme + Let's Encrypt, bind to public IP
# - path: values-pingora.yaml
# TODO: set OIDC redirect URIs to https://*.sunbeam.pt/...
# - path: values-ory.yaml
# TODO: set production resource limits (64 GB server)
# - path: values-resources.yaml

46
scripts/local-certs.sh Executable file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Generate a mkcert wildcard TLS cert for the current Lima VM IP.
# Output: secrets/local/tls.crt + secrets/local/tls.key
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
SECRETS_DIR="$REPO_ROOT/secrets/local"
echo "==> Getting Lima VM IP..."
LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
if [[ -z "$LIMA_IP" ]]; then
echo "ERROR: Could not determine Lima VM IP. Is the 'sunbeam' VM running?" >&2
exit 1
fi
DOMAIN="*.${LIMA_IP}.sslip.io"
echo "==> Generating mkcert cert for: $DOMAIN"
mkdir -p "$SECRETS_DIR"
cd "$SECRETS_DIR"
mkcert -install
mkcert "$DOMAIN"
# mkcert names the output files based on the domain; normalise to tls.crt / tls.key
CERT_FILE="_wildcard.${LIMA_IP}.sslip.io.pem"
KEY_FILE="_wildcard.${LIMA_IP}.sslip.io-key.pem"
if [[ -f "$CERT_FILE" ]]; then
mv "$CERT_FILE" tls.crt
mv "$KEY_FILE" tls.key
echo "==> Cert written to secrets/local/tls.crt and secrets/local/tls.key"
else
echo "ERROR: Expected cert file '$CERT_FILE' not found." >&2
exit 1
fi
echo "==> Domain: $DOMAIN"
echo "==> Lima IP: $LIMA_IP"
echo ""
echo "Next: run scripts/local-up.sh or manually apply the TLS secret:"
echo " kubectl create secret tls pingora-tls -n ingress \\"
echo " --cert=secrets/local/tls.crt \\"
echo " --key=secrets/local/tls.key \\"
echo " --dry-run=client -o yaml | kubectl apply -f -"

33
scripts/local-down.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Stop the Sunbeam Lima VM (preserves disk by default).
set -euo pipefail
DELETE=false
HARD=false
for arg in "$@"; do
case $arg in
--delete) DELETE=true ;;
--hard) HARD=true ;;
*)
echo "Usage: $0 [--hard] [--delete]" >&2
exit 1
;;
esac
done
if $HARD; then
echo "==> Force-stopping Lima VM 'sunbeam'..."
limactl stop --force sunbeam
else
echo "==> Stopping Lima VM 'sunbeam' (graceful)..."
limactl stop sunbeam
fi
if $DELETE; then
echo "==> Deleting Lima VM 'sunbeam' (disk will be lost)..."
limactl delete sunbeam
echo "==> VM deleted."
else
echo "==> VM stopped. Disk preserved. Run scripts/local-up.sh to restart."
fi

122
scripts/local-up.sh Executable file
View File

@@ -0,0 +1,122 @@
#!/usr/bin/env bash
# Start the Sunbeam local dev stack.
# Idempotent: safe to run multiple times.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# ---------------------------------------------------------------------------
# 1. Check prerequisites
# ---------------------------------------------------------------------------
echo "==> Checking prerequisites..."
for tool in limactl mkcert kubectl helm linkerd; do
if ! command -v "$tool" &>/dev/null; then
echo "ERROR: '$tool' not found. Install with: brew install $tool" >&2
exit 1
fi
done
echo " OK"
# ---------------------------------------------------------------------------
# 2. Start Lima VM (skip if already running)
# ---------------------------------------------------------------------------
LIMA_STATUS=$(limactl list --json 2>/dev/null | \
python3 -c "import sys,json; vms=[v for v in json.load(sys.stdin) if v['name']=='sunbeam']; print(vms[0]['status'] if vms else 'none')" 2>/dev/null || echo "none")
if [[ "$LIMA_STATUS" == "Running" ]]; then
echo "==> Lima VM 'sunbeam' already running."
elif [[ "$LIMA_STATUS" == "Stopped" ]]; then
echo "==> Starting existing Lima VM 'sunbeam'..."
limactl start sunbeam
else
echo "==> Creating Lima VM 'sunbeam' (k3s, 6 CPU / 12 GB / 60 GB)..."
limactl start \
--name=sunbeam \
template://k3s \
--memory=12 \
--cpus=6 \
--disk=60 \
--vm-type=vz \
--mount-type=virtiofs
fi
# ---------------------------------------------------------------------------
# 3. Export kubeconfig
# ---------------------------------------------------------------------------
echo "==> Exporting kubeconfig..."
mkdir -p ~/.kube
limactl shell sunbeam kubectl config view --raw > ~/.kube/sunbeam.yaml
export KUBECONFIG=~/.kube/sunbeam.yaml
echo " KUBECONFIG=$KUBECONFIG"
# ---------------------------------------------------------------------------
# 4. Install Linkerd CRDs + control plane
# ---------------------------------------------------------------------------
echo "==> Adding Linkerd Helm repo..."
helm repo add linkerd https://helm.linkerd.io/stable --force-update
helm repo update linkerd
echo "==> Installing Linkerd CRDs..."
helm upgrade --install linkerd-crds linkerd/linkerd-crds \
-n mesh --create-namespace --wait
echo "==> Installing Linkerd control plane..."
helm upgrade --install linkerd-control-plane linkerd/linkerd-control-plane \
-n mesh \
--set-file identityTrustAnchorsPEM="$(linkerd identity trust-anchors 2>/dev/null || echo '')" \
--wait || {
echo "==> Bootstrapping Linkerd identity (first install)..."
linkerd install --crds | kubectl apply -f -
linkerd install | kubectl apply -f -
linkerd check
}
# ---------------------------------------------------------------------------
# 5. Generate mkcert wildcard cert
# ---------------------------------------------------------------------------
echo "==> Generating TLS cert..."
bash "$SCRIPT_DIR/local-certs.sh"
LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
DOMAIN="${LIMA_IP}.sslip.io"
# ---------------------------------------------------------------------------
# 6. Create TLS Secret in ingress namespace
# ---------------------------------------------------------------------------
echo "==> Applying TLS Secret to ingress namespace..."
kubectl create namespace ingress --dry-run=client -o yaml | kubectl apply -f -
kubectl create secret tls pingora-tls \
--cert="$REPO_ROOT/secrets/local/tls.crt" \
--key="$REPO_ROOT/secrets/local/tls.key" \
-n ingress \
--dry-run=client -o yaml | kubectl apply -f -
# ---------------------------------------------------------------------------
# 7. Substitute domain and apply manifests
# ---------------------------------------------------------------------------
echo "==> Applying manifests (domain: $DOMAIN)..."
# Substitute DOMAIN_SUFFIX placeholder before piping to kubectl
kubectl kustomize "$REPO_ROOT/overlays/local" --enable-helm | \
sed "s/DOMAIN_SUFFIX/${DOMAIN}/g" | \
kubectl apply -f -
# ---------------------------------------------------------------------------
# 8. Wait for core components
# ---------------------------------------------------------------------------
echo "==> Waiting for PostgreSQL cluster..."
kubectl wait --for=condition=Ready cluster/postgres -n data --timeout=180s || true
echo "==> Waiting for Redis..."
kubectl rollout status deployment/redis -n data --timeout=120s || true
echo "==> Waiting for Kratos..."
kubectl rollout status deployment/kratos -n ory --timeout=120s || true
echo "==> Waiting for Hydra..."
kubectl rollout status deployment/hydra -n ory --timeout=120s || true
# ---------------------------------------------------------------------------
# 9. Print URLs
# ---------------------------------------------------------------------------
bash "$SCRIPT_DIR/local-urls.sh"

29
scripts/local-urls.sh Executable file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/env bash
# Print all local service URLs for the current Lima VM IP.
set -euo pipefail
echo "==> Getting Lima VM IP..."
LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
if [[ -z "$LIMA_IP" ]]; then
echo "ERROR: Could not determine Lima VM IP. Is the 'sunbeam' VM running?" >&2
exit 1
fi
BASE="${LIMA_IP}.sslip.io"
echo ""
echo "Sunbeam local URLs (Lima IP: $LIMA_IP)"
echo "============================================"
echo " Docs: https://docs.${BASE}"
echo " Meet: https://meet.${BASE}"
echo " Drive: https://drive.${BASE}"
echo " Mail: https://mail.${BASE}"
echo " Chat: https://chat.${BASE}"
echo " People: https://people.${BASE}"
echo " Source: https://src.${BASE}"
echo " Auth: https://auth.${BASE}"
echo " S3: https://s3.${BASE}"
echo ""
echo " Linkerd viz: kubectl port-forward -n mesh svc/linkerd-viz 8084:8084"
echo " then open http://localhost:8084"
echo ""

View File

978
system-design.md Normal file
View File

@@ -0,0 +1,978 @@
# Sunbeam Studio — Infrastructure Design Document
**Version:** 0.1.0-draft
**Date:** 2026-02-28
**Author:** Sienna Satterthwaite, Chief Engineer
**Status:** Planning
---
## 1. Overview
Sunbeam is a three-person game studio founded by Sienna, Lonni, and Amber. This document describes the self-hosted collaboration and development infrastructure that supports studio operations — document editing, video calls, email, version control, AI tooling, and game asset management.
**Guiding principles:**
- **One box, one bill.** Single Scaleway Elastic Metal server in Paris. No multi-vendor sprawl.
- **European data sovereignty.** All data resides in France, GDPR-compliant by default.
- **Self-hosted, open source.** No per-seat SaaS fees. MIT-licensed where possible.
- **Consistent experience.** Unified authentication, shared design language, single login across all tools.
- **Operationally honest.** The stack is architecturally rich but the operational surface is small: three users, one node, one cluster.
---
## 2. Platform
### 2.1 Compute
| Property | Value |
|---|---|
| Provider | Scaleway Elastic Metal |
| Region | Paris (PAR1/PAR2) |
| RAM | 64 GB minimum |
| Storage | Local NVMe (k3s + OS + SeaweedFS volumes) |
| Network | Public IPv4, configurable reverse DNS |
### 2.2 Orchestration
k3s — single-node Kubernetes. Traefik disabled at install (replaced by custom Pingora proxy):
```bash
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--disable=traefik" sh -
```
### 2.3 External Scaleway Services
| Service | Purpose | Estimated Cost |
|---|---|---|
| Object Storage | PostgreSQL backups (barman), cold asset overflow | ~€510/mo |
| Transactional Email (TEM) | Outbound SMTP relay for notifications | ~€1/mo |
| Generative APIs | AI inference for all La Suite components | ~€15/mo |
---
## 3. Namespace Layout
```
k3s cluster
├── ory/ Identity & auth (Kratos, Hydra, Login UI)
├── lasuite/ Docs, Meet, Drive, Messages, Conversations, People, Hive
├── media/ LiveKit server + TURN
├── storage/ SeaweedFS (master, volume, filer)
├── data/ CloudNativePG, Redis, OpenSearch
├── devtools/ Gitea
├── mesh/ Linkerd control plane
└── ingress/ Pingora edge proxy
```
---
## 4. Core Infrastructure
### 4.1 Authentication — Ory Kratos + Hydra
Replaces the Keycloak default from La Suite's French government deployments. No JVM, no XML — lightweight Go binaries that fit k3s cleanly.
| Component | Role |
|---|---|
| **Kratos** | Identity management (registration, login, profile, recovery) |
| **Hydra** | OAuth2 / OpenID Connect provider |
| **Login UI** | Sunbeam-branded login and consent pages |
Every La Suite app authenticates via `mozilla-django-oidc`. Each app registers as an OIDC client in Hydra with a client ID, secret, and redirect URI. Swapping Keycloak for Hydra is transparent at the app level.
**Auth flow:**
```
User → any *.sunbeam.pt app
→ 302 to auth.sunbeam.pt
→ Hydra → Kratos login UI
→ authenticate
→ Hydra issues OIDC token
→ 302 back to app
→ app validates via mozilla-django-oidc
→ session established
```
### 4.2 Database — CloudNativePG
Single PostgreSQL cluster via CloudNativePG operator. One cluster, multiple logical databases:
```
PostgreSQL (CloudNativePG)
├── kratos_db
├── hydra_db
├── docs_db
├── meet_db
├── drive_db
├── messages_db
├── conversations_db
├── people_db
├── gitea_db
└── hive_db
```
### 4.3 Object Storage — SeaweedFS
S3-compatible distributed storage. Apache 2.0 licensed (chosen over MinIO post-AGPL relicensing).
**Components:** master (metadata/topology), volume servers (data on local NVMe), filer (S3 API gateway).
**S3 endpoint:** `http://seaweedfs-filer.storage.svc:8333` (cluster-internal). For local dev access outside the cluster, expose via ingress at `s3.sunbeam.pt` or `kubectl port-forward`.
**Buckets:**
| Bucket | Consumer | Contents |
|---|---|---|
| `sunbeam-docs` | Docs | Document content, images, exports |
| `sunbeam-meet` | Meet | Recordings (if enabled) |
| `sunbeam-drive` | Drive | Uploaded/shared files |
| `sunbeam-messages` | Messages | Email attachments |
| `sunbeam-conversations` | Conversations | Chat attachments |
| `sunbeam-git-lfs` | Gitea | Git LFS objects (game assets) |
| `sunbeam-game-assets` | Hive | Game assets synced between Drive and S3 |
### 4.4 Cache — Redis
Single Redis instance in `data` namespace. Shared by Messages (Celery broker), Conversations (session/cache), Meet (LiveKit ephemeral state).
### 4.5 Search — OpenSearch
Required by Messages for full-text email search. Single-node deployment in `data` namespace.
### 4.6 Edge Proxy — Pingora (Custom Rust Binary)
Custom proxy built on Cloudflare's Pingora framework. A few hundred lines of Rust handling:
- **HTTPS termination** — Let's Encrypt certs via `rustls-acme` compiled into the proxy binary
- **Hostname routing** — static mapping of `*.sunbeam.pt` hostnames to backend ClusterIP:port
- **WebSocket passthrough** — LiveKit signaling (Meet), Y.js CRDT sync (Docs)
- **Raw UDP forwarding** — TURN relay ports (3478 + 4915249252). Forwards bytes, not protocol. LiveKit handles TURN/STUN internally per RFC 5766. 100 relay ports is vastly more than three users need.
Seven hostnames, rarely changes. No dynamic service discovery required.
### 4.7 Service Mesh — Linkerd
mTLS between all pods with zero application changes. Sidecar injection provides:
- Mutual TLS on all internal east-west traffic
- Automatic certificate rotation
- Per-route observability (request rate, success rate, latency)
Rust-based data plane — lightweight on a single node.
---
## 5. La Suite Numérique Applications
All La Suite apps share a common pattern: Django backend, React frontend, PostgreSQL, S3 storage, OIDC auth. Independent services, not a monolith.
### 5.1 Docs — `docs.sunbeam.pt`
Collaborative document editing. GDD, lore bibles, specs, meeting notes.
| Property | Detail |
|---|---|
| Editor | BlockNote (Tiptap-based) |
| Realtime | Y.js CRDT over WebSocket |
| AI | BlockNote XL AI extension — rephrase, summarize, translate, fix typos, freeform prompts. Available via formatting toolbar and `/ai` slash command. |
| Export | .odt, .docx, .pdf |
BlockNote XL packages (AI, PDF export) are GPL-licensed. Fine for internal use — GPL triggers on distribution, not deployment.
### 5.2 Meet — `meet.sunbeam.pt`
Video conferencing. Standups, playtests, partner calls.
| Property | Detail |
|---|---|
| Backend | LiveKit (self-hosted, Apache 2.0) |
| Media | DTLS-SRTP encrypted WebRTC |
| TURN | LiveKit built-in, UDP ports exposed through Pingora |
### 5.3 Drive — `drive.sunbeam.pt`
File sharing and document management. Game assets, reference material, shared resources.
Granular access control, workspace organization, linked to Messages for email attachments and Docs for file references.
### 5.4 Messages — `mail.sunbeam.pt`
Full email platform with team and personal mailboxes.
**Architecture:**
```
Inbound: Internet → MX → Pingora → Postfix MTA-in → Rspamd → Django MDA → Postgres + OpenSearch
Outbound: User → Django → Postfix MTA-out (DKIM) → Scaleway TEM relay → recipient
```
**Mailboxes:**
- Personal: `sienna@`, `lonni@`, `amber@sunbeam.pt`
- Shared: `hello@sunbeam.pt` (all three see incoming business email)
**AI features:** Thread summaries, compose assistance, auto-labelling.
**Limitation:** No IMAP/POP3 — web UI only. Deliberate upstream design choice. Acceptable for a three-person studio living in the browser.
**DNS requirements:** MX, SPF, DKIM, DMARC, PTR (reverse DNS configurable in Scaleway console).
### 5.5 Conversations — `chat.sunbeam.pt`
AI chatbot / team assistant.
| Property | Detail |
|---|---|
| AI Framework | Pydantic AI (backend), Vercel AI SDK (frontend streaming) |
| Tools | Extensible agent tools — wire into Docs search, Drive queries, Messages summaries |
| Attachments | PDF and image upload for analysis |
| Helm | Official chart at `suitenumerique.github.io/conversations/` |
Primary force multiplier. Custom tools can search GDD content, query shared files, and summarize email threads.
### 5.6 People — `people.sunbeam.pt`
Centralized user and team management. Creates users/teams and propagates permissions across all La Suite apps. Interoperates with dimail (Messages email backend) for mailbox provisioning.
Admin-facing, not a daily-use interface.
### 5.7 La Suite Integration Layer
Apps share a unified experience through:
- **`@gouvfr-lasuite/integration`** — npm package providing the shared navigation bar, header, branding. Fork/configure for Sunbeam logo, colors, and nav links.
- **`lasuite-django`** — shared Python library for OIDC helpers and common Django patterns.
- Per-app env vars for branding: `DJANGO_EMAIL_BRAND_NAME=Sunbeam`, `DJANGO_EMAIL_LOGO_IMG`, etc.
---
## 6. Development Tools
### 6.1 Gitea — `src.sunbeam.pt`
Self-hosted Git with issue tracking, wiki, and CI.
| Property | Detail |
|---|---|
| Runtime | Single Go binary |
| Auth | OIDC via Hydra (same login as everything else) |
| LFS | Built-in Git LFS, S3 backend → SeaweedFS `sunbeam-git-lfs` bucket |
| CI | Gitea Actions (GitHub Actions compatible YAML). Lightweight jobs: compiles, tests, linting. Platform-specific builds offloaded to external providers. |
| Theming | `custom/` directory for Sunbeam logo, colors, CSS |
Replaces GitHub for private repos and eliminates GitHub LFS bandwidth costs. Game assets (textures, models, audio) flow through LFS into SeaweedFS.
### 6.2 Hive — Asset Sync Service (Custom Rust Binary)
Bidirectional sync between Drive and a dedicated S3 bucket (`sunbeam-game-assets`). Lonni and Amber manage game assets through Drive's UI; the build pipeline and Sienna's tooling address the same assets via S3. Hive keeps both views consistent.
**Architecture:**
```
Drive REST API SeaweedFS S3
(Game Assets workspace) (sunbeam-game-assets bucket)
│ │
└──────────► Hive ◄────────────────────┘
PostgreSQL
(hive_db)
```
**Reconciliation loop** (configurable, default 30s):
1. Poll Drive API — list files in watched workspace (IDs, paths, modified timestamps)
2. Poll S3 — `ListObjectsV2` on game assets bucket (keys, ETags, LastModified)
3. Diff both sides against Hive's state in `hive_db`
4. For each difference:
- New in Drive → download from Drive, upload to S3, record state
- New in S3 → download from S3, upload to Drive, record state
- Drive newer → overwrite S3, update state
- S3 newer → overwrite Drive, update state
- Deleted from Drive → delete from S3, remove state
- Deleted from S3 → delete from Drive, remove state
**Conflict resolution:** Last-write-wins by timestamp. For three users this is sufficient. Log a warning when both sides change the same file within the same poll interval.
**Path mapping:** Direct 1:1. Drive workspace folder structure maps to S3 key prefixes. `Game Assets/textures/hero_sprite.png` in Drive becomes `textures/hero_sprite.png` in S3 (workspace root stripped). Lonni creates a folder in Drive, it appears as an S3 prefix. Sienna runs `aws s3 cp` into a prefix, it appears in Drive's folder.
**State table (`hive_db`):**
| Column | Type | Purpose |
|---|---|---|
| `id` | UUID | Primary key |
| `drive_file_id` | TEXT | Drive's internal file ID |
| `drive_path` | TEXT | Human-readable path in Drive |
| `s3_key` | TEXT | S3 object key |
| `drive_modified_at` | TIMESTAMPTZ | Last modification on Drive side |
| `s3_etag` | TEXT | S3 object ETag |
| `s3_last_modified` | TIMESTAMPTZ | Last modification on S3 side |
| `last_synced_at` | TIMESTAMPTZ | When Hive last reconciled this file |
| `sync_source` | TEXT | Which side was source of truth (`drive` or `s3`) |
**Large file handling:** Files over 50 MB stream to a temp file before uploading to the other side. Multipart upload for S3 targets. No large files held in memory.
**Authentication:** OIDC client credentials via Hydra (same as every other service). Registered as client `hive` in the OIDC registry.
**Crate dependencies:**
| Crate | Purpose |
|---|---|
| `reqwest` | HTTP client for Drive REST API |
| `aws-sdk-s3` | S3 client for SeaweedFS |
| `sqlx` | Async PostgreSQL driver |
| `tokio` | Async runtime |
| `serde` / `serde_json` | Serialization |
| `tracing` | Structured logging |
**Configuration:**
```toml
[drive]
base_url = "https://drive.sunbeam.pt"
workspace = "Game Assets"
oidc_client_id = "hive"
oidc_client_secret_file = "/run/secrets/hive-oidc"
oidc_token_url = "https://auth.sunbeam.pt/oauth2/token"
[s3]
endpoint = "http://seaweedfs-filer.storage.svc:8333"
bucket = "sunbeam-game-assets"
region = "us-east-1"
access_key_file = "/run/secrets/seaweedfs-key"
secret_key_file = "/run/secrets/seaweedfs-secret"
[postgres]
url_file = "/run/secrets/hive-db-url"
[sync]
interval_seconds = 30
temp_dir = "/tmp/hive"
large_file_threshold_mb = 50
```
**Deployment:** Single pod in `lasuite` namespace. No PVC needed — state lives in PostgreSQL, temp files are ephemeral. OIDC credentials and S3 keys via Kubernetes secrets.
**Size estimate:** ~8001200 lines of Rust. Reconciliation logic is the bulk; Drive API and S3 clients are mostly configuration of existing crates.
---
## 7. AI Integration
All AI features across the stack share a single backend.
### 7.1 Backend
**Scaleway Generative APIs** — hosted in Paris, GDPR-compliant. Fully OpenAI-compatible endpoint. Prompts and outputs are not read, reused, or analyzed by Scaleway.
### 7.2 Model
**`mistral-small-3.2-24b-instruct-2506`**
| Property | Value |
|---|---|
| Input | €0.15 / M tokens |
| Output | €0.35 / M tokens |
| Capabilities | Chat + Vision |
| Strengths | Summarization, rephrasing, translation, instruction following |
Estimated 25M tokens/month for three users ≈ €12/month after the 1M free tier.
**Upgrade path:** If Conversations needs heavier reasoning, route it to `qwen3-235b-a22b-instruct` (€0.75/€2.25 per M tokens) while keeping Docs and Messages on Mistral Small.
### 7.3 Configuration
Three env vars, identical across all components:
```bash
AI_BASE_URL=https://api.scaleway.ai/v1/
AI_API_KEY=<SCW_SECRET_KEY>
AI_MODEL=mistral-small-3.2-24b-instruct-2506
```
### 7.4 Capabilities by Component
| Component | What AI Does |
|---|---|
| Docs | Rephrase, summarize, fix typos, translate, freeform prompts on selected text |
| Messages | Thread summaries, compose assistance, auto-labelling |
| Conversations | Full chat interface, extensible agent tools, attachment analysis |
---
## 8. DNS Map
All A records point to the Elastic Metal public IP. TLS terminated by Pingora.
| Hostname | Backend |
|---|---|
| `docs.sunbeam.pt` | Docs |
| `meet.sunbeam.pt` | Meet |
| `drive.sunbeam.pt` | Drive |
| `mail.sunbeam.pt` | Messages |
| `chat.sunbeam.pt` | Conversations |
| `people.sunbeam.pt` | People |
| `src.sunbeam.pt` | Gitea |
| `auth.sunbeam.pt` | Ory Hydra + Login UI |
| `s3.sunbeam.pt` | SeaweedFS S3 endpoint (dev access) |
**Email DNS (sunbeam.pt zone):**
| Record | Value |
|---|---|
| MX | → Elastic Metal IP |
| TXT (SPF) | `v=spf1 ip4:<EM_IP> include:tem.scaleway.com ~all` |
| TXT (DKIM) | Generated by Postfix/Messages |
| TXT (DMARC) | `v=DMARC1; p=quarantine; rua=mailto:dmarc@sunbeam.pt` |
| PTR | Configured in Scaleway console |
---
## 9. OIDC Client Registry
Each application registered in Ory Hydra:
| Client | Redirect URI | Scopes |
|---|---|---|
| Docs | `https://docs.sunbeam.pt/oidc/callback/` | `openid profile email` |
| Meet | `https://meet.sunbeam.pt/oidc/callback/` | `openid profile email` |
| Drive | `https://drive.sunbeam.pt/oidc/callback/` | `openid profile email` |
| Messages | `https://mail.sunbeam.pt/oidc/callback/` | `openid profile email` |
| Conversations | `https://chat.sunbeam.pt/oidc/callback/` | `openid profile email` |
| People | `https://people.sunbeam.pt/oidc/callback/` | `openid profile email` |
| Gitea | `https://src.sunbeam.pt/user/oauth2/sunbeam/callback` | `openid profile email` |
| Hive | Client credentials grant (no redirect URI) | `openid` |
---
## 10. Local Development Environment
### 10.1 Goal
The local dev stack is **structurally identical** to production. Same k3s orchestrator, same namespaces, same manifests, same service DNS, same Linkerd mesh, same Pingora edge proxy, same TLS termination, same OIDC flows. The only differences are resource limits, the TLS cert source (mkcert vs Let's Encrypt), and the domain suffix (sslip.io vs sunbeam.pt). Traffic flows through the same path locally as it does in production: browser → Pingora → Linkerd sidecar → app → Linkerd sidecar → data stores. Bugs caught locally are bugs that would have happened in production.
### 10.2 Platform
| Property | Value |
|---|---|
| Machine | MacBook Pro M1 Pro, 10-core, 32 GB RAM |
| VM | Lima (lightweight Linux VM, virtiofs, Apple Virtualization.framework) |
| Orchestration | k3s inside Lima VM (`--disable=traefik`, identical to production) |
| Architecture | arm64 native (no Rosetta overhead) |
```bash
# Install Lima + k3s
brew install lima mkcert
# Create Lima VM with sufficient resources for the full stack
limactl start --name=sunbeam template://k3s \
--memory=12 \
--cpus=6 \
--disk=60 \
--vm-type=vz \
--mount-type=virtiofs
# Confirm
limactl shell sunbeam kubectl get nodes
```
12 GB VM allocation covers the full stack (~6 GB pods + kubelet/OS overhead) and leaves 20 GB for macOS, IDE, browser, and builds.
### 10.3 What Stays the Same
Everything:
- **Namespace layout** — all namespaces identical: `ory/`, `lasuite/`, `media/`, `storage/`, `data/`, `devtools/`, `mesh/`, `ingress/`
- **Kubernetes manifests** — same Deployments, Services, ConfigMaps, Secrets. Applied with `kubectl apply` or Helm.
- **Service DNS** — `seaweedfs-filer.storage.svc`, `kratos.ory.svc`, `hydra.ory.svc`, etc. Apps resolve the same internal names.
- **Service mesh** — Linkerd injected into all application namespaces. mTLS between all pods. Same topology as production.
- **Edge proxy** — Pingora runs in `ingress/` namespace, routes by hostname, terminates TLS. Same binary, same routing config (different cert source).
- **Database structure** — same CloudNativePG operator, same logical databases, same schemas.
- **S3 bucket structure** — same SeaweedFS filer, same bucket names.
- **OIDC flow** — same Kratos + Hydra, same client registrations. Redirect URIs point at sslip.io hostnames instead of `sunbeam.pt`.
- **AI configuration** — same `AI_BASE_URL` / `AI_API_KEY` / `AI_MODEL` env vars, same Scaleway endpoint.
- **Hive sync** — same reconciliation loop against local Drive and SeaweedFS.
- **TURN/UDP** — Pingora forwards UDP to LiveKit on the same port range (4915249252).
### 10.4 Local DNS — sslip.io
[sslip.io](https://sslip.io) provides wildcard DNS that embeds the IP address in the hostname. The Lima VM gets a routable IP on the host (e.g., `192.168.5.2`), and all services resolve through it:
| Production | Local |
|---|---|
| `docs.sunbeam.pt` | `docs.192.168.5.2.sslip.io` |
| `meet.sunbeam.pt` | `meet.192.168.5.2.sslip.io` |
| `drive.sunbeam.pt` | `drive.192.168.5.2.sslip.io` |
| `mail.sunbeam.pt` | `mail.192.168.5.2.sslip.io` |
| `chat.sunbeam.pt` | `chat.192.168.5.2.sslip.io` |
| `people.sunbeam.pt` | `people.192.168.5.2.sslip.io` |
| `src.sunbeam.pt` | `src.192.168.5.2.sslip.io` |
| `auth.sunbeam.pt` | `auth.192.168.5.2.sslip.io` |
| `s3.sunbeam.pt` | `s3.192.168.5.2.sslip.io` |
Pingora hostname routing works identically — it just matches on `docs.*`, `meet.*`, etc. regardless of the domain suffix. The domain suffix is the only thing that changes between overlays.
```bash
# Get the Lima VM IP
LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
echo "Local base domain: ${LIMA_IP}.sslip.io"
```
### 10.5 Local TLS — mkcert
Production uses `rustls-acme` with Let's Encrypt. Locally, Pingora loads a self-signed wildcard cert generated by [mkcert](https://github.com/FiloSottile/mkcert), which installs a local CA trusted by the system and browsers:
```bash
brew install mkcert
mkcert -install # Trust the local CA
LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
mkcert "*.${LIMA_IP}.sslip.io"
# Creates: _wildcard.<IP>.sslip.io.pem + _wildcard.<IP>.sslip.io-key.pem
```
The certs are mounted into the Pingora pod via a Secret. The local Pingora config differs from production only in the cert source — file path to the mkcert cert instead of `rustls-acme` ACME negotiation. All other routing logic is identical.
### 10.6 What Changes (Local Overrides)
Managed via `values-local.yaml` overlays per component. The list is intentionally short:
| Concern | Production | Local |
|---|---|---|
| **Resource limits** | Sized for 64 GB server | Capped tight (see §10.7) |
| **TLS cert source** | `rustls-acme` + Let's Encrypt | mkcert wildcard cert mounted as Secret |
| **Domain suffix** | `sunbeam.pt` | `<LIMA_IP>.sslip.io` |
| **OIDC redirect URIs** | `https://*.sunbeam.pt/...` | `https://*.sslip.io/...` |
| **Pingora listen** | Bound to public IP, ports 80/443/4915249252 | hostPort on Lima VM |
| **Backups** | barman → Scaleway Object Storage | Disabled |
| **Email DNS** | MX, SPF, DKIM, DMARC, PTR | Not applicable (no inbound email) |
Everything else — mesh injection, mTLS, proxy routing, service discovery, OIDC flows, S3 paths, AI integration — is the same.
### 10.7 Resource Limits (Local)
Target: **~68 GB total** for the full stack including mesh and edge, leaving 24+ GB for IDE, browser, builds.
| Component | Memory Limit | Notes |
|---|---|---|
| **Mesh + Edge** | | |
| Linkerd control plane | 128 MB | destination, identity, proxy-injector combined |
| Linkerd proxies (sidecars) | ~15 MB each | ~20 injected pods ≈ 300 MB total |
| Pingora | 64 MB | Rust binary, lightweight |
| **Data** | | |
| PostgreSQL (CloudNativePG) | 512 MB | Handles all 10 databases fine at this scale |
| Redis | 64 MB | |
| OpenSearch | 512 MB | `ES_JAVA_OPTS=-Xms256m -Xmx512m` |
| **Storage** | | |
| SeaweedFS (master) | 64 MB | Metadata only |
| SeaweedFS (volume) | 256 MB | Actual data storage |
| SeaweedFS (filer) | 256 MB | S3 API gateway |
| **Auth** | | |
| Ory Kratos | 64 MB | Go binary, tiny footprint |
| Ory Hydra | 64 MB | Go binary, tiny footprint |
| Login UI | 64 MB | |
| **Apps** | | |
| Docs (Django) | 256 MB | |
| Docs (Next.js) | 256 MB | |
| Meet | 128 MB | |
| LiveKit | 128 MB | |
| Drive (Django) | 256 MB | |
| Drive (Next.js) | 256 MB | |
| Messages (Django + MDA) | 256 MB | |
| Messages (Next.js) | 256 MB | |
| Postfix MTA-in/out | 64 MB each | |
| Rspamd | 128 MB | |
| Conversations (Django) | 256 MB | |
| Conversations (Next.js) | 256 MB | |
| People (Django) | 128 MB | |
| **Dev Tools** | | |
| Gitea | 256 MB | Go binary |
| Hive | 64 MB | Rust binary, tiny |
| **Total** | **~5.5 GB** | Including mesh overhead. Well within budget. |
The Linkerd sidecar proxies add ~300 MB across all pods. Still leaves plenty of headroom on 32 GB. You don't need to run everything simultaneously — working on Hive? Skip Meet, Messages, Conversations. Testing the email flow? Skip Meet, Gitea, Hive. But you *can* run it all if you want to.
### 10.8 Access Pattern
Traffic flows through Pingora, exactly like production. Browser hits `https://docs.<LIMA_IP>.sslip.io` → Pingora terminates TLS → routes to Docs service → Linkerd sidecar handles mTLS to backend.
```bash
# After deploying the local stack:
LIMA_IP=$(limactl shell sunbeam hostname -I | awk '{print $1}')
echo "Docs: https://docs.${LIMA_IP}.sslip.io"
echo "Meet: https://meet.${LIMA_IP}.sslip.io"
echo "Drive: https://drive.${LIMA_IP}.sslip.io"
echo "Mail: https://mail.${LIMA_IP}.sslip.io"
echo "Chat: https://chat.${LIMA_IP}.sslip.io"
echo "People: https://people.${LIMA_IP}.sslip.io"
echo "Source: https://src.${LIMA_IP}.sslip.io"
echo "Auth: https://auth.${LIMA_IP}.sslip.io"
echo "S3: https://s3.${LIMA_IP}.sslip.io"
echo "Linkerd: kubectl port-forward -n mesh svc/linkerd-viz 8084:8084"
```
Direct `kubectl port-forward` is still available as a fallback for debugging individual services, but the normal workflow goes through the edge — same as production.
### 10.9 Manifest Organization
```
sunbeam-infra/ ← Gitea repo (and GitHub mirror)
├── base/ ← Shared manifests (both environments)
│ ├── mesh/
│ ├── ingress/
│ ├── ory/
│ ├── lasuite/
│ ├── media/
│ ├── storage/
│ ├── data/
│ └── devtools/
├── overlays/
│ ├── production/ ← Production-specific values
│ │ ├── values-ory.yaml (sunbeam.pt redirect URIs)
│ │ ├── values-pingora.yaml (rustls-acme, LE certs)
│ │ ├── values-docs.yaml
│ │ ├── values-linkerd.yaml
│ │ └── ...
│ └── local/ ← Local dev overrides
│ ├── values-domain.yaml (sslip.io suffix, mkcert cert path)
│ ├── values-ory.yaml (sslip.io redirect URIs)
│ ├── values-pingora.yaml (mkcert TLS, hostPort binding)
│ ├── values-resources.yaml (global memory caps)
│ └── ...
├── secrets/
│ ├── production/ ← Sealed Secrets or SOPS-encrypted
│ └── local/ ← Plaintext (gitignored), includes mkcert certs
└── scripts/
├── local-up.sh ← Start Lima VM, deploy full stack
├── local-down.sh ← Tear down
├── local-certs.sh ← Generate mkcert wildcard for current Lima IP
└── local-urls.sh ← Print all https://*.sslip.io URLs
```
Deploy to either environment:
```bash
# Local
kubectl apply -k overlays/local/
# Production
kubectl apply -k overlays/production/
```
Same base manifests. Same mesh. Same edge. Different certs and domain suffix. One repo.
---
## 11. Deployment Sequence (Production)
### Phase 0: Local Validation (MacBook k3s)
Every phase below is first deployed and tested on the local Lima + k3s stack before touching production. The workflow:
1. Apply manifests to local k3s using `kubectl apply -k overlays/local/`
2. Verify the component starts, passes health checks, and integrates with dependencies
3. Run the phase's integration test through the full edge path (`https://*.sslip.io` — same Pingora routing, same Linkerd mesh, same OIDC flows)
4. Commit manifests to `sunbeam-infra` repo
5. Apply to production using `kubectl apply -k overlays/production/`
6. Verify on production
This catches misconfigurations, missing env vars, broken OIDC flows, and service connectivity issues before they hit production. The local stack is structurally identical — same namespaces, same service DNS, same manifests — so a successful local deploy is a high-confidence signal for production.
### Phase 1: Foundation
1. Provision Elastic Metal, install k3s (`--disable=traefik`)
2. Deploy Linkerd service mesh
3. Deploy CloudNativePG operator + PostgreSQL cluster
4. Deploy Redis
5. Deploy OpenSearch
6. Deploy SeaweedFS (master + volume + filer)
7. Deploy Pingora with TLS for `*.sunbeam.pt`
### Phase 2: Authentication
8. Deploy Ory Kratos + Hydra
9. Deploy Sunbeam-branded login UI at `auth.sunbeam.pt`
10. Create initial identities (Sienna, Lonni, Amber)
11. Verify OIDC flow end-to-end
### Phase 3: Core Apps
12. Deploy Docs → verify Y.js WebSocket, AI slash command
13. Deploy Meet → verify WebSocket signaling + TURN/UDP
14. Deploy Drive → verify S3 uploads
15. Deploy People → verify user/team management
16. For each: create database, create S3 bucket, register OIDC client, deploy, verify
### Phase 4: Communication
17. Configure email DNS (MX, SPF, DKIM, DMARC, PTR)
18. Deploy Messages (Postfix MTA-in/out, Rspamd, Django MDA)
19. Provision mailboxes via People: personal + `hello@` shared inbox
20. Test send/receive with external addresses
### Phase 5: AI + Dev Tools
21. Generate Scaleway Generative APIs key
22. Set `AI_BASE_URL` / `AI_API_KEY` / `AI_MODEL` across all components
23. Deploy Conversations → verify chat, tool calls, streaming
24. Deploy Gitea → configure OIDC, LFS → SeaweedFS S3 backend
25. Apply Sunbeam theming to Gitea
26. Create "Game Assets" workspace in Drive
27. Deploy Hive → configure Drive workspace, S3 bucket, OIDC client credentials
28. Verify bidirectional sync: upload file in Drive → appears in S3, `aws s3 cp` to bucket → appears in Drive
### Phase 6: Hardening
29. Configure CloudNativePG backups → Scaleway Object Storage (barman)
30. Configure SeaweedFS replication for critical buckets
31. Create `sunbeam-studio` GitHub org, create private mirror repos
32. Add `GITHUB_MIRROR_TOKEN` secret to Gitea, deploy mirror workflow to all repos
33. Verify nightly mirror: check GitHub repos reflect Gitea state
34. Full integration smoke test: create user → log in → create doc → send email → push code → upload asset in Drive → verify in S3 → ask AI
35. Enable Linkerd dashboard + Scaleway Cockpit for monitoring
---
## 12. Backup & Replication Strategy
### 12.1 Offsite Replication — Scaleway Object Storage
SeaweedFS runs on local NVMe (single node). Scaleway Object Storage in Paris serves as the offsite replication target for disaster recovery.
**Scaleway Object Storage pricing (Paris):**
| Tier | Cost | Use Case |
|---|---|---|
| Standard Multi-AZ | ~€0.015/GB/month | Critical data (barman backups, active game assets) |
| Standard One Zone | ~€0.008/GB/month | Less critical replicas |
| Glacier | ~€0.003/GB/month | Deep archive (old builds, historical assets) |
| Egress | 75 GB free/month, then €0.01/GB | |
| Requests + Ingress | Included | |
**Estimated replication cost:** 100 GB on Multi-AZ ≈ €1.50/month. Even 500 GB Multi-AZ ≈ €7.50/month. Glacier for deep archive of old builds is essentially free.
### 12.2 Code Backup — GitHub Mirror
All Gitea repositories are mirrored daily to private GitHub repos as an offsite code backup. This is **code only** — Git LFS objects are excluded (covered by SeaweedFS → Scaleway Object Storage replication above).
**Implementation:** Gitea Actions cron job, runs nightly at 03:00 UTC.
```yaml
# .gitea/workflows/github-mirror.yaml (placed in each repo)
name: Mirror to GitHub
on:
schedule:
- cron: '0 3 * * *'
jobs:
mirror:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: false
- name: Push mirror
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_MIRROR_TOKEN }}
run: |
git remote add github "https://${GITHUB_TOKEN}@github.com/sunbeam-studio/${{ github.event.repository.name }}.git" 2>/dev/null || true
git push github --all --force
git push github --tags --force
```
**GitHub org:** `sunbeam-studio` (all repos private, free tier covers unlimited private repos).
**Mirrored repos:** `sunbeam-infra`, `pingora-proxy`, `hive`, `game`, and any future Sunbeam repositories. **Not mirrored:** Git LFS objects (game assets, large binaries) and secrets (never in Git).
This gives triple redundancy on source code: Gitea on Elastic Metal, GitHub mirror, and every developer's local clone. If the server and all Scaleway backups vanish simultaneously, the code is still safe.
### 12.3 Backup Schedule
| What | Method | Destination | Frequency | Retention |
|---|---|---|---|---|
| PostgreSQL (all DBs) | CloudNativePG barmanObjectStore | Scaleway Object Storage (Multi-AZ) | Continuous WAL + daily base | 30 days PITR, 90 days base |
| SeaweedFS (all buckets) | Nightly sync to Scaleway Object Storage | Scaleway Object Storage (One Zone) | Nightly | 30 days |
| Git repositories (code) | Gitea Actions → GitHub mirror | GitHub (`sunbeam-studio` org, private) | Nightly 03:00 UTC | Indefinite |
| Git repositories (local) | Distributed by nature (every clone) | Developer machines | Every push | Indefinite |
| Git LFS objects | In SeaweedFS → covered by SeaweedFS sync | Scaleway Object Storage | Per SeaweedFS schedule | 30 days |
| Cluster config (manifests, Helm values) | Committed to Gitea (mirrored to GitHub) | Distributed + GitHub | Every commit | Indefinite |
| Ory config | Committed to Gitea (secrets via Sealed Secrets or Scaleway Secret Manager) | Distributed + GitHub | Every commit | Indefinite |
| Pingora config | Committed to Gitea (mirrored to GitHub) | Distributed + GitHub | Every commit | Indefinite |
**Monthly verification:** Restore a random database to a scratch namespace, verify integrity and app startup. Spot-check a GitHub mirror repo against Gitea (compare `git log --oneline -5` on both remotes). Automate via Gitea Actions cron job.
---
## 13. Operational Runbooks
### 13.1 Add a New User
1. Create identity in Kratos (via People UI or Kratos admin API)
2. People propagates permissions to La Suite apps
3. Messages provisions personal mailbox (`name@sunbeam.pt`)
4. Gitea account auto-provisions on first OIDC login
5. User visits any `*.sunbeam.pt` URL, authenticates once, has access everywhere
### 13.2 Deploy a New La Suite Component
1. Create logical database in CloudNativePG
2. Create S3 bucket in SeaweedFS
3. Register OIDC client in Hydra (ID, secret, redirect URIs)
4. Deploy to `lasuite` namespace with standard env vars:
- `DJANGO_DATABASE_URL`, `AWS_S3_ENDPOINT_URL`, `AWS_S3_BUCKET_NAME`
- `OIDC_RP_CLIENT_ID`, `OIDC_RP_CLIENT_SECRET`
- `AI_BASE_URL`, `AI_API_KEY`, `AI_MODEL`
5. Add hostname route in Pingora
6. Verify auth flow, S3 access, AI connectivity
### 13.3 Restore PostgreSQL from Backup
**Full cluster:** CloudNativePG bootstraps new cluster from barman backup in Scaleway Object Storage. Specify `recoveryTarget.targetTime` for PITR. Verify integrity, swap service endpoints.
**Single database:** `pg_dump` from recovered cluster → `pg_restore` into production.
### 13.4 Recover from Elastic Metal Failure
1. Provision new Elastic Metal instance
2. Install k3s, deploy Linkerd
3. Restore CloudNativePG from barman (Scaleway Object Storage)
4. Restore SeaweedFS data from Scaleway Object Storage replicas
5. Re-deploy all manifests from Gitea (every developer has a clone)
6. Update DNS A records to new IP
7. Update PTR record in Scaleway console
8. Verify OIDC, email, TURN, AI connectivity
### 13.5 Troubleshoot LiveKit TURN
Symptoms: Users connect to Meet but have no audio/video.
1. Verify UDP 3478 + 4915249252 reachable from outside
2. Check Pingora UDP forwarding is active
3. Check LiveKit logs for TURN allocation failures
4. Verify Elastic Metal firewall rules
5. Test with external STUN/TURN tester
### 13.6 Certificate Renewal Failure
1. Check Pingora logs for ACME errors
2. Verify port 80 reachable for HTTP-01 challenge (or DNS-01 if configured)
3. Restart Pingora to force `rustls-acme` renewal retry
---
## 14. Maintenance Schedule
### Weekly
- Check CloudNativePG backup status (latest successful timestamp)
- Glance at Linkerd dashboard for error rate anomalies
- Review Scaleway billing for unexpected charges
### Monthly
- Apply k3s patch releases if available
- Check suitenumerique GitHub for new La Suite releases, review changelogs
- Update container images one at a time, verify after each
- Review SeaweedFS storage utilization
- Run backup restore test (random database → scratch namespace)
### Quarterly
- **La Suite upstream sync:** Test new releases in local Docker Compose before deploying. One component at a time.
- **Ory updates:** Kratos/Hydra migrations may involve schema changes. Always backup first.
- **Linkerd updates:** Follow upgrade guide. Data plane sidecars roll automatically.
- **Security audit:** Review exposed ports, DNS, TLS config. Run `testssl.sh` against all endpoints. Check CVEs in deployed images.
- **Storage rebalance:** Evaluate SeaweedFS vs Scaleway Object Storage split. Move cold game assets to Scaleway if NVMe is filling.
- **AI model review:** Check Scaleway for new models. Evaluate cost/performance. Test in Conversations before switching.
### Annually
- Review Elastic Metal spec — more RAM, more disk?
- Evaluate new La Suite components
- Domain renewal for `sunbeam.pt`
- Full disaster recovery drill: simulate Elastic Metal loss, restore everything to a fresh instance from backups
---
## 15. Cost Estimate
| Item | Monthly |
|---|---|
| Scaleway Elastic Metal (64GB, NVMe) | ~€80120 |
| Scaleway Object Storage (backups + replication) | ~€210 |
| Scaleway Transactional Email | ~€1 |
| Scaleway Generative APIs | ~€15 |
| Domain (amortized) | ~€2 |
| **Total** | **~€86138** |
For comparison: Google Workspace (€12/user × 3) + Zoom (€13) + Notion (€8/user × 3) + GitHub Team (€4/user × 3) + Linear (€8/user × 3) + email hosting ≈ €130+/month — with no data control, no customization, per-seat scaling.
---
## 16. Architecture Diagram (Text)
```
Internet
┌──────────┴──────────┐
│ Pingora Edge │
│ HTTPS + WS + UDP │
└──────────┬──────────┘
┌──────────┴──────────┐
│ Linkerd mTLS mesh │
└──────────┬──────────┘
┌────────┬───────┬───┴───┬────────┬────────┐
│ │ │ │ │ │
┌──┴──┐ ┌──┴──┐ ┌──┴──┐ ┌──┴──┐ ┌───┴──┐ ┌──┴──┐
│Docs │ │Meet │ │Drive│ │Msgs │ │Convos│ │Gitea│
└──┬──┘ └──┬──┘ └──┬──┘ └──┬──┘ └───┬──┘ └──┬──┘
│ │ │ │ │ │
│ ┌──┴──┐ │ ┌──┴──┐ │ │
│ │Live │ │ │Post │ │ │
│ │Kit │ │ │fix │ │ │
│ └─────┘ │ └─────┘ │ │
│ │ │ │
│ ┌──┴──┐ │ │
│ │Hive │ ◄── sync ──►│ │
│ └──┬──┘ │ │
│ │ │ │
┌─────┴───────────────┴────────────────┴───────┴─────┐
│ │
┌───┴────┐ ┌─────────┐ ┌───────┐ ┌──────────────────┐ │
│Postgres│ │SeaweedFS│ │ Redis │ │ OpenSearch │ │
│ (CNPG) │ │ (S3) │ │ │ │ │ │
└────────┘ └─────────┘ └───────┘ └──────────────────┘ │
│ │
│ ┌──────────────────────┐ │
│ │ Ory Kratos/Hydra │◄───── all apps ────┘
│ │ (auth.sunbeam.*) │ via OIDC
│ └──────────────────────┘
└──── barman ──── Scaleway Object Storage (backups)
Scaleway Generative APIs (AI)
│ HTTPS
└── Docs, Messages, Conversations
```
---
## 17. Open Questions
- **Game build pipeline details** — Gitea Actions handles lightweight CI (compiles, tests, linting). Platform-specific builds (console SDKs, platform cert signing) offloaded to external providers. All build artifacts land in SeaweedFS. Exact pipeline TBD as game toolchain solidifies.
- **Drive REST API surface** — Hive's Drive client depends on Drive's exact file list/upload/download endpoints. Need to read Drive source to confirm: pagination strategy, file version handling, multipart upload support, how folder hierarchy is represented in API responses.
---
## Appendix: Repository References
| Component | Repository | License |
|---|---|---|
| Docs | `github.com/suitenumerique/docs` | MIT |
| Meet | `github.com/suitenumerique/meet` | MIT |
| Drive | `github.com/suitenumerique/drive` | MIT |
| Messages | `github.com/suitenumerique/messages` | MIT |
| Conversations | `github.com/suitenumerique/conversations` | MIT |
| People | `github.com/suitenumerique/people` | MIT |
| Integration bar | `github.com/suitenumerique/integration` | MIT |
| Django shared lib | `github.com/suitenumerique/django-lasuite` | MIT |
| Ory Kratos | `github.com/ory/kratos` | Apache 2.0 |
| Ory Hydra | `github.com/ory/hydra` | Apache 2.0 |
| SeaweedFS | `github.com/seaweedfs/seaweedfs` | Apache 2.0 |
| CloudNativePG | `github.com/cloudnative-pg/cloudnative-pg` | Apache 2.0 |
| Linkerd | `github.com/linkerd/linkerd2` | Apache 2.0 |
| Pingora | `github.com/cloudflare/pingora` | Apache 2.0 |
| Gitea | `github.com/go-gitea/gitea` | MIT |
| LiveKit | `github.com/livekit/livekit` | Apache 2.0 |