From 1147b1a5aabbbf47cd1dc47840a1119889d1ea08 Mon Sep 17 00:00:00 2001 From: Sienna Meridian Satterwhite Date: Tue, 24 Mar 2026 12:22:10 +0000 Subject: [PATCH] fix: WOPI registration on restart + Collabora readiness probes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add readiness/liveness probes to Collabora (GET /hosting/discovery) - Add init container to Drive backend that waits for Collabora and runs trigger_wopi_configuration on every pod start — fixes WOPI silently breaking after server restarts (chart Job only ran on sunbeam apply) - Add OIDC_RESPONSE_MODE=query to Projects config --- base/lasuite/collabora-deployment.yaml | 29 +++++++- base/lasuite/drive-values.yaml | 25 ++++--- base/lasuite/kustomization.yaml | 2 + base/lasuite/patch-drive-wopi-init.yaml | 89 +++++++++++++++++++++++++ base/lasuite/projects-config.yaml | 4 ++ 5 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 base/lasuite/patch-drive-wopi-init.yaml diff --git a/base/lasuite/collabora-deployment.yaml b/base/lasuite/collabora-deployment.yaml index f2e091a..dc56768 100644 --- a/base/lasuite/collabora-deployment.yaml +++ b/base/lasuite/collabora-deployment.yaml @@ -25,11 +25,20 @@ spec: # Public hostname — Collabora uses this in self-referencing URLs. - name: server_name value: "docs.DOMAIN_SUFFIX" - # TLS is terminated at Pingora; disable Collabora's built-in TLS. + # TLS terminated at Pingora. Production defaults for lang, UI, and perf. - name: extra_params - value: "--o:ssl.enable=false --o:ssl.termination=true" + value: >- + --o:ssl.enable=false + --o:ssl.termination=true + --o:default_language=en_GB + --o:user_interface.mode=tabbed + --o:autosave_duration_secs=120 + --o:idlesave_duration_secs=15 + --o:num_prespawn_children=2 + --o:per_document.max_concurrency=4 + --o:logging.level=warning - name: dictionaries - value: "en_US fr_FR" + value: "en_GB en_US fr_FR nl_NL pt_PT de_DE es_ES it_IT pl_PL sv_SE da_DK nb_NO fi_FI el_GR cs_CZ ro_RO hu_HU bg_BG hr_HR sk_SK sl_SI et_EE lv_LV lt_LT" - name: username valueFrom: secretKeyRef: @@ -40,6 +49,20 @@ spec: secretKeyRef: name: collabora-credentials key: password + readinessProbe: + httpGet: + path: /hosting/discovery + port: 9980 + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 5 + livenessProbe: + httpGet: + path: /hosting/discovery + port: 9980 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 securityContext: capabilities: add: diff --git a/base/lasuite/drive-values.yaml b/base/lasuite/drive-values.yaml index cd0a6a7..da7ae44 100644 --- a/base/lasuite/drive-values.yaml +++ b/base/lasuite/drive-values.yaml @@ -143,11 +143,14 @@ backend: secretKeyRef: name: oidc-drive key: CLIENT_SECRET - # Only accept tokens issued to the messages OAuth2 client (ListValue, comma-separated). + # Accept bearer tokens from Sunbeam CLI and Messages (server-to-server). + # OIDC_RS_ALLOWED_AUDIENCES is set by the sunbeam seed script which reads + # the messages client ID from the oidc-messages secret and combines it with + # the static sunbeam-cli audience. Stored in vault as secret/drive. OIDC_RS_ALLOWED_AUDIENCES: secretKeyRef: - name: oidc-messages - key: CLIENT_ID + name: drive-rs-audiences + key: OIDC_RS_ALLOWED_AUDIENCES # ── Django ──────────────────────────────────────────────────────────────── DJANGO_SECRET_KEY: @@ -160,14 +163,14 @@ backend: DJANGO_CSRF_TRUSTED_ORIGINS: https://drive.DOMAIN_SUFFIX LOGIN_REDIRECT_URL: / LOGOUT_REDIRECT_URL: / - SESSION_COOKIE_AGE: "3600" - # Session cache TTL must match SESSION_COOKIE_AGE; default is 30s which - # causes sessions to expire in Valkey while the cookie remains valid. - CACHES_SESSION_TIMEOUT: "3600" - # Silent login disabled: the callback redirects back to the returnTo URL - # (not LOGIN_REDIRECT_URL) on login_required, causing an infinite reload loop - # when the user has no Hydra session. UserProfile shows a Login button instead. - FRONTEND_SILENT_LOGIN_ENABLED: "false" + SESSION_COOKIE_AGE: "604800" + # Session cache TTL must match SESSION_COOKIE_AGE. + CACHES_SESSION_TIMEOUT: "604800" + # Silent login: when the Django session expires, the frontend redirects + # to the OIDC login endpoint which completes instantly (Hydra already + # has a session) and bounces the user back. This keeps users logged in + # seamlessly as long as their Hydra SSO session is valid. + FRONTEND_SILENT_LOGIN_ENABLED: "true" # Redirect unauthenticated visitors at / straight to OIDC login instead of # showing the La Suite marketing landing page. returnTo brings them to # their files after successful auth. diff --git a/base/lasuite/kustomization.yaml b/base/lasuite/kustomization.yaml index 973d4d6..9e595c4 100644 --- a/base/lasuite/kustomization.yaml +++ b/base/lasuite/kustomization.yaml @@ -59,6 +59,8 @@ patches: - path: patch-people-frontend-nginx.yaml # Mount media auth proxy nginx config in drive-frontend - path: patch-drive-frontend-nginx.yaml + # Wait for Collabora + run trigger_wopi_configuration on every Drive pod start + - path: patch-drive-wopi-init.yaml # La Suite Numérique Helm charts. # Charts with a published Helm repo use helmCharts below. diff --git a/base/lasuite/patch-drive-wopi-init.yaml b/base/lasuite/patch-drive-wopi-init.yaml new file mode 100644 index 0000000..b9f9d80 --- /dev/null +++ b/base/lasuite/patch-drive-wopi-init.yaml @@ -0,0 +1,89 @@ +# Init container that waits for Collabora and configures WOPI on every pod start. +# +# The Drive chart's configure_wopi Job is designed for ArgoCD PostSync hooks +# and only runs on `sunbeam apply`. On server restart, no one re-triggers it, +# so WOPI editing silently breaks. This init container fixes that — every time +# a Drive backend pod starts (restart, rollout, scale-up), it waits for +# Collabora and runs trigger_wopi_configuration before the main container. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: drive-backend + namespace: lasuite +spec: + template: + spec: + initContainers: + - name: configure-wopi + image: lasuite/drive-backend:latest + command: + - "/bin/sh" + - "-c" + - | + echo "Waiting for Collabora..." + for i in $(seq 1 36); do + if wget -qO /dev/null --timeout=5 http://collabora.lasuite.svc.cluster.local:9980/hosting/discovery 2>/dev/null; then + echo "Collabora ready — configuring WOPI..." + python manage.py trigger_wopi_configuration + exit $? + fi + echo "Attempt $i/36: not ready, retrying in 5s..." + sleep 5 + done + echo "WARN: Collabora not ready after 3 minutes — starting without WOPI" + exit 0 + env: + # Database — minimum needed for Django manage.py + - name: DB_ENGINE + value: django.db.backends.postgresql + - name: DB_NAME + value: drive_db + - name: DB_USER + value: drive + - name: DB_HOST + valueFrom: + configMapKeyRef: + name: lasuite-postgres + key: DB_HOST + - name: DB_PORT + valueFrom: + configMapKeyRef: + name: lasuite-postgres + key: DB_PORT + - name: DB_PASSWORD + valueFrom: + secretKeyRef: + name: drive-db-credentials + key: password + # Django + - name: DJANGO_CONFIGURATION + value: Production + - name: DJANGO_SECRET_KEY + valueFrom: + secretKeyRef: + name: drive-django-secret + key: DJANGO_SECRET_KEY + # Redis/Celery — trigger_wopi_configuration dispatches a Celery task + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: lasuite-valkey + key: REDIS_URL + - name: DJANGO_CELERY_BROKER_URL + valueFrom: + configMapKeyRef: + name: lasuite-valkey + key: CELERY_BROKER_URL + # WOPI + - name: WOPI_CLIENTS + value: collabora + - name: WOPI_COLLABORA_DISCOVERY_URL + value: http://collabora.lasuite.svc.cluster.local:9980/hosting/discovery + - name: WOPI_SRC_BASE_URL + value: https://drive.DOMAIN_SUFFIX + resources: + limits: + memory: 128Mi + requests: + memory: 64Mi + cpu: 25m diff --git a/base/lasuite/projects-config.yaml b/base/lasuite/projects-config.yaml index a693524..031ebd6 100644 --- a/base/lasuite/projects-config.yaml +++ b/base/lasuite/projects-config.yaml @@ -16,6 +16,10 @@ data: OIDC_IGNORE_ROLES: "true" OIDC_ADMIN_ROLES: "*" OIDC_FULLNAME_ATTRIBUTES: "given_name,family_name" + # Planka defaults to response_mode=fragment, which returns the auth code in the + # URL hash (#code=...). The SPA callback handler doesn't parse fragments correctly, + # causing a silent auth loop. Use query mode so the code comes as ?code=... + OIDC_RESPONSE_MODE: "query" # S3 file storage via SeaweedFS S3_ENDPOINT: "http://seaweedfs-filer.storage.svc.cluster.local:8333"