feat(infra): production bootstrap — cert-manager, longhorn, monitoring
Add new bases for cert-manager (Let's Encrypt + wildcard cert), Longhorn distributed storage, and monitoring (kube-prometheus-stack + Loki + Tempo + Grafana OIDC). Add cloud-init for Scaleway Elastic Metal provisioning. Production overlay: add patches for postgres sizing, SeaweedFS volume, OpenSearch storage, LiveKit service, Pingora host ports, resource limits, and CNPG daily barman backups. Update cert-manager.yaml with full dnsNames for all *.sunbeam.pt subdomains.
This commit is contained in:
96
base/monitoring/prometheus-values.yaml
Normal file
96
base/monitoring/prometheus-values.yaml
Normal file
@@ -0,0 +1,96 @@
|
||||
# kube-prometheus-stack — Prometheus + AlertManager + Grafana + node-exporter + kube-state-metrics
|
||||
#
|
||||
# k3s quirks: kube-proxy is replaced by Cilium; etcd/scheduler/controller-manager
|
||||
# don't expose metrics on standard ports. Disable their monitors to avoid noise.
|
||||
|
||||
grafana:
|
||||
adminUser: admin
|
||||
admin:
|
||||
existingSecret: grafana-admin
|
||||
passwordKey: admin-password
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 2Gi
|
||||
# Inject Hydra OIDC client credentials (created by Hydra Maester from the OAuth2Client CRD)
|
||||
envFromSecret: grafana-oidc
|
||||
grafana.ini:
|
||||
server:
|
||||
root_url: "https://grafana.DOMAIN_SUFFIX"
|
||||
auth:
|
||||
# Keep local login as fallback (admin password from grafana-admin secret)
|
||||
disable_login_form: false
|
||||
signout_redirect_url: "https://auth.DOMAIN_SUFFIX/oauth2/sessions/logout"
|
||||
auth.generic_oauth:
|
||||
enabled: true
|
||||
name: Sunbeam
|
||||
icon: signin
|
||||
# CLIENT_ID / CLIENT_SECRET injected from grafana-oidc K8s Secret via envFromSecret
|
||||
client_id: "${CLIENT_ID}"
|
||||
client_secret: "${CLIENT_SECRET}"
|
||||
scopes: "openid email profile"
|
||||
auth_url: "https://auth.DOMAIN_SUFFIX/oauth2/auth"
|
||||
token_url: "https://auth.DOMAIN_SUFFIX/oauth2/token"
|
||||
api_url: "https://auth.DOMAIN_SUFFIX/userinfo"
|
||||
allow_sign_up: true
|
||||
# Small studio — anyone with a valid La Suite account is an admin.
|
||||
# To restrict to specific users, set role_attribute_path instead.
|
||||
auto_assign_org_role: Admin
|
||||
skip_org_role_sync: true
|
||||
additionalDataSources:
|
||||
- name: Loki
|
||||
type: loki
|
||||
url: http://loki.monitoring.svc.cluster.local:3100
|
||||
access: proxy
|
||||
isDefault: false
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
url: http://tempo.monitoring.svc.cluster.local:3100
|
||||
access: proxy
|
||||
isDefault: false
|
||||
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 90d
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ReadWriteOnce]
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
|
||||
alertmanager:
|
||||
alertmanagerSpec:
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ReadWriteOnce]
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
config:
|
||||
global:
|
||||
smtp_from: "alerts@DOMAIN_SUFFIX"
|
||||
smtp_smarthost: "postfix.lasuite.svc.cluster.local:25"
|
||||
smtp_require_tls: false
|
||||
route:
|
||||
group_by: [alertname, namespace]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: email
|
||||
receivers:
|
||||
- name: email
|
||||
email_configs:
|
||||
- to: "ops@DOMAIN_SUFFIX"
|
||||
send_resolved: true
|
||||
|
||||
# Disable monitors for components k3s doesn't expose
|
||||
kubeEtcd:
|
||||
enabled: false
|
||||
kubeControllerManager:
|
||||
enabled: false
|
||||
kubeScheduler:
|
||||
enabled: false
|
||||
kubeProxy:
|
||||
enabled: false
|
||||
Reference in New Issue
Block a user