# kube-prometheus-stack — Prometheus + AlertManager + Grafana + node-exporter + kube-state-metrics
#
# k3s quirks: kube-proxy is replaced by Cilium; etcd/scheduler/controller-manager
# don't expose metrics on standard ports. Disable their monitors to avoid noise.

grafana:
  adminUser: admin
  admin:
    existingSecret: grafana-admin
    passwordKey: admin-password
  persistence:
    enabled: true
    size: 2Gi
  # Inject Hydra OIDC client credentials (created by Hydra Maester from the OAuth2Client CRD)
  envFromSecret: grafana-oidc
  grafana.ini:
    server:
      root_url: "https://metrics.DOMAIN_SUFFIX"
    auth:
      # Keep local login as fallback (admin password from grafana-admin secret)
      disable_login_form: false
      signout_redirect_url: "https://auth.DOMAIN_SUFFIX/oauth2/sessions/logout"
    auth.generic_oauth:
      enabled: true
      name: Sunbeam
      icon: signin
      # CLIENT_ID / CLIENT_SECRET injected from grafana-oidc K8s Secret via envFromSecret
      client_id: "${CLIENT_ID}"
      client_secret: "${CLIENT_SECRET}"
      scopes: "openid email profile"
      auth_url: "https://auth.DOMAIN_SUFFIX/oauth2/auth"
      token_url: "https://auth.DOMAIN_SUFFIX/oauth2/token"
      api_url: "https://auth.DOMAIN_SUFFIX/userinfo"
      allow_sign_up: true
      # Small studio — anyone with a valid La Suite account is an admin.
      # To restrict to specific users, set role_attribute_path instead.
      auto_assign_org_role: Admin
      skip_org_role_sync: true
  sidecar:
    datasources:
      # Disable the auto-provisioned ClusterIP datasource; we define it
      # explicitly below using the external URL so Grafana's backend reaches
      # Prometheus via Pingora (https://systemmetrics.DOMAIN_SUFFIX) rather
      # than the cluster-internal ClusterIP which is blocked by network policy.
      defaultDatasourceEnabled: false

  additionalDataSources:
    - name: Prometheus
      type: prometheus
      url: "https://systemmetrics.DOMAIN_SUFFIX"
      access: proxy
      isDefault: true
      jsonData:
        timeInterval: 30s
    - name: Loki
      type: loki
      url: "https://systemlogs.DOMAIN_SUFFIX"
      access: proxy
      isDefault: false
    - name: Tempo
      type: tempo
      url: "https://systemtracing.DOMAIN_SUFFIX"
      access: proxy
      isDefault: false

prometheus:
  prometheusSpec:
    retention: 90d
    # hostNetwork allows Prometheus to reach kubelet (10250) and node-exporter
    # (9100) on the node's public InternalIP.  On a single-node bare-metal
    # server, pod-to-node-public-IP traffic doesn't route without this.
    hostNetwork: true
    additionalArgs:
      # Allow browser-direct queries from the Grafana UI origin.
      - name: web.cors.origin
        value: "https://metrics.DOMAIN_SUFFIX"
    storageSpec:
      volumeClaimTemplate:
        spec:
          accessModes: [ReadWriteOnce]
          resources:
            requests:
              storage: 30Gi

alertmanager:
  alertmanagerSpec:
    storage:
      volumeClaimTemplate:
        spec:
          accessModes: [ReadWriteOnce]
          resources:
            requests:
              storage: 2Gi
  config:
    global:
      smtp_from: "alerts@DOMAIN_SUFFIX"
      smtp_smarthost: "postfix.lasuite.svc.cluster.local:25"
      smtp_require_tls: false
    route:
      group_by: [alertname, namespace]
      group_wait: 30s
      group_interval: 5m
      repeat_interval: 12h
      receiver: email
    receivers:
      - name: email
        email_configs:
          - to: "ops@DOMAIN_SUFFIX"
            send_resolved: true

# Disable monitors for components k3s doesn't expose
kubeEtcd:
  enabled: false
kubeControllerManager:
  enabled: false
kubeScheduler:
  enabled: false
kubeProxy:
  enabled: false