diff --git a/base/ingress/kustomization.yaml b/base/ingress/kustomization.yaml index 41e1aa1..a5957a2 100644 --- a/base/ingress/kustomization.yaml +++ b/base/ingress/kustomization.yaml @@ -9,6 +9,7 @@ resources: - pingora-deployment.yaml - pingora-service.yaml - pingora-config.yaml + - pingora-servicemonitor.yaml images: - name: sunbeam-proxy diff --git a/base/ingress/pingora-config.yaml b/base/ingress/pingora-config.yaml index 8473483..977aa13 100644 --- a/base/ingress/pingora-config.yaml +++ b/base/ingress/pingora-config.yaml @@ -21,8 +21,7 @@ data: key_path = "/etc/tls/tls.key" [telemetry] - # Empty = OTEL disabled. Set to http://otel-collector.data.svc:4318 when ready. - otlp_endpoint = "" + otlp_endpoint = "http://tempo.monitoring.svc.cluster.local:4318" metrics_port = 9090 # Kubernetes resource names for cert/config watchers. diff --git a/base/ingress/pingora-servicemonitor.yaml b/base/ingress/pingora-servicemonitor.yaml new file mode 100644 index 0000000..cb61496 --- /dev/null +++ b/base/ingress/pingora-servicemonitor.yaml @@ -0,0 +1,15 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: pingora + namespace: ingress + labels: + app: pingora +spec: + selector: + matchLabels: + app: pingora + endpoints: + - port: metrics + interval: 15s + path: /metrics diff --git a/base/monitoring/prometheus-values.yaml b/base/monitoring/prometheus-values.yaml index 5b1a0d8..8510f79 100644 --- a/base/monitoring/prometheus-values.yaml +++ b/base/monitoring/prometheus-values.yaml @@ -15,7 +15,7 @@ grafana: envFromSecret: grafana-oidc grafana.ini: server: - root_url: "https://grafana.DOMAIN_SUFFIX" + root_url: "https://metrics.DOMAIN_SUFFIX" auth: # Keep local login as fallback (admin password from grafana-admin secret) disable_login_form: false @@ -36,21 +36,44 @@ grafana: # To restrict to specific users, set role_attribute_path instead. auto_assign_org_role: Admin skip_org_role_sync: true + sidecar: + datasources: + # Disable the auto-provisioned ClusterIP datasource; we define it + # explicitly below using the external URL so Grafana's backend reaches + # Prometheus via Pingora (https://systemmetrics.DOMAIN_SUFFIX) rather + # than the cluster-internal ClusterIP which is blocked by network policy. + defaultDatasourceEnabled: false + additionalDataSources: + - name: Prometheus + type: prometheus + url: "https://systemmetrics.DOMAIN_SUFFIX" + access: proxy + isDefault: true + jsonData: + timeInterval: 30s - name: Loki type: loki - url: http://loki.monitoring.svc.cluster.local:3100 + url: "https://systemlogs.DOMAIN_SUFFIX" access: proxy isDefault: false - name: Tempo type: tempo - url: http://tempo.monitoring.svc.cluster.local:3100 + url: "https://systemtracing.DOMAIN_SUFFIX" access: proxy isDefault: false prometheus: prometheusSpec: retention: 90d + # hostNetwork allows Prometheus to reach kubelet (10250) and node-exporter + # (9100) on the node's public InternalIP. On a single-node bare-metal + # server, pod-to-node-public-IP traffic doesn't route without this. + hostNetwork: true + additionalArgs: + # Allow browser-direct queries from the Grafana UI origin. + - name: web.cors.origin + value: "https://metrics.DOMAIN_SUFFIX" storageSpec: volumeClaimTemplate: spec: