feat(observability): enable OTLP tracing, fix Prometheus scraping, add proxy ServiceMonitor
- Set otlp_endpoint to Tempo HTTP receiver (port 4318) for request tracing - Add hostNetwork to prometheusSpec so it can reach kubelet/node-exporter on node public IP - Add ServiceMonitor for proxy metrics scrape on port 9090 - Add CORS origin and Grafana datasource config for monitoring stack
This commit is contained in:
@@ -9,6 +9,7 @@ resources:
|
|||||||
- pingora-deployment.yaml
|
- pingora-deployment.yaml
|
||||||
- pingora-service.yaml
|
- pingora-service.yaml
|
||||||
- pingora-config.yaml
|
- pingora-config.yaml
|
||||||
|
- pingora-servicemonitor.yaml
|
||||||
|
|
||||||
images:
|
images:
|
||||||
- name: sunbeam-proxy
|
- name: sunbeam-proxy
|
||||||
|
|||||||
@@ -21,8 +21,7 @@ data:
|
|||||||
key_path = "/etc/tls/tls.key"
|
key_path = "/etc/tls/tls.key"
|
||||||
|
|
||||||
[telemetry]
|
[telemetry]
|
||||||
# Empty = OTEL disabled. Set to http://otel-collector.data.svc:4318 when ready.
|
otlp_endpoint = "http://tempo.monitoring.svc.cluster.local:4318"
|
||||||
otlp_endpoint = ""
|
|
||||||
metrics_port = 9090
|
metrics_port = 9090
|
||||||
|
|
||||||
# Kubernetes resource names for cert/config watchers.
|
# Kubernetes resource names for cert/config watchers.
|
||||||
|
|||||||
15
base/ingress/pingora-servicemonitor.yaml
Normal file
15
base/ingress/pingora-servicemonitor.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: pingora
|
||||||
|
namespace: ingress
|
||||||
|
labels:
|
||||||
|
app: pingora
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: pingora
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: 15s
|
||||||
|
path: /metrics
|
||||||
@@ -15,7 +15,7 @@ grafana:
|
|||||||
envFromSecret: grafana-oidc
|
envFromSecret: grafana-oidc
|
||||||
grafana.ini:
|
grafana.ini:
|
||||||
server:
|
server:
|
||||||
root_url: "https://grafana.DOMAIN_SUFFIX"
|
root_url: "https://metrics.DOMAIN_SUFFIX"
|
||||||
auth:
|
auth:
|
||||||
# Keep local login as fallback (admin password from grafana-admin secret)
|
# Keep local login as fallback (admin password from grafana-admin secret)
|
||||||
disable_login_form: false
|
disable_login_form: false
|
||||||
@@ -36,21 +36,44 @@ grafana:
|
|||||||
# To restrict to specific users, set role_attribute_path instead.
|
# To restrict to specific users, set role_attribute_path instead.
|
||||||
auto_assign_org_role: Admin
|
auto_assign_org_role: Admin
|
||||||
skip_org_role_sync: true
|
skip_org_role_sync: true
|
||||||
|
sidecar:
|
||||||
|
datasources:
|
||||||
|
# Disable the auto-provisioned ClusterIP datasource; we define it
|
||||||
|
# explicitly below using the external URL so Grafana's backend reaches
|
||||||
|
# Prometheus via Pingora (https://systemmetrics.DOMAIN_SUFFIX) rather
|
||||||
|
# than the cluster-internal ClusterIP which is blocked by network policy.
|
||||||
|
defaultDatasourceEnabled: false
|
||||||
|
|
||||||
additionalDataSources:
|
additionalDataSources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
url: "https://systemmetrics.DOMAIN_SUFFIX"
|
||||||
|
access: proxy
|
||||||
|
isDefault: true
|
||||||
|
jsonData:
|
||||||
|
timeInterval: 30s
|
||||||
- name: Loki
|
- name: Loki
|
||||||
type: loki
|
type: loki
|
||||||
url: http://loki.monitoring.svc.cluster.local:3100
|
url: "https://systemlogs.DOMAIN_SUFFIX"
|
||||||
access: proxy
|
access: proxy
|
||||||
isDefault: false
|
isDefault: false
|
||||||
- name: Tempo
|
- name: Tempo
|
||||||
type: tempo
|
type: tempo
|
||||||
url: http://tempo.monitoring.svc.cluster.local:3100
|
url: "https://systemtracing.DOMAIN_SUFFIX"
|
||||||
access: proxy
|
access: proxy
|
||||||
isDefault: false
|
isDefault: false
|
||||||
|
|
||||||
prometheus:
|
prometheus:
|
||||||
prometheusSpec:
|
prometheusSpec:
|
||||||
retention: 90d
|
retention: 90d
|
||||||
|
# hostNetwork allows Prometheus to reach kubelet (10250) and node-exporter
|
||||||
|
# (9100) on the node's public InternalIP. On a single-node bare-metal
|
||||||
|
# server, pod-to-node-public-IP traffic doesn't route without this.
|
||||||
|
hostNetwork: true
|
||||||
|
additionalArgs:
|
||||||
|
# Allow browser-direct queries from the Grafana UI origin.
|
||||||
|
- name: web.cors.origin
|
||||||
|
value: "https://metrics.DOMAIN_SUFFIX"
|
||||||
storageSpec:
|
storageSpec:
|
||||||
volumeClaimTemplate:
|
volumeClaimTemplate:
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
Reference in New Issue
Block a user