28 alert rules across 9 PrometheusRule files covering infrastructure (Longhorn, cert-manager), data (PostgreSQL, OpenBao, OpenSearch), storage (SeaweedFS), devtools (Gitea), identity (Hydra, Kratos), media (LiveKit), and mesh (Linkerd golden signals for all services). Severity routing: critical alerts fire to Matrix + email, warnings to Matrix only (AlertManager config updated in separate commit).
29 lines
817 B
YAML
29 lines
817 B
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: gitea-alerts
|
|
namespace: devtools
|
|
labels:
|
|
role: alert-rules
|
|
spec:
|
|
groups:
|
|
- name: gitea
|
|
rules:
|
|
- alert: GiteaDown
|
|
expr: up{job=~".*gitea.*"} == 0
|
|
for: 3m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Gitea is down"
|
|
description: "Gitea instance {{ $labels.namespace }}/{{ $labels.pod }} is down."
|
|
|
|
- alert: GiteaHighGoroutines
|
|
expr: go_goroutines{job=~".*gitea.*"} > 500
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Gitea goroutine count is high"
|
|
description: "Gitea {{ $labels.namespace }}/{{ $labels.pod }} has {{ $value }} goroutines."
|