apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: ory-alerts namespace: ory labels: role: alert-rules spec: groups: - name: ory rules: - alert: HydraDown expr: up{job=~".*hydra.*"} == 0 for: 2m labels: severity: critical annotations: summary: "Hydra is down" description: "Hydra instance {{ $labels.namespace }}/{{ $labels.pod }} is down." - alert: KratosDown expr: up{job=~".*kratos.*"} == 0 for: 2m labels: severity: critical annotations: summary: "Kratos is down" description: "Kratos instance {{ $labels.namespace }}/{{ $labels.pod }} is down." - alert: HydraHighErrorRate expr: sum(rate(http_requests_total{job=~".*hydra.*",code=~"5.."}[5m])) / sum(rate(http_requests_total{job=~".*hydra.*"}[5m])) > 0.05 for: 5m labels: severity: warning annotations: summary: "Hydra has a high HTTP error rate" description: "Hydra 5xx error rate is {{ $value | humanizePercentage }}." - alert: KratosHighErrorRate expr: sum(rate(http_requests_total{job=~".*kratos.*",code=~"5.."}[5m])) / sum(rate(http_requests_total{job=~".*kratos.*"}[5m])) > 0.05 for: 5m labels: severity: warning annotations: summary: "Kratos has a high HTTP error rate" description: "Kratos 5xx error rate is {{ $value | humanizePercentage }}."