Files
sbbb/base/ory/ory-alertrules.yaml

47 lines
1.6 KiB
YAML
Raw Permalink Normal View History

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: ory-alerts
namespace: ory
labels:
role: alert-rules
spec:
groups:
- name: ory
rules:
- alert: HydraDown
expr: up{job=~".*hydra.*"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Hydra is down"
description: "Hydra instance {{ $labels.namespace }}/{{ $labels.pod }} is down."
- alert: KratosDown
expr: up{job=~".*kratos.*"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Kratos is down"
description: "Kratos instance {{ $labels.namespace }}/{{ $labels.pod }} is down."
- alert: HydraHighErrorRate
expr: sum(rate(http_requests_total{job=~".*hydra.*",code=~"5.."}[5m])) / sum(rate(http_requests_total{job=~".*hydra.*"}[5m])) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "Hydra has a high HTTP error rate"
description: "Hydra 5xx error rate is {{ $value | humanizePercentage }}."
- alert: KratosHighErrorRate
expr: sum(rate(http_requests_total{job=~".*kratos.*",code=~"5.."}[5m])) / sum(rate(http_requests_total{job=~".*kratos.*"}[5m])) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "Kratos has a high HTTP error rate"
description: "Kratos 5xx error rate is {{ $value | humanizePercentage }}."