47 lines
1.6 KiB
YAML
47 lines
1.6 KiB
YAML
|
|
apiVersion: monitoring.coreos.com/v1
|
||
|
|
kind: PrometheusRule
|
||
|
|
metadata:
|
||
|
|
name: ory-alerts
|
||
|
|
namespace: ory
|
||
|
|
labels:
|
||
|
|
role: alert-rules
|
||
|
|
spec:
|
||
|
|
groups:
|
||
|
|
- name: ory
|
||
|
|
rules:
|
||
|
|
- alert: HydraDown
|
||
|
|
expr: up{job=~".*hydra.*"} == 0
|
||
|
|
for: 2m
|
||
|
|
labels:
|
||
|
|
severity: critical
|
||
|
|
annotations:
|
||
|
|
summary: "Hydra is down"
|
||
|
|
description: "Hydra instance {{ $labels.namespace }}/{{ $labels.pod }} is down."
|
||
|
|
|
||
|
|
- alert: KratosDown
|
||
|
|
expr: up{job=~".*kratos.*"} == 0
|
||
|
|
for: 2m
|
||
|
|
labels:
|
||
|
|
severity: critical
|
||
|
|
annotations:
|
||
|
|
summary: "Kratos is down"
|
||
|
|
description: "Kratos instance {{ $labels.namespace }}/{{ $labels.pod }} is down."
|
||
|
|
|
||
|
|
- alert: HydraHighErrorRate
|
||
|
|
expr: sum(rate(http_requests_total{job=~".*hydra.*",code=~"5.."}[5m])) / sum(rate(http_requests_total{job=~".*hydra.*"}[5m])) > 0.05
|
||
|
|
for: 5m
|
||
|
|
labels:
|
||
|
|
severity: warning
|
||
|
|
annotations:
|
||
|
|
summary: "Hydra has a high HTTP error rate"
|
||
|
|
description: "Hydra 5xx error rate is {{ $value | humanizePercentage }}."
|
||
|
|
|
||
|
|
- alert: KratosHighErrorRate
|
||
|
|
expr: sum(rate(http_requests_total{job=~".*kratos.*",code=~"5.."}[5m])) / sum(rate(http_requests_total{job=~".*kratos.*"}[5m])) > 0.05
|
||
|
|
for: 5m
|
||
|
|
labels:
|
||
|
|
severity: warning
|
||
|
|
annotations:
|
||
|
|
summary: "Kratos has a high HTTP error rate"
|
||
|
|
description: "Kratos 5xx error rate is {{ $value | humanizePercentage }}."
|