Files
sbbb/base/monitoring/recording-rules.yaml

41 lines
1.4 KiB
YAML
Raw Normal View History

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: recording-rules
namespace: monitoring
labels:
role: alert-rules
release: kube-prometheus-stack
spec:
groups:
- name: linkerd-service-sli
interval: 30s
rules:
- record: service:request_rate:5m
expr: sum(rate(response_total{direction="inbound"}[5m])) by (deployment, namespace)
- record: service:error_rate:5m
expr: |
sum(rate(response_total{classification="failure",direction="inbound"}[5m])) by (deployment, namespace)
/ sum(rate(response_total{direction="inbound"}[5m])) by (deployment, namespace)
- record: service:latency_p95:5m
expr: |
histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{direction="inbound"}[5m])) by (le, deployment, namespace))
- record: service:latency_p99:5m
expr: |
histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{direction="inbound"}[5m])) by (le, deployment, namespace))
- name: node-aggregates
interval: 30s
rules:
- record: node:memory_usage_ratio
expr: 1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes
- record: node:cpu_usage_ratio
expr: 1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))
- record: node:swap_usage_ratio
expr: 1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes