41 lines
1.4 KiB
YAML
41 lines
1.4 KiB
YAML
|
|
apiVersion: monitoring.coreos.com/v1
|
||
|
|
kind: PrometheusRule
|
||
|
|
metadata:
|
||
|
|
name: recording-rules
|
||
|
|
namespace: monitoring
|
||
|
|
labels:
|
||
|
|
role: alert-rules
|
||
|
|
release: kube-prometheus-stack
|
||
|
|
spec:
|
||
|
|
groups:
|
||
|
|
- name: linkerd-service-sli
|
||
|
|
interval: 30s
|
||
|
|
rules:
|
||
|
|
- record: service:request_rate:5m
|
||
|
|
expr: sum(rate(response_total{direction="inbound"}[5m])) by (deployment, namespace)
|
||
|
|
|
||
|
|
- record: service:error_rate:5m
|
||
|
|
expr: |
|
||
|
|
sum(rate(response_total{classification="failure",direction="inbound"}[5m])) by (deployment, namespace)
|
||
|
|
/ sum(rate(response_total{direction="inbound"}[5m])) by (deployment, namespace)
|
||
|
|
|
||
|
|
- record: service:latency_p95:5m
|
||
|
|
expr: |
|
||
|
|
histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{direction="inbound"}[5m])) by (le, deployment, namespace))
|
||
|
|
|
||
|
|
- record: service:latency_p99:5m
|
||
|
|
expr: |
|
||
|
|
histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{direction="inbound"}[5m])) by (le, deployment, namespace))
|
||
|
|
|
||
|
|
- name: node-aggregates
|
||
|
|
interval: 30s
|
||
|
|
rules:
|
||
|
|
- record: node:memory_usage_ratio
|
||
|
|
expr: 1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes
|
||
|
|
|
||
|
|
- record: node:cpu_usage_ratio
|
||
|
|
expr: 1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))
|
||
|
|
|
||
|
|
- record: node:swap_usage_ratio
|
||
|
|
expr: 1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes
|