Files
sbbb/base/storage/seaweedfs-alertrules.yaml

68 lines
2.4 KiB
YAML
Raw Normal View History

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: seaweedfs-alerts
namespace: storage
labels:
role: alert-rules
release: kube-prometheus-stack
spec:
groups:
- name: seaweedfs
rules:
- alert: SeaweedFSMasterDown
expr: up{job=~".*seaweedfs.*", app="seaweedfs-master"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "SeaweedFS master is down"
description: "SeaweedFS master {{ $labels.namespace }}/{{ $labels.pod }} is down."
- alert: SeaweedFSVolumeDown
expr: up{job=~".*seaweedfs.*", app="seaweedfs-volume"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "SeaweedFS volume server is down"
description: "SeaweedFS volume server {{ $labels.namespace }}/{{ $labels.pod }} is down."
- alert: SeaweedFSFilerDown
expr: up{job=~".*seaweedfs.*", app="seaweedfs-filer"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "SeaweedFS filer is down"
description: "SeaweedFS filer {{ $labels.namespace }}/{{ $labels.pod }} is down."
- alert: SeaweedFSDiskLow
expr: (seaweedfs_disk_free_bytes / (seaweedfs_disk_free_bytes + seaweedfs_disk_used_bytes)) < 0.15
for: 5m
labels:
severity: warning
annotations:
summary: "SeaweedFS disk space low"
description: "SeaweedFS disk on {{ $labels.instance }} has less than 15% free space."
- alert: SeaweedFSMirrorJobFailing
expr: |
kube_job_status_failed{namespace="storage", job_name=~"seaweedfs-s3-mirror.*"} > 0
for: 5m
labels:
severity: critical
annotations:
summary: "SeaweedFS S3 mirror job failed"
description: "Job {{ $labels.job_name }} failed. S3 backups to Scaleway are not running."
- alert: SeaweedFSMirrorStale
expr: |
time() - kube_cronjob_status_last_successful_time{namespace="storage", cronjob="seaweedfs-s3-mirror"} > 7200
for: 10m
labels:
severity: warning
annotations:
summary: "SeaweedFS S3 mirror hasn't succeeded in 2+ hours"
description: "CronJob seaweedfs-s3-mirror last succeeded {{ $value | humanizeDuration }} ago."