apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: seaweedfs-alerts namespace: storage labels: role: alert-rules release: kube-prometheus-stack spec: groups: - name: seaweedfs rules: - alert: SeaweedFSMasterDown expr: up{job=~".*seaweedfs.*", app="seaweedfs-master"} == 0 for: 2m labels: severity: critical annotations: summary: "SeaweedFS master is down" description: "SeaweedFS master {{ $labels.namespace }}/{{ $labels.pod }} is down." - alert: SeaweedFSVolumeDown expr: up{job=~".*seaweedfs.*", app="seaweedfs-volume"} == 0 for: 2m labels: severity: critical annotations: summary: "SeaweedFS volume server is down" description: "SeaweedFS volume server {{ $labels.namespace }}/{{ $labels.pod }} is down." - alert: SeaweedFSFilerDown expr: up{job=~".*seaweedfs.*", app="seaweedfs-filer"} == 0 for: 2m labels: severity: critical annotations: summary: "SeaweedFS filer is down" description: "SeaweedFS filer {{ $labels.namespace }}/{{ $labels.pod }} is down." - alert: SeaweedFSDiskLow expr: (seaweedfs_disk_free_bytes / (seaweedfs_disk_free_bytes + seaweedfs_disk_used_bytes)) < 0.15 for: 5m labels: severity: warning annotations: summary: "SeaweedFS disk space low" description: "SeaweedFS disk on {{ $labels.instance }} has less than 15% free space." - alert: SeaweedFSMirrorJobFailing expr: | kube_job_status_failed{namespace="storage", job_name=~"seaweedfs-s3-mirror.*"} > 0 for: 5m labels: severity: critical annotations: summary: "SeaweedFS S3 mirror job failed" description: "Job {{ $labels.job_name }} failed. S3 backups to Scaleway are not running." - alert: SeaweedFSMirrorStale expr: | time() - kube_cronjob_status_last_successful_time{namespace="storage", cronjob="seaweedfs-s3-mirror"} > 7200 for: 10m labels: severity: warning annotations: summary: "SeaweedFS S3 mirror hasn't succeeded in 2+ hours" description: "CronJob seaweedfs-s3-mirror last succeeded {{ $value | humanizeDuration }} ago."