apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: infrastructure-alerts namespace: monitoring labels: role: alert-rules spec: groups: - name: infrastructure rules: - alert: LonghornDiskSpaceLow expr: (longhorn_disk_usage_bytes / longhorn_disk_capacity_bytes) > 0.90 for: 5m labels: severity: critical annotations: summary: "Longhorn disk space critically low" description: "Longhorn disk on {{ $labels.node }} is over 90% full." - alert: LonghornVolumeSpaceLow expr: (longhorn_volume_actual_size_bytes / longhorn_volume_capacity_bytes) > 0.85 for: 5m labels: severity: warning annotations: summary: "Longhorn volume space low" description: "Longhorn volume {{ $labels.volume }} is over 85% full." - alert: CertExpiringCritical expr: (certmanager_certificate_expiration_timestamp_seconds - time()) < 86400 for: 10m labels: severity: critical annotations: summary: "Certificate expiring in less than 24 hours" description: "Certificate {{ $labels.name }} in {{ $labels.namespace }} expires in less than 24 hours" - alert: CertExpiringSoon expr: (certmanager_certificate_expiration_timestamp_seconds - time()) < 604800 for: 30m labels: severity: warning annotations: summary: "Certificate expiring in less than 7 days" description: "Certificate {{ $labels.name }} in {{ $labels.namespace }} expires in less than 7 days" - alert: CertNotReady expr: certmanager_certificate_ready_status{condition="True"} != 1 for: 10m labels: severity: warning annotations: summary: "Certificate not ready" description: "Certificate {{ $labels.name }} in {{ $labels.namespace }} is not in a ready state."