Files
sbbb/base/monitoring/dashboards-infrastructure.yaml
Sienna Meridian Satterwhite eab91eb85d feat(monitoring): expanded dashboards for all services
Enriched dashboards for DevTools (Gitea), Identity (Hydra/Kratos),
Infrastructure (Longhorn, PostgreSQL, cert-manager, OpenBao),
Ingress (Pingora), and Storage (SeaweedFS).
2026-03-25 17:58:51 +00:00

617 lines
23 KiB
YAML

# Grafana dashboard ConfigMaps — Infrastructure
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-openbao
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
openbao.json: |
{
"annotations": { "list": [] },
"editable": true,
"graphTooltip": 1,
"panels": [
{
"title": "Health",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"collapsed": false
},
{
"title": "Seal Status",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_core_unsealed", "legendFormat": "", "instant": true }
],
"fieldConfig": {
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"SEALED","color":"red"},"1":{"text":"UNSEALED","color":"green"}}}] }
}
},
{
"title": "Active Node",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_core_active", "legendFormat": "", "instant": true }
],
"fieldConfig": {
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"Standby","color":"yellow"},"1":{"text":"Active","color":"green"}}}] }
}
},
{
"title": "In-Flight Requests",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_core_in_flight_requests", "legendFormat": "", "instant": true }
],
"fieldConfig": { "defaults": { "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":50},{"color":"red","value":200}] } } }
},
{
"title": "Active Leases",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_expire_num_leases", "legendFormat": "", "instant": true }
]
},
{
"title": "Irrevocable Leases",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_expire_num_irrevocable_leases", "legendFormat": "", "instant": true }
],
"fieldConfig": { "defaults": { "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":1},{"color":"red","value":10}] } } }
},
{
"title": "Mount Table Entries",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_core_mount_table_num_entries", "legendFormat": "", "instant": true }
]
},
{
"title": "Request Performance",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"collapsed": false
},
{
"title": "Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "sum(rate(vault_core_handle_request_count[5m]))", "legendFormat": "req/s" }
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Request Latency (avg)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_core_handle_request_sum[5m]) / rate(vault_core_handle_request_count[5m])", "legendFormat": "avg" }
],
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"title": "Token Lookups / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_token_lookup_count[5m])", "legendFormat": "lookups/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Barrier & Cache",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"collapsed": false
},
{
"title": "Barrier Ops / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_barrier_get_count[5m])", "legendFormat": "get" },
{ "expr": "rate(vault_barrier_put_count[5m])", "legendFormat": "put" },
{ "expr": "rate(vault_barrier_list_count[5m])", "legendFormat": "list" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Cache Hit Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_cache_hit[5m])", "legendFormat": "cache hits/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Audit",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
"collapsed": false
},
{
"title": "Audit Log Throughput",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_audit_log_request_count[5m])", "legendFormat": "request logs/s" },
{ "expr": "rate(vault_audit_log_response_count[5m])", "legendFormat": "response logs/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Audit Log Failures",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_audit_log_request_failure[5m])", "legendFormat": "request failures/s" },
{ "expr": "rate(vault_audit_log_response_failure[5m])", "legendFormat": "response failures/s" }
],
"fieldConfig": {
"defaults": { "unit": "ops", "thresholds": { "steps": [{"color":"green","value":null},{"color":"red","value":0.01}] } }
}
},
{
"title": "Runtime",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 },
"collapsed": false
},
{
"title": "Memory",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 33 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_runtime_alloc_bytes", "legendFormat": "alloc" },
{ "expr": "vault_runtime_sys_bytes", "legendFormat": "sys" }
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Goroutines & Heap Objects",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 33 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "vault_runtime_num_goroutines", "legendFormat": "goroutines" },
{ "expr": "vault_runtime_heap_objects", "legendFormat": "heap objects" }
],
"fieldConfig": { "defaults": { "unit": "short" } }
},
{
"title": "GC Activity",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 33 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(vault_runtime_total_gc_runs[5m])", "legendFormat": "GC runs/s" },
{ "expr": "rate(vault_runtime_total_gc_pause_ns[5m])", "legendFormat": "GC pause ns/s" }
],
"fieldConfig": { "defaults": { "unit": "short" } }
}
],
"schemaVersion": 39,
"tags": ["vault", "openbao"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"title": "OpenBao / Vault",
"uid": "openbao"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-longhorn
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
longhorn.json: |
{
"annotations": { "list": [] },
"editable": true,
"graphTooltip": 1,
"panels": [
{
"title": "Volume Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "longhorn_volume_actual_size_bytes",
"legendFormat": "{{volume}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Volume Capacity",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(longhorn_volume_capacity_bytes)",
"legendFormat": "total",
"instant": true
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Disk Usage %",
"type": "gauge",
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "(longhorn_disk_usage_bytes / longhorn_disk_capacity_bytes) * 100",
"legendFormat": "{{node}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":70},{"color":"red","value":85}] }
}
}
},
{
"title": "Node Status",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "longhorn_node_count_total",
"legendFormat": "nodes",
"instant": true
}
]
},
{
"title": "Volume State",
"type": "table",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "longhorn_volume_state",
"legendFormat": "{{volume}} — {{state}}",
"format": "table",
"instant": true
}
]
}
],
"schemaVersion": 39,
"tags": ["longhorn", "storage"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"title": "Longhorn Storage",
"uid": "longhorn"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-postgres
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
postgres.json: |
{
"annotations": { "list": [] },
"editable": true,
"graphTooltip": 1,
"panels": [
{
"title": "Health",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"collapsed": false
},
{
"title": "Status",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "cnpg_collector_up", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "mappings": [{"type":"value","options":{"0":{"text":"DOWN","color":"red"},"1":{"text":"UP","color":"green"}}}] } }
},
{
"title": "Backends",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "sum(cnpg_backends_total)", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":60},{"color":"red","value":80}] } } }
},
{
"title": "Waiting Backends",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "sum(cnpg_backends_waiting_total)", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":5},{"color":"red","value":15}] } } }
},
{
"title": "Longest Transaction",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "max(cnpg_backends_max_tx_duration_seconds)", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "unit": "s", "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":30},{"color":"red","value":300}] } } }
},
{
"title": "Cache Hit Ratio",
"type": "gauge",
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "sum(cnpg_pg_stat_database_blks_hit) / (sum(cnpg_pg_stat_database_blks_hit) + sum(cnpg_pg_stat_database_blks_read))", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "thresholds": { "steps": [{"color":"red","value":null},{"color":"yellow","value":0.9},{"color":"green","value":0.99}] } } }
},
{
"title": "WAL Size",
"type": "stat",
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "cnpg_collector_pg_wal", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Databases",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"collapsed": false
},
{
"title": "Database Size",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "cnpg_pg_database_size_bytes{datname!~'template.*|postgres'}", "legendFormat": "{{datname}}" }],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Connections by Database",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "cnpg_backends_total{datname!~'template.*'}", "legendFormat": "{{datname}}" }],
"fieldConfig": { "defaults": { "unit": "short" } }
},
{
"title": "Transactions / sec (commits)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(cnpg_pg_stat_database_xact_commit{datname!~'template.*|postgres'}[5m])", "legendFormat": "{{datname}} commits" },
{ "expr": "rate(cnpg_pg_stat_database_xact_rollback{datname!~'template.*|postgres'}[5m])", "legendFormat": "{{datname}} rollbacks" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Rows Fetched / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "rate(cnpg_pg_stat_database_tup_fetched{datname!~'template.*|postgres'}[5m])", "legendFormat": "{{datname}}" }],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "I/O & Caching",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 },
"collapsed": false
},
{
"title": "Block I/O (reads vs cache hits)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 23 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "sum(rate(cnpg_pg_stat_database_blks_read[5m]))", "legendFormat": "disk reads" },
{ "expr": "sum(rate(cnpg_pg_stat_database_blks_hit[5m]))", "legendFormat": "cache hits" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Temp Files / Bytes Written",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 23 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(cnpg_pg_stat_database_temp_bytes[5m])", "legendFormat": "temp bytes/s" },
{ "expr": "rate(cnpg_pg_stat_database_temp_files[5m])", "legendFormat": "temp files/s" }
],
"fieldConfig": { "defaults": { "unit": "Bps" } },
"options": { "tooltip": { "mode": "multi" } }
},
{
"title": "Row Mutations / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 31 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "sum(rate(cnpg_pg_stat_database_tup_inserted[5m]))", "legendFormat": "inserts" },
{ "expr": "sum(rate(cnpg_pg_stat_database_tup_updated[5m]))", "legendFormat": "updates" },
{ "expr": "sum(rate(cnpg_pg_stat_database_tup_deleted[5m]))", "legendFormat": "deletes" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Deadlocks & Conflicts",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 31 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(cnpg_pg_stat_database_deadlocks[5m])", "legendFormat": "deadlocks/s" },
{ "expr": "rate(cnpg_pg_stat_database_conflicts[5m])", "legendFormat": "conflicts/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "WAL & Archival",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 39 },
"collapsed": false
},
{
"title": "WAL Generation Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 40 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "rate(cnpg_collector_wal_bytes[5m])", "legendFormat": "WAL bytes/s" }],
"fieldConfig": { "defaults": { "unit": "Bps" } }
},
{
"title": "WAL Archival",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 40 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(cnpg_pg_stat_archiver_archived_count[5m])", "legendFormat": "archived/s" },
{ "expr": "rate(cnpg_pg_stat_archiver_failed_count[5m])", "legendFormat": "failed/s" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Seconds Since Last Archive",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 48 },
"datasource": { "uid": "prometheus" },
"targets": [{ "expr": "cnpg_pg_stat_archiver_seconds_since_last_archival", "legendFormat": "", "instant": true }],
"fieldConfig": { "defaults": { "unit": "s", "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":300},{"color":"red","value":900}] } } }
},
{
"title": "Checkpoints",
"type": "timeseries",
"gridPos": { "h": 8, "w": 16, "x": 8, "y": 48 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "rate(cnpg_pg_stat_checkpointer_checkpoints_timed[5m])", "legendFormat": "timed" },
{ "expr": "rate(cnpg_pg_stat_checkpointer_checkpoints_req[5m])", "legendFormat": "requested" }
],
"fieldConfig": { "defaults": { "unit": "ops" } }
}
],
"schemaVersion": 39,
"tags": ["postgres", "cnpg"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"title": "PostgreSQL / CNPG",
"uid": "postgres-cnpg"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-certmanager
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
certmanager.json: |
{
"annotations": { "list": [] },
"editable": true,
"graphTooltip": 1,
"panels": [
{
"title": "Certificates Expiring",
"type": "table",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "certmanager_certificate_expiration_timestamp_seconds - time()",
"legendFormat": "{{name}} ({{namespace}})",
"format": "table",
"instant": true
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
},
"transformations": [
{ "id": "sortBy", "options": { "fields": {}, "sort": [{ "field": "Value", "desc": false }] } }
]
},
{
"title": "Certificate Readiness",
"type": "stat",
"gridPos": { "h": 4, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "certmanager_certificate_ready_status{condition=\"True\"}",
"legendFormat": "{{name}}",
"instant": true
}
],
"fieldConfig": {
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"NotReady","color":"red"},"1":{"text":"Ready","color":"green"}}}] }
}
},
{
"title": "ACME Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "rate(certmanager_http_acme_client_request_count[5m])",
"legendFormat": "{{status}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
}
],
"schemaVersion": 39,
"tags": ["cert-manager", "tls"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"title": "Cert-Manager",
"uid": "cert-manager"
}