feat: split Grafana dashboards into per-folder ConfigMaps
Replace monolithic dashboards-configmap.yaml with 10 dedicated files, one per Grafana folder: Ingress, Observability, Infrastructure, Storage, Identity, DevTools, Search, Media, La Suite, Communications. New dashboards for Longhorn, PostgreSQL/CNPG, Cert-Manager, SeaweedFS, Hydra, Kratos, Gitea, OpenSearch, LiveKit, La Suite golden signals (Linkerd metrics), Matrix, and Email Pipeline.
This commit is contained in:
321
base/monitoring/dashboards-infrastructure.yaml
Normal file
321
base/monitoring/dashboards-infrastructure.yaml
Normal file
@@ -0,0 +1,321 @@
|
||||
# Grafana dashboard ConfigMaps — Infrastructure
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-openbao
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Infrastructure"
|
||||
data:
|
||||
openbao.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Vault/OpenBao Sealed Status",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "expr": "vault_core_unsealed", "legendFormat": "unsealed" }
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"SEALED","color":"red"},"1":{"text":"UNSEALED","color":"green"}}}] }
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Token Count",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "expr": "vault_token_count", "legendFormat": "tokens" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Request Rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "expr": "sum(rate(vault_core_handle_request_count[5m]))", "legendFormat": "req/s" }
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Request Latency (p95)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "expr": "histogram_quantile(0.95, sum(rate(vault_core_handle_request_bucket[5m])) by (le))", "legendFormat": "p95" }
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "s" } }
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["vault", "openbao"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"title": "OpenBao / Vault",
|
||||
"uid": "openbao"
|
||||
}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-longhorn
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Infrastructure"
|
||||
data:
|
||||
longhorn.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Volume Usage",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "longhorn_volume_actual_size_bytes",
|
||||
"legendFormat": "{{volume}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" } }
|
||||
},
|
||||
{
|
||||
"title": "Volume Capacity",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(longhorn_volume_capacity_bytes)",
|
||||
"legendFormat": "total"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" } }
|
||||
},
|
||||
{
|
||||
"title": "Disk Usage %",
|
||||
"type": "gauge",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(longhorn_disk_usage_bytes / longhorn_disk_capacity_bytes) * 100",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"max": 100,
|
||||
"thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":70},{"color":"red","value":85}] }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Node Status",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 4 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "longhorn_node_count_total",
|
||||
"legendFormat": "nodes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Volume State",
|
||||
"type": "table",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "longhorn_volume_state",
|
||||
"legendFormat": "{{volume}} — {{state}}",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["longhorn", "storage"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"title": "Longhorn Storage",
|
||||
"uid": "longhorn"
|
||||
}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-postgres
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Infrastructure"
|
||||
data:
|
||||
postgres.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Database Size",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cnpg_pg_database_size_bytes",
|
||||
"legendFormat": "{{datname}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" } }
|
||||
},
|
||||
{
|
||||
"title": "Active Connections",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cnpg_pg_stat_activity_count",
|
||||
"legendFormat": "{{state}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Collector Status",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cnpg_collector_up",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"DOWN","color":"red"},"1":{"text":"UP","color":"green"}}}] }
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Transactions/sec",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 6, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(cnpg_pg_stat_database_xact_commit[5m]) + rate(cnpg_pg_stat_database_xact_rollback[5m])",
|
||||
"legendFormat": "{{datname}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "ops" } }
|
||||
},
|
||||
{
|
||||
"title": "Cache Hit Ratio",
|
||||
"type": "gauge",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cnpg_pg_stat_database_blks_hit / (cnpg_pg_stat_database_blks_hit + cnpg_pg_stat_database_blks_read)",
|
||||
"legendFormat": "{{datname}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "percentunit", "max": 1 }
|
||||
}
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["postgres", "cnpg"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"title": "PostgreSQL / CNPG",
|
||||
"uid": "postgres-cnpg"
|
||||
}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-certmanager
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Infrastructure"
|
||||
data:
|
||||
certmanager.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Certificates Expiring",
|
||||
"type": "table",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "certmanager_certificate_expiration_timestamp_seconds - time()",
|
||||
"legendFormat": "{{name}} ({{namespace}})",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "s" }
|
||||
},
|
||||
"transformations": [
|
||||
{ "id": "sortBy", "options": { "fields": {}, "sort": [{ "field": "Value", "desc": false }] } }
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Certificate Readiness",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 12, "x": 0, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "certmanager_certificate_ready_status{condition=\"True\"}",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"NotReady","color":"red"},"1":{"text":"Ready","color":"green"}}}] }
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "ACME Request Rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(certmanager_http_acme_client_request_count[5m])",
|
||||
"legendFormat": "{{status}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["cert-manager", "tls"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"title": "Cert-Manager",
|
||||
"uid": "cert-manager"
|
||||
}
|
||||
Reference in New Issue
Block a user