feat: split Grafana dashboards into per-folder ConfigMaps
Replace monolithic dashboards-configmap.yaml with 10 dedicated files, one per Grafana folder: Ingress, Observability, Infrastructure, Storage, Identity, DevTools, Search, Media, La Suite, Communications. New dashboards for Longhorn, PostgreSQL/CNPG, Cert-Manager, SeaweedFS, Hydra, Kratos, Gitea, OpenSearch, LiveKit, La Suite golden signals (Linkerd metrics), Matrix, and Email Pipeline.
This commit is contained in:
140
base/monitoring/dashboards-comms.yaml
Normal file
140
base/monitoring/dashboards-comms.yaml
Normal file
@@ -0,0 +1,140 @@
|
||||
# Grafana dashboard ConfigMaps — Communications
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-matrix
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Communications"
|
||||
data:
|
||||
matrix.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Request Rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(request_total{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (deployment)",
|
||||
"legendFormat": "{{deployment}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Success Rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\", classification=\"success\"}[5m])) by (deployment) / sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (deployment)",
|
||||
"legendFormat": "{{deployment}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "percentunit", "max": 1 } }
|
||||
},
|
||||
{
|
||||
"title": "Latency p95",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (le, deployment))",
|
||||
"legendFormat": "{{deployment}} p95"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "ms" } }
|
||||
},
|
||||
{
|
||||
"title": "Error Rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\", classification=\"failure\"}[5m])) by (deployment) / sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (deployment)",
|
||||
"legendFormat": "{{deployment}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "percentunit", "max": 1 }
|
||||
}
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["matrix", "tuwunel", "communications"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"title": "Matrix / Tuwunel",
|
||||
"uid": "matrix"
|
||||
}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-email
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Communications"
|
||||
data:
|
||||
email.json: |
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Request Rate by Service",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(request_total{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\"}[5m])) by (deployment)",
|
||||
"legendFormat": "{{deployment}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Error Rate by Service",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(response_total{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\", classification=\"failure\"}[5m])) by (deployment) / sum(rate(response_total{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\"}[5m])) by (deployment)",
|
||||
"legendFormat": "{{deployment}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "percentunit", "max": 1 } }
|
||||
},
|
||||
{
|
||||
"title": "Latency p95 by Service",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\"}[5m])) by (le, deployment))",
|
||||
"legendFormat": "{{deployment}} p95"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "ms" } }
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["email", "postfix", "communications"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"title": "Email Pipeline",
|
||||
"uid": "email-pipeline"
|
||||
}
|
||||
Reference in New Issue
Block a user