feat(monitoring): expanded dashboards for all services

Enriched dashboards for DevTools (Gitea), Identity (Hydra/Kratos),
Infrastructure (Longhorn, PostgreSQL, cert-manager, OpenBao),
Ingress (Pingora), and Storage (SeaweedFS).
This commit is contained in:
2026-03-25 17:58:51 +00:00
parent 9ee40aaa69
commit eab91eb85d
5 changed files with 1278 additions and 283 deletions

View File

@@ -19,111 +19,210 @@ data:
"links": [],
"panels": [
{
"title": "Requests / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"title": "Traffic Overview",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"collapsed": false
},
{
"title": "Active Connections",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_http_requests_total[5m]))",
"legendFormat": "total"
},
{
"expr": "sum(rate(pingora_http_requests_total[5m])) by (status_code)",
"legendFormat": "{{status_code}}"
}
{ "expr": "sum(sunbeam_active_connections)", "legendFormat": "", "instant": true }
],
"fieldConfig": {
"defaults": { "unit": "reqps" }
"defaults": { "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":500},{"color":"red","value":1000}] } }
}
},
{
"title": "Error Rate (5xx)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"title": "Request Rate",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(pingora_http_requests_total[5m]))",
"legendFormat": "5xx ratio"
}
{ "expr": "sum(rate(sunbeam_requests_total[5m]))", "legendFormat": "", "instant": true }
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Error Rate (5xx)",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "sum(rate(sunbeam_requests_total{status=~\"5..\"}[5m])) / sum(rate(sunbeam_requests_total[5m]))", "legendFormat": "", "instant": true }
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
}
},
{
"title": "Request Latency (p50 / p95 / p99)",
"title": "Avg Latency (p95)",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "histogram_quantile(0.95, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))", "legendFormat": "", "instant": true }
],
"fieldConfig": { "defaults": { "unit": "s", "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.5},{"color":"red","value":2}] } } }
},
{
"title": "Requests & Latency",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"collapsed": false
},
{
"title": "Requests / sec by Status",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"expr": "sum(rate(sunbeam_requests_total[5m])) by (status)",
"legendFormat": "{{status}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Requests / sec by Backend",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(sunbeam_requests_total[5m])) by (backend)",
"legendFormat": "{{backend}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Requests / sec by Method",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(sunbeam_requests_total[5m])) by (method)",
"legendFormat": "{{method}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Request Latency (p50 / p95 / p99)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"expr": "histogram_quantile(0.95, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"expr": "histogram_quantile(0.99, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99"
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
}
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"title": "Active Connections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "pingora_active_connections",
"legendFormat": "active"
}
]
},
{
"title": "Upstream Latency by Backend",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(pingora_upstream_duration_seconds_bucket[5m])) by (le, backend))",
"legendFormat": "{{backend}} p95"
"expr": "sunbeam_active_connections",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
}
"fieldConfig": { "defaults": { "unit": "short" } }
},
{
"title": "DDoS / Scanner Detections",
"title": "Security & Rate Limiting",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 },
"collapsed": false
},
{
"title": "DDoS Decisions",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 23 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_ddos_detections_total[5m]))",
"legendFormat": "DDoS"
},
{
"expr": "sum(rate(pingora_scanner_detections_total[5m]))",
"legendFormat": "Scanner"
},
{
"expr": "sum(rate(pingora_rate_limit_rejected_total[5m]))",
"legendFormat": "Rate-limited"
"expr": "sum(rate(sunbeam_ddos_decisions_total[5m])) by (decision)",
"legendFormat": "{{decision}}"
}
],
"fieldConfig": {
"defaults": { "unit": "reqps" }
}
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Scanner Decisions",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 23 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(sunbeam_scanner_decisions_total[5m])) by (decision)",
"legendFormat": "{{decision}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Rate Limit Decisions",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 23 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(sunbeam_rate_limit_decisions_total[5m])) by (decision)",
"legendFormat": "{{decision}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Ensemble Decision Paths",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 31 },
"collapsed": false
},
{
"title": "DDoS Ensemble Paths",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 32 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(sunbeam_ddos_ensemble_path_total[5m])) by (path)",
"legendFormat": "{{path}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Scanner Ensemble Paths",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 32 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(sunbeam_scanner_ensemble_path_total[5m])) by (path)",
"legendFormat": "{{path}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
}
],
"schemaVersion": 39,