feat(monitoring): expanded dashboards for all services
Enriched dashboards for DevTools (Gitea), Identity (Hydra/Kratos), Infrastructure (Longhorn, PostgreSQL, cert-manager, OpenBao), Ingress (Pingora), and Storage (SeaweedFS).
This commit is contained in:
@@ -19,111 +19,210 @@ data:
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"title": "Requests / sec",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"title": "Traffic Overview",
|
||||
"type": "row",
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||
"collapsed": false
|
||||
},
|
||||
{
|
||||
"title": "Active Connections",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(pingora_http_requests_total[5m]))",
|
||||
"legendFormat": "total"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(pingora_http_requests_total[5m])) by (status_code)",
|
||||
"legendFormat": "{{status_code}}"
|
||||
}
|
||||
{ "expr": "sum(sunbeam_active_connections)", "legendFormat": "", "instant": true }
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps" }
|
||||
"defaults": { "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":500},{"color":"red","value":1000}] } }
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Error Rate (5xx)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"title": "Request Rate",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(pingora_http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(pingora_http_requests_total[5m]))",
|
||||
"legendFormat": "5xx ratio"
|
||||
}
|
||||
{ "expr": "sum(rate(sunbeam_requests_total[5m]))", "legendFormat": "", "instant": true }
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Error Rate (5xx)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "expr": "sum(rate(sunbeam_requests_total{status=~\"5..\"}[5m])) / sum(rate(sunbeam_requests_total[5m]))", "legendFormat": "", "instant": true }
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Request Latency (p50 / p95 / p99)",
|
||||
"title": "Avg Latency (p95)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "expr": "histogram_quantile(0.95, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))", "legendFormat": "", "instant": true }
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "s", "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.5},{"color":"red","value":2}] } } }
|
||||
},
|
||||
{
|
||||
"title": "Requests & Latency",
|
||||
"type": "row",
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
|
||||
"collapsed": false
|
||||
},
|
||||
{
|
||||
"title": "Requests / sec by Status",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
|
||||
"expr": "sum(rate(sunbeam_requests_total[5m])) by (status)",
|
||||
"legendFormat": "{{status}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Requests / sec by Backend",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(sunbeam_requests_total[5m])) by (backend)",
|
||||
"legendFormat": "{{backend}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Requests / sec by Method",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(sunbeam_requests_total[5m])) by (method)",
|
||||
"legendFormat": "{{method}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Request Latency (p50 / p95 / p99)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
|
||||
"expr": "histogram_quantile(0.95, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(sunbeam_request_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "s" }
|
||||
}
|
||||
"fieldConfig": { "defaults": { "unit": "s" } }
|
||||
},
|
||||
{
|
||||
"title": "Active Connections",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "pingora_active_connections",
|
||||
"legendFormat": "active"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Upstream Latency by Backend",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(pingora_upstream_duration_seconds_bucket[5m])) by (le, backend))",
|
||||
"legendFormat": "{{backend}} p95"
|
||||
"expr": "sunbeam_active_connections",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "s" }
|
||||
}
|
||||
"fieldConfig": { "defaults": { "unit": "short" } }
|
||||
},
|
||||
{
|
||||
"title": "DDoS / Scanner Detections",
|
||||
"title": "Security & Rate Limiting",
|
||||
"type": "row",
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 },
|
||||
"collapsed": false
|
||||
},
|
||||
{
|
||||
"title": "DDoS Decisions",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 23 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(pingora_ddos_detections_total[5m]))",
|
||||
"legendFormat": "DDoS"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(pingora_scanner_detections_total[5m]))",
|
||||
"legendFormat": "Scanner"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(pingora_rate_limit_rejected_total[5m]))",
|
||||
"legendFormat": "Rate-limited"
|
||||
"expr": "sum(rate(sunbeam_ddos_decisions_total[5m])) by (decision)",
|
||||
"legendFormat": "{{decision}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps" }
|
||||
}
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Scanner Decisions",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 23 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(sunbeam_scanner_decisions_total[5m])) by (decision)",
|
||||
"legendFormat": "{{decision}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Rate Limit Decisions",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 23 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(sunbeam_rate_limit_decisions_total[5m])) by (decision)",
|
||||
"legendFormat": "{{decision}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Ensemble Decision Paths",
|
||||
"type": "row",
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 31 },
|
||||
"collapsed": false
|
||||
},
|
||||
{
|
||||
"title": "DDoS Ensemble Paths",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 32 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(sunbeam_ddos_ensemble_path_total[5m])) by (path)",
|
||||
"legendFormat": "{{path}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
},
|
||||
{
|
||||
"title": "Scanner Ensemble Paths",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 32 },
|
||||
"datasource": { "uid": "prometheus" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(sunbeam_scanner_ensemble_path_total[5m])) by (path)",
|
||||
"legendFormat": "{{path}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" } }
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
|
||||
Reference in New Issue
Block a user