feat: split Grafana dashboards into per-folder ConfigMaps

Replace monolithic dashboards-configmap.yaml with 10 dedicated files,
one per Grafana folder: Ingress, Observability, Infrastructure, Storage,
Identity, DevTools, Search, Media, La Suite, Communications.

New dashboards for Longhorn, PostgreSQL/CNPG, Cert-Manager, SeaweedFS,
Hydra, Kratos, Gitea, OpenSearch, LiveKit, La Suite golden signals
(Linkerd metrics), Matrix, and Email Pipeline.
This commit is contained in:
2026-03-24 12:20:42 +00:00
parent 234fe72707
commit 74bb59cfdc
11 changed files with 1418 additions and 247 deletions

View File

@@ -0,0 +1,140 @@
# Grafana dashboard ConfigMaps — Communications
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-matrix
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Communications"
data:
matrix.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(request_total{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Success Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\", classification=\"success\"}[5m])) by (deployment) / sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": { "defaults": { "unit": "percentunit", "max": 1 } }
},
{
"title": "Latency p95",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (le, deployment))",
"legendFormat": "{{deployment}} p95"
}
],
"fieldConfig": { "defaults": { "unit": "ms" } }
},
{
"title": "Error Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\", classification=\"failure\"}[5m])) by (deployment) / sum(rate(response_total{namespace=\"matrix\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1 }
}
}
],
"schemaVersion": 39,
"tags": ["matrix", "tuwunel", "communications"],
"time": { "from": "now-1h", "to": "now" },
"title": "Matrix / Tuwunel",
"uid": "matrix"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-email
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Communications"
data:
email.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Request Rate by Service",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(request_total{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Error Rate by Service",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(response_total{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\", classification=\"failure\"}[5m])) by (deployment) / sum(rate(response_total{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": { "defaults": { "unit": "percentunit", "max": 1 } }
},
{
"title": "Latency p95 by Service",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"lasuite\", deployment=~\"postfix|messages-mta-in|messages-mta-out|messages-mpa|messages-worker\", direction=\"inbound\"}[5m])) by (le, deployment))",
"legendFormat": "{{deployment}} p95"
}
],
"fieldConfig": { "defaults": { "unit": "ms" } }
}
],
"schemaVersion": 39,
"tags": ["email", "postfix", "communications"],
"time": { "from": "now-1h", "to": "now" },
"title": "Email Pipeline",
"uid": "email-pipeline"
}

View File

@@ -0,0 +1,89 @@
# Grafana dashboard ConfigMaps — DevTools
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-gitea
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "DevTools"
data:
gitea.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Repositories",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "gitea_repositories", "legendFormat": "repos" }
]
},
{
"title": "Users",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 8, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "gitea_users", "legendFormat": "users" }
]
},
{
"title": "Issues",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "gitea_issues", "legendFormat": "issues" }
]
},
{
"title": "Go Goroutines",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "go_goroutines{job=~\".*gitea.*\"}",
"legendFormat": "{{instance}}"
}
]
},
{
"title": "Memory Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "process_resident_memory_bytes{job=~\".*gitea.*\"}",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "CPU Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "rate(process_cpu_seconds_total{job=~\".*gitea.*\"}[5m])",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": { "defaults": { "unit": "short" } }
}
],
"schemaVersion": 39,
"tags": ["gitea", "devtools"],
"time": { "from": "now-1h", "to": "now" },
"title": "Gitea",
"uid": "gitea"
}

View File

@@ -0,0 +1,179 @@
# Grafana dashboard ConfigMaps — Identity
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-hydra
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Identity"
data:
hydra.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(http_requests_total{job=~\".*hydra.*\"}[5m])) by (handler)",
"legendFormat": "{{handler}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Error Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(http_requests_total{job=~\".*hydra.*\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{job=~\".*hydra.*\"}[5m]))",
"legendFormat": "5xx ratio"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
}
},
{
"title": "Request Latency p95",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=~\".*hydra.*\"}[5m])) by (le, handler))",
"legendFormat": "{{handler}} p95"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"title": "Go Goroutines",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "go_goroutines{job=~\".*hydra.*\"}",
"legendFormat": "{{instance}}"
}
]
},
{
"title": "Memory Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "go_memstats_alloc_bytes{job=~\".*hydra.*\"}",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
}
],
"schemaVersion": 39,
"tags": ["hydra", "oauth2", "identity"],
"time": { "from": "now-1h", "to": "now" },
"title": "Hydra OAuth2",
"uid": "hydra"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-kratos
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Identity"
data:
kratos.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(http_requests_total{job=~\".*kratos.*\"}[5m])) by (handler)",
"legendFormat": "{{handler}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Error Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(http_requests_total{job=~\".*kratos.*\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{job=~\".*kratos.*\"}[5m]))",
"legendFormat": "5xx ratio"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
}
},
{
"title": "Request Latency p95",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=~\".*kratos.*\"}[5m])) by (le, handler))",
"legendFormat": "{{handler}} p95"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"title": "Go Goroutines",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "go_goroutines{job=~\".*kratos.*\"}",
"legendFormat": "{{instance}}"
}
]
},
{
"title": "Memory Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "go_memstats_alloc_bytes{job=~\".*kratos.*\"}",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
}
],
"schemaVersion": 39,
"tags": ["kratos", "identity"],
"time": { "from": "now-1h", "to": "now" },
"title": "Kratos Identity",
"uid": "kratos"
}

View File

@@ -1,249 +1,4 @@
# Grafana dashboard ConfigMaps — picked up by the Grafana sidecar. # Grafana dashboard ConfigMaps — Infrastructure
#
# Each ConfigMap holds one or more dashboard JSON files. The sidecar
# watches for the label grafana_dashboard=1 across all namespaces and
# hot-loads them into Grafana (no restart required).
#
# The grafana_folder annotation groups dashboards into Grafana folders.
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-pingora
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Ingress"
data:
pingora.json: |
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "Requests / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_http_requests_total[5m]))",
"legendFormat": "total"
},
{
"expr": "sum(rate(pingora_http_requests_total[5m])) by (status_code)",
"legendFormat": "{{status_code}}"
}
],
"fieldConfig": {
"defaults": { "unit": "reqps" }
}
},
{
"title": "Error Rate (5xx)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(pingora_http_requests_total[5m]))",
"legendFormat": "5xx ratio"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
}
},
{
"title": "Request Latency (p50 / p95 / p99)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99"
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
}
},
{
"title": "Active Connections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "pingora_active_connections",
"legendFormat": "active"
}
]
},
{
"title": "Upstream Latency by Backend",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(pingora_upstream_duration_seconds_bucket[5m])) by (le, backend))",
"legendFormat": "{{backend}} p95"
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
}
},
{
"title": "DDoS / Scanner Detections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_ddos_detections_total[5m]))",
"legendFormat": "DDoS"
},
{
"expr": "sum(rate(pingora_scanner_detections_total[5m]))",
"legendFormat": "Scanner"
},
{
"expr": "sum(rate(pingora_rate_limit_rejected_total[5m]))",
"legendFormat": "Rate-limited"
}
],
"fieldConfig": {
"defaults": { "unit": "reqps" }
}
}
],
"schemaVersion": 39,
"tags": ["ingress", "pingora"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"title": "Pingora Proxy",
"uid": "pingora-proxy"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-loki
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Observability"
data:
loki-overview.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Log Volume by Namespace",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"datasource": { "uid": "loki" },
"targets": [
{
"expr": "sum(count_over_time({namespace=~\".+\"}[5m])) by (namespace)",
"legendFormat": "{{namespace}}"
}
]
},
{
"title": "Error Logs",
"type": "logs",
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 8 },
"datasource": { "uid": "loki" },
"targets": [
{
"expr": "{namespace=~\".+\"} |~ \"(?i)(error|panic|fatal|exception)\"",
"legendFormat": ""
}
]
}
],
"schemaVersion": 39,
"tags": ["loki", "logs"],
"time": { "from": "now-1h", "to": "now" },
"title": "Loki — Log Overview",
"uid": "loki-overview"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-tempo
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Observability"
data:
tempo-overview.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Trace Ingestion Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(tempo_distributor_spans_received_total[5m]))",
"legendFormat": "spans/s"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Service Map (RED)",
"type": "nodeGraph",
"gridPos": { "h": 16, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "tempo" },
"targets": [
{ "queryType": "serviceMap" }
]
},
{
"title": "Span Duration by Service (p95)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket[5m])) by (le, service))",
"legendFormat": "{{service}}"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
}
],
"schemaVersion": 39,
"tags": ["tempo", "tracing"],
"time": { "from": "now-1h", "to": "now" },
"title": "Tempo — Trace Overview",
"uid": "tempo-overview"
}
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
@@ -308,3 +63,259 @@ data:
"title": "OpenBao / Vault", "title": "OpenBao / Vault",
"uid": "openbao" "uid": "openbao"
} }
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-longhorn
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
longhorn.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Volume Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "longhorn_volume_actual_size_bytes",
"legendFormat": "{{volume}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Volume Capacity",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(longhorn_volume_capacity_bytes)",
"legendFormat": "total"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Disk Usage %",
"type": "gauge",
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "(longhorn_disk_usage_bytes / longhorn_disk_capacity_bytes) * 100",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":70},{"color":"red","value":85}] }
}
}
},
{
"title": "Node Status",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "longhorn_node_count_total",
"legendFormat": "nodes"
}
]
},
{
"title": "Volume State",
"type": "table",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "longhorn_volume_state",
"legendFormat": "{{volume}} — {{state}}",
"format": "table",
"instant": true
}
]
}
],
"schemaVersion": 39,
"tags": ["longhorn", "storage"],
"time": { "from": "now-1h", "to": "now" },
"title": "Longhorn Storage",
"uid": "longhorn"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-postgres
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
postgres.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Database Size",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "cnpg_pg_database_size_bytes",
"legendFormat": "{{datname}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Active Connections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "cnpg_pg_stat_activity_count",
"legendFormat": "{{state}}"
}
]
},
{
"title": "Collector Status",
"type": "stat",
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "cnpg_collector_up",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": {
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"DOWN","color":"red"},"1":{"text":"UP","color":"green"}}}] }
}
},
{
"title": "Transactions/sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 6, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "rate(cnpg_pg_stat_database_xact_commit[5m]) + rate(cnpg_pg_stat_database_xact_rollback[5m])",
"legendFormat": "{{datname}}"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Cache Hit Ratio",
"type": "gauge",
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "cnpg_pg_stat_database_blks_hit / (cnpg_pg_stat_database_blks_hit + cnpg_pg_stat_database_blks_read)",
"legendFormat": "{{datname}}"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1 }
}
}
],
"schemaVersion": 39,
"tags": ["postgres", "cnpg"],
"time": { "from": "now-1h", "to": "now" },
"title": "PostgreSQL / CNPG",
"uid": "postgres-cnpg"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-certmanager
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Infrastructure"
data:
certmanager.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Certificates Expiring",
"type": "table",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "certmanager_certificate_expiration_timestamp_seconds - time()",
"legendFormat": "{{name}} ({{namespace}})",
"format": "table",
"instant": true
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
},
"transformations": [
{ "id": "sortBy", "options": { "fields": {}, "sort": [{ "field": "Value", "desc": false }] } }
]
},
{
"title": "Certificate Readiness",
"type": "stat",
"gridPos": { "h": 4, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "certmanager_certificate_ready_status{condition=\"True\"}",
"legendFormat": "{{name}}"
}
],
"fieldConfig": {
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"NotReady","color":"red"},"1":{"text":"Ready","color":"green"}}}] }
}
},
{
"title": "ACME Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "rate(certmanager_http_acme_client_request_count[5m])",
"legendFormat": "{{status}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
}
],
"schemaVersion": 39,
"tags": ["cert-manager", "tls"],
"time": { "from": "now-1h", "to": "now" },
"title": "Cert-Manager",
"uid": "cert-manager"
}

View File

@@ -0,0 +1,135 @@
# Grafana dashboard ConfigMaps — Ingress
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-pingora
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Ingress"
data:
pingora.json: |
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "Requests / sec",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_http_requests_total[5m]))",
"legendFormat": "total"
},
{
"expr": "sum(rate(pingora_http_requests_total[5m])) by (status_code)",
"legendFormat": "{{status_code}}"
}
],
"fieldConfig": {
"defaults": { "unit": "reqps" }
}
},
{
"title": "Error Rate (5xx)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(pingora_http_requests_total[5m]))",
"legendFormat": "5xx ratio"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
}
},
{
"title": "Request Latency (p50 / p95 / p99)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, sum(rate(pingora_http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99"
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
}
},
{
"title": "Active Connections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "pingora_active_connections",
"legendFormat": "active"
}
]
},
{
"title": "Upstream Latency by Backend",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(pingora_upstream_duration_seconds_bucket[5m])) by (le, backend))",
"legendFormat": "{{backend}} p95"
}
],
"fieldConfig": {
"defaults": { "unit": "s" }
}
},
{
"title": "DDoS / Scanner Detections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(pingora_ddos_detections_total[5m]))",
"legendFormat": "DDoS"
},
{
"expr": "sum(rate(pingora_scanner_detections_total[5m]))",
"legendFormat": "Scanner"
},
{
"expr": "sum(rate(pingora_rate_limit_rejected_total[5m]))",
"legendFormat": "Rate-limited"
}
],
"fieldConfig": {
"defaults": { "unit": "reqps" }
}
}
],
"schemaVersion": 39,
"tags": ["ingress", "pingora"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"title": "Pingora Proxy",
"uid": "pingora-proxy"
}

View File

@@ -0,0 +1,135 @@
# Grafana dashboard ConfigMaps — La Suite (Golden Signals via Linkerd)
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-lasuite
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "La Suite"
data:
lasuite.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(request_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Success Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(response_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\", classification=\"success\"}[5m])) by (deployment) / sum(rate(response_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": { "defaults": { "unit": "percentunit", "max": 1 } }
},
{
"title": "Error Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(response_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\", classification=\"failure\"}[5m])) by (deployment) / sum(rate(response_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (deployment)",
"legendFormat": "{{deployment}}"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.01},{"color":"red","value":0.05}] } }
}
},
{
"title": "Latency p50 / p95 / p99",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(response_latency_ms_bucket{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (le, deployment))",
"legendFormat": "{{deployment}} p50"
},
{
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (le, deployment))",
"legendFormat": "{{deployment}} p95"
},
{
"expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (le, deployment))",
"legendFormat": "{{deployment}} p99"
}
],
"fieldConfig": { "defaults": { "unit": "ms" } }
},
{
"title": "Request Rate by Status Code",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(response_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}[5m])) by (deployment, status_code)",
"legendFormat": "{{deployment}} {{status_code}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "TCP Connections",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "tcp_open_connections{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\"}",
"legendFormat": "{{deployment}}"
}
]
}
],
"schemaVersion": 39,
"tags": ["lasuite", "linkerd", "golden-signals"],
"templating": {
"list": [
{
"name": "namespace",
"type": "query",
"datasource": { "uid": "prometheus" },
"query": "label_values(request_total{direction=\"inbound\"}, namespace)",
"refresh": 2,
"multi": true,
"includeAll": true,
"allValue": ".*"
},
{
"name": "deployment",
"type": "query",
"datasource": { "uid": "prometheus" },
"query": "label_values(request_total{direction=\"inbound\", namespace=~\"$namespace\"}, deployment)",
"refresh": 2,
"multi": true,
"includeAll": true,
"allValue": ".*"
}
]
},
"time": { "from": "now-1h", "to": "now" },
"title": "La Suite — Golden Signals",
"uid": "lasuite-golden"
}

View File

@@ -0,0 +1,90 @@
# Grafana dashboard ConfigMaps — Media
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-livekit
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Media"
data:
livekit.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Active Rooms",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "livekit_room_count", "legendFormat": "rooms" }
]
},
{
"title": "Total Participants",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 8, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "livekit_participant_count", "legendFormat": "participants" }
]
},
{
"title": "Total Tracks",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "livekit_track_count", "legendFormat": "tracks" }
]
},
{
"title": "Packet Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(livekit_packet_total[5m])) by (direction)",
"legendFormat": "{{direction}}"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Bandwidth",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(livekit_bytes_total[5m])) by (direction)",
"legendFormat": "{{direction}}"
}
],
"fieldConfig": { "defaults": { "unit": "Bps" } }
},
{
"title": "NACK Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(livekit_nack_total[5m]))",
"legendFormat": "NACKs"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
}
],
"schemaVersion": 39,
"tags": ["livekit", "media", "webrtc"],
"time": { "from": "now-1h", "to": "now" },
"title": "LiveKit",
"uid": "livekit"
}

View File

@@ -0,0 +1,106 @@
# Grafana dashboard ConfigMaps — Observability
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-loki
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Observability"
data:
loki-overview.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Log Volume by Namespace",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"datasource": { "uid": "loki" },
"targets": [
{
"expr": "sum(count_over_time({namespace=~\".+\"}[5m])) by (namespace)",
"legendFormat": "{{namespace}}"
}
]
},
{
"title": "Error Logs",
"type": "logs",
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 8 },
"datasource": { "uid": "loki" },
"targets": [
{
"expr": "{namespace=~\".+\"} |~ \"(?i)(error|panic|fatal|exception)\"",
"legendFormat": ""
}
]
}
],
"schemaVersion": 39,
"tags": ["loki", "logs"],
"time": { "from": "now-1h", "to": "now" },
"title": "Loki — Log Overview",
"uid": "loki-overview"
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-tempo
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Observability"
data:
tempo-overview.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Trace Ingestion Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(tempo_distributor_spans_received_total[5m]))",
"legendFormat": "spans/s"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Service Map (RED)",
"type": "nodeGraph",
"gridPos": { "h": 16, "w": 12, "x": 12, "y": 0 },
"datasource": { "uid": "tempo" },
"targets": [
{ "queryType": "serviceMap" }
]
},
{
"title": "Span Duration by Service (p95)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket[5m])) by (le, service))",
"legendFormat": "{{service}}"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
}
],
"schemaVersion": 39,
"tags": ["tempo", "tracing"],
"time": { "from": "now-1h", "to": "now" },
"title": "Tempo — Trace Overview",
"uid": "tempo-overview"
}

View File

@@ -0,0 +1,108 @@
# Grafana dashboard ConfigMaps — Search
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-opensearch
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Search"
data:
opensearch.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Cluster Health",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "opensearch_cluster_health_status", "legendFormat": "health" }
],
"fieldConfig": {
"defaults": { "mappings": [{"type":"value","options":{"0":{"text":"GREEN","color":"green"},"1":{"text":"YELLOW","color":"yellow"},"2":{"text":"RED","color":"red"}}}] }
}
},
{
"title": "Active Shards",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 8, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "opensearch_cluster_health_active_shards", "legendFormat": "shards" }
]
},
{
"title": "Node Count",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 0 },
"datasource": { "uid": "prometheus" },
"targets": [
{ "expr": "opensearch_cluster_health_number_of_nodes", "legendFormat": "nodes" }
]
},
{
"title": "Index Size",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "opensearch_index_store_size_bytes",
"legendFormat": "{{index}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "JVM Heap Usage",
"type": "gauge",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "opensearch_jvm_mem_heap_used_bytes / opensearch_jvm_mem_heap_max_bytes",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "steps": [{"color":"green","value":null},{"color":"yellow","value":0.7},{"color":"red","value":0.85}] } }
}
},
{
"title": "Search Query Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "rate(opensearch_indices_search_query_total[5m])",
"legendFormat": "{{node}}"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "GC Collection Time",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "rate(opensearch_jvm_gc_collection_time_seconds[5m])",
"legendFormat": "{{gc}}"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
}
],
"schemaVersion": 39,
"tags": ["opensearch", "search"],
"time": { "from": "now-1h", "to": "now" },
"title": "OpenSearch",
"uid": "opensearch"
}

View File

@@ -0,0 +1,163 @@
# Grafana dashboard ConfigMaps — Storage
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-seaweedfs
namespace: monitoring
labels:
grafana_dashboard: "1"
annotations:
grafana_folder: "Storage"
data:
seaweedfs.json: |
{
"annotations": { "list": [] },
"editable": true,
"panels": [
{
"title": "Cluster Overview",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"collapsed": false
},
{
"title": "Data Nodes",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "seaweedfs_master_data_nodes or count(up{job=~\".*seaweedfs-volume.*\"})",
"legendFormat": "nodes"
}
]
},
{
"title": "Total Volume Count",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 8, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "seaweedfs_master_volumes_count or sum(seaweedfs_volume_count)",
"legendFormat": "volumes"
}
]
},
{
"title": "Total Disk Free",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 16, "y": 1 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(seaweedfs_disk_free_bytes)",
"legendFormat": "free"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Volume Server",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"collapsed": false
},
{
"title": "Read Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(seaweedfs_volume_read_total[5m]))",
"legendFormat": "reads"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Write Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(seaweedfs_volume_write_total[5m]))",
"legendFormat": "writes"
}
],
"fieldConfig": { "defaults": { "unit": "ops" } }
},
{
"title": "Disk Usage",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "seaweedfs_disk_used_bytes",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": { "defaults": { "unit": "bytes" } }
},
{
"title": "Filer",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"collapsed": false
},
{
"title": "Filer Request Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(seaweedfs_filer_request_total[5m])) by (type)",
"legendFormat": "{{type}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
},
{
"title": "Filer Latency p95",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(seaweedfs_filer_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95"
}
],
"fieldConfig": { "defaults": { "unit": "s" } }
},
{
"title": "S3 API",
"type": "row",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
"collapsed": false
},
{
"title": "S3 Requests",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 },
"datasource": { "uid": "prometheus" },
"targets": [
{
"expr": "sum(rate(seaweedfs_s3_request_total[5m])) by (bucket, method)",
"legendFormat": "{{bucket}} {{method}}"
}
],
"fieldConfig": { "defaults": { "unit": "reqps" } }
}
],
"schemaVersion": 39,
"tags": ["seaweedfs", "storage", "s3"],
"time": { "from": "now-1h", "to": "now" },
"title": "SeaweedFS",
"uid": "seaweedfs"
}

View File

@@ -7,7 +7,22 @@ resources:
- namespace.yaml - namespace.yaml
- vault-secrets.yaml - vault-secrets.yaml
- grafana-oauth2client.yaml - grafana-oauth2client.yaml
- dashboards-configmap.yaml # Dashboards (one ConfigMap per Grafana folder)
- dashboards-ingress.yaml
- dashboards-observability.yaml
- dashboards-infrastructure.yaml
- dashboards-storage.yaml
- dashboards-identity.yaml
- dashboards-devtools.yaml
- dashboards-search.yaml
- dashboards-media.yaml
- dashboards-lasuite.yaml
- dashboards-comms.yaml
# AlertManager → Matrix bridge
- matrix-alertmanager-receiver-deployment.yaml
- matrix-bot-secret.yaml
# Alert rules
- alertrules-infrastructure.yaml
helmCharts: helmCharts:
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts # helm repo add prometheus-community https://prometheus-community.github.io/helm-charts