Files
wifi-densepose/monitoring/grafana-dashboard.json
2025-06-07 11:44:19 +00:00

472 lines
13 KiB
JSON

{
"dashboard": {
"id": null,
"title": "WiFi-DensePose Monitoring Dashboard",
"tags": ["wifi-densepose", "monitoring", "kubernetes"],
"style": "dark",
"timezone": "browser",
"refresh": "30s",
"schemaVersion": 30,
"version": 1,
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"]
},
"templating": {
"list": [
{
"name": "namespace",
"type": "query",
"query": "label_values(kube_namespace_info, namespace)",
"refresh": 1,
"includeAll": true,
"allValue": ".*",
"multi": true,
"datasource": "Prometheus"
},
{
"name": "pod",
"type": "query",
"query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)",
"refresh": 1,
"includeAll": true,
"allValue": ".*",
"multi": true,
"datasource": "Prometheus"
},
{
"name": "instance",
"type": "query",
"query": "label_values(up, instance)",
"refresh": 1,
"includeAll": true,
"allValue": ".*",
"multi": true,
"datasource": "Prometheus"
}
]
},
"panels": [
{
"id": 1,
"title": "System Overview",
"type": "row",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
"collapsed": false
},
{
"id": 2,
"title": "Application Status",
"type": "stat",
"gridPos": {"h": 8, "w": 6, "x": 0, "y": 1},
"targets": [
{
"expr": "up{job=\"wifi-densepose-app\"}",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"steps": [
{"color": "red", "value": 0},
{"color": "green", "value": 1}
]
},
"mappings": [
{"options": {"0": {"text": "Down"}}, "type": "value"},
{"options": {"1": {"text": "Up"}}, "type": "value"}
]
}
},
"options": {
"reduceOptions": {
"values": false,
"calcs": ["lastNotNull"],
"fields": ""
},
"orientation": "auto",
"textMode": "auto",
"colorMode": "background"
}
},
{
"id": 3,
"title": "Request Rate",
"type": "stat",
"gridPos": {"h": 8, "w": 6, "x": 6, "y": 1},
"targets": [
{
"expr": "sum(rate(http_requests_total{job=\"wifi-densepose-app\"}[5m]))",
"legendFormat": "Requests/sec",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "reqps",
"color": {"mode": "palette-classic"},
"thresholds": {
"steps": [
{"color": "green", "value": 0},
{"color": "yellow", "value": 100},
{"color": "red", "value": 1000}
]
}
}
}
},
{
"id": 4,
"title": "Error Rate",
"type": "stat",
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 1},
"targets": [
{
"expr": "sum(rate(http_requests_total{job=\"wifi-densepose-app\",status=~\"5..\"}[5m])) / sum(rate(http_requests_total{job=\"wifi-densepose-app\"}[5m])) * 100",
"legendFormat": "Error Rate %",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"color": {"mode": "thresholds"},
"thresholds": {
"steps": [
{"color": "green", "value": 0},
{"color": "yellow", "value": 1},
{"color": "red", "value": 5}
]
}
}
}
},
{
"id": 5,
"title": "Response Time",
"type": "stat",
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 1},
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
"legendFormat": "95th percentile",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "s",
"color": {"mode": "thresholds"},
"thresholds": {
"steps": [
{"color": "green", "value": 0},
{"color": "yellow", "value": 0.5},
{"color": "red", "value": 1}
]
}
}
}
},
{
"id": 6,
"title": "Application Metrics",
"type": "row",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 9},
"collapsed": false
},
{
"id": 7,
"title": "HTTP Request Rate",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 10},
"targets": [
{
"expr": "sum(rate(http_requests_total{job=\"wifi-densepose-app\"}[5m])) by (method, status)",
"legendFormat": "{{method}} {{status}}",
"refId": "A"
}
],
"yAxes": [
{"label": "Requests/sec", "min": 0},
{"show": false}
],
"xAxis": {"show": true},
"legend": {"show": true, "values": true, "current": true}
},
{
"id": 8,
"title": "Response Time Distribution",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 10},
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
"legendFormat": "50th percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
"legendFormat": "95th percentile",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
"legendFormat": "99th percentile",
"refId": "C"
}
],
"yAxes": [
{"label": "Response Time (s)", "min": 0},
{"show": false}
]
},
{
"id": 9,
"title": "Infrastructure Metrics",
"type": "row",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 18},
"collapsed": false
},
{
"id": 10,
"title": "CPU Usage",
"type": "graph",
"gridPos": {"h": 8, "w": 8, "x": 0, "y": 19},
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",pod=~\"$pod\"}[5m])) by (pod) * 100",
"legendFormat": "{{pod}}",
"refId": "A"
}
],
"yAxes": [
{"label": "CPU %", "min": 0, "max": 100},
{"show": false}
]
},
{
"id": 11,
"title": "Memory Usage",
"type": "graph",
"gridPos": {"h": 8, "w": 8, "x": 8, "y": 19},
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\",pod=~\"$pod\"}) by (pod) / 1024 / 1024",
"legendFormat": "{{pod}}",
"refId": "A"
}
],
"yAxes": [
{"label": "Memory (MB)", "min": 0},
{"show": false}
]
},
{
"id": 12,
"title": "Network I/O",
"type": "graph",
"gridPos": {"h": 8, "w": 8, "x": 16, "y": 19},
"targets": [
{
"expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[5m])) by (pod)",
"legendFormat": "{{pod}} RX",
"refId": "A"
},
{
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[5m])) by (pod)",
"legendFormat": "{{pod}} TX",
"refId": "B"
}
],
"yAxes": [
{"label": "Bytes/sec", "min": 0},
{"show": false}
]
},
{
"id": 13,
"title": "Database Metrics",
"type": "row",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 27},
"collapsed": false
},
{
"id": 14,
"title": "Database Connections",
"type": "graph",
"gridPos": {"h": 8, "w": 8, "x": 0, "y": 28},
"targets": [
{
"expr": "pg_stat_database_numbackends{datname=\"wifi_densepose\"}",
"legendFormat": "Active Connections",
"refId": "A"
},
{
"expr": "pg_settings_max_connections",
"legendFormat": "Max Connections",
"refId": "B"
}
],
"yAxes": [
{"label": "Connections", "min": 0},
{"show": false}
]
},
{
"id": 15,
"title": "Database Query Performance",
"type": "graph",
"gridPos": {"h": 8, "w": 8, "x": 8, "y": 28},
"targets": [
{
"expr": "rate(pg_stat_database_tup_fetched{datname=\"wifi_densepose\"}[5m])",
"legendFormat": "Tuples Fetched/sec",
"refId": "A"
},
{
"expr": "rate(pg_stat_database_tup_inserted{datname=\"wifi_densepose\"}[5m])",
"legendFormat": "Tuples Inserted/sec",
"refId": "B"
},
{
"expr": "rate(pg_stat_database_tup_updated{datname=\"wifi_densepose\"}[5m])",
"legendFormat": "Tuples Updated/sec",
"refId": "C"
}
],
"yAxes": [
{"label": "Operations/sec", "min": 0},
{"show": false}
]
},
{
"id": 16,
"title": "Redis Metrics",
"type": "graph",
"gridPos": {"h": 8, "w": 8, "x": 16, "y": 28},
"targets": [
{
"expr": "redis_connected_clients",
"legendFormat": "Connected Clients",
"refId": "A"
},
{
"expr": "rate(redis_total_commands_processed_total[5m])",
"legendFormat": "Commands/sec",
"refId": "B"
}
],
"yAxes": [
{"label": "Count", "min": 0},
{"show": false}
]
},
{
"id": 17,
"title": "Kubernetes Metrics",
"type": "row",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 36},
"collapsed": false
},
{
"id": 18,
"title": "Pod Status",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 37},
"targets": [
{
"expr": "sum(kube_pod_status_phase{namespace=~\"$namespace\"}) by (phase)",
"legendFormat": "{{phase}}",
"refId": "A"
}
],
"yAxes": [
{"label": "Pod Count", "min": 0},
{"show": false}
]
},
{
"id": 19,
"title": "Node Resource Usage",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 37},
"targets": [
{
"expr": "(1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100",
"legendFormat": "CPU Usage %",
"refId": "A"
},
{
"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
"legendFormat": "Memory Usage %",
"refId": "B"
}
],
"yAxes": [
{"label": "Usage %", "min": 0, "max": 100},
{"show": false}
]
},
{
"id": 20,
"title": "Alerts and Logs",
"type": "row",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 45},
"collapsed": false
},
{
"id": 21,
"title": "Active Alerts",
"type": "table",
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 46},
"targets": [
{
"expr": "ALERTS{alertstate=\"firing\"}",
"format": "table",
"instant": true,
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"__name__": true,
"Time": true,
"job": true
},
"indexByName": {},
"renameByName": {
"alertname": "Alert",
"severity": "Severity",
"summary": "Summary",
"description": "Description"
}
}
}
]
}
],
"annotations": {
"list": [
{
"name": "Deployments",
"datasource": "Prometheus",
"expr": "increase(kube_deployment_status_observed_generation{namespace=~\"$namespace\"}[1m])",
"iconColor": "green",
"titleFormat": "Deployment: {{deployment}}"
}
]
}
},
"overwrite": true
}