472 lines
13 KiB
JSON
472 lines
13 KiB
JSON
{
|
|
"dashboard": {
|
|
"id": null,
|
|
"title": "WiFi-DensePose Monitoring Dashboard",
|
|
"tags": ["wifi-densepose", "monitoring", "kubernetes"],
|
|
"style": "dark",
|
|
"timezone": "browser",
|
|
"refresh": "30s",
|
|
"schemaVersion": 30,
|
|
"version": 1,
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"]
|
|
},
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "namespace",
|
|
"type": "query",
|
|
"query": "label_values(kube_namespace_info, namespace)",
|
|
"refresh": 1,
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"multi": true,
|
|
"datasource": "Prometheus"
|
|
},
|
|
{
|
|
"name": "pod",
|
|
"type": "query",
|
|
"query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)",
|
|
"refresh": 1,
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"multi": true,
|
|
"datasource": "Prometheus"
|
|
},
|
|
{
|
|
"name": "instance",
|
|
"type": "query",
|
|
"query": "label_values(up, instance)",
|
|
"refresh": 1,
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"multi": true,
|
|
"datasource": "Prometheus"
|
|
}
|
|
]
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "System Overview",
|
|
"type": "row",
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Application Status",
|
|
"type": "stat",
|
|
"gridPos": {"h": 8, "w": 6, "x": 0, "y": 1},
|
|
"targets": [
|
|
{
|
|
"expr": "up{job=\"wifi-densepose-app\"}",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "red", "value": 0},
|
|
{"color": "green", "value": 1}
|
|
]
|
|
},
|
|
"mappings": [
|
|
{"options": {"0": {"text": "Down"}}, "type": "value"},
|
|
{"options": {"1": {"text": "Up"}}, "type": "value"}
|
|
]
|
|
}
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"values": false,
|
|
"calcs": ["lastNotNull"],
|
|
"fields": ""
|
|
},
|
|
"orientation": "auto",
|
|
"textMode": "auto",
|
|
"colorMode": "background"
|
|
}
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "Request Rate",
|
|
"type": "stat",
|
|
"gridPos": {"h": 8, "w": 6, "x": 6, "y": 1},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_requests_total{job=\"wifi-densepose-app\"}[5m]))",
|
|
"legendFormat": "Requests/sec",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "reqps",
|
|
"color": {"mode": "palette-classic"},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "green", "value": 0},
|
|
{"color": "yellow", "value": 100},
|
|
{"color": "red", "value": 1000}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "Error Rate",
|
|
"type": "stat",
|
|
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 1},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_requests_total{job=\"wifi-densepose-app\",status=~\"5..\"}[5m])) / sum(rate(http_requests_total{job=\"wifi-densepose-app\"}[5m])) * 100",
|
|
"legendFormat": "Error Rate %",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {"mode": "thresholds"},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "green", "value": 0},
|
|
{"color": "yellow", "value": 1},
|
|
{"color": "red", "value": 5}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "Response Time",
|
|
"type": "stat",
|
|
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 1},
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
|
|
"legendFormat": "95th percentile",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"color": {"mode": "thresholds"},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "green", "value": 0},
|
|
{"color": "yellow", "value": 0.5},
|
|
{"color": "red", "value": 1}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 6,
|
|
"title": "Application Metrics",
|
|
"type": "row",
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 9},
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 7,
|
|
"title": "HTTP Request Rate",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 10},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_requests_total{job=\"wifi-densepose-app\"}[5m])) by (method, status)",
|
|
"legendFormat": "{{method}} {{status}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Requests/sec", "min": 0},
|
|
{"show": false}
|
|
],
|
|
"xAxis": {"show": true},
|
|
"legend": {"show": true, "values": true, "current": true}
|
|
},
|
|
{
|
|
"id": 8,
|
|
"title": "Response Time Distribution",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 10},
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
|
|
"legendFormat": "50th percentile",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
|
|
"legendFormat": "95th percentile",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{job=\"wifi-densepose-app\"}[5m])) by (le))",
|
|
"legendFormat": "99th percentile",
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Response Time (s)", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 9,
|
|
"title": "Infrastructure Metrics",
|
|
"type": "row",
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 18},
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 10,
|
|
"title": "CPU Usage",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 8, "x": 0, "y": 19},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",pod=~\"$pod\"}[5m])) by (pod) * 100",
|
|
"legendFormat": "{{pod}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "CPU %", "min": 0, "max": 100},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 11,
|
|
"title": "Memory Usage",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 8, "x": 8, "y": 19},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\",pod=~\"$pod\"}) by (pod) / 1024 / 1024",
|
|
"legendFormat": "{{pod}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Memory (MB)", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 12,
|
|
"title": "Network I/O",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 8, "x": 16, "y": 19},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[5m])) by (pod)",
|
|
"legendFormat": "{{pod}} RX",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[5m])) by (pod)",
|
|
"legendFormat": "{{pod}} TX",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Bytes/sec", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 13,
|
|
"title": "Database Metrics",
|
|
"type": "row",
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 27},
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 14,
|
|
"title": "Database Connections",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 8, "x": 0, "y": 28},
|
|
"targets": [
|
|
{
|
|
"expr": "pg_stat_database_numbackends{datname=\"wifi_densepose\"}",
|
|
"legendFormat": "Active Connections",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "pg_settings_max_connections",
|
|
"legendFormat": "Max Connections",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Connections", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 15,
|
|
"title": "Database Query Performance",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 8, "x": 8, "y": 28},
|
|
"targets": [
|
|
{
|
|
"expr": "rate(pg_stat_database_tup_fetched{datname=\"wifi_densepose\"}[5m])",
|
|
"legendFormat": "Tuples Fetched/sec",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "rate(pg_stat_database_tup_inserted{datname=\"wifi_densepose\"}[5m])",
|
|
"legendFormat": "Tuples Inserted/sec",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"expr": "rate(pg_stat_database_tup_updated{datname=\"wifi_densepose\"}[5m])",
|
|
"legendFormat": "Tuples Updated/sec",
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Operations/sec", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 16,
|
|
"title": "Redis Metrics",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 8, "x": 16, "y": 28},
|
|
"targets": [
|
|
{
|
|
"expr": "redis_connected_clients",
|
|
"legendFormat": "Connected Clients",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "rate(redis_total_commands_processed_total[5m])",
|
|
"legendFormat": "Commands/sec",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Count", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 17,
|
|
"title": "Kubernetes Metrics",
|
|
"type": "row",
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 36},
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 18,
|
|
"title": "Pod Status",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 37},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_pod_status_phase{namespace=~\"$namespace\"}) by (phase)",
|
|
"legendFormat": "{{phase}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Pod Count", "min": 0},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 19,
|
|
"title": "Node Resource Usage",
|
|
"type": "graph",
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 37},
|
|
"targets": [
|
|
{
|
|
"expr": "(1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100",
|
|
"legendFormat": "CPU Usage %",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
|
|
"legendFormat": "Memory Usage %",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"yAxes": [
|
|
{"label": "Usage %", "min": 0, "max": 100},
|
|
{"show": false}
|
|
]
|
|
},
|
|
{
|
|
"id": 20,
|
|
"title": "Alerts and Logs",
|
|
"type": "row",
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 45},
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 21,
|
|
"title": "Active Alerts",
|
|
"type": "table",
|
|
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 46},
|
|
"targets": [
|
|
{
|
|
"expr": "ALERTS{alertstate=\"firing\"}",
|
|
"format": "table",
|
|
"instant": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"id": "organize",
|
|
"options": {
|
|
"excludeByName": {
|
|
"__name__": true,
|
|
"Time": true,
|
|
"job": true
|
|
},
|
|
"indexByName": {},
|
|
"renameByName": {
|
|
"alertname": "Alert",
|
|
"severity": "Severity",
|
|
"summary": "Summary",
|
|
"description": "Description"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"name": "Deployments",
|
|
"datasource": "Prometheus",
|
|
"expr": "increase(kube_deployment_status_observed_generation{namespace=~\"$namespace\"}[1m])",
|
|
"iconColor": "green",
|
|
"titleFormat": "Deployment: {{deployment}}"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overwrite": true
|
|
} |