Files
wifi-densepose/monitoring/prometheus-config.yml
2025-06-07 11:44:19 +00:00

325 lines
9.7 KiB
YAML

# Prometheus Configuration for WiFi-DensePose
# This configuration sets up comprehensive monitoring for the WiFi-DensePose application
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'wifi-densepose'
environment: 'production'
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "alerting-rules.yml"
- "recording-rules.yml"
# Scrape configuration
scrape_configs:
# Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
scrape_interval: 30s
metrics_path: /metrics
# Kubernetes API Server
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
namespaces:
names:
- default
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Kubernetes Nodes
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# Kubernetes Node Exporter
- job_name: 'kubernetes-node-exporter'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_endpoints_name]
action: keep
regex: node-exporter
- source_labels: [__meta_kubernetes_endpoint_address_target_name]
target_label: node
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
# Kubernetes Pods
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
# WiFi-DensePose Application
- job_name: 'wifi-densepose-app'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- wifi-densepose
- wifi-densepose-staging
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
action: keep
regex: wifi-densepose
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
scrape_interval: 10s
metrics_path: /metrics
# PostgreSQL Exporter
- job_name: 'postgres-exporter'
kubernetes_sd_configs:
- role: service
namespaces:
names:
- wifi-densepose
- wifi-densepose-staging
relabel_configs:
- source_labels: [__meta_kubernetes_service_label_app]
action: keep
regex: postgres-exporter
- source_labels: [__meta_kubernetes_service_port_name]
action: keep
regex: metrics
scrape_interval: 30s
# Redis Exporter
- job_name: 'redis-exporter'
kubernetes_sd_configs:
- role: service
namespaces:
names:
- wifi-densepose
- wifi-densepose-staging
relabel_configs:
- source_labels: [__meta_kubernetes_service_label_app]
action: keep
regex: redis-exporter
- source_labels: [__meta_kubernetes_service_port_name]
action: keep
regex: metrics
scrape_interval: 30s
# NGINX Ingress Controller
- job_name: 'nginx-ingress'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- ingress-nginx
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: ingress-nginx
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+)
replacement: $1:10254
scrape_interval: 30s
# Kubernetes Services
- job_name: 'kubernetes-services'
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
# Blackbox Exporter for external endpoints
- job_name: 'blackbox-http'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://wifi-densepose.com
- https://staging.wifi-densepose.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
scrape_interval: 60s
# cAdvisor for container metrics
- job_name: 'kubernetes-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
scrape_interval: 30s
# Kube State Metrics
- job_name: 'kube-state-metrics'
kubernetes_sd_configs:
- role: service
namespaces:
names:
- kube-system
relabel_configs:
- source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name]
action: keep
regex: kube-state-metrics
scrape_interval: 30s
# CoreDNS
- job_name: 'coredns'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- kube-system
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_k8s_app]
action: keep
regex: kube-dns
- source_labels: [__meta_kubernetes_pod_container_port_name]
action: keep
regex: metrics
scrape_interval: 30s
# Kubernetes Ingress
- job_name: 'kubernetes-ingresses'
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
# Remote write configuration for long-term storage
remote_write:
- url: "https://prometheus-remote-write.monitoring.svc.cluster.local/api/v1/write"
queue_config:
max_samples_per_send: 1000
max_shards: 200
capacity: 2500
write_relabel_configs:
- source_labels: [__name__]
regex: 'go_.*'
action: drop
# Storage configuration
storage:
tsdb:
retention.time: 15d
retention.size: 50GB
wal-compression: true
# Feature flags
feature_flags:
- promql-at-modifier
- remote-write-receiver