updates
This commit is contained in:
325
monitoring/prometheus-config.yml
Normal file
325
monitoring/prometheus-config.yml
Normal file
@@ -0,0 +1,325 @@
|
||||
# Prometheus Configuration for WiFi-DensePose
|
||||
# This configuration sets up comprehensive monitoring for the WiFi-DensePose application
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
cluster: 'wifi-densepose'
|
||||
environment: 'production'
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
- "alerting-rules.yml"
|
||||
- "recording-rules.yml"
|
||||
|
||||
# Scrape configuration
|
||||
scrape_configs:
|
||||
# Prometheus itself
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
scrape_interval: 30s
|
||||
metrics_path: /metrics
|
||||
|
||||
# Kubernetes API Server
|
||||
- job_name: 'kubernetes-apiservers'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
namespaces:
|
||||
names:
|
||||
- default
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
|
||||
# Kubernetes Nodes
|
||||
- job_name: 'kubernetes-nodes'
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics
|
||||
|
||||
# Kubernetes Node Exporter
|
||||
- job_name: 'kubernetes-node-exporter'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_endpoints_name]
|
||||
action: keep
|
||||
regex: node-exporter
|
||||
- source_labels: [__meta_kubernetes_endpoint_address_target_name]
|
||||
target_label: node
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
|
||||
# Kubernetes Pods
|
||||
- job_name: 'kubernetes-pods'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: kubernetes_pod_name
|
||||
|
||||
# WiFi-DensePose Application
|
||||
- job_name: 'wifi-densepose-app'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- wifi-densepose
|
||||
- wifi-densepose-staging
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_label_app]
|
||||
action: keep
|
||||
regex: wifi-densepose
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: kubernetes_pod_name
|
||||
scrape_interval: 10s
|
||||
metrics_path: /metrics
|
||||
|
||||
# PostgreSQL Exporter
|
||||
- job_name: 'postgres-exporter'
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
namespaces:
|
||||
names:
|
||||
- wifi-densepose
|
||||
- wifi-densepose-staging
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_label_app]
|
||||
action: keep
|
||||
regex: postgres-exporter
|
||||
- source_labels: [__meta_kubernetes_service_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
scrape_interval: 30s
|
||||
|
||||
# Redis Exporter
|
||||
- job_name: 'redis-exporter'
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
namespaces:
|
||||
names:
|
||||
- wifi-densepose
|
||||
- wifi-densepose-staging
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_label_app]
|
||||
action: keep
|
||||
regex: redis-exporter
|
||||
- source_labels: [__meta_kubernetes_service_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
scrape_interval: 30s
|
||||
|
||||
# NGINX Ingress Controller
|
||||
- job_name: 'nginx-ingress'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- ingress-nginx
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
||||
action: keep
|
||||
regex: ingress-nginx
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: (.+)
|
||||
replacement: $1:10254
|
||||
scrape_interval: 30s
|
||||
|
||||
# Kubernetes Services
|
||||
- job_name: 'kubernetes-services'
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
|
||||
# Blackbox Exporter for external endpoints
|
||||
- job_name: 'blackbox-http'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
- https://wifi-densepose.com
|
||||
- https://staging.wifi-densepose.com
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
scrape_interval: 60s
|
||||
|
||||
# cAdvisor for container metrics
|
||||
- job_name: 'kubernetes-cadvisor'
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
||||
scrape_interval: 30s
|
||||
|
||||
# Kube State Metrics
|
||||
- job_name: 'kube-state-metrics'
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
namespaces:
|
||||
names:
|
||||
- kube-system
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name]
|
||||
action: keep
|
||||
regex: kube-state-metrics
|
||||
scrape_interval: 30s
|
||||
|
||||
# CoreDNS
|
||||
- job_name: 'coredns'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- kube-system
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_label_k8s_app]
|
||||
action: keep
|
||||
regex: kube-dns
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
scrape_interval: 30s
|
||||
|
||||
# Kubernetes Ingress
|
||||
- job_name: 'kubernetes-ingresses'
|
||||
kubernetes_sd_configs:
|
||||
- role: ingress
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
|
||||
regex: (.+);(.+);(.+)
|
||||
replacement: ${1}://${2}${3}
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_ingress_label_(.+)
|
||||
|
||||
# Remote write configuration for long-term storage
|
||||
remote_write:
|
||||
- url: "https://prometheus-remote-write.monitoring.svc.cluster.local/api/v1/write"
|
||||
queue_config:
|
||||
max_samples_per_send: 1000
|
||||
max_shards: 200
|
||||
capacity: 2500
|
||||
write_relabel_configs:
|
||||
- source_labels: [__name__]
|
||||
regex: 'go_.*'
|
||||
action: drop
|
||||
|
||||
# Storage configuration
|
||||
storage:
|
||||
tsdb:
|
||||
retention.time: 15d
|
||||
retention.size: 50GB
|
||||
wal-compression: true
|
||||
|
||||
# Feature flags
|
||||
feature_flags:
|
||||
- promql-at-modifier
|
||||
- remote-write-receiver
|
||||
Reference in New Issue
Block a user