Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/detector.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/detector.rs
@@ -0,0 +1,825 @@
+//! Problem Detection for Self-Healing Engine
+//!
+//! Implements continuous monitoring and problem classification:
+//! - IndexDegradation: Index performance has degraded
+//! - ReplicaLag: Replica is falling behind primary
+//! - StorageExhaustion: Storage space is running low
+//! - QueryTimeout: Queries are timing out excessively
+//! - IntegrityViolation: Graph integrity has been compromised
+
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::SystemTime;
+
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+
+// ============================================================================
+// Problem Types
+// ============================================================================
+
+/// Types of problems that can be detected
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum ProblemType {
+    /// Index performance has degraded (fragmentation, poor connectivity)
+    IndexDegradation,
+    /// Replica is lagging behind primary
+    ReplicaLag,
+    /// Storage space is running low
+    StorageExhaustion,
+    /// Queries are timing out excessively
+    QueryTimeout,
+    /// Graph integrity has been violated (mincut below threshold)
+    IntegrityViolation,
+    /// Memory pressure is high
+    MemoryPressure,
+    /// Connection pool exhaustion
+    ConnectionExhaustion,
+    /// Hot partition detected (uneven load distribution)
+    HotPartition,
+}
+
+impl ProblemType {
+    /// Get human-readable description
+    pub fn description(&self) -> &'static str {
+        match self {
+            ProblemType::IndexDegradation => "Index performance degradation detected",
+            ProblemType::ReplicaLag => "Replica lag exceeds threshold",
+            ProblemType::StorageExhaustion => "Storage space running low",
+            ProblemType::QueryTimeout => "Excessive query timeouts",
+            ProblemType::IntegrityViolation => "Graph integrity violation",
+            ProblemType::MemoryPressure => "Memory pressure detected",
+            ProblemType::ConnectionExhaustion => "Connection pool exhausted",
+            ProblemType::HotPartition => "Hot partition detected",
+        }
+    }
+
+    /// Get all problem types
+    pub fn all() -> Vec<ProblemType> {
+        vec![
+            ProblemType::IndexDegradation,
+            ProblemType::ReplicaLag,
+            ProblemType::StorageExhaustion,
+            ProblemType::QueryTimeout,
+            ProblemType::IntegrityViolation,
+            ProblemType::MemoryPressure,
+            ProblemType::ConnectionExhaustion,
+            ProblemType::HotPartition,
+        ]
+    }
+}
+
+impl std::fmt::Display for ProblemType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ProblemType::IndexDegradation => write!(f, "index_degradation"),
+            ProblemType::ReplicaLag => write!(f, "replica_lag"),
+            ProblemType::StorageExhaustion => write!(f, "storage_exhaustion"),
+            ProblemType::QueryTimeout => write!(f, "query_timeout"),
+            ProblemType::IntegrityViolation => write!(f, "integrity_violation"),
+            ProblemType::MemoryPressure => write!(f, "memory_pressure"),
+            ProblemType::ConnectionExhaustion => write!(f, "connection_exhaustion"),
+            ProblemType::HotPartition => write!(f, "hot_partition"),
+        }
+    }
+}
+
+impl std::str::FromStr for ProblemType {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_lowercase().as_str() {
+            "index_degradation" | "indexdegradation" => Ok(ProblemType::IndexDegradation),
+            "replica_lag" | "replicalag" => Ok(ProblemType::ReplicaLag),
+            "storage_exhaustion" | "storageexhaustion" => Ok(ProblemType::StorageExhaustion),
+            "query_timeout" | "querytimeout" => Ok(ProblemType::QueryTimeout),
+            "integrity_violation" | "integrityviolation" => Ok(ProblemType::IntegrityViolation),
+            "memory_pressure" | "memorypressure" => Ok(ProblemType::MemoryPressure),
+            "connection_exhaustion" | "connectionexhaustion" => {
+                Ok(ProblemType::ConnectionExhaustion)
+            }
+            "hot_partition" | "hotpartition" => Ok(ProblemType::HotPartition),
+            _ => Err(format!("Unknown problem type: {}", s)),
+        }
+    }
+}
+
+// ============================================================================
+// Severity Levels
+// ============================================================================
+
+/// Problem severity levels
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
+pub enum Severity {
+    /// Informational, no action required
+    Info,
+    /// Low severity, can be addressed during maintenance
+    Low,
+    /// Medium severity, should be addressed soon
+    Medium,
+    /// High severity, requires prompt attention
+    High,
+    /// Critical severity, immediate action required
+    Critical,
+}
+
+impl Severity {
+    /// Get numeric value for comparison
+    pub fn value(&self) -> u8 {
+        match self {
+            Severity::Info => 0,
+            Severity::Low => 1,
+            Severity::Medium => 2,
+            Severity::High => 3,
+            Severity::Critical => 4,
+        }
+    }
+}
+
+// ============================================================================
+// Problem Definition
+// ============================================================================
+
+/// A detected problem with full context
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Problem {
+    /// Type of problem
+    pub problem_type: ProblemType,
+    /// Severity level
+    pub severity: Severity,
+    /// When the problem was detected
+    #[serde(with = "system_time_serde")]
+    pub detected_at: SystemTime,
+    /// Additional details about the problem
+    pub details: serde_json::Value,
+    /// Affected partition IDs (if applicable)
+    pub affected_partitions: Vec<i64>,
+}
+
+impl Problem {
+    /// Create a new problem
+    pub fn new(problem_type: ProblemType, severity: Severity) -> Self {
+        Self {
+            problem_type,
+            severity,
+            detected_at: SystemTime::now(),
+            details: serde_json::json!({}),
+            affected_partitions: vec![],
+        }
+    }
+
+    /// Add details to the problem
+    pub fn with_details(mut self, details: serde_json::Value) -> Self {
+        self.details = details;
+        self
+    }
+
+    /// Add affected partitions
+    pub fn with_partitions(mut self, partitions: Vec<i64>) -> Self {
+        self.affected_partitions = partitions;
+        self
+    }
+
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        let detected_ts = self
+            .detected_at
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+
+        serde_json::json!({
+            "problem_type": self.problem_type.to_string(),
+            "severity": format!("{:?}", self.severity).to_lowercase(),
+            "detected_at": detected_ts,
+            "details": self.details,
+            "affected_partitions": self.affected_partitions,
+        })
+    }
+}
+
+// Custom serde for SystemTime
+mod system_time_serde {
+    use serde::{Deserialize, Deserializer, Serialize, Serializer};
+    use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+    pub fn serialize<S>(time: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let duration = time.duration_since(UNIX_EPOCH).unwrap();
+        duration.as_secs().serialize(serializer)
+    }
+
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let secs = u64::deserialize(deserializer)?;
+        Ok(UNIX_EPOCH + Duration::from_secs(secs))
+    }
+}
+
+// ============================================================================
+// Detection Thresholds
+// ============================================================================
+
+/// Configurable thresholds for problem detection
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DetectionThresholds {
+    /// Index fragmentation percentage threshold (0-100)
+    pub index_fragmentation_pct: f32,
+    /// Replica lag in seconds threshold
+    pub replica_lag_seconds: f32,
+    /// Storage usage percentage threshold (0-100)
+    pub storage_usage_pct: f32,
+    /// Query timeout rate threshold (0-1)
+    pub query_timeout_rate: f32,
+    /// Minimum lambda (mincut) value for integrity
+    pub min_integrity_lambda: f32,
+    /// Memory usage percentage threshold (0-100)
+    pub memory_usage_pct: f32,
+    /// Connection pool usage percentage threshold (0-100)
+    pub connection_usage_pct: f32,
+    /// Partition load ratio threshold (vs average)
+    pub partition_load_ratio: f32,
+}
+
+impl Default for DetectionThresholds {
+    fn default() -> Self {
+        Self {
+            index_fragmentation_pct: 30.0,
+            replica_lag_seconds: 5.0,
+            storage_usage_pct: 85.0,
+            query_timeout_rate: 0.05, // 5% timeout rate
+            min_integrity_lambda: 0.5,
+            memory_usage_pct: 85.0,
+            connection_usage_pct: 90.0,
+            partition_load_ratio: 3.0, // 3x average load
+        }
+    }
+}
+
+// ============================================================================
+// System Metrics
+// ============================================================================
+
+/// System metrics collected for problem detection
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct SystemMetrics {
+    /// Index fragmentation percentage per index
+    pub index_fragmentation: HashMap<String, f32>,
+    /// Replica lag in seconds per replica
+    pub replica_lag: HashMap<String, f32>,
+    /// Storage usage percentage
+    pub storage_usage_pct: f32,
+    /// Query timeout rate (0-1)
+    pub query_timeout_rate: f32,
+    /// Current integrity lambda value
+    pub integrity_lambda: f32,
+    /// Memory usage percentage
+    pub memory_usage_pct: f32,
+    /// Connection pool usage percentage
+    pub connection_usage_pct: f32,
+    /// Load per partition
+    pub partition_loads: HashMap<i64, f64>,
+    /// Witness edges from mincut computation
+    pub witness_edges: Vec<WitnessEdge>,
+    /// Maintenance queue depth
+    pub maintenance_queue_depth: usize,
+    /// Top memory consumers
+    pub top_memory_consumers: Vec<(String, usize)>,
+    /// Fragmented index IDs
+    pub fragmented_indexes: Vec<i64>,
+    /// Timestamp of metrics collection
+    pub collected_at: u64,
+}
+
+impl SystemMetrics {
+    /// Create new empty metrics
+    pub fn new() -> Self {
+        Self {
+            collected_at: SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+            ..Default::default()
+        }
+    }
+
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "index_fragmentation": self.index_fragmentation,
+            "replica_lag": self.replica_lag,
+            "storage_usage_pct": self.storage_usage_pct,
+            "query_timeout_rate": self.query_timeout_rate,
+            "integrity_lambda": self.integrity_lambda,
+            "memory_usage_pct": self.memory_usage_pct,
+            "connection_usage_pct": self.connection_usage_pct,
+            "partition_loads": self.partition_loads,
+            "witness_edge_count": self.witness_edges.len(),
+            "maintenance_queue_depth": self.maintenance_queue_depth,
+            "collected_at": self.collected_at,
+        })
+    }
+}
+
+/// Witness edge from mincut computation
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WitnessEdge {
+    /// Source node ID
+    pub from: i64,
+    /// Target node ID
+    pub to: i64,
+    /// Edge type (e.g., "partition_link", "replication", "dependency")
+    pub edge_type: String,
+    /// Edge weight/capacity
+    pub weight: f32,
+}
+
+// ============================================================================
+// Problem Detector
+// ============================================================================
+
+/// Problem detector with configurable thresholds
+pub struct ProblemDetector {
+    /// Detection thresholds
+    thresholds: RwLock<DetectionThresholds>,
+    /// Number of problems detected
+    problems_detected: AtomicU64,
+    /// Last detection timestamp
+    last_detection: AtomicU64,
+}
+
+impl ProblemDetector {
+    /// Create a new problem detector with default thresholds
+    pub fn new() -> Self {
+        Self {
+            thresholds: RwLock::new(DetectionThresholds::default()),
+            problems_detected: AtomicU64::new(0),
+            last_detection: AtomicU64::new(0),
+        }
+    }
+
+    /// Create with custom thresholds
+    pub fn with_thresholds(thresholds: DetectionThresholds) -> Self {
+        Self {
+            thresholds: RwLock::new(thresholds),
+            problems_detected: AtomicU64::new(0),
+            last_detection: AtomicU64::new(0),
+        }
+    }
+
+    /// Update thresholds
+    pub fn update_thresholds(&self, thresholds: DetectionThresholds) {
+        *self.thresholds.write() = thresholds;
+    }
+
+    /// Get current thresholds
+    pub fn get_thresholds(&self) -> DetectionThresholds {
+        self.thresholds.read().clone()
+    }
+
+    /// Collect current system metrics
+    pub fn collect_metrics(&self) -> SystemMetrics {
+        let mut metrics = SystemMetrics::new();
+
+        // In production, these would query PostgreSQL system catalogs
+        // and index statistics. For now, we simulate with reasonable defaults.
+
+        // Query pg_stat_user_indexes for fragmentation
+        metrics.index_fragmentation = self.collect_index_fragmentation();
+
+        // Query pg_stat_replication for replica lag
+        metrics.replica_lag = self.collect_replica_lag();
+
+        // Query pg_tablespace for storage usage
+        metrics.storage_usage_pct = self.collect_storage_usage();
+
+        // Query pg_stat_statements for timeout rate
+        metrics.query_timeout_rate = self.collect_query_timeout_rate();
+
+        // Get integrity lambda from mincut computation
+        metrics.integrity_lambda = self.collect_integrity_lambda();
+
+        // Query memory usage
+        metrics.memory_usage_pct = self.collect_memory_usage();
+
+        // Query connection pool usage
+        metrics.connection_usage_pct = self.collect_connection_usage();
+
+        // Query partition loads
+        metrics.partition_loads = self.collect_partition_loads();
+
+        // Get witness edges from mincut
+        metrics.witness_edges = self.collect_witness_edges();
+
+        metrics
+    }
+
+    /// Detect problems from collected metrics
+    pub fn detect_problems(&self, metrics: &SystemMetrics) -> Vec<Problem> {
+        let thresholds = self.thresholds.read();
+        let mut problems = Vec::new();
+
+        // Check index fragmentation
+        for (index_name, frag_pct) in &metrics.index_fragmentation {
+            if *frag_pct > thresholds.index_fragmentation_pct {
+                let severity = if *frag_pct > 60.0 {
+                    Severity::High
+                } else if *frag_pct > 45.0 {
+                    Severity::Medium
+                } else {
+                    Severity::Low
+                };
+
+                problems.push(
+                    Problem::new(ProblemType::IndexDegradation, severity).with_details(
+                        serde_json::json!({
+                            "index_name": index_name,
+                            "fragmentation_pct": frag_pct,
+                            "threshold": thresholds.index_fragmentation_pct,
+                        }),
+                    ),
+                );
+            }
+        }
+
+        // Check replica lag
+        for (replica_id, lag_seconds) in &metrics.replica_lag {
+            if *lag_seconds > thresholds.replica_lag_seconds {
+                let severity = if *lag_seconds > 30.0 {
+                    Severity::Critical
+                } else if *lag_seconds > 15.0 {
+                    Severity::High
+                } else if *lag_seconds > 10.0 {
+                    Severity::Medium
+                } else {
+                    Severity::Low
+                };
+
+                problems.push(
+                    Problem::new(ProblemType::ReplicaLag, severity).with_details(
+                        serde_json::json!({
+                            "replica_id": replica_id,
+                            "lag_seconds": lag_seconds,
+                            "threshold": thresholds.replica_lag_seconds,
+                        }),
+                    ),
+                );
+            }
+        }
+
+        // Check storage usage
+        if metrics.storage_usage_pct > thresholds.storage_usage_pct {
+            let severity = if metrics.storage_usage_pct > 95.0 {
+                Severity::Critical
+            } else if metrics.storage_usage_pct > 90.0 {
+                Severity::High
+            } else {
+                Severity::Medium
+            };
+
+            problems.push(
+                Problem::new(ProblemType::StorageExhaustion, severity).with_details(
+                    serde_json::json!({
+                        "usage_pct": metrics.storage_usage_pct,
+                        "threshold": thresholds.storage_usage_pct,
+                    }),
+                ),
+            );
+        }
+
+        // Check query timeout rate
+        if metrics.query_timeout_rate > thresholds.query_timeout_rate {
+            let severity = if metrics.query_timeout_rate > 0.20 {
+                Severity::Critical
+            } else if metrics.query_timeout_rate > 0.10 {
+                Severity::High
+            } else {
+                Severity::Medium
+            };
+
+            problems.push(
+                Problem::new(ProblemType::QueryTimeout, severity).with_details(serde_json::json!({
+                    "timeout_rate": metrics.query_timeout_rate,
+                    "threshold": thresholds.query_timeout_rate,
+                })),
+            );
+        }
+
+        // Check integrity lambda
+        if metrics.integrity_lambda < thresholds.min_integrity_lambda
+            && metrics.integrity_lambda > 0.0
+        {
+            let severity = if metrics.integrity_lambda < 0.2 {
+                Severity::Critical
+            } else if metrics.integrity_lambda < 0.35 {
+                Severity::High
+            } else {
+                Severity::Medium
+            };
+
+            problems.push(
+                Problem::new(ProblemType::IntegrityViolation, severity).with_details(
+                    serde_json::json!({
+                        "lambda": metrics.integrity_lambda,
+                        "threshold": thresholds.min_integrity_lambda,
+                        "witness_edges": metrics.witness_edges.len(),
+                    }),
+                ),
+            );
+        }
+
+        // Check memory pressure
+        if metrics.memory_usage_pct > thresholds.memory_usage_pct {
+            let severity = if metrics.memory_usage_pct > 95.0 {
+                Severity::Critical
+            } else if metrics.memory_usage_pct > 90.0 {
+                Severity::High
+            } else {
+                Severity::Medium
+            };
+
+            problems.push(
+                Problem::new(ProblemType::MemoryPressure, severity).with_details(
+                    serde_json::json!({
+                        "usage_pct": metrics.memory_usage_pct,
+                        "threshold": thresholds.memory_usage_pct,
+                    }),
+                ),
+            );
+        }
+
+        // Check connection exhaustion
+        if metrics.connection_usage_pct > thresholds.connection_usage_pct {
+            let severity = if metrics.connection_usage_pct > 98.0 {
+                Severity::Critical
+            } else if metrics.connection_usage_pct > 95.0 {
+                Severity::High
+            } else {
+                Severity::Medium
+            };
+
+            problems.push(
+                Problem::new(ProblemType::ConnectionExhaustion, severity).with_details(
+                    serde_json::json!({
+                        "usage_pct": metrics.connection_usage_pct,
+                        "threshold": thresholds.connection_usage_pct,
+                    }),
+                ),
+            );
+        }
+
+        // Check for hot partitions
+        if !metrics.partition_loads.is_empty() {
+            let avg_load: f64 = metrics.partition_loads.values().sum::<f64>()
+                / metrics.partition_loads.len() as f64;
+
+            let hot_partitions: Vec<i64> = metrics
+                .partition_loads
+                .iter()
+                .filter(|(_, load)| **load > avg_load * thresholds.partition_load_ratio as f64)
+                .map(|(id, _)| *id)
+                .collect();
+
+            if !hot_partitions.is_empty() {
+                let max_ratio = hot_partitions
+                    .iter()
+                    .filter_map(|id| metrics.partition_loads.get(id))
+                    .map(|load| *load / avg_load)
+                    .fold(0.0_f64, f64::max);
+
+                let severity = if max_ratio > 10.0 {
+                    Severity::High
+                } else if max_ratio > 5.0 {
+                    Severity::Medium
+                } else {
+                    Severity::Low
+                };
+
+                problems.push(
+                    Problem::new(ProblemType::HotPartition, severity)
+                        .with_details(serde_json::json!({
+                            "avg_load": avg_load,
+                            "max_ratio": max_ratio,
+                            "threshold_ratio": thresholds.partition_load_ratio,
+                        }))
+                        .with_partitions(hot_partitions),
+                );
+            }
+        }
+
+        // Update statistics
+        self.problems_detected
+            .fetch_add(problems.len() as u64, Ordering::SeqCst);
+        self.last_detection.store(
+            SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+            Ordering::SeqCst,
+        );
+
+        problems
+    }
+
+    /// Get detection statistics
+    pub fn get_stats(&self) -> DetectorStats {
+        DetectorStats {
+            problems_detected: self.problems_detected.load(Ordering::SeqCst),
+            last_detection: self.last_detection.load(Ordering::SeqCst),
+        }
+    }
+
+    // ========================================================================
+    // Metric Collection Helpers (would use SPI in production)
+    // ========================================================================
+
+    fn collect_index_fragmentation(&self) -> HashMap<String, f32> {
+        // In production: Query pg_stat_user_indexes and compute fragmentation
+        // For now, return empty (healthy state)
+        HashMap::new()
+    }
+
+    fn collect_replica_lag(&self) -> HashMap<String, f32> {
+        // In production: Query pg_stat_replication
+        HashMap::new()
+    }
+
+    fn collect_storage_usage(&self) -> f32 {
+        // In production: Query pg_tablespace sizes
+        0.0
+    }
+
+    fn collect_query_timeout_rate(&self) -> f32 {
+        // In production: Query pg_stat_statements for timeout metrics
+        0.0
+    }
+
+    fn collect_integrity_lambda(&self) -> f32 {
+        // In production: Get from integrity control plane
+        1.0 // Healthy default
+    }
+
+    fn collect_memory_usage(&self) -> f32 {
+        // In production: Query pg_shmem_allocations or OS metrics
+        0.0
+    }
+
+    fn collect_connection_usage(&self) -> f32 {
+        // In production: Query pg_stat_activity vs max_connections
+        0.0
+    }
+
+    fn collect_partition_loads(&self) -> HashMap<i64, f64> {
+        // In production: Query partition statistics
+        HashMap::new()
+    }
+
+    fn collect_witness_edges(&self) -> Vec<WitnessEdge> {
+        // In production: Get from mincut computation
+        Vec::new()
+    }
+}
+
+impl Default for ProblemDetector {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Detector statistics
+#[derive(Debug, Clone)]
+pub struct DetectorStats {
+    pub problems_detected: u64,
+    pub last_detection: u64,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_problem_type_display() {
+        assert_eq!(
+            ProblemType::IndexDegradation.to_string(),
+            "index_degradation"
+        );
+        assert_eq!(ProblemType::ReplicaLag.to_string(), "replica_lag");
+        assert_eq!(
+            ProblemType::IntegrityViolation.to_string(),
+            "integrity_violation"
+        );
+    }
+
+    #[test]
+    fn test_problem_type_parse() {
+        assert_eq!(
+            "index_degradation".parse::<ProblemType>().unwrap(),
+            ProblemType::IndexDegradation
+        );
+        assert_eq!(
+            "replica_lag".parse::<ProblemType>().unwrap(),
+            ProblemType::ReplicaLag
+        );
+    }
+
+    #[test]
+    fn test_detect_index_degradation() {
+        let detector = ProblemDetector::new();
+
+        let mut metrics = SystemMetrics::new();
+        metrics
+            .index_fragmentation
+            .insert("test_idx".to_string(), 50.0);
+
+        let problems = detector.detect_problems(&metrics);
+
+        assert_eq!(problems.len(), 1);
+        assert_eq!(problems[0].problem_type, ProblemType::IndexDegradation);
+        assert_eq!(problems[0].severity, Severity::Medium);
+    }
+
+    #[test]
+    fn test_detect_storage_exhaustion() {
+        let detector = ProblemDetector::new();
+
+        let mut metrics = SystemMetrics::new();
+        metrics.storage_usage_pct = 92.0;
+
+        let problems = detector.detect_problems(&metrics);
+
+        assert_eq!(problems.len(), 1);
+        assert_eq!(problems[0].problem_type, ProblemType::StorageExhaustion);
+        assert_eq!(problems[0].severity, Severity::High);
+    }
+
+    #[test]
+    fn test_detect_integrity_violation() {
+        let detector = ProblemDetector::new();
+
+        let mut metrics = SystemMetrics::new();
+        metrics.integrity_lambda = 0.3;
+
+        let problems = detector.detect_problems(&metrics);
+
+        assert_eq!(problems.len(), 1);
+        assert_eq!(problems[0].problem_type, ProblemType::IntegrityViolation);
+        assert_eq!(problems[0].severity, Severity::High);
+    }
+
+    #[test]
+    fn test_detect_hot_partition() {
+        let detector = ProblemDetector::new();
+
+        let mut metrics = SystemMetrics::new();
+        metrics.partition_loads.insert(1, 100.0);
+        metrics.partition_loads.insert(2, 100.0);
+        metrics.partition_loads.insert(3, 500.0); // Hot partition
+
+        let problems = detector.detect_problems(&metrics);
+
+        assert_eq!(problems.len(), 1);
+        assert_eq!(problems[0].problem_type, ProblemType::HotPartition);
+        assert!(problems[0].affected_partitions.contains(&3));
+    }
+
+    #[test]
+    fn test_severity_ordering() {
+        assert!(Severity::Critical > Severity::High);
+        assert!(Severity::High > Severity::Medium);
+        assert!(Severity::Medium > Severity::Low);
+        assert!(Severity::Low > Severity::Info);
+    }
+
+    #[test]
+    fn test_healthy_metrics_no_problems() {
+        let detector = ProblemDetector::new();
+        let metrics = SystemMetrics::new();
+
+        let problems = detector.detect_problems(&metrics);
+
+        assert!(problems.is_empty());
+    }
+
+    #[test]
+    fn test_custom_thresholds() {
+        let thresholds = DetectionThresholds {
+            index_fragmentation_pct: 10.0, // More sensitive
+            ..Default::default()
+        };
+        let detector = ProblemDetector::with_thresholds(thresholds);
+
+        let mut metrics = SystemMetrics::new();
+        metrics
+            .index_fragmentation
+            .insert("test_idx".to_string(), 15.0);
+
+        let problems = detector.detect_problems(&metrics);
+
+        assert_eq!(problems.len(), 1);
+        assert_eq!(problems[0].problem_type, ProblemType::IndexDegradation);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/engine.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/engine.rs
@@ -0,0 +1,788 @@
+//! Remediation Engine for Self-Healing System
+//!
+//! Orchestrates remediation execution with:
+//! - Strategy selection based on problem type and weights
+//! - Execution with timeout and rollback capability
+//! - Outcome verification
+//! - Cooldown periods to prevent thrashing
+
+use std::collections::{HashMap, VecDeque};
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+
+use super::detector::{Problem, ProblemType, SystemMetrics};
+use super::learning::OutcomeTracker;
+use super::strategies::{
+    RemediationResult, RemediationStrategy, StrategyContext, StrategyRegistry,
+};
+
+// ============================================================================
+// Healing Configuration
+// ============================================================================
+
+/// Configuration for the healing engine
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HealingConfig {
+    /// Minimum time between healing attempts for same problem type
+    pub min_healing_interval: Duration,
+    /// Maximum attempts per time window
+    pub max_attempts_per_window: usize,
+    /// Time window for attempt counting
+    pub attempt_window: Duration,
+    /// Maximum impact level for auto-healing (0-1)
+    pub max_auto_heal_impact: f32,
+    /// Problem types that require human approval
+    pub require_approval: Vec<ProblemType>,
+    /// Strategy names that require human approval
+    pub require_approval_strategies: Vec<String>,
+    /// Enable learning from outcomes
+    pub learning_enabled: bool,
+    /// Cooldown after failed remediation
+    pub failure_cooldown: Duration,
+    /// Whether to verify improvement after remediation
+    pub verify_improvement: bool,
+    /// Minimum improvement percentage to consider success
+    pub min_improvement_pct: f32,
+    /// Maximum concurrent remediations
+    pub max_concurrent_remediations: usize,
+}
+
+impl Default for HealingConfig {
+    fn default() -> Self {
+        Self {
+            min_healing_interval: Duration::from_secs(300), // 5 minutes
+            max_attempts_per_window: 3,
+            attempt_window: Duration::from_secs(3600), // 1 hour
+            max_auto_heal_impact: 0.5,
+            require_approval: vec![],
+            require_approval_strategies: vec!["promote_replica".to_string()],
+            learning_enabled: true,
+            failure_cooldown: Duration::from_secs(600), // 10 minutes
+            verify_improvement: true,
+            min_improvement_pct: 5.0,
+            max_concurrent_remediations: 2,
+        }
+    }
+}
+
+// ============================================================================
+// Healing Outcome
+// ============================================================================
+
+/// Outcome of a healing attempt
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum HealingOutcome {
+    /// Healing completed (may or may not have succeeded)
+    Completed {
+        problem_type: ProblemType,
+        strategy: String,
+        result: RemediationResult,
+        verified: bool,
+    },
+    /// Healing was deferred (needs approval or cooldown)
+    Deferred {
+        reason: String,
+        problem_type: ProblemType,
+    },
+    /// No suitable strategy found
+    NoStrategy { problem_type: ProblemType },
+    /// Healing is disabled
+    Disabled,
+    /// Already at maximum concurrent remediations
+    MaxConcurrent,
+}
+
+impl HealingOutcome {
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        match self {
+            HealingOutcome::Completed {
+                problem_type,
+                strategy,
+                result,
+                verified,
+            } => {
+                serde_json::json!({
+                    "status": "completed",
+                    "problem_type": problem_type.to_string(),
+                    "strategy": strategy,
+                    "result": result.to_json(),
+                    "verified": verified,
+                })
+            }
+            HealingOutcome::Deferred {
+                reason,
+                problem_type,
+            } => {
+                serde_json::json!({
+                    "status": "deferred",
+                    "reason": reason,
+                    "problem_type": problem_type.to_string(),
+                })
+            }
+            HealingOutcome::NoStrategy { problem_type } => {
+                serde_json::json!({
+                    "status": "no_strategy",
+                    "problem_type": problem_type.to_string(),
+                })
+            }
+            HealingOutcome::Disabled => {
+                serde_json::json!({
+                    "status": "disabled",
+                })
+            }
+            HealingOutcome::MaxConcurrent => {
+                serde_json::json!({
+                    "status": "max_concurrent",
+                })
+            }
+        }
+    }
+}
+
+// ============================================================================
+// Active Remediation
+// ============================================================================
+
+/// An active remediation in progress
+#[derive(Debug, Clone)]
+pub struct ActiveRemediation {
+    /// Unique ID
+    pub id: u64,
+    /// Problem being remediated
+    pub problem: Problem,
+    /// Strategy being used
+    pub strategy_name: String,
+    /// When remediation started
+    pub started_at: SystemTime,
+    /// Expected completion time
+    pub expected_completion: SystemTime,
+}
+
+impl ActiveRemediation {
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        let started_ts = self
+            .started_at
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+        let expected_ts = self
+            .expected_completion
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+
+        serde_json::json!({
+            "id": self.id,
+            "problem_type": self.problem.problem_type.to_string(),
+            "strategy": self.strategy_name,
+            "started_at": started_ts,
+            "expected_completion": expected_ts,
+        })
+    }
+}
+
+// ============================================================================
+// Remediation Context
+// ============================================================================
+
+/// Full context for remediation execution
+#[derive(Debug, Clone)]
+pub struct RemediationContext {
+    /// The problem being remediated
+    pub problem: Problem,
+    /// Collection/table being remediated
+    pub collection_id: i64,
+    /// Tenant ID (for multi-tenant)
+    pub tenant_id: Option<String>,
+    /// Initial integrity lambda
+    pub initial_lambda: f32,
+    /// Target integrity lambda
+    pub target_lambda: f32,
+    /// System metrics at start
+    pub initial_metrics: SystemMetrics,
+    /// When context was created
+    pub created_at: SystemTime,
+    /// Maximum impact allowed
+    pub max_impact: f32,
+    /// Timeout for remediation
+    pub timeout: Duration,
+    /// Healing attempts in current window
+    pub attempts_in_window: usize,
+    /// Last healing attempt time
+    pub last_attempt: Option<SystemTime>,
+}
+
+impl RemediationContext {
+    /// Create a new remediation context
+    pub fn new(problem: Problem, metrics: SystemMetrics) -> Self {
+        Self {
+            problem,
+            collection_id: 0,
+            tenant_id: None,
+            initial_lambda: metrics.integrity_lambda,
+            target_lambda: 0.8,
+            initial_metrics: metrics,
+            created_at: SystemTime::now(),
+            max_impact: 0.5,
+            timeout: Duration::from_secs(300),
+            attempts_in_window: 0,
+            last_attempt: None,
+        }
+    }
+
+    /// Set collection ID
+    pub fn with_collection(mut self, collection_id: i64) -> Self {
+        self.collection_id = collection_id;
+        self
+    }
+
+    /// Set tenant ID
+    pub fn with_tenant(mut self, tenant_id: String) -> Self {
+        self.tenant_id = Some(tenant_id);
+        self
+    }
+
+    /// Create strategy context
+    pub fn to_strategy_context(&self) -> StrategyContext {
+        StrategyContext {
+            problem: self.problem.clone(),
+            collection_id: self.collection_id,
+            initial_lambda: self.initial_lambda,
+            target_lambda: self.target_lambda,
+            max_impact: self.max_impact,
+            timeout: self.timeout,
+            start_time: SystemTime::now(),
+            dry_run: false,
+        }
+    }
+}
+
+// ============================================================================
+// Remediation Engine
+// ============================================================================
+
+/// The main remediation engine
+pub struct RemediationEngine {
+    /// Strategy registry
+    pub registry: StrategyRegistry,
+    /// Configuration
+    config: RwLock<HealingConfig>,
+    /// Outcome tracker for learning
+    tracker: OutcomeTracker,
+    /// Active remediations
+    active: RwLock<Vec<ActiveRemediation>>,
+    /// Next remediation ID
+    next_id: AtomicU64,
+    /// Healing attempt history (problem_type -> timestamps)
+    attempt_history: RwLock<HashMap<ProblemType, VecDeque<SystemTime>>>,
+    /// Whether engine is enabled
+    enabled: AtomicBool,
+    /// Total healings attempted
+    total_healings: AtomicU64,
+    /// Successful healings
+    successful_healings: AtomicU64,
+}
+
+impl RemediationEngine {
+    /// Create a new remediation engine
+    pub fn new(registry: StrategyRegistry, config: HealingConfig, tracker: OutcomeTracker) -> Self {
+        Self {
+            registry,
+            config: RwLock::new(config),
+            tracker,
+            active: RwLock::new(Vec::new()),
+            next_id: AtomicU64::new(1),
+            attempt_history: RwLock::new(HashMap::new()),
+            enabled: AtomicBool::new(true),
+            total_healings: AtomicU64::new(0),
+            successful_healings: AtomicU64::new(0),
+        }
+    }
+
+    /// Enable or disable the engine
+    pub fn set_enabled(&self, enabled: bool) {
+        self.enabled.store(enabled, Ordering::SeqCst);
+    }
+
+    /// Check if engine is enabled
+    pub fn is_enabled(&self) -> bool {
+        self.enabled.load(Ordering::SeqCst)
+    }
+
+    /// Update configuration
+    pub fn update_config(&self, config: HealingConfig) {
+        *self.config.write() = config;
+    }
+
+    /// Get current configuration
+    pub fn get_config(&self) -> HealingConfig {
+        self.config.read().clone()
+    }
+
+    /// Get active remediations
+    pub fn active_remediations(&self) -> Vec<ActiveRemediation> {
+        self.active.read().clone()
+    }
+
+    /// Main healing method
+    pub fn heal(&self, problem: &Problem) -> HealingOutcome {
+        // Check if enabled
+        if !self.is_enabled() {
+            return HealingOutcome::Disabled;
+        }
+
+        let config = self.config.read().clone();
+
+        // Check concurrent limit
+        if self.active.read().len() >= config.max_concurrent_remediations {
+            return HealingOutcome::MaxConcurrent;
+        }
+
+        // Check if we should auto-heal
+        if !self.should_auto_heal(problem, &config) {
+            return HealingOutcome::Deferred {
+                reason: self.get_defer_reason(problem, &config),
+                problem_type: problem.problem_type,
+            };
+        }
+
+        // Select strategy
+        let strategy = match self.registry.select(problem, config.max_auto_heal_impact) {
+            Some(s) => s,
+            None => {
+                return HealingOutcome::NoStrategy {
+                    problem_type: problem.problem_type,
+                };
+            }
+        };
+
+        // Check if strategy requires approval
+        if config
+            .require_approval_strategies
+            .contains(&strategy.name().to_string())
+        {
+            return HealingOutcome::Deferred {
+                reason: format!("Strategy '{}' requires human approval", strategy.name()),
+                problem_type: problem.problem_type,
+            };
+        }
+
+        // Record attempt
+        self.record_attempt(problem.problem_type);
+        self.total_healings.fetch_add(1, Ordering::SeqCst);
+
+        // Start active remediation
+        let remediation_id = self.next_id.fetch_add(1, Ordering::SeqCst);
+        let active_rem = ActiveRemediation {
+            id: remediation_id,
+            problem: problem.clone(),
+            strategy_name: strategy.name().to_string(),
+            started_at: SystemTime::now(),
+            expected_completion: SystemTime::now() + strategy.estimated_duration(),
+        };
+        self.active.write().push(active_rem);
+
+        // Execute strategy
+        let context = StrategyContext {
+            problem: problem.clone(),
+            collection_id: 0,
+            initial_lambda: 1.0,
+            target_lambda: 0.8,
+            max_impact: config.max_auto_heal_impact,
+            timeout: strategy.estimated_duration() * 2,
+            start_time: SystemTime::now(),
+            dry_run: false,
+        };
+
+        let result = self.execute_with_safeguards(&*strategy, &context);
+
+        // Remove from active
+        self.active.write().retain(|r| r.id != remediation_id);
+
+        // Verify improvement
+        let verified = if config.verify_improvement && result.is_success() {
+            self.verify_improvement(&result, config.min_improvement_pct)
+        } else {
+            result.is_success()
+        };
+
+        // Rollback if not verified and reversible
+        if !verified && strategy.reversible() {
+            pgrx::log!(
+                "Remediation not verified, rolling back: {}",
+                strategy.name()
+            );
+            if let Err(e) = strategy.rollback(&context, &result) {
+                pgrx::warning!("Rollback failed: {}", e);
+            }
+        }
+
+        // Update learning
+        if config.learning_enabled {
+            self.registry
+                .update_weight(strategy.name(), verified, result.improvement_pct);
+            self.tracker
+                .record(problem, strategy.name(), &result, verified);
+        }
+
+        if verified {
+            self.successful_healings.fetch_add(1, Ordering::SeqCst);
+        }
+
+        HealingOutcome::Completed {
+            problem_type: problem.problem_type,
+            strategy: strategy.name().to_string(),
+            result,
+            verified,
+        }
+    }
+
+    /// Execute strategy with safeguards (timeout, panic catching)
+    fn execute_with_safeguards(
+        &self,
+        strategy: &dyn RemediationStrategy,
+        context: &StrategyContext,
+    ) -> RemediationResult {
+        // In production, wrap in timeout and panic handling
+        // For now, execute directly
+        let start = std::time::Instant::now();
+        let mut result = strategy.execute(context);
+        result.duration_ms = start.elapsed().as_millis() as u64;
+        result
+    }
+
+    /// Check if we should auto-heal this problem
+    fn should_auto_heal(&self, problem: &Problem, config: &HealingConfig) -> bool {
+        // Check if problem type requires approval
+        if config.require_approval.contains(&problem.problem_type) {
+            return false;
+        }
+
+        // Check cooldown
+        if !self.is_past_cooldown(problem.problem_type, config) {
+            return false;
+        }
+
+        // Check attempt limit
+        if self.attempts_in_window(problem.problem_type, &config.attempt_window)
+            >= config.max_attempts_per_window
+        {
+            return false;
+        }
+
+        true
+    }
+
+    /// Get reason for deferring
+    fn get_defer_reason(&self, problem: &Problem, config: &HealingConfig) -> String {
+        if config.require_approval.contains(&problem.problem_type) {
+            return format!(
+                "Problem type '{:?}' requires human approval",
+                problem.problem_type
+            );
+        }
+
+        if !self.is_past_cooldown(problem.problem_type, config) {
+            return "In cooldown period after recent healing attempt".to_string();
+        }
+
+        if self.attempts_in_window(problem.problem_type, &config.attempt_window)
+            >= config.max_attempts_per_window
+        {
+            return format!(
+                "Exceeded maximum {} attempts per {:?}",
+                config.max_attempts_per_window, config.attempt_window
+            );
+        }
+
+        "Unknown reason".to_string()
+    }
+
+    /// Check if past cooldown period
+    fn is_past_cooldown(&self, problem_type: ProblemType, config: &HealingConfig) -> bool {
+        let history = self.attempt_history.read();
+        if let Some(attempts) = history.get(&problem_type) {
+            if let Some(last) = attempts.back() {
+                if let Ok(elapsed) = last.elapsed() {
+                    return elapsed >= config.min_healing_interval;
+                }
+            }
+        }
+        true
+    }
+
+    /// Count attempts in window
+    fn attempts_in_window(&self, problem_type: ProblemType, window: &Duration) -> usize {
+        let history = self.attempt_history.read();
+        if let Some(attempts) = history.get(&problem_type) {
+            let cutoff = SystemTime::now() - *window;
+            attempts.iter().filter(|t| **t > cutoff).count()
+        } else {
+            0
+        }
+    }
+
+    /// Record an attempt
+    fn record_attempt(&self, problem_type: ProblemType) {
+        let mut history = self.attempt_history.write();
+        let attempts = history.entry(problem_type).or_insert_with(VecDeque::new);
+        attempts.push_back(SystemTime::now());
+
+        // Keep only recent attempts
+        let cutoff = SystemTime::now() - Duration::from_secs(86400); // 24 hours
+        while let Some(front) = attempts.front() {
+            if *front < cutoff {
+                attempts.pop_front();
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Verify improvement after remediation
+    fn verify_improvement(&self, result: &RemediationResult, min_pct: f32) -> bool {
+        result.improvement_pct >= min_pct
+    }
+
+    /// Get engine statistics
+    pub fn get_stats(&self) -> EngineStats {
+        let total = self.total_healings.load(Ordering::SeqCst);
+        let successful = self.successful_healings.load(Ordering::SeqCst);
+
+        EngineStats {
+            enabled: self.is_enabled(),
+            total_healings: total,
+            successful_healings: successful,
+            success_rate: if total > 0 {
+                successful as f32 / total as f32
+            } else {
+                0.0
+            },
+            active_remediations: self.active.read().len(),
+            strategy_weights: self.registry.get_all_weights(),
+        }
+    }
+
+    /// Execute a specific strategy manually
+    pub fn execute_strategy(
+        &self,
+        strategy_name: &str,
+        problem: &Problem,
+        dry_run: bool,
+    ) -> Option<HealingOutcome> {
+        let strategy = self.registry.get_by_name(strategy_name)?;
+        let _config = self.config.read().clone();
+
+        let context = StrategyContext {
+            problem: problem.clone(),
+            collection_id: 0,
+            initial_lambda: 1.0,
+            target_lambda: 0.8,
+            max_impact: 1.0, // Manual execution allows higher impact
+            timeout: strategy.estimated_duration() * 2,
+            start_time: SystemTime::now(),
+            dry_run,
+        };
+
+        let result = strategy.execute(&context);
+
+        Some(HealingOutcome::Completed {
+            problem_type: problem.problem_type,
+            strategy: strategy_name.to_string(),
+            result,
+            verified: !dry_run,
+        })
+    }
+}
+
+/// Engine statistics
+#[derive(Debug, Clone)]
+pub struct EngineStats {
+    pub enabled: bool,
+    pub total_healings: u64,
+    pub successful_healings: u64,
+    pub success_rate: f32,
+    pub active_remediations: usize,
+    pub strategy_weights: HashMap<String, f32>,
+}
+
+impl EngineStats {
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "enabled": self.enabled,
+            "total_healings": self.total_healings,
+            "successful_healings": self.successful_healings,
+            "success_rate": self.success_rate,
+            "active_remediations": self.active_remediations,
+            "strategy_weights": self.strategy_weights,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::healing::detector::Severity;
+
+    fn create_engine() -> RemediationEngine {
+        let registry = StrategyRegistry::new_with_defaults();
+        let config = HealingConfig::default();
+        let tracker = OutcomeTracker::new();
+        RemediationEngine::new(registry, config, tracker)
+    }
+
+    #[test]
+    fn test_engine_creation() {
+        let engine = create_engine();
+        assert!(engine.is_enabled());
+        assert!(engine.active_remediations().is_empty());
+    }
+
+    #[test]
+    fn test_engine_enable_disable() {
+        let engine = create_engine();
+
+        engine.set_enabled(false);
+        assert!(!engine.is_enabled());
+
+        let problem = Problem::new(ProblemType::IndexDegradation, Severity::Medium);
+        let outcome = engine.heal(&problem);
+        assert!(matches!(outcome, HealingOutcome::Disabled));
+
+        engine.set_enabled(true);
+        assert!(engine.is_enabled());
+    }
+
+    #[test]
+    fn test_heal_index_degradation() {
+        let engine = create_engine();
+        let problem = Problem::new(ProblemType::IndexDegradation, Severity::Medium);
+
+        let outcome = engine.heal(&problem);
+        match outcome {
+            HealingOutcome::Completed { strategy, .. } => {
+                assert!(strategy.contains("reindex") || strategy.contains("integrity"));
+            }
+            _ => panic!("Expected Completed outcome"),
+        }
+    }
+
+    #[test]
+    fn test_cooldown_enforcement() {
+        let mut config = HealingConfig::default();
+        config.min_healing_interval = Duration::from_secs(60);
+
+        let registry = StrategyRegistry::new_with_defaults();
+        let tracker = OutcomeTracker::new();
+        let engine = RemediationEngine::new(registry, config, tracker);
+
+        let problem = Problem::new(ProblemType::IndexDegradation, Severity::Medium);
+
+        // First healing should succeed
+        let outcome1 = engine.heal(&problem);
+        assert!(matches!(outcome1, HealingOutcome::Completed { .. }));
+
+        // Second should be deferred (in cooldown)
+        let outcome2 = engine.heal(&problem);
+        assert!(matches!(outcome2, HealingOutcome::Deferred { .. }));
+    }
+
+    #[test]
+    fn test_max_attempts_enforcement() {
+        let mut config = HealingConfig::default();
+        config.max_attempts_per_window = 2;
+        config.min_healing_interval = Duration::from_millis(1);
+
+        let registry = StrategyRegistry::new_with_defaults();
+        let tracker = OutcomeTracker::new();
+        let engine = RemediationEngine::new(registry, config, tracker);
+
+        let problem = Problem::new(ProblemType::IndexDegradation, Severity::Medium);
+
+        // First two should succeed
+        engine.heal(&problem);
+        std::thread::sleep(Duration::from_millis(2));
+        engine.heal(&problem);
+        std::thread::sleep(Duration::from_millis(2));
+
+        // Third should be deferred
+        let outcome = engine.heal(&problem);
+        assert!(matches!(outcome, HealingOutcome::Deferred { .. }));
+    }
+
+    #[test]
+    fn test_approval_requirement() {
+        let mut config = HealingConfig::default();
+        config.require_approval.push(ProblemType::ReplicaLag);
+
+        let registry = StrategyRegistry::new_with_defaults();
+        let tracker = OutcomeTracker::new();
+        let engine = RemediationEngine::new(registry, config, tracker);
+
+        let problem = Problem::new(ProblemType::ReplicaLag, Severity::High);
+        let outcome = engine.heal(&problem);
+
+        assert!(matches!(outcome, HealingOutcome::Deferred { .. }));
+    }
+
+    #[test]
+    fn test_strategy_approval_requirement() {
+        let mut config = HealingConfig::default();
+        config
+            .require_approval_strategies
+            .push("promote_replica".to_string());
+        config.max_auto_heal_impact = 1.0; // Allow high impact
+
+        let registry = StrategyRegistry::new_with_defaults();
+        let tracker = OutcomeTracker::new();
+        let engine = RemediationEngine::new(registry, config, tracker);
+
+        let problem = Problem::new(ProblemType::ReplicaLag, Severity::High);
+        let outcome = engine.heal(&problem);
+
+        // Should be deferred because promote_replica requires approval
+        assert!(matches!(outcome, HealingOutcome::Deferred { .. }));
+    }
+
+    #[test]
+    fn test_no_strategy() {
+        let registry = StrategyRegistry::new(); // Empty registry
+        let config = HealingConfig::default();
+        let tracker = OutcomeTracker::new();
+        let engine = RemediationEngine::new(registry, config, tracker);
+
+        let problem = Problem::new(ProblemType::IndexDegradation, Severity::Medium);
+        let outcome = engine.heal(&problem);
+
+        assert!(matches!(outcome, HealingOutcome::NoStrategy { .. }));
+    }
+
+    #[test]
+    fn test_manual_execution() {
+        let engine = create_engine();
+        let problem = Problem::new(ProblemType::IndexDegradation, Severity::Medium);
+
+        let outcome = engine.execute_strategy("reindex_partition", &problem, true);
+        assert!(outcome.is_some());
+
+        if let Some(HealingOutcome::Completed { result, .. }) = outcome {
+            assert!(result.metadata.get("dry_run") == Some(&serde_json::json!(true)));
+        }
+    }
+
+    #[test]
+    fn test_engine_stats() {
+        let engine = create_engine();
+        let stats = engine.get_stats();
+
+        assert!(stats.enabled);
+        assert_eq!(stats.total_healings, 0);
+        assert_eq!(stats.active_remediations, 0);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/functions.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/functions.rs
@@ -0,0 +1,467 @@
+//! SQL Functions for Self-Healing Engine
+//!
+//! Provides PostgreSQL-accessible functions for:
+//! - Health status monitoring
+//! - Healing history queries
+//! - Manual healing triggers
+//! - Configuration management
+
+use pgrx::prelude::*;
+
+use super::detector::ProblemType;
+use super::{get_healing_engine, Problem};
+
+// ============================================================================
+// Health Status Functions
+// ============================================================================
+
+/// Get current health status of the RuVector system
+///
+/// Returns JSON with:
+/// - healthy: whether system is healthy
+/// - problem_count: number of detected problems
+/// - active_remediation_count: ongoing remediations
+/// - problems: list of current problems
+/// - enabled: whether healing is enabled
+#[pg_extern]
+pub fn ruvector_health_status() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+    let status = engine_lock.health_status();
+    pgrx::JsonB(status.to_json())
+}
+
+/// Check if system is currently healthy (no detected problems)
+#[pg_extern]
+pub fn ruvector_is_healthy() -> bool {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+    let status = engine_lock.health_status();
+    status.healthy
+}
+
+/// Get system metrics used for problem detection
+#[pg_extern]
+pub fn ruvector_system_metrics() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+    let metrics = engine_lock.detector.collect_metrics();
+    pgrx::JsonB(metrics.to_json())
+}
+
+// ============================================================================
+// Healing History Functions
+// ============================================================================
+
+/// Get recent healing history
+///
+/// # Arguments
+/// * `limit` - Maximum number of records to return (default 20)
+#[pg_extern]
+pub fn ruvector_healing_history(limit: default!(i32, 20)) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let records = engine_lock.tracker.get_recent(limit as usize);
+    let history: Vec<serde_json::Value> = records.iter().map(|r| r.to_json()).collect();
+
+    pgrx::JsonB(serde_json::json!({
+        "history": history,
+        "count": history.len(),
+    }))
+}
+
+/// Get healing history since a specific timestamp
+///
+/// # Arguments
+/// * `since_timestamp` - Unix timestamp to filter from
+#[pg_extern]
+pub fn ruvector_healing_history_since(since_timestamp: i64) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let records = engine_lock.tracker.get_since(since_timestamp as u64);
+    let history: Vec<serde_json::Value> = records.iter().map(|r| r.to_json()).collect();
+
+    pgrx::JsonB(serde_json::json!({
+        "history": history,
+        "count": history.len(),
+        "since": since_timestamp,
+    }))
+}
+
+/// Get healing history for a specific strategy
+#[pg_extern]
+pub fn ruvector_healing_history_for_strategy(
+    strategy_name: &str,
+    limit: default!(i32, 20),
+) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let records = engine_lock
+        .tracker
+        .get_for_strategy(strategy_name, limit as usize);
+    let history: Vec<serde_json::Value> = records.iter().map(|r| r.to_json()).collect();
+
+    pgrx::JsonB(serde_json::json!({
+        "strategy": strategy_name,
+        "history": history,
+        "count": history.len(),
+    }))
+}
+
+// ============================================================================
+// Healing Trigger Functions
+// ============================================================================
+
+/// Manually trigger healing for a specific problem type
+///
+/// # Arguments
+/// * `problem_type` - One of: index_degradation, replica_lag, storage_exhaustion,
+///                   query_timeout, integrity_violation, memory_pressure,
+///                   connection_exhaustion, hot_partition
+#[pg_extern]
+pub fn ruvector_healing_trigger(problem_type: &str) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    // Parse problem type
+    let ptype = match problem_type.parse::<ProblemType>() {
+        Ok(pt) => pt,
+        Err(e) => {
+            return pgrx::JsonB(serde_json::json!({
+                "success": false,
+                "error": e,
+            }));
+        }
+    };
+
+    // Trigger healing
+    match engine_lock.trigger_healing(ptype) {
+        Some(outcome) => pgrx::JsonB(serde_json::json!({
+            "success": true,
+            "outcome": outcome.to_json(),
+        })),
+        None => pgrx::JsonB(serde_json::json!({
+            "success": false,
+            "error": "Healing is disabled",
+        })),
+    }
+}
+
+/// Execute a specific healing strategy manually
+///
+/// # Arguments
+/// * `strategy_name` - Strategy to execute
+/// * `problem_type` - Problem type for context
+/// * `dry_run` - If true, don't actually execute (default false)
+#[pg_extern]
+pub fn ruvector_healing_execute(
+    strategy_name: &str,
+    problem_type: &str,
+    dry_run: default!(bool, false),
+) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    // Parse problem type
+    let ptype = match problem_type.parse::<ProblemType>() {
+        Ok(pt) => pt,
+        Err(e) => {
+            return pgrx::JsonB(serde_json::json!({
+                "success": false,
+                "error": e,
+            }));
+        }
+    };
+
+    let problem = Problem::new(ptype, super::detector::Severity::Medium);
+
+    match engine_lock
+        .remediation
+        .execute_strategy(strategy_name, &problem, dry_run)
+    {
+        Some(outcome) => pgrx::JsonB(serde_json::json!({
+            "success": true,
+            "dry_run": dry_run,
+            "outcome": outcome.to_json(),
+        })),
+        None => pgrx::JsonB(serde_json::json!({
+            "success": false,
+            "error": format!("Strategy '{}' not found", strategy_name),
+        })),
+    }
+}
+
+// ============================================================================
+// Configuration Functions
+// ============================================================================
+
+/// Configure healing engine settings
+///
+/// # Arguments
+/// * `config_json` - JSON configuration object with optional keys:
+///   - min_healing_interval_secs
+///   - max_attempts_per_window
+///   - max_auto_heal_impact
+///   - learning_enabled
+///   - verify_improvement
+///   - min_improvement_pct
+#[pg_extern]
+pub fn ruvector_healing_configure(config_json: pgrx::JsonB) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let mut engine_lock = engine.write();
+
+    let mut config = engine_lock.config.clone();
+    let json = config_json.0;
+
+    // Update configuration from JSON
+    if let Some(interval) = json
+        .get("min_healing_interval_secs")
+        .and_then(|v| v.as_i64())
+    {
+        if interval > 0 {
+            config.min_healing_interval = std::time::Duration::from_secs(interval as u64);
+        }
+    }
+
+    if let Some(attempts) = json.get("max_attempts_per_window").and_then(|v| v.as_i64()) {
+        if attempts > 0 {
+            config.max_attempts_per_window = attempts as usize;
+        }
+    }
+
+    if let Some(impact) = json.get("max_auto_heal_impact").and_then(|v| v.as_f64()) {
+        if impact >= 0.0 && impact <= 1.0 {
+            config.max_auto_heal_impact = impact as f32;
+        }
+    }
+
+    if let Some(learning) = json.get("learning_enabled").and_then(|v| v.as_bool()) {
+        config.learning_enabled = learning;
+    }
+
+    if let Some(verify) = json.get("verify_improvement").and_then(|v| v.as_bool()) {
+        config.verify_improvement = verify;
+    }
+
+    if let Some(min_pct) = json.get("min_improvement_pct").and_then(|v| v.as_f64()) {
+        if min_pct >= 0.0 {
+            config.min_improvement_pct = min_pct as f32;
+        }
+    }
+
+    if let Some(enabled) = json.get("enabled").and_then(|v| v.as_bool()) {
+        engine_lock.set_enabled(enabled);
+    }
+
+    engine_lock.update_config(config.clone());
+
+    pgrx::JsonB(serde_json::json!({
+        "status": "updated",
+        "config": {
+            "min_healing_interval_secs": config.min_healing_interval.as_secs(),
+            "max_attempts_per_window": config.max_attempts_per_window,
+            "max_auto_heal_impact": config.max_auto_heal_impact,
+            "learning_enabled": config.learning_enabled,
+            "verify_improvement": config.verify_improvement,
+            "min_improvement_pct": config.min_improvement_pct,
+            "enabled": engine_lock.enabled,
+        }
+    }))
+}
+
+/// Get current healing configuration
+#[pg_extern]
+pub fn ruvector_healing_get_config() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+    let config = &engine_lock.config;
+
+    pgrx::JsonB(serde_json::json!({
+        "min_healing_interval_secs": config.min_healing_interval.as_secs(),
+        "max_attempts_per_window": config.max_attempts_per_window,
+        "attempt_window_secs": config.attempt_window.as_secs(),
+        "max_auto_heal_impact": config.max_auto_heal_impact,
+        "learning_enabled": config.learning_enabled,
+        "failure_cooldown_secs": config.failure_cooldown.as_secs(),
+        "verify_improvement": config.verify_improvement,
+        "min_improvement_pct": config.min_improvement_pct,
+        "max_concurrent_remediations": config.max_concurrent_remediations,
+        "require_approval_strategies": config.require_approval_strategies,
+        "enabled": engine_lock.enabled,
+    }))
+}
+
+/// Enable or disable healing
+#[pg_extern]
+pub fn ruvector_healing_enable(enabled: bool) -> bool {
+    let engine = get_healing_engine();
+    let mut engine_lock = engine.write();
+    engine_lock.set_enabled(enabled);
+    engine_lock.enabled
+}
+
+// ============================================================================
+// Strategy Functions
+// ============================================================================
+
+/// List all available healing strategies
+#[pg_extern]
+pub fn ruvector_healing_strategies() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let strategies: Vec<serde_json::Value> = engine_lock
+        .remediation
+        .registry
+        .all_strategies()
+        .iter()
+        .map(|s| {
+            serde_json::json!({
+                "name": s.name(),
+                "description": s.description(),
+                "handles": s.handles().iter().map(|h| h.to_string()).collect::<Vec<_>>(),
+                "impact": s.impact(),
+                "estimated_duration_secs": s.estimated_duration().as_secs(),
+                "reversible": s.reversible(),
+                "weight": engine_lock.remediation.registry.get_weight(s.name()),
+            })
+        })
+        .collect();
+
+    pgrx::JsonB(serde_json::json!({
+        "strategies": strategies,
+        "count": strategies.len(),
+    }))
+}
+
+/// Get effectiveness report for all strategies
+#[pg_extern]
+pub fn ruvector_healing_effectiveness() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let report = engine_lock.tracker.effectiveness_report();
+    pgrx::JsonB(report.to_json())
+}
+
+/// Get statistics for the healing engine
+#[pg_extern]
+pub fn ruvector_healing_stats() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let engine_stats = engine_lock.remediation.get_stats();
+    let tracker_stats = engine_lock.tracker.get_stats();
+
+    pgrx::JsonB(serde_json::json!({
+        "engine": engine_stats.to_json(),
+        "tracker": tracker_stats.to_json(),
+    }))
+}
+
+// ============================================================================
+// Detection Threshold Functions
+// ============================================================================
+
+/// Get current detection thresholds
+#[pg_extern]
+pub fn ruvector_healing_thresholds() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let thresholds = engine_lock.detector.get_thresholds();
+
+    pgrx::JsonB(serde_json::json!({
+        "index_fragmentation_pct": thresholds.index_fragmentation_pct,
+        "replica_lag_seconds": thresholds.replica_lag_seconds,
+        "storage_usage_pct": thresholds.storage_usage_pct,
+        "query_timeout_rate": thresholds.query_timeout_rate,
+        "min_integrity_lambda": thresholds.min_integrity_lambda,
+        "memory_usage_pct": thresholds.memory_usage_pct,
+        "connection_usage_pct": thresholds.connection_usage_pct,
+        "partition_load_ratio": thresholds.partition_load_ratio,
+    }))
+}
+
+/// Update detection thresholds
+#[pg_extern]
+pub fn ruvector_healing_set_thresholds(thresholds_json: pgrx::JsonB) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let mut thresholds = engine_lock.detector.get_thresholds();
+    let json = thresholds_json.0;
+
+    if let Some(v) = json.get("index_fragmentation_pct").and_then(|v| v.as_f64()) {
+        thresholds.index_fragmentation_pct = v as f32;
+    }
+    if let Some(v) = json.get("replica_lag_seconds").and_then(|v| v.as_f64()) {
+        thresholds.replica_lag_seconds = v as f32;
+    }
+    if let Some(v) = json.get("storage_usage_pct").and_then(|v| v.as_f64()) {
+        thresholds.storage_usage_pct = v as f32;
+    }
+    if let Some(v) = json.get("query_timeout_rate").and_then(|v| v.as_f64()) {
+        thresholds.query_timeout_rate = v as f32;
+    }
+    if let Some(v) = json.get("min_integrity_lambda").and_then(|v| v.as_f64()) {
+        thresholds.min_integrity_lambda = v as f32;
+    }
+    if let Some(v) = json.get("memory_usage_pct").and_then(|v| v.as_f64()) {
+        thresholds.memory_usage_pct = v as f32;
+    }
+    if let Some(v) = json.get("connection_usage_pct").and_then(|v| v.as_f64()) {
+        thresholds.connection_usage_pct = v as f32;
+    }
+    if let Some(v) = json.get("partition_load_ratio").and_then(|v| v.as_f64()) {
+        thresholds.partition_load_ratio = v as f32;
+    }
+
+    engine_lock.detector.update_thresholds(thresholds.clone());
+
+    pgrx::JsonB(serde_json::json!({
+        "status": "updated",
+        "thresholds": {
+            "index_fragmentation_pct": thresholds.index_fragmentation_pct,
+            "replica_lag_seconds": thresholds.replica_lag_seconds,
+            "storage_usage_pct": thresholds.storage_usage_pct,
+            "query_timeout_rate": thresholds.query_timeout_rate,
+            "min_integrity_lambda": thresholds.min_integrity_lambda,
+            "memory_usage_pct": thresholds.memory_usage_pct,
+            "connection_usage_pct": thresholds.connection_usage_pct,
+            "partition_load_ratio": thresholds.partition_load_ratio,
+        }
+    }))
+}
+
+// ============================================================================
+// Problem Type Reference
+// ============================================================================
+
+/// List all supported problem types
+#[pg_extern]
+pub fn ruvector_healing_problem_types() -> pgrx::JsonB {
+    let types: Vec<serde_json::Value> = ProblemType::all()
+        .iter()
+        .map(|t| {
+            serde_json::json!({
+                "name": t.to_string(),
+                "description": t.description(),
+            })
+        })
+        .collect();
+
+    pgrx::JsonB(serde_json::json!({
+        "problem_types": types,
+        "count": types.len(),
+    }))
+}
+
+#[cfg(test)]
+mod tests {
+    // These tests would run in a PostgreSQL context with pg_test
+    // For now, they verify the function signatures compile correctly
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/learning.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/learning.rs
@@ -0,0 +1,669 @@
+//! Learning System for Self-Healing Engine
+//!
+//! Tracks remediation outcomes and adjusts strategy selection:
+//! - Outcome recording with full context
+//! - Strategy weight updates based on success/failure
+//! - Confidence scoring for strategies
+//! - Effectiveness reporting
+
+use std::collections::{HashMap, VecDeque};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+
+use super::detector::{Problem, ProblemType, Severity};
+use super::strategies::RemediationResult;
+
+// ============================================================================
+// Outcome Record
+// ============================================================================
+
+/// A recorded remediation outcome
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OutcomeRecord {
+    /// Unique ID
+    pub id: u64,
+    /// Problem type
+    pub problem_type: ProblemType,
+    /// Problem severity
+    pub severity: Severity,
+    /// Strategy used
+    pub strategy_name: String,
+    /// Whether remediation succeeded
+    pub success: bool,
+    /// Whether improvement was verified
+    pub verified: bool,
+    /// Actions taken
+    pub actions_taken: usize,
+    /// Improvement percentage
+    pub improvement_pct: f32,
+    /// Duration in milliseconds
+    pub duration_ms: u64,
+    /// Error message if failed
+    pub error_message: Option<String>,
+    /// Timestamp
+    pub timestamp: u64,
+    /// Human feedback score (if provided, 0-1)
+    pub feedback_score: Option<f32>,
+    /// Additional metadata
+    pub metadata: serde_json::Value,
+}
+
+impl OutcomeRecord {
+    /// Create from a problem and result
+    pub fn from_result(
+        id: u64,
+        problem: &Problem,
+        strategy_name: &str,
+        result: &RemediationResult,
+        verified: bool,
+    ) -> Self {
+        Self {
+            id,
+            problem_type: problem.problem_type,
+            severity: problem.severity,
+            strategy_name: strategy_name.to_string(),
+            success: result.is_success(),
+            verified,
+            actions_taken: result.actions_taken,
+            improvement_pct: result.improvement_pct,
+            duration_ms: result.duration_ms,
+            error_message: result.error_message.clone(),
+            timestamp: SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+            feedback_score: None,
+            metadata: result.metadata.clone(),
+        }
+    }
+
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "id": self.id,
+            "problem_type": self.problem_type.to_string(),
+            "severity": format!("{:?}", self.severity).to_lowercase(),
+            "strategy_name": self.strategy_name,
+            "success": self.success,
+            "verified": self.verified,
+            "actions_taken": self.actions_taken,
+            "improvement_pct": self.improvement_pct,
+            "duration_ms": self.duration_ms,
+            "error_message": self.error_message,
+            "timestamp": self.timestamp,
+            "feedback_score": self.feedback_score,
+        })
+    }
+}
+
+// ============================================================================
+// Strategy Weight
+// ============================================================================
+
+/// Strategy weight with confidence metrics
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StrategyWeight {
+    /// Strategy name
+    pub strategy_name: String,
+    /// Current weight (1.0 = baseline)
+    pub weight: f32,
+    /// Confidence in weight (0-1)
+    pub confidence: f32,
+    /// Number of observations
+    pub observations: usize,
+    /// Success count
+    pub successes: usize,
+    /// Average improvement when successful
+    pub avg_improvement: f32,
+    /// Average duration in milliseconds
+    pub avg_duration_ms: u64,
+    /// Last update timestamp
+    pub last_updated: u64,
+}
+
+impl StrategyWeight {
+    /// Create new weight for strategy
+    pub fn new(strategy_name: &str) -> Self {
+        Self {
+            strategy_name: strategy_name.to_string(),
+            weight: 1.0,
+            confidence: 0.0,
+            observations: 0,
+            successes: 0,
+            avg_improvement: 0.0,
+            avg_duration_ms: 0,
+            last_updated: SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+        }
+    }
+
+    /// Update with new observation
+    pub fn update(&mut self, success: bool, improvement_pct: f32, duration_ms: u64) {
+        self.observations += 1;
+        if success {
+            self.successes += 1;
+        }
+
+        // Update running averages
+        let n = self.observations as f32;
+        self.avg_improvement = ((n - 1.0) * self.avg_improvement + improvement_pct) / n;
+        self.avg_duration_ms = ((self.observations as u64 - 1) * self.avg_duration_ms
+            + duration_ms)
+            / self.observations as u64;
+
+        // Calculate success rate
+        let success_rate = self.successes as f32 / self.observations as f32;
+
+        // Weight = success_rate * (1 + avg_improvement/100)
+        self.weight = success_rate * (1.0 + self.avg_improvement / 100.0);
+        self.weight = self.weight.max(0.1).min(2.0);
+
+        // Confidence increases with observations (asymptotic to 1.0)
+        self.confidence = 1.0 - 1.0 / (1.0 + (self.observations as f32 / 10.0));
+
+        self.last_updated = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+    }
+
+    /// Get success rate
+    pub fn success_rate(&self) -> f32 {
+        if self.observations > 0 {
+            self.successes as f32 / self.observations as f32
+        } else {
+            0.0
+        }
+    }
+
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "strategy_name": self.strategy_name,
+            "weight": self.weight,
+            "confidence": self.confidence,
+            "observations": self.observations,
+            "successes": self.successes,
+            "success_rate": self.success_rate(),
+            "avg_improvement": self.avg_improvement,
+            "avg_duration_ms": self.avg_duration_ms,
+            "last_updated": self.last_updated,
+        })
+    }
+}
+
+// ============================================================================
+// Outcome Tracker
+// ============================================================================
+
+/// Tracks remediation outcomes for learning
+#[derive(Clone)]
+pub struct OutcomeTracker {
+    /// Outcome history
+    history: std::sync::Arc<RwLock<VecDeque<OutcomeRecord>>>,
+    /// Strategy weights
+    weights: std::sync::Arc<RwLock<HashMap<String, StrategyWeight>>>,
+    /// Maximum history size
+    max_history: usize,
+    /// Next record ID
+    next_id: std::sync::Arc<AtomicU64>,
+}
+
+impl OutcomeTracker {
+    /// Create new tracker
+    pub fn new() -> Self {
+        Self {
+            history: std::sync::Arc::new(RwLock::new(VecDeque::new())),
+            weights: std::sync::Arc::new(RwLock::new(HashMap::new())),
+            max_history: 10000,
+            next_id: std::sync::Arc::new(AtomicU64::new(1)),
+        }
+    }
+
+    /// Create with custom history size
+    pub fn with_max_history(max_history: usize) -> Self {
+        Self {
+            history: std::sync::Arc::new(RwLock::new(VecDeque::new())),
+            weights: std::sync::Arc::new(RwLock::new(HashMap::new())),
+            max_history,
+            next_id: std::sync::Arc::new(AtomicU64::new(1)),
+        }
+    }
+
+    /// Record a remediation outcome
+    pub fn record(
+        &self,
+        problem: &Problem,
+        strategy_name: &str,
+        result: &RemediationResult,
+        verified: bool,
+    ) {
+        let id = self.next_id.fetch_add(1, Ordering::SeqCst);
+        let record = OutcomeRecord::from_result(id, problem, strategy_name, result, verified);
+
+        // Add to history
+        let mut history = self.history.write();
+        history.push_back(record.clone());
+        while history.len() > self.max_history {
+            history.pop_front();
+        }
+
+        // Update strategy weight
+        let mut weights = self.weights.write();
+        let weight = weights
+            .entry(strategy_name.to_string())
+            .or_insert_with(|| StrategyWeight::new(strategy_name));
+        weight.update(verified, result.improvement_pct, result.duration_ms);
+    }
+
+    /// Get recent outcomes
+    pub fn get_recent(&self, limit: usize) -> Vec<OutcomeRecord> {
+        let history = self.history.read();
+        history.iter().rev().take(limit).cloned().collect()
+    }
+
+    /// Get outcomes since timestamp
+    pub fn get_since(&self, since: u64) -> Vec<OutcomeRecord> {
+        let history = self.history.read();
+        history
+            .iter()
+            .filter(|r| r.timestamp >= since)
+            .cloned()
+            .collect()
+    }
+
+    /// Get outcomes for a specific strategy
+    pub fn get_for_strategy(&self, strategy_name: &str, limit: usize) -> Vec<OutcomeRecord> {
+        let history = self.history.read();
+        history
+            .iter()
+            .rev()
+            .filter(|r| r.strategy_name == strategy_name)
+            .take(limit)
+            .cloned()
+            .collect()
+    }
+
+    /// Get outcomes for a specific problem type
+    pub fn get_for_problem_type(
+        &self,
+        problem_type: ProblemType,
+        limit: usize,
+    ) -> Vec<OutcomeRecord> {
+        let history = self.history.read();
+        history
+            .iter()
+            .rev()
+            .filter(|r| r.problem_type == problem_type)
+            .take(limit)
+            .cloned()
+            .collect()
+    }
+
+    /// Get strategy weight
+    pub fn get_weight(&self, strategy_name: &str) -> Option<StrategyWeight> {
+        self.weights.read().get(strategy_name).cloned()
+    }
+
+    /// Get all strategy weights
+    pub fn get_all_weights(&self) -> Vec<StrategyWeight> {
+        self.weights.read().values().cloned().collect()
+    }
+
+    /// Add human feedback to an outcome
+    pub fn add_feedback(&self, outcome_id: u64, score: f32) -> bool {
+        let mut history = self.history.write();
+        for record in history.iter_mut() {
+            if record.id == outcome_id {
+                record.feedback_score = Some(score.max(0.0).min(1.0));
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Get overall statistics
+    pub fn get_stats(&self) -> TrackerStats {
+        let history = self.history.read();
+        let weights = self.weights.read();
+
+        let total = history.len();
+        let successes = history.iter().filter(|r| r.success && r.verified).count();
+        let total_improvement: f32 = history.iter().map(|r| r.improvement_pct).sum();
+        let total_duration: u64 = history.iter().map(|r| r.duration_ms).sum();
+
+        TrackerStats {
+            total_outcomes: total,
+            successful_outcomes: successes,
+            success_rate: if total > 0 {
+                successes as f32 / total as f32
+            } else {
+                0.0
+            },
+            avg_improvement: if total > 0 {
+                total_improvement / total as f32
+            } else {
+                0.0
+            },
+            avg_duration_ms: if total > 0 {
+                total_duration / total as u64
+            } else {
+                0
+            },
+            tracked_strategies: weights.len(),
+        }
+    }
+
+    /// Generate effectiveness report
+    pub fn effectiveness_report(&self) -> EffectivenessReport {
+        let weights = self.get_all_weights();
+        let stats = self.get_stats();
+
+        let strategy_reports: Vec<StrategyEffectiveness> = weights
+            .iter()
+            .map(|w| {
+                let recent = self.get_for_strategy(&w.strategy_name, 10);
+                StrategyEffectiveness {
+                    strategy_name: w.strategy_name.clone(),
+                    weight: w.weight,
+                    confidence: w.confidence,
+                    success_rate: w.success_rate(),
+                    avg_improvement: w.avg_improvement,
+                    recent_outcomes: recent.len(),
+                }
+            })
+            .collect();
+
+        EffectivenessReport {
+            strategies: strategy_reports,
+            overall_success_rate: stats.success_rate,
+            avg_time_to_recovery_ms: stats.avg_duration_ms,
+            total_outcomes: stats.total_outcomes,
+        }
+    }
+
+    /// Update weights from historical data (for batch learning)
+    pub fn recalculate_weights(&self, lookback: Duration) {
+        let cutoff = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_secs()
+            - lookback.as_secs();
+
+        let history = self.history.read();
+        let mut weights = self.weights.write();
+
+        // Group outcomes by strategy
+        let mut strategy_outcomes: HashMap<String, Vec<&OutcomeRecord>> = HashMap::new();
+        for record in history.iter().filter(|r| r.timestamp >= cutoff) {
+            strategy_outcomes
+                .entry(record.strategy_name.clone())
+                .or_default()
+                .push(record);
+        }
+
+        // Recalculate each strategy's weight
+        for (strategy_name, outcomes) in strategy_outcomes {
+            let weight = weights
+                .entry(strategy_name.clone())
+                .or_insert_with(|| StrategyWeight::new(&strategy_name));
+
+            // Reset counters
+            weight.observations = outcomes.len();
+            weight.successes = outcomes.iter().filter(|o| o.success && o.verified).count();
+            weight.avg_improvement =
+                outcomes.iter().map(|o| o.improvement_pct).sum::<f32>() / outcomes.len() as f32;
+            weight.avg_duration_ms =
+                outcomes.iter().map(|o| o.duration_ms).sum::<u64>() / outcomes.len() as u64;
+
+            // Recalculate weight
+            let success_rate = weight.success_rate();
+            weight.weight = success_rate * (1.0 + weight.avg_improvement / 100.0);
+            weight.weight = weight.weight.max(0.1).min(2.0);
+            weight.confidence = 1.0 - 1.0 / (1.0 + (weight.observations as f32 / 10.0));
+            weight.last_updated = SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap()
+                .as_secs();
+        }
+    }
+}
+
+impl Default for OutcomeTracker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Tracker statistics
+#[derive(Debug, Clone)]
+pub struct TrackerStats {
+    pub total_outcomes: usize,
+    pub successful_outcomes: usize,
+    pub success_rate: f32,
+    pub avg_improvement: f32,
+    pub avg_duration_ms: u64,
+    pub tracked_strategies: usize,
+}
+
+impl TrackerStats {
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "total_outcomes": self.total_outcomes,
+            "successful_outcomes": self.successful_outcomes,
+            "success_rate": self.success_rate,
+            "avg_improvement": self.avg_improvement,
+            "avg_duration_ms": self.avg_duration_ms,
+            "tracked_strategies": self.tracked_strategies,
+        })
+    }
+}
+
+/// Strategy effectiveness
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StrategyEffectiveness {
+    pub strategy_name: String,
+    pub weight: f32,
+    pub confidence: f32,
+    pub success_rate: f32,
+    pub avg_improvement: f32,
+    pub recent_outcomes: usize,
+}
+
+/// Effectiveness report
+#[derive(Debug, Clone)]
+pub struct EffectivenessReport {
+    pub strategies: Vec<StrategyEffectiveness>,
+    pub overall_success_rate: f32,
+    pub avg_time_to_recovery_ms: u64,
+    pub total_outcomes: usize,
+}
+
+impl EffectivenessReport {
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "strategies": self.strategies,
+            "overall_success_rate": self.overall_success_rate,
+            "avg_time_to_recovery_ms": self.avg_time_to_recovery_ms,
+            "total_outcomes": self.total_outcomes,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn create_problem() -> Problem {
+        Problem::new(ProblemType::IndexDegradation, Severity::Medium)
+    }
+
+    fn create_result(success: bool, improvement: f32) -> RemediationResult {
+        if success {
+            RemediationResult::success(1, improvement).with_duration(1000)
+        } else {
+            RemediationResult::failure("test error").with_duration(500)
+        }
+    }
+
+    #[test]
+    fn test_record_outcome() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+        let result = create_result(true, 15.0);
+
+        tracker.record(&problem, "test_strategy", &result, true);
+
+        let recent = tracker.get_recent(10);
+        assert_eq!(recent.len(), 1);
+        assert!(recent[0].success);
+        assert!(recent[0].verified);
+    }
+
+    #[test]
+    fn test_weight_updates() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+
+        // Record successes
+        for _ in 0..5 {
+            let result = create_result(true, 20.0);
+            tracker.record(&problem, "test_strategy", &result, true);
+        }
+
+        let weight = tracker.get_weight("test_strategy").unwrap();
+        assert_eq!(weight.observations, 5);
+        assert_eq!(weight.successes, 5);
+        assert!(weight.weight > 1.0); // Should be elevated
+        assert!(weight.confidence > 0.3); // Should have some confidence
+    }
+
+    #[test]
+    fn test_mixed_outcomes() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+
+        // 3 successes
+        for _ in 0..3 {
+            let result = create_result(true, 10.0);
+            tracker.record(&problem, "test_strategy", &result, true);
+        }
+
+        // 2 failures
+        for _ in 0..2 {
+            let result = create_result(false, 0.0);
+            tracker.record(&problem, "test_strategy", &result, false);
+        }
+
+        let weight = tracker.get_weight("test_strategy").unwrap();
+        assert_eq!(weight.observations, 5);
+        assert_eq!(weight.successes, 3);
+        assert!((weight.success_rate() - 0.6).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_get_for_strategy() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+
+        // Record for different strategies
+        tracker.record(&problem, "strategy_a", &create_result(true, 10.0), true);
+        tracker.record(&problem, "strategy_b", &create_result(true, 15.0), true);
+        tracker.record(&problem, "strategy_a", &create_result(true, 20.0), true);
+
+        let a_outcomes = tracker.get_for_strategy("strategy_a", 10);
+        assert_eq!(a_outcomes.len(), 2);
+
+        let b_outcomes = tracker.get_for_strategy("strategy_b", 10);
+        assert_eq!(b_outcomes.len(), 1);
+    }
+
+    #[test]
+    fn test_feedback() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+
+        tracker.record(&problem, "test_strategy", &create_result(true, 10.0), true);
+
+        let recent = tracker.get_recent(1);
+        let id = recent[0].id;
+
+        assert!(tracker.add_feedback(id, 0.9));
+
+        let updated = tracker.get_recent(1);
+        assert_eq!(updated[0].feedback_score, Some(0.9));
+    }
+
+    #[test]
+    fn test_max_history() {
+        let tracker = OutcomeTracker::with_max_history(5);
+        let problem = create_problem();
+
+        // Record 10 outcomes
+        for i in 0..10 {
+            tracker.record(
+                &problem,
+                "test_strategy",
+                &create_result(true, i as f32),
+                true,
+            );
+        }
+
+        let history = tracker.get_recent(100);
+        assert_eq!(history.len(), 5); // Should be capped at 5
+    }
+
+    #[test]
+    fn test_effectiveness_report() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+
+        for _ in 0..5 {
+            tracker.record(&problem, "strategy_a", &create_result(true, 15.0), true);
+        }
+        for _ in 0..5 {
+            tracker.record(&problem, "strategy_b", &create_result(true, 25.0), true);
+        }
+
+        let report = tracker.effectiveness_report();
+        assert_eq!(report.strategies.len(), 2);
+        assert_eq!(report.total_outcomes, 10);
+        assert_eq!(report.overall_success_rate, 1.0);
+    }
+
+    #[test]
+    fn test_strategy_weight_confidence() {
+        let mut weight = StrategyWeight::new("test");
+
+        // Few observations = low confidence
+        weight.update(true, 10.0, 1000);
+        assert!(weight.confidence < 0.5);
+
+        // More observations = higher confidence
+        for _ in 0..20 {
+            weight.update(true, 10.0, 1000);
+        }
+        assert!(weight.confidence > 0.5);
+    }
+
+    #[test]
+    fn test_tracker_stats() {
+        let tracker = OutcomeTracker::new();
+        let problem = create_problem();
+
+        tracker.record(&problem, "strategy_a", &create_result(true, 10.0), true);
+        tracker.record(&problem, "strategy_b", &create_result(false, 0.0), false);
+
+        let stats = tracker.get_stats();
+        assert_eq!(stats.total_outcomes, 2);
+        assert_eq!(stats.successful_outcomes, 1);
+        assert_eq!(stats.success_rate, 0.5);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/mod.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/mod.rs
@@ -0,0 +1,233 @@
+//! Self-Healing Engine for RuVector Postgres v2
+//!
+//! This module provides automated problem detection and remediation capabilities:
+//! - **Problem Detection**: Monitors system health and detects issues
+//! - **Remediation Strategies**: Defines actions to fix detected problems
+//! - **Remediation Engine**: Orchestrates strategy execution with rollback
+//! - **Learning System**: Tracks outcomes and improves strategy selection
+//! - **Background Worker**: Continuous health monitoring
+//!
+//! # Architecture
+//!
+//! ```text
+//! +------------------------------------------------------------------+
+//! |                    Integrity Monitor                              |
+//! |  - Detects state transitions (normal -> stress -> critical)      |
+//! +------------------------------------------------------------------+
+//!                               |
+//!                               v
+//! +------------------------------------------------------------------+
+//! |                    Problem Detector                               |
+//! |  - Classifies problem types from witness edges                   |
+//! +------------------------------------------------------------------+
+//!                               |
+//!                               v
+//! +------------------------------------------------------------------+
+//! |                    Remediation Engine                             |
+//! |  - Selects strategy, executes with timeout/rollback              |
+//! +------------------------------------------------------------------+
+//!                               |
+//!                               v
+//! +------------------------------------------------------------------+
+//! |                    Learning System                                |
+//! |  - Records outcomes, updates strategy weights                    |
+//! +------------------------------------------------------------------+
+//! ```
+
+pub mod detector;
+pub mod engine;
+pub mod functions;
+pub mod learning;
+pub mod strategies;
+pub mod worker;
+
+pub use detector::{Problem, ProblemDetector, ProblemType, SystemMetrics};
+pub use engine::{HealingConfig, HealingOutcome, RemediationContext, RemediationEngine};
+pub use learning::{OutcomeRecord, OutcomeTracker, StrategyWeight};
+pub use strategies::{
+    IntegrityRecovery, PromoteReplica, QueryCircuitBreaker, ReindexPartition, RemediationOutcome,
+    RemediationResult, RemediationStrategy, StrategyRegistry, TierEviction,
+};
+pub use worker::{HealingWorker, HealingWorkerConfig, HealingWorkerState};
+
+use parking_lot::RwLock;
+use std::sync::Arc;
+
+/// Global healing engine instance
+static HEALING_ENGINE: std::sync::OnceLock<Arc<RwLock<HealingEngine>>> = std::sync::OnceLock::new();
+
+/// Get or initialize the global healing engine
+pub fn get_healing_engine() -> Arc<RwLock<HealingEngine>> {
+    HEALING_ENGINE
+        .get_or_init(|| Arc::new(RwLock::new(HealingEngine::new())))
+        .clone()
+}
+
+/// Main healing engine combining all components
+pub struct HealingEngine {
+    /// Problem detector
+    pub detector: ProblemDetector,
+    /// Remediation engine
+    pub remediation: RemediationEngine,
+    /// Outcome tracker for learning
+    pub tracker: OutcomeTracker,
+    /// Background worker state
+    pub worker_state: Arc<HealingWorkerState>,
+    /// Configuration
+    pub config: HealingConfig,
+    /// Whether healing is enabled
+    pub enabled: bool,
+}
+
+impl HealingEngine {
+    /// Create a new healing engine with default configuration
+    pub fn new() -> Self {
+        let config = HealingConfig::default();
+        let tracker = OutcomeTracker::new();
+        let registry = StrategyRegistry::new_with_defaults();
+
+        Self {
+            detector: ProblemDetector::new(),
+            remediation: RemediationEngine::new(registry, config.clone(), tracker.clone()),
+            tracker,
+            worker_state: Arc::new(HealingWorkerState::new(HealingWorkerConfig::default())),
+            config,
+            enabled: true,
+        }
+    }
+
+    /// Create with custom configuration
+    pub fn with_config(config: HealingConfig) -> Self {
+        let tracker = OutcomeTracker::new();
+        let registry = StrategyRegistry::new_with_defaults();
+
+        Self {
+            detector: ProblemDetector::new(),
+            remediation: RemediationEngine::new(registry, config.clone(), tracker.clone()),
+            tracker,
+            worker_state: Arc::new(HealingWorkerState::new(HealingWorkerConfig::default())),
+            config,
+            enabled: true,
+        }
+    }
+
+    /// Check system health and return current status
+    pub fn health_status(&self) -> HealthStatus {
+        let metrics = self.detector.collect_metrics();
+        let problems = self.detector.detect_problems(&metrics);
+        let active_remediations = self.remediation.active_remediations();
+
+        HealthStatus {
+            healthy: problems.is_empty() && active_remediations.is_empty(),
+            problem_count: problems.len(),
+            active_remediation_count: active_remediations.len(),
+            problems,
+            metrics,
+            enabled: self.enabled,
+            last_check: std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+        }
+    }
+
+    /// Enable or disable healing
+    pub fn set_enabled(&mut self, enabled: bool) {
+        self.enabled = enabled;
+    }
+
+    /// Update configuration
+    pub fn update_config(&mut self, config: HealingConfig) {
+        self.config = config.clone();
+        self.remediation.update_config(config);
+    }
+
+    /// Trigger manual healing for a specific problem type
+    pub fn trigger_healing(&self, problem_type: ProblemType) -> Option<HealingOutcome> {
+        if !self.enabled {
+            return None;
+        }
+
+        let problem = Problem {
+            problem_type,
+            severity: detector::Severity::Medium,
+            detected_at: std::time::SystemTime::now(),
+            details: serde_json::json!({"source": "manual_trigger"}),
+            affected_partitions: vec![],
+        };
+
+        Some(self.remediation.heal(&problem))
+    }
+}
+
+impl Default for HealingEngine {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Health status summary
+#[derive(Debug, Clone)]
+pub struct HealthStatus {
+    /// Whether the system is healthy
+    pub healthy: bool,
+    /// Number of detected problems
+    pub problem_count: usize,
+    /// Number of active remediations
+    pub active_remediation_count: usize,
+    /// List of detected problems
+    pub problems: Vec<Problem>,
+    /// Current system metrics
+    pub metrics: SystemMetrics,
+    /// Whether healing is enabled
+    pub enabled: bool,
+    /// Timestamp of last health check
+    pub last_check: u64,
+}
+
+impl HealthStatus {
+    /// Convert to JSON for SQL function output
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "healthy": self.healthy,
+            "problem_count": self.problem_count,
+            "active_remediation_count": self.active_remediation_count,
+            "problems": self.problems.iter().map(|p| p.to_json()).collect::<Vec<_>>(),
+            "metrics": self.metrics.to_json(),
+            "enabled": self.enabled,
+            "last_check": self.last_check,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_healing_engine_creation() {
+        let engine = HealingEngine::new();
+        assert!(engine.enabled);
+
+        let status = engine.health_status();
+        assert!(status.healthy);
+    }
+
+    #[test]
+    fn test_healing_enable_disable() {
+        let mut engine = HealingEngine::new();
+
+        engine.set_enabled(false);
+        assert!(!engine.enabled);
+
+        engine.set_enabled(true);
+        assert!(engine.enabled);
+    }
+
+    #[test]
+    fn test_global_instance() {
+        let engine1 = get_healing_engine();
+        let engine2 = get_healing_engine();
+        assert!(Arc::ptr_eq(&engine1, &engine2));
+    }
+}
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/strategies.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/strategies.rs
--- a/vendor/ruvector/crates/ruvector-postgres/src/healing/worker.rs
+++ b/vendor/ruvector/crates/ruvector-postgres/src/healing/worker.rs
@@ -0,0 +1,618 @@
+//! Background Worker for Self-Healing Engine
+//!
+//! Provides continuous health monitoring and async remediation:
+//! - Periodic health checks
+//! - Automatic problem detection
+//! - Async remediation execution
+//! - Integration with integrity control plane
+
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+use parking_lot::RwLock;
+use serde::{Deserialize, Serialize};
+
+use super::detector::ProblemDetector;
+use super::engine::HealingOutcome;
+use super::get_healing_engine;
+
+// ============================================================================
+// Worker Configuration
+// ============================================================================
+
+/// Configuration for the healing background worker
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HealingWorkerConfig {
+    /// Health check interval
+    pub check_interval: Duration,
+    /// Whether to auto-remediate detected problems
+    pub auto_remediate: bool,
+    /// Minimum severity to auto-remediate
+    pub min_auto_severity: u8, // 0=Info, 1=Low, 2=Medium, 3=High, 4=Critical
+    /// Maximum concurrent remediations
+    pub max_concurrent: usize,
+    /// Whether to log health status
+    pub log_status: bool,
+    /// Enable metrics collection
+    pub collect_metrics: bool,
+}
+
+impl Default for HealingWorkerConfig {
+    fn default() -> Self {
+        Self {
+            check_interval: Duration::from_secs(60),
+            auto_remediate: true,
+            min_auto_severity: 2, // Medium and above
+            max_concurrent: 2,
+            log_status: true,
+            collect_metrics: true,
+        }
+    }
+}
+
+// ============================================================================
+// Worker State
+// ============================================================================
+
+/// State of the healing background worker
+pub struct HealingWorkerState {
+    /// Configuration
+    config: RwLock<HealingWorkerConfig>,
+    /// Whether worker is running
+    running: AtomicBool,
+    /// Last health check timestamp
+    last_check: AtomicU64,
+    /// Total health checks performed
+    checks_completed: AtomicU64,
+    /// Total problems detected
+    problems_detected: AtomicU64,
+    /// Total remediations triggered
+    remediations_triggered: AtomicU64,
+    /// Recent health statuses
+    recent_statuses: RwLock<Vec<HealthCheckResult>>,
+}
+
+impl HealingWorkerState {
+    /// Create new worker state
+    pub fn new(config: HealingWorkerConfig) -> Self {
+        Self {
+            config: RwLock::new(config),
+            running: AtomicBool::new(false),
+            last_check: AtomicU64::new(0),
+            checks_completed: AtomicU64::new(0),
+            problems_detected: AtomicU64::new(0),
+            remediations_triggered: AtomicU64::new(0),
+            recent_statuses: RwLock::new(Vec::new()),
+        }
+    }
+
+    /// Check if worker is running
+    pub fn is_running(&self) -> bool {
+        self.running.load(Ordering::SeqCst)
+    }
+
+    /// Start worker
+    pub fn start(&self) {
+        self.running.store(true, Ordering::SeqCst);
+    }
+
+    /// Stop worker
+    pub fn stop(&self) {
+        self.running.store(false, Ordering::SeqCst);
+    }
+
+    /// Get configuration
+    pub fn get_config(&self) -> HealingWorkerConfig {
+        self.config.read().clone()
+    }
+
+    /// Update configuration
+    pub fn set_config(&self, config: HealingWorkerConfig) {
+        *self.config.write() = config;
+    }
+
+    /// Record a health check
+    pub fn record_check(&self, result: HealthCheckResult) {
+        let now = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+
+        self.last_check.store(now, Ordering::SeqCst);
+        self.checks_completed.fetch_add(1, Ordering::SeqCst);
+        self.problems_detected
+            .fetch_add(result.problems_found as u64, Ordering::SeqCst);
+        self.remediations_triggered
+            .fetch_add(result.remediations_triggered as u64, Ordering::SeqCst);
+
+        // Keep last 100 statuses
+        let mut statuses = self.recent_statuses.write();
+        statuses.push(result);
+        while statuses.len() > 100 {
+            statuses.remove(0);
+        }
+    }
+
+    /// Get worker statistics
+    pub fn get_stats(&self) -> WorkerStats {
+        WorkerStats {
+            running: self.running.load(Ordering::SeqCst),
+            last_check: self.last_check.load(Ordering::SeqCst),
+            checks_completed: self.checks_completed.load(Ordering::SeqCst),
+            problems_detected: self.problems_detected.load(Ordering::SeqCst),
+            remediations_triggered: self.remediations_triggered.load(Ordering::SeqCst),
+        }
+    }
+
+    /// Get recent health check results
+    pub fn get_recent_checks(&self, limit: usize) -> Vec<HealthCheckResult> {
+        let statuses = self.recent_statuses.read();
+        statuses.iter().rev().take(limit).cloned().collect()
+    }
+}
+
+/// Worker statistics
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WorkerStats {
+    pub running: bool,
+    pub last_check: u64,
+    pub checks_completed: u64,
+    pub problems_detected: u64,
+    pub remediations_triggered: u64,
+}
+
+impl WorkerStats {
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "running": self.running,
+            "last_check": self.last_check,
+            "checks_completed": self.checks_completed,
+            "problems_detected": self.problems_detected,
+            "remediations_triggered": self.remediations_triggered,
+        })
+    }
+}
+
+// ============================================================================
+// Health Check Result
+// ============================================================================
+
+/// Result of a health check
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HealthCheckResult {
+    /// Timestamp of check
+    pub timestamp: u64,
+    /// Whether system is healthy
+    pub healthy: bool,
+    /// Number of problems found
+    pub problems_found: usize,
+    /// Number of remediations triggered
+    pub remediations_triggered: usize,
+    /// Remediation outcomes
+    pub outcomes: Vec<serde_json::Value>,
+    /// Metrics collected
+    pub metrics: Option<serde_json::Value>,
+    /// Duration of check in milliseconds
+    pub duration_ms: u64,
+}
+
+impl HealthCheckResult {
+    /// Create a healthy result
+    pub fn healthy() -> Self {
+        Self {
+            timestamp: SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+            healthy: true,
+            problems_found: 0,
+            remediations_triggered: 0,
+            outcomes: vec![],
+            metrics: None,
+            duration_ms: 0,
+        }
+    }
+
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "timestamp": self.timestamp,
+            "healthy": self.healthy,
+            "problems_found": self.problems_found,
+            "remediations_triggered": self.remediations_triggered,
+            "outcomes": self.outcomes,
+            "duration_ms": self.duration_ms,
+        })
+    }
+}
+
+// ============================================================================
+// Healing Worker
+// ============================================================================
+
+/// Background worker for continuous health monitoring
+pub struct HealingWorker {
+    /// Worker state
+    state: Arc<HealingWorkerState>,
+    /// Problem detector
+    detector: ProblemDetector,
+}
+
+impl HealingWorker {
+    /// Create new healing worker
+    pub fn new(config: HealingWorkerConfig) -> Self {
+        Self {
+            state: Arc::new(HealingWorkerState::new(config)),
+            detector: ProblemDetector::new(),
+        }
+    }
+
+    /// Create with shared state
+    pub fn with_state(state: Arc<HealingWorkerState>) -> Self {
+        Self {
+            state,
+            detector: ProblemDetector::new(),
+        }
+    }
+
+    /// Get worker state
+    pub fn state(&self) -> &Arc<HealingWorkerState> {
+        &self.state
+    }
+
+    /// Perform one health check cycle
+    pub fn check_health(&self) -> HealthCheckResult {
+        let start = std::time::Instant::now();
+        let config = self.state.get_config();
+
+        // Collect metrics
+        let metrics = self.detector.collect_metrics();
+
+        // Detect problems
+        let problems = self.detector.detect_problems(&metrics);
+        let problems_found = problems.len();
+
+        if config.log_status {
+            if problems_found > 0 {
+                pgrx::log!("Healing worker: {} problems detected", problems_found);
+            } else {
+                pgrx::debug1!("Healing worker: no problems detected");
+            }
+        }
+
+        let mut remediations_triggered = 0;
+        let mut outcomes = Vec::new();
+
+        // Auto-remediate if enabled
+        if config.auto_remediate && problems_found > 0 {
+            let engine = get_healing_engine();
+            let engine_lock = engine.read();
+
+            for problem in &problems {
+                // Check severity threshold
+                if problem.severity.value() < config.min_auto_severity {
+                    continue;
+                }
+
+                // Attempt remediation
+                let outcome = engine_lock.remediation.heal(problem);
+                outcomes.push(outcome.to_json());
+
+                if matches!(outcome, HealingOutcome::Completed { .. }) {
+                    remediations_triggered += 1;
+                }
+            }
+        }
+
+        let duration_ms = start.elapsed().as_millis() as u64;
+
+        let result = HealthCheckResult {
+            timestamp: SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+            healthy: problems_found == 0,
+            problems_found,
+            remediations_triggered,
+            outcomes,
+            metrics: if config.collect_metrics {
+                Some(metrics.to_json())
+            } else {
+                None
+            },
+            duration_ms,
+        };
+
+        self.state.record_check(result.clone());
+
+        result
+    }
+
+    /// Run the worker loop (blocking)
+    pub fn run(&self) {
+        self.state.start();
+        pgrx::log!("Healing background worker started");
+
+        while self.state.is_running() {
+            // Perform health check
+            let _result = self.check_health();
+
+            // Sleep until next check
+            let interval = self.state.get_config().check_interval;
+
+            // Use PostgreSQL's WaitLatch for interruptible sleep
+            self.wait_for_interval(interval);
+        }
+
+        pgrx::log!("Healing background worker stopped");
+    }
+
+    /// Wait for interval with interruption support
+    fn wait_for_interval(&self, interval: Duration) {
+        // Use simple thread sleep which works in all contexts.
+        // In production as a full background worker, one would use
+        // PostgreSQL's WaitLatch for interruptible sleep.
+        std::thread::sleep(interval);
+    }
+
+    /// Stop the worker
+    pub fn stop(&self) {
+        self.state.stop();
+    }
+}
+
+// ============================================================================
+// Background Worker Entry Point
+// ============================================================================
+
+/// PostgreSQL background worker entry point
+#[pgrx::pg_guard]
+pub extern "C" fn healing_bgworker_main(_arg: pgrx::pg_sys::Datum) {
+    pgrx::log!("RuVector healing background worker starting");
+
+    let config = HealingWorkerConfig::default();
+    let worker = HealingWorker::new(config);
+
+    worker.run();
+}
+
+/// Register the background worker with PostgreSQL
+pub fn register_healing_worker() {
+    pgrx::log!("Registering RuVector healing background worker");
+
+    // In production, use pg_sys::RegisterBackgroundWorker
+    // This is a placeholder for now
+    //
+    // unsafe {
+    //     let mut worker = pg_sys::BackgroundWorker::default();
+    //     // Configure worker...
+    //     pg_sys::RegisterBackgroundWorker(&mut worker);
+    // }
+}
+
+// ============================================================================
+// SQL Functions for Worker Control
+// ============================================================================
+
+use pgrx::prelude::*;
+
+/// Start the healing background worker
+#[pg_extern]
+pub fn ruvector_healing_worker_start() -> bool {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    if engine_lock.worker_state.is_running() {
+        pgrx::warning!("Healing worker is already running");
+        return false;
+    }
+
+    // In production, would launch actual background worker
+    engine_lock.worker_state.start();
+    pgrx::log!("Healing worker started");
+    true
+}
+
+/// Stop the healing background worker
+#[pg_extern]
+pub fn ruvector_healing_worker_stop() -> bool {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    if !engine_lock.worker_state.is_running() {
+        pgrx::warning!("Healing worker is not running");
+        return false;
+    }
+
+    engine_lock.worker_state.stop();
+    pgrx::log!("Healing worker stopped");
+    true
+}
+
+/// Get healing worker status
+#[pg_extern]
+pub fn ruvector_healing_worker_status() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let stats = engine_lock.worker_state.get_stats();
+    let config = engine_lock.worker_state.get_config();
+
+    let status = serde_json::json!({
+        "stats": stats.to_json(),
+        "config": {
+            "check_interval_secs": config.check_interval.as_secs(),
+            "auto_remediate": config.auto_remediate,
+            "min_auto_severity": config.min_auto_severity,
+            "max_concurrent": config.max_concurrent,
+        }
+    });
+
+    pgrx::JsonB(status)
+}
+
+/// Configure the healing worker
+#[pg_extern]
+pub fn ruvector_healing_worker_config(
+    check_interval_secs: Option<i32>,
+    auto_remediate: Option<bool>,
+    min_auto_severity: Option<i32>,
+) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let mut config = engine_lock.worker_state.get_config();
+
+    if let Some(interval) = check_interval_secs {
+        if interval > 0 {
+            config.check_interval = Duration::from_secs(interval as u64);
+        }
+    }
+
+    if let Some(auto_rem) = auto_remediate {
+        config.auto_remediate = auto_rem;
+    }
+
+    if let Some(severity) = min_auto_severity {
+        if severity >= 0 && severity <= 4 {
+            config.min_auto_severity = severity as u8;
+        }
+    }
+
+    engine_lock.worker_state.set_config(config.clone());
+
+    pgrx::JsonB(serde_json::json!({
+        "status": "updated",
+        "config": {
+            "check_interval_secs": config.check_interval.as_secs(),
+            "auto_remediate": config.auto_remediate,
+            "min_auto_severity": config.min_auto_severity,
+        }
+    }))
+}
+
+/// Manually trigger a health check
+#[pg_extern]
+pub fn ruvector_healing_check_now() -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let detector = ProblemDetector::new();
+    let start = std::time::Instant::now();
+
+    let metrics = detector.collect_metrics();
+    let problems = detector.detect_problems(&metrics);
+
+    let mut outcomes = Vec::new();
+    for problem in &problems {
+        let outcome = engine_lock.remediation.heal(problem);
+        outcomes.push(outcome.to_json());
+    }
+
+    let result = serde_json::json!({
+        "healthy": problems.is_empty(),
+        "problems_found": problems.len(),
+        "problems": problems.iter().map(|p| p.to_json()).collect::<Vec<_>>(),
+        "outcomes": outcomes,
+        "metrics": metrics.to_json(),
+        "duration_ms": start.elapsed().as_millis() as u64,
+    });
+
+    pgrx::JsonB(result)
+}
+
+/// Get recent health check results
+#[pg_extern]
+pub fn ruvector_healing_recent_checks(limit: default!(i32, 10)) -> pgrx::JsonB {
+    let engine = get_healing_engine();
+    let engine_lock = engine.read();
+
+    let checks = engine_lock.worker_state.get_recent_checks(limit as usize);
+
+    pgrx::JsonB(serde_json::json!({
+        "checks": checks.iter().map(|c| c.to_json()).collect::<Vec<_>>(),
+        "count": checks.len(),
+    }))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_worker_state() {
+        let state = HealingWorkerState::new(HealingWorkerConfig::default());
+
+        assert!(!state.is_running());
+        state.start();
+        assert!(state.is_running());
+        state.stop();
+        assert!(!state.is_running());
+    }
+
+    #[test]
+    fn test_health_check_result() {
+        let result = HealthCheckResult::healthy();
+        assert!(result.healthy);
+        assert_eq!(result.problems_found, 0);
+    }
+
+    #[test]
+    fn test_worker_config() {
+        let config = HealingWorkerConfig::default();
+        assert!(config.auto_remediate);
+        assert_eq!(config.min_auto_severity, 2);
+    }
+
+    #[test]
+    fn test_state_recording() {
+        let state = HealingWorkerState::new(HealingWorkerConfig::default());
+
+        let result = HealthCheckResult {
+            timestamp: 12345,
+            healthy: false,
+            problems_found: 2,
+            remediations_triggered: 1,
+            outcomes: vec![],
+            metrics: None,
+            duration_ms: 100,
+        };
+
+        state.record_check(result);
+
+        let stats = state.get_stats();
+        assert_eq!(stats.checks_completed, 1);
+        assert_eq!(stats.problems_detected, 2);
+        assert_eq!(stats.remediations_triggered, 1);
+    }
+
+    #[test]
+    fn test_recent_checks() {
+        let state = HealingWorkerState::new(HealingWorkerConfig::default());
+
+        for i in 0..5 {
+            state.record_check(HealthCheckResult {
+                timestamp: i,
+                healthy: true,
+                problems_found: 0,
+                remediations_triggered: 0,
+                outcomes: vec![],
+                metrics: None,
+                duration_ms: 10,
+            });
+        }
+
+        let recent = state.get_recent_checks(3);
+        assert_eq!(recent.len(), 3);
+        // Most recent first
+        assert_eq!(recent[0].timestamp, 4);
+    }
+
+    #[test]
+    fn test_worker_creation() {
+        let worker = HealingWorker::new(HealingWorkerConfig::default());
+        assert!(!worker.state().is_running());
+    }
+}