Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/crates/ruvector-graph/src/executor/mod.rs
+++ b/crates/ruvector-graph/src/executor/mod.rs
@@ -0,0 +1,183 @@
+//! High-performance query execution engine for RuVector graph database
+//!
+//! This module provides a complete query execution system with:
+//! - Logical and physical query plans
+//! - Vectorized operators (scan, filter, join, aggregate)
+//! - Pipeline execution with iterator model
+//! - Parallel execution using rayon
+//! - Query result caching
+//! - Cost-based optimization statistics
+//!
+//! Performance targets:
+//! - 100K+ traversals/second per core
+//! - Sub-millisecond simple lookups
+//! - SIMD-optimized predicate evaluation
+
+pub mod cache;
+pub mod operators;
+pub mod parallel;
+pub mod pipeline;
+pub mod plan;
+pub mod stats;
+
+pub use cache::{CacheConfig, CacheEntry, QueryCache};
+pub use operators::{
+    Aggregate, AggregateFunction, EdgeScan, Filter, HyperedgeScan, Join, JoinType, Limit, NodeScan,
+    Operator, Project, ScanMode, Sort,
+};
+pub use parallel::{ParallelConfig, ParallelExecutor};
+pub use pipeline::{ExecutionContext, Pipeline, RowBatch};
+pub use plan::{LogicalPlan, PhysicalPlan, PlanNode};
+pub use stats::{ColumnStats, Histogram, Statistics, TableStats};
+
+use std::error::Error;
+use std::fmt;
+use std::sync::Arc;
+
+/// Query execution error types
+#[derive(Debug, Clone)]
+pub enum ExecutionError {
+    /// Invalid query plan
+    InvalidPlan(String),
+    /// Operator execution failed
+    OperatorError(String),
+    /// Type mismatch in expression evaluation
+    TypeMismatch(String),
+    /// Resource exhausted (memory, disk, etc.)
+    ResourceExhausted(String),
+    /// Internal error
+    Internal(String),
+}
+
+impl fmt::Display for ExecutionError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ExecutionError::InvalidPlan(msg) => write!(f, "Invalid plan: {}", msg),
+            ExecutionError::OperatorError(msg) => write!(f, "Operator error: {}", msg),
+            ExecutionError::TypeMismatch(msg) => write!(f, "Type mismatch: {}", msg),
+            ExecutionError::ResourceExhausted(msg) => write!(f, "Resource exhausted: {}", msg),
+            ExecutionError::Internal(msg) => write!(f, "Internal error: {}", msg),
+        }
+    }
+}
+
+impl Error for ExecutionError {}
+
+pub type Result<T> = std::result::Result<T, ExecutionError>;
+
+/// Query execution engine
+pub struct QueryExecutor {
+    /// Query result cache
+    cache: Arc<QueryCache>,
+    /// Execution statistics
+    stats: Arc<Statistics>,
+    /// Parallel execution configuration
+    parallel_config: ParallelConfig,
+}
+
+impl QueryExecutor {
+    /// Create a new query executor
+    pub fn new() -> Self {
+        Self {
+            cache: Arc::new(QueryCache::new(CacheConfig::default())),
+            stats: Arc::new(Statistics::new()),
+            parallel_config: ParallelConfig::default(),
+        }
+    }
+
+    /// Create executor with custom configuration
+    pub fn with_config(cache_config: CacheConfig, parallel_config: ParallelConfig) -> Self {
+        Self {
+            cache: Arc::new(QueryCache::new(cache_config)),
+            stats: Arc::new(Statistics::new()),
+            parallel_config,
+        }
+    }
+
+    /// Execute a logical plan
+    pub fn execute(&self, plan: &LogicalPlan) -> Result<Vec<RowBatch>> {
+        // Check cache first
+        let cache_key = plan.cache_key();
+        if let Some(cached) = self.cache.get(&cache_key) {
+            return Ok(cached.results.clone());
+        }
+
+        // Optimize logical plan to physical plan
+        let physical_plan = self.optimize(plan)?;
+
+        // Execute physical plan
+        let results = if self.parallel_config.enabled && plan.is_parallelizable() {
+            self.execute_parallel(&physical_plan)?
+        } else {
+            self.execute_sequential(&physical_plan)?
+        };
+
+        // Cache results
+        self.cache.insert(cache_key, results.clone());
+
+        Ok(results)
+    }
+
+    /// Optimize logical plan to physical plan
+    fn optimize(&self, plan: &LogicalPlan) -> Result<PhysicalPlan> {
+        // Cost-based optimization using statistics
+        let physical = PhysicalPlan::from_logical(plan, &self.stats)?;
+        Ok(physical)
+    }
+
+    /// Execute plan sequentially
+    fn execute_sequential(&self, _plan: &PhysicalPlan) -> Result<Vec<RowBatch>> {
+        // Note: In a real implementation, we would need to reconstruct operators
+        // For now, return empty results as placeholder
+        Ok(Vec::new())
+    }
+
+    /// Execute plan in parallel
+    fn execute_parallel(&self, plan: &PhysicalPlan) -> Result<Vec<RowBatch>> {
+        let executor = ParallelExecutor::new(self.parallel_config.clone());
+        executor.execute(plan)
+    }
+
+    /// Get execution statistics
+    pub fn stats(&self) -> Arc<Statistics> {
+        Arc::clone(&self.stats)
+    }
+
+    /// Clear query cache
+    pub fn clear_cache(&self) {
+        self.cache.clear();
+    }
+}
+
+impl Default for QueryExecutor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_executor_creation() {
+        let executor = QueryExecutor::new();
+        assert!(executor.stats().is_empty());
+    }
+
+    #[test]
+    fn test_executor_with_config() {
+        let cache_config = CacheConfig {
+            max_entries: 100,
+            max_memory_bytes: 1024 * 1024,
+            ttl_seconds: 300,
+        };
+        let parallel_config = ParallelConfig {
+            enabled: true,
+            num_threads: 4,
+            batch_size: 1000,
+        };
+        let executor = QueryExecutor::with_config(cache_config, parallel_config);
+        assert!(executor.stats().is_empty());
+    }
+}