git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
184 lines
5.4 KiB
Rust
184 lines
5.4 KiB
Rust
//! High-performance query execution engine for RuVector graph database
|
|
//!
|
|
//! This module provides a complete query execution system with:
|
|
//! - Logical and physical query plans
|
|
//! - Vectorized operators (scan, filter, join, aggregate)
|
|
//! - Pipeline execution with iterator model
|
|
//! - Parallel execution using rayon
|
|
//! - Query result caching
|
|
//! - Cost-based optimization statistics
|
|
//!
|
|
//! Performance targets:
|
|
//! - 100K+ traversals/second per core
|
|
//! - Sub-millisecond simple lookups
|
|
//! - SIMD-optimized predicate evaluation
|
|
|
|
pub mod cache;
|
|
pub mod operators;
|
|
pub mod parallel;
|
|
pub mod pipeline;
|
|
pub mod plan;
|
|
pub mod stats;
|
|
|
|
pub use cache::{CacheConfig, CacheEntry, QueryCache};
|
|
pub use operators::{
|
|
Aggregate, AggregateFunction, EdgeScan, Filter, HyperedgeScan, Join, JoinType, Limit, NodeScan,
|
|
Operator, Project, ScanMode, Sort,
|
|
};
|
|
pub use parallel::{ParallelConfig, ParallelExecutor};
|
|
pub use pipeline::{ExecutionContext, Pipeline, RowBatch};
|
|
pub use plan::{LogicalPlan, PhysicalPlan, PlanNode};
|
|
pub use stats::{ColumnStats, Histogram, Statistics, TableStats};
|
|
|
|
use std::error::Error;
|
|
use std::fmt;
|
|
use std::sync::Arc;
|
|
|
|
/// Query execution error types
|
|
#[derive(Debug, Clone)]
|
|
pub enum ExecutionError {
|
|
/// Invalid query plan
|
|
InvalidPlan(String),
|
|
/// Operator execution failed
|
|
OperatorError(String),
|
|
/// Type mismatch in expression evaluation
|
|
TypeMismatch(String),
|
|
/// Resource exhausted (memory, disk, etc.)
|
|
ResourceExhausted(String),
|
|
/// Internal error
|
|
Internal(String),
|
|
}
|
|
|
|
impl fmt::Display for ExecutionError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
ExecutionError::InvalidPlan(msg) => write!(f, "Invalid plan: {}", msg),
|
|
ExecutionError::OperatorError(msg) => write!(f, "Operator error: {}", msg),
|
|
ExecutionError::TypeMismatch(msg) => write!(f, "Type mismatch: {}", msg),
|
|
ExecutionError::ResourceExhausted(msg) => write!(f, "Resource exhausted: {}", msg),
|
|
ExecutionError::Internal(msg) => write!(f, "Internal error: {}", msg),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Error for ExecutionError {}
|
|
|
|
pub type Result<T> = std::result::Result<T, ExecutionError>;
|
|
|
|
/// Query execution engine
|
|
pub struct QueryExecutor {
|
|
/// Query result cache
|
|
cache: Arc<QueryCache>,
|
|
/// Execution statistics
|
|
stats: Arc<Statistics>,
|
|
/// Parallel execution configuration
|
|
parallel_config: ParallelConfig,
|
|
}
|
|
|
|
impl QueryExecutor {
|
|
/// Create a new query executor
|
|
pub fn new() -> Self {
|
|
Self {
|
|
cache: Arc::new(QueryCache::new(CacheConfig::default())),
|
|
stats: Arc::new(Statistics::new()),
|
|
parallel_config: ParallelConfig::default(),
|
|
}
|
|
}
|
|
|
|
/// Create executor with custom configuration
|
|
pub fn with_config(cache_config: CacheConfig, parallel_config: ParallelConfig) -> Self {
|
|
Self {
|
|
cache: Arc::new(QueryCache::new(cache_config)),
|
|
stats: Arc::new(Statistics::new()),
|
|
parallel_config,
|
|
}
|
|
}
|
|
|
|
/// Execute a logical plan
|
|
pub fn execute(&self, plan: &LogicalPlan) -> Result<Vec<RowBatch>> {
|
|
// Check cache first
|
|
let cache_key = plan.cache_key();
|
|
if let Some(cached) = self.cache.get(&cache_key) {
|
|
return Ok(cached.results.clone());
|
|
}
|
|
|
|
// Optimize logical plan to physical plan
|
|
let physical_plan = self.optimize(plan)?;
|
|
|
|
// Execute physical plan
|
|
let results = if self.parallel_config.enabled && plan.is_parallelizable() {
|
|
self.execute_parallel(&physical_plan)?
|
|
} else {
|
|
self.execute_sequential(&physical_plan)?
|
|
};
|
|
|
|
// Cache results
|
|
self.cache.insert(cache_key, results.clone());
|
|
|
|
Ok(results)
|
|
}
|
|
|
|
/// Optimize logical plan to physical plan
|
|
fn optimize(&self, plan: &LogicalPlan) -> Result<PhysicalPlan> {
|
|
// Cost-based optimization using statistics
|
|
let physical = PhysicalPlan::from_logical(plan, &self.stats)?;
|
|
Ok(physical)
|
|
}
|
|
|
|
/// Execute plan sequentially
|
|
fn execute_sequential(&self, _plan: &PhysicalPlan) -> Result<Vec<RowBatch>> {
|
|
// Note: In a real implementation, we would need to reconstruct operators
|
|
// For now, return empty results as placeholder
|
|
Ok(Vec::new())
|
|
}
|
|
|
|
/// Execute plan in parallel
|
|
fn execute_parallel(&self, plan: &PhysicalPlan) -> Result<Vec<RowBatch>> {
|
|
let executor = ParallelExecutor::new(self.parallel_config.clone());
|
|
executor.execute(plan)
|
|
}
|
|
|
|
/// Get execution statistics
|
|
pub fn stats(&self) -> Arc<Statistics> {
|
|
Arc::clone(&self.stats)
|
|
}
|
|
|
|
/// Clear query cache
|
|
pub fn clear_cache(&self) {
|
|
self.cache.clear();
|
|
}
|
|
}
|
|
|
|
impl Default for QueryExecutor {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_executor_creation() {
|
|
let executor = QueryExecutor::new();
|
|
assert!(executor.stats().is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_executor_with_config() {
|
|
let cache_config = CacheConfig {
|
|
max_entries: 100,
|
|
max_memory_bytes: 1024 * 1024,
|
|
ttl_seconds: 300,
|
|
};
|
|
let parallel_config = ParallelConfig {
|
|
enabled: true,
|
|
num_threads: 4,
|
|
batch_size: 1000,
|
|
};
|
|
let executor = QueryExecutor::with_config(cache_config, parallel_config);
|
|
assert!(executor.stats().is_empty());
|
|
}
|
|
}
|