Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/benchmark.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/benchmark.rs
@@ -0,0 +1,755 @@
+//! Comprehensive Benchmark Suite for j-Tree + BMSSP Optimizations
+//!
+//! Measures before/after performance for each optimization:
+//! - DSpar: 5.9x target speedup
+//! - Cache: 10x target for repeated queries
+//! - SIMD: 2-4x target for distance operations
+//! - Pool: 50-75% memory reduction
+//! - Parallel: Near-linear scaling
+//! - WASM Batch: 10x FFI overhead reduction
+//!
+//! Target: Combined 10x speedup over naive implementation
+
+use super::cache::{CacheConfig, PathDistanceCache};
+use super::dspar::{DegreePresparse, PresparseConfig};
+use super::parallel::{LevelUpdateResult, ParallelConfig, ParallelLevelUpdater, WorkItem};
+use super::pool::{LevelData, LevelPool, PoolConfig};
+use super::simd_distance::{DistanceArray, SimdDistanceOps};
+use super::wasm_batch::{BatchConfig, WasmBatchOps};
+use crate::graph::DynamicGraph;
+use std::collections::HashSet;
+use std::time::{Duration, Instant};
+
+/// Single benchmark result
+#[derive(Debug, Clone)]
+pub struct BenchmarkResult {
+    /// Name of the benchmark
+    pub name: String,
+    /// Baseline time (naive implementation)
+    pub baseline_us: u64,
+    /// Optimized time
+    pub optimized_us: u64,
+    /// Speedup factor (baseline / optimized)
+    pub speedup: f64,
+    /// Target speedup
+    pub target_speedup: f64,
+    /// Whether target was achieved
+    pub target_achieved: bool,
+    /// Memory usage baseline (bytes)
+    pub baseline_memory: usize,
+    /// Memory usage optimized (bytes)
+    pub optimized_memory: usize,
+    /// Memory reduction percentage
+    pub memory_reduction_percent: f64,
+    /// Additional metrics
+    pub metrics: Vec<(String, f64)>,
+}
+
+impl BenchmarkResult {
+    /// Create new result
+    pub fn new(name: &str, baseline_us: u64, optimized_us: u64, target_speedup: f64) -> Self {
+        let speedup = if optimized_us > 0 {
+            baseline_us as f64 / optimized_us as f64
+        } else {
+            f64::INFINITY
+        };
+
+        Self {
+            name: name.to_string(),
+            baseline_us,
+            optimized_us,
+            speedup,
+            target_speedup,
+            target_achieved: speedup >= target_speedup,
+            baseline_memory: 0,
+            optimized_memory: 0,
+            memory_reduction_percent: 0.0,
+            metrics: Vec::new(),
+        }
+    }
+
+    /// Set memory metrics
+    pub fn with_memory(mut self, baseline: usize, optimized: usize) -> Self {
+        self.baseline_memory = baseline;
+        self.optimized_memory = optimized;
+        self.memory_reduction_percent = if baseline > 0 {
+            100.0 * (1.0 - (optimized as f64 / baseline as f64))
+        } else {
+            0.0
+        };
+        self
+    }
+
+    /// Add custom metric
+    pub fn add_metric(&mut self, name: &str, value: f64) {
+        self.metrics.push((name.to_string(), value));
+    }
+}
+
+/// Individual optimization benchmark
+#[derive(Debug, Clone)]
+pub struct OptimizationBenchmark {
+    /// Optimization name
+    pub name: String,
+    /// Results for different workloads
+    pub results: Vec<BenchmarkResult>,
+    /// Overall assessment
+    pub summary: BenchmarkSummary,
+}
+
+/// Summary of benchmark results
+#[derive(Debug, Clone, Default)]
+pub struct BenchmarkSummary {
+    /// Average speedup achieved
+    pub avg_speedup: f64,
+    /// Minimum speedup
+    pub min_speedup: f64,
+    /// Maximum speedup
+    pub max_speedup: f64,
+    /// Percentage of targets achieved
+    pub targets_achieved_percent: f64,
+    /// Overall memory reduction
+    pub avg_memory_reduction: f64,
+}
+
+/// Comprehensive benchmark suite
+pub struct BenchmarkSuite {
+    /// Test graph sizes
+    sizes: Vec<usize>,
+    /// Number of iterations per test
+    iterations: usize,
+    /// Results
+    results: Vec<OptimizationBenchmark>,
+}
+
+impl BenchmarkSuite {
+    /// Create new benchmark suite
+    pub fn new() -> Self {
+        Self {
+            sizes: vec![100, 1000, 10000],
+            iterations: 10,
+            results: Vec::new(),
+        }
+    }
+
+    /// Set test sizes
+    pub fn with_sizes(mut self, sizes: Vec<usize>) -> Self {
+        self.sizes = sizes;
+        self
+    }
+
+    /// Set iterations
+    pub fn with_iterations(mut self, iterations: usize) -> Self {
+        self.iterations = iterations;
+        self
+    }
+
+    /// Run all benchmarks
+    pub fn run_all(&mut self) -> &Vec<OptimizationBenchmark> {
+        self.results.clear();
+
+        self.results.push(self.benchmark_dspar());
+        self.results.push(self.benchmark_cache());
+        self.results.push(self.benchmark_simd());
+        self.results.push(self.benchmark_pool());
+        self.results.push(self.benchmark_parallel());
+        self.results.push(self.benchmark_wasm_batch());
+
+        &self.results
+    }
+
+    /// Get combined speedup estimate
+    pub fn combined_speedup(&self) -> f64 {
+        if self.results.is_empty() {
+            return 1.0;
+        }
+
+        // Estimate combined speedup (conservative: product of square roots)
+        // Skip results with zero or negative speedup to avoid NaN
+        let mut combined = 1.0;
+        let mut count = 0;
+        for result in &self.results {
+            let speedup = result.summary.avg_speedup;
+            if speedup > 0.0 && speedup.is_finite() {
+                combined *= speedup.sqrt();
+                count += 1;
+            }
+        }
+
+        if count == 0 {
+            return 1.0;
+        }
+
+        combined
+    }
+
+    /// Benchmark DSpar (Degree-based presparse)
+    fn benchmark_dspar(&self) -> OptimizationBenchmark {
+        let mut results = Vec::new();
+
+        for &size in &self.sizes {
+            let graph = create_test_graph(size, size * 5);
+
+            // Baseline: process all edges
+            let baseline_start = Instant::now();
+            for _ in 0..self.iterations {
+                let edges = graph.edges();
+                let _count = edges.len();
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            // Optimized: DSpar filtering
+            let mut dspar = DegreePresparse::with_config(PresparseConfig {
+                target_sparsity: 0.1,
+                ..Default::default()
+            });
+
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                let _ = dspar.presparse(&graph);
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let mut result = BenchmarkResult::new(
+                &format!("DSpar n={}", size),
+                baseline_us,
+                opt_us,
+                5.9, // Target speedup
+            );
+
+            // Get sparsification stats
+            let sparse_result = dspar.presparse(&graph);
+            result.add_metric("sparsity_ratio", sparse_result.stats.sparsity_ratio);
+            result.add_metric(
+                "edges_reduced",
+                (sparse_result.stats.original_edges - sparse_result.stats.sparse_edges) as f64,
+            );
+
+            results.push(result);
+        }
+
+        compute_summary("DSpar", results)
+    }
+
+    /// Benchmark cache performance
+    fn benchmark_cache(&self) -> OptimizationBenchmark {
+        let mut results = Vec::new();
+
+        for &size in &self.sizes {
+            // Baseline: no caching (compute every time)
+            let baseline_start = Instant::now();
+            let mut total = 0.0;
+            for _ in 0..self.iterations {
+                for i in 0..size {
+                    // Simulate distance computation
+                    total += (i as f64 * 1.414).sqrt();
+                }
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+            let _ = total; // Prevent optimization
+
+            // Optimized: with caching
+            let cache = PathDistanceCache::with_config(CacheConfig {
+                max_entries: size,
+                ..Default::default()
+            });
+
+            // Warm up cache
+            for i in 0..(size / 2) {
+                cache.insert(i as u64, (i + 1) as u64, (i as f64).sqrt());
+            }
+
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                for i in 0..size {
+                    if cache.get(i as u64, (i + 1) as u64).is_none() {
+                        cache.insert(i as u64, (i + 1) as u64, (i as f64).sqrt());
+                    }
+                }
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let mut result = BenchmarkResult::new(
+                &format!("Cache n={}", size),
+                baseline_us,
+                opt_us,
+                10.0, // Target speedup for cached hits
+            );
+
+            let stats = cache.stats();
+            result.add_metric("hit_rate", stats.hit_rate());
+            result.add_metric("cache_size", stats.size as f64);
+
+            results.push(result);
+        }
+
+        compute_summary("Cache", results)
+    }
+
+    /// Benchmark SIMD operations
+    fn benchmark_simd(&self) -> OptimizationBenchmark {
+        let mut results = Vec::new();
+
+        for &size in &self.sizes {
+            let mut arr = DistanceArray::new(size);
+
+            // Initialize with test data
+            for i in 0..size {
+                arr.set(i as u64, (i as f64) * 0.5 + 1.0);
+            }
+            arr.set((size / 2) as u64, 0.1); // Min value
+
+            // Baseline: naive find_min
+            let baseline_start = Instant::now();
+            for _ in 0..self.iterations {
+                let data = arr.as_slice();
+                let mut min_val = f64::INFINITY;
+                let mut min_idx = 0;
+                for (i, &d) in data.iter().enumerate() {
+                    if d < min_val {
+                        min_val = d;
+                        min_idx = i;
+                    }
+                }
+                let _ = (min_val, min_idx);
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            // Optimized: SIMD find_min
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                let _ = SimdDistanceOps::find_min(&arr);
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let result = BenchmarkResult::new(
+                &format!("SIMD find_min n={}", size),
+                baseline_us,
+                opt_us.max(1), // Avoid divide by zero
+                2.0,           // Target speedup
+            );
+
+            results.push(result);
+
+            // Also benchmark relax_batch
+            let neighbors: Vec<_> = (0..(size / 10).min(100))
+                .map(|i| ((i * 10) as u64, 1.0))
+                .collect();
+
+            let baseline_start = Instant::now();
+            let mut arr_baseline = DistanceArray::new(size);
+            for _ in 0..self.iterations {
+                let data = arr_baseline.as_mut_slice();
+                for &(idx, weight) in &neighbors {
+                    let idx = idx as usize;
+                    if idx < data.len() {
+                        let new_dist = 0.0 + weight;
+                        if new_dist < data[idx] {
+                            data[idx] = new_dist;
+                        }
+                    }
+                }
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let mut arr_opt = DistanceArray::new(size);
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                SimdDistanceOps::relax_batch(&mut arr_opt, 0.0, &neighbors);
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let result = BenchmarkResult::new(
+                &format!("SIMD relax_batch n={}", size),
+                baseline_us,
+                opt_us.max(1),
+                2.0,
+            );
+
+            results.push(result);
+        }
+
+        compute_summary("SIMD", results)
+    }
+
+    /// Benchmark pool allocation
+    fn benchmark_pool(&self) -> OptimizationBenchmark {
+        let mut results = Vec::new();
+
+        for &size in &self.sizes {
+            // Baseline: allocate/deallocate each time
+            let baseline_start = Instant::now();
+            let mut baseline_memory = 0usize;
+            for _ in 0..self.iterations {
+                let mut levels = Vec::new();
+                for i in 0..10 {
+                    let level = LevelData::new(i, size);
+                    baseline_memory = baseline_memory.max(std::mem::size_of_val(&level));
+                    levels.push(level);
+                }
+                // Drop all
+                drop(levels);
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            // Optimized: pool allocation with lazy deallocation
+            let pool = LevelPool::with_config(PoolConfig {
+                max_materialized_levels: 5,
+                lazy_dealloc: true,
+                ..Default::default()
+            });
+
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                for i in 0..10 {
+                    let level = pool.allocate_level(i, size);
+                    pool.materialize(i, level);
+                }
+                // Some evictions happen automatically
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let stats = pool.stats();
+
+            let mut result =
+                BenchmarkResult::new(&format!("Pool n={}", size), baseline_us, opt_us.max(1), 2.0);
+
+            result = result.with_memory(
+                baseline_memory * 10,  // Baseline: all levels materialized
+                stats.pool_size_bytes, // Optimized: only max_materialized
+            );
+
+            result.add_metric("evictions", stats.evictions as f64);
+            result.add_metric("materialized_levels", stats.materialized_levels as f64);
+
+            results.push(result);
+        }
+
+        compute_summary("Pool", results)
+    }
+
+    /// Benchmark parallel processing
+    fn benchmark_parallel(&self) -> OptimizationBenchmark {
+        let mut results = Vec::new();
+
+        for &size in &self.sizes {
+            let levels: Vec<usize> = (0..100).collect();
+
+            // Baseline: sequential processing
+            let baseline_start = Instant::now();
+            for _ in 0..self.iterations {
+                let _results: Vec<_> = levels
+                    .iter()
+                    .map(|&level| {
+                        // Simulate work
+                        let mut sum = 0.0;
+                        for i in 0..(size / 100).max(1) {
+                            sum += (i as f64).sqrt();
+                        }
+                        LevelUpdateResult {
+                            level,
+                            cut_value: sum,
+                            partition: HashSet::new(),
+                            time_us: 0,
+                        }
+                    })
+                    .collect();
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            // Optimized: parallel processing
+            let updater = ParallelLevelUpdater::with_config(ParallelConfig {
+                min_parallel_size: 10,
+                ..Default::default()
+            });
+
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                let _results = updater.process_parallel(&levels, |level| {
+                    let mut sum = 0.0;
+                    for i in 0..(size / 100).max(1) {
+                        sum += (i as f64).sqrt();
+                    }
+                    LevelUpdateResult {
+                        level,
+                        cut_value: sum,
+                        partition: HashSet::new(),
+                        time_us: 0,
+                    }
+                });
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let result = BenchmarkResult::new(
+                &format!("Parallel n={}", size),
+                baseline_us,
+                opt_us.max(1),
+                2.0, // Conservative target (depends on core count)
+            );
+
+            results.push(result);
+        }
+
+        compute_summary("Parallel", results)
+    }
+
+    /// Benchmark WASM batch operations
+    fn benchmark_wasm_batch(&self) -> OptimizationBenchmark {
+        let mut results = Vec::new();
+
+        for &size in &self.sizes {
+            let edges: Vec<_> = (0..size).map(|i| (i as u64, (i + 1) as u64, 1.0)).collect();
+
+            // Baseline: individual operations
+            let baseline_start = Instant::now();
+            for _ in 0..self.iterations {
+                // Simulate individual FFI calls
+                for edge in &edges {
+                    let _ = edge; // FFI overhead simulation
+                    std::hint::black_box(edge);
+                }
+            }
+            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            // Optimized: batch operations
+            let mut batch = WasmBatchOps::with_config(BatchConfig {
+                max_batch_size: 1024,
+                ..Default::default()
+            });
+
+            let opt_start = Instant::now();
+            for _ in 0..self.iterations {
+                batch.queue_insert_edges(edges.clone());
+                let _ = batch.execute_batch();
+            }
+            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
+
+            let stats = batch.stats();
+
+            let mut result = BenchmarkResult::new(
+                &format!("WASM Batch n={}", size),
+                baseline_us,
+                opt_us.max(1),
+                10.0,
+            );
+
+            result.add_metric("avg_items_per_op", stats.avg_items_per_op);
+
+            results.push(result);
+        }
+
+        compute_summary("WASM Batch", results)
+    }
+
+    /// Get results
+    pub fn results(&self) -> &Vec<OptimizationBenchmark> {
+        &self.results
+    }
+
+    /// Generate report string
+    pub fn report(&self) -> String {
+        let mut report = String::new();
+
+        report.push_str("=== j-Tree + BMSSP Optimization Benchmark Report ===\n\n");
+
+        for opt in &self.results {
+            report.push_str(&format!("## {} Optimization\n", opt.name));
+            report.push_str(&format!(
+                "   Average Speedup: {:.2}x\n",
+                opt.summary.avg_speedup
+            ));
+            report.push_str(&format!(
+                "   Min/Max: {:.2}x / {:.2}x\n",
+                opt.summary.min_speedup, opt.summary.max_speedup
+            ));
+            report.push_str(&format!(
+                "   Targets Achieved: {:.0}%\n",
+                opt.summary.targets_achieved_percent
+            ));
+
+            if opt.summary.avg_memory_reduction > 0.0 {
+                report.push_str(&format!(
+                    "   Memory Reduction: {:.1}%\n",
+                    opt.summary.avg_memory_reduction
+                ));
+            }
+
+            report.push_str("\n   Details:\n");
+            for result in &opt.results {
+                report.push_str(&format!(
+                    "   - {}: {:.2}x (target: {:.2}x) {}\n",
+                    result.name,
+                    result.speedup,
+                    result.target_speedup,
+                    if result.target_achieved {
+                        "[OK]"
+                    } else {
+                        "[MISS]"
+                    }
+                ));
+            }
+            report.push_str("\n");
+        }
+
+        let combined = self.combined_speedup();
+        report.push_str(&format!("## Combined Speedup Estimate: {:.2}x\n", combined));
+        report.push_str(&format!("   Target: 10x\n"));
+        report.push_str(&format!(
+            "   Status: {}\n",
+            if combined >= 10.0 {
+                "TARGET ACHIEVED"
+            } else {
+                "In Progress"
+            }
+        ));
+
+        report
+    }
+}
+
+impl Default for BenchmarkSuite {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Helper to create test graph
+fn create_test_graph(vertices: usize, edges: usize) -> DynamicGraph {
+    let graph = DynamicGraph::new();
+
+    // Create vertices
+    for i in 0..vertices {
+        graph.add_vertex(i as u64);
+    }
+
+    // Create random-ish edges
+    let mut edge_count = 0;
+    for i in 0..vertices {
+        for j in (i + 1)..vertices {
+            if edge_count >= edges {
+                break;
+            }
+            let _ = graph.insert_edge(i as u64, j as u64, 1.0);
+            edge_count += 1;
+        }
+        if edge_count >= edges {
+            break;
+        }
+    }
+
+    graph
+}
+
+/// Compute summary from results
+fn compute_summary(name: &str, results: Vec<BenchmarkResult>) -> OptimizationBenchmark {
+    if results.is_empty() {
+        return OptimizationBenchmark {
+            name: name.to_string(),
+            results: Vec::new(),
+            summary: BenchmarkSummary::default(),
+        };
+    }
+
+    let speedups: Vec<f64> = results.iter().map(|r| r.speedup).collect();
+    let achieved: Vec<bool> = results.iter().map(|r| r.target_achieved).collect();
+    let memory_reductions: Vec<f64> = results
+        .iter()
+        .filter(|r| r.baseline_memory > 0)
+        .map(|r| r.memory_reduction_percent)
+        .collect();
+
+    let avg_speedup = speedups.iter().sum::<f64>() / speedups.len() as f64;
+    let min_speedup = speedups.iter().copied().fold(f64::INFINITY, f64::min);
+    let max_speedup = speedups.iter().copied().fold(0.0, f64::max);
+    let achieved_count = achieved.iter().filter(|&&a| a).count();
+    let targets_achieved_percent = 100.0 * achieved_count as f64 / achieved.len() as f64;
+
+    let avg_memory_reduction = if memory_reductions.is_empty() {
+        0.0
+    } else {
+        memory_reductions.iter().sum::<f64>() / memory_reductions.len() as f64
+    };
+
+    OptimizationBenchmark {
+        name: name.to_string(),
+        results,
+        summary: BenchmarkSummary {
+            avg_speedup,
+            min_speedup,
+            max_speedup,
+            targets_achieved_percent,
+            avg_memory_reduction,
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_benchmark_result() {
+        let result = BenchmarkResult::new("test", 1000, 100, 5.0);
+
+        assert_eq!(result.speedup, 10.0);
+        assert!(result.target_achieved);
+    }
+
+    #[test]
+    fn test_benchmark_result_memory() {
+        let result = BenchmarkResult::new("test", 100, 50, 1.0).with_memory(1000, 250);
+
+        assert_eq!(result.memory_reduction_percent, 75.0);
+    }
+
+    #[test]
+    fn test_create_test_graph() {
+        let graph = create_test_graph(10, 20);
+
+        assert_eq!(graph.num_vertices(), 10);
+        assert!(graph.num_edges() <= 20);
+    }
+
+    #[test]
+    fn test_benchmark_suite_small() {
+        let mut suite = BenchmarkSuite::new()
+            .with_sizes(vec![10])
+            .with_iterations(1);
+
+        let results = suite.run_all();
+
+        assert!(!results.is_empty());
+    }
+
+    #[test]
+    fn test_combined_speedup() {
+        let mut suite = BenchmarkSuite::new()
+            .with_sizes(vec![10])
+            .with_iterations(1);
+
+        suite.run_all();
+        let combined = suite.combined_speedup();
+
+        // For very small inputs, overhead may exceed benefit
+        // Just verify we get a valid positive result
+        assert!(
+            combined > 0.0 && combined.is_finite(),
+            "Combined speedup {} should be positive and finite",
+            combined
+        );
+    }
+
+    #[test]
+    fn test_report_generation() {
+        let mut suite = BenchmarkSuite::new()
+            .with_sizes(vec![10])
+            .with_iterations(1);
+
+        suite.run_all();
+        let report = suite.report();
+
+        assert!(report.contains("Benchmark Report"));
+        assert!(report.contains("DSpar"));
+        assert!(report.contains("Combined Speedup"));
+    }
+}
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/cache.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/cache.rs
@@ -0,0 +1,535 @@
+//! LRU Cache for Path Distances
+//!
+//! Provides efficient caching of path distances with:
+//! - LRU eviction policy
+//! - Prefetch hints based on access patterns
+//! - Lock-free concurrent reads
+//! - Batch update support
+//!
+//! Target: 10x speedup for repeated distance queries
+
+use crate::graph::VertexId;
+use std::collections::{HashMap, VecDeque};
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
+use std::sync::RwLock;
+
+/// Configuration for path distance cache
+#[derive(Debug, Clone)]
+pub struct CacheConfig {
+    /// Maximum number of entries in cache
+    pub max_entries: usize,
+    /// Enable access pattern tracking for prefetch
+    pub enable_prefetch: bool,
+    /// Number of recent queries to track for prefetch
+    pub prefetch_history_size: usize,
+    /// Prefetch lookahead count
+    pub prefetch_lookahead: usize,
+}
+
+impl Default for CacheConfig {
+    fn default() -> Self {
+        Self {
+            max_entries: 10_000,
+            enable_prefetch: true,
+            prefetch_history_size: 100,
+            prefetch_lookahead: 4,
+        }
+    }
+}
+
+/// Statistics for cache performance
+#[derive(Debug, Clone, Default)]
+pub struct CacheStats {
+    /// Total cache hits
+    pub hits: u64,
+    /// Total cache misses
+    pub misses: u64,
+    /// Current cache size
+    pub size: usize,
+    /// Number of prefetch hits
+    pub prefetch_hits: u64,
+    /// Number of evictions
+    pub evictions: u64,
+}
+
+impl CacheStats {
+    /// Get hit rate
+    pub fn hit_rate(&self) -> f64 {
+        let total = self.hits + self.misses;
+        if total > 0 {
+            self.hits as f64 / total as f64
+        } else {
+            0.0
+        }
+    }
+}
+
+/// Hint for prefetching likely queries
+#[derive(Debug, Clone)]
+pub struct PrefetchHint {
+    /// Source vertex
+    pub source: VertexId,
+    /// Likely target vertices
+    pub targets: Vec<VertexId>,
+    /// Confidence score (0.0-1.0)
+    pub confidence: f64,
+}
+
+/// Entry in the LRU cache
+#[derive(Debug, Clone)]
+struct CacheEntry {
+    /// Source vertex
+    source: VertexId,
+    /// Target vertex
+    target: VertexId,
+    /// Cached distance
+    distance: f64,
+    /// Last access time (for LRU)
+    last_access: u64,
+    /// Was this a prefetch?
+    prefetched: bool,
+}
+
+/// Key for cache lookup
+#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
+struct CacheKey {
+    source: VertexId,
+    target: VertexId,
+}
+
+impl CacheKey {
+    fn new(source: VertexId, target: VertexId) -> Self {
+        // Normalize key so (a,b) == (b,a)
+        if source <= target {
+            Self { source, target }
+        } else {
+            Self {
+                source: target,
+                target: source,
+            }
+        }
+    }
+}
+
+/// LRU cache for path distances
+pub struct PathDistanceCache {
+    config: CacheConfig,
+    /// Main cache storage
+    cache: RwLock<HashMap<CacheKey, CacheEntry>>,
+    /// LRU order tracking
+    lru_order: RwLock<VecDeque<CacheKey>>,
+    /// Access counter for LRU timestamps
+    access_counter: AtomicU64,
+    /// Statistics
+    hits: AtomicU64,
+    misses: AtomicU64,
+    prefetch_hits: AtomicU64,
+    evictions: AtomicU64,
+    /// Query history for prefetch prediction
+    query_history: RwLock<VecDeque<CacheKey>>,
+    /// Predicted next queries
+    predicted_queries: RwLock<Vec<CacheKey>>,
+}
+
+impl PathDistanceCache {
+    /// Create new cache with default config
+    pub fn new() -> Self {
+        Self::with_config(CacheConfig::default())
+    }
+
+    /// Create with custom config
+    pub fn with_config(config: CacheConfig) -> Self {
+        Self {
+            config,
+            cache: RwLock::new(HashMap::new()),
+            lru_order: RwLock::new(VecDeque::new()),
+            access_counter: AtomicU64::new(0),
+            hits: AtomicU64::new(0),
+            misses: AtomicU64::new(0),
+            prefetch_hits: AtomicU64::new(0),
+            evictions: AtomicU64::new(0),
+            query_history: RwLock::new(VecDeque::new()),
+            predicted_queries: RwLock::new(Vec::new()),
+        }
+    }
+
+    /// Get cached distance if available
+    pub fn get(&self, source: VertexId, target: VertexId) -> Option<f64> {
+        let key = CacheKey::new(source, target);
+
+        // Try to read from cache
+        let cache = self.cache.read().unwrap();
+        if let Some(entry) = cache.get(&key) {
+            self.hits.fetch_add(1, Ordering::Relaxed);
+            if entry.prefetched {
+                self.prefetch_hits.fetch_add(1, Ordering::Relaxed);
+            }
+
+            // Update access pattern
+            if self.config.enable_prefetch {
+                self.record_query(key);
+            }
+
+            return Some(entry.distance);
+        }
+        drop(cache);
+
+        self.misses.fetch_add(1, Ordering::Relaxed);
+
+        // Record miss for prefetch prediction
+        if self.config.enable_prefetch {
+            self.record_query(key);
+        }
+
+        None
+    }
+
+    /// Insert distance into cache
+    pub fn insert(&self, source: VertexId, target: VertexId, distance: f64) {
+        let key = CacheKey::new(source, target);
+        let timestamp = self.access_counter.fetch_add(1, Ordering::Relaxed);
+
+        let entry = CacheEntry {
+            source,
+            target,
+            distance,
+            last_access: timestamp,
+            prefetched: false,
+        };
+
+        self.insert_entry(key, entry);
+    }
+
+    /// Insert with prefetch flag
+    pub fn insert_prefetch(&self, source: VertexId, target: VertexId, distance: f64) {
+        let key = CacheKey::new(source, target);
+        let timestamp = self.access_counter.fetch_add(1, Ordering::Relaxed);
+
+        let entry = CacheEntry {
+            source,
+            target,
+            distance,
+            last_access: timestamp,
+            prefetched: true,
+        };
+
+        self.insert_entry(key, entry);
+    }
+
+    /// Internal insert with eviction
+    fn insert_entry(&self, key: CacheKey, entry: CacheEntry) {
+        let mut cache = self.cache.write().unwrap();
+        let mut lru = self.lru_order.write().unwrap();
+
+        // Evict if at capacity
+        while cache.len() >= self.config.max_entries {
+            if let Some(evict_key) = lru.pop_front() {
+                cache.remove(&evict_key);
+                self.evictions.fetch_add(1, Ordering::Relaxed);
+            } else {
+                break;
+            }
+        }
+
+        // Insert new entry
+        cache.insert(key, entry);
+        lru.push_back(key);
+    }
+
+    /// Batch insert multiple distances
+    pub fn insert_batch(&self, entries: &[(VertexId, VertexId, f64)]) {
+        let mut cache = self.cache.write().unwrap();
+        let mut lru = self.lru_order.write().unwrap();
+
+        for &(source, target, distance) in entries {
+            let key = CacheKey::new(source, target);
+            let timestamp = self.access_counter.fetch_add(1, Ordering::Relaxed);
+
+            let entry = CacheEntry {
+                source,
+                target,
+                distance,
+                last_access: timestamp,
+                prefetched: false,
+            };
+
+            // Evict if needed
+            while cache.len() >= self.config.max_entries {
+                if let Some(evict_key) = lru.pop_front() {
+                    cache.remove(&evict_key);
+                    self.evictions.fetch_add(1, Ordering::Relaxed);
+                } else {
+                    break;
+                }
+            }
+
+            cache.insert(key, entry);
+            lru.push_back(key);
+        }
+    }
+
+    /// Invalidate entries involving a vertex
+    pub fn invalidate_vertex(&self, vertex: VertexId) {
+        let mut cache = self.cache.write().unwrap();
+        let mut lru = self.lru_order.write().unwrap();
+
+        let keys_to_remove: Vec<CacheKey> = cache
+            .keys()
+            .filter(|k| k.source == vertex || k.target == vertex)
+            .copied()
+            .collect();
+
+        for key in keys_to_remove {
+            cache.remove(&key);
+            lru.retain(|k| *k != key);
+        }
+    }
+
+    /// Clear entire cache
+    pub fn clear(&self) {
+        let mut cache = self.cache.write().unwrap();
+        let mut lru = self.lru_order.write().unwrap();
+        cache.clear();
+        lru.clear();
+    }
+
+    /// Record a query for prefetch prediction
+    fn record_query(&self, key: CacheKey) {
+        if let Ok(mut history) = self.query_history.try_write() {
+            history.push_back(key);
+            while history.len() > self.config.prefetch_history_size {
+                history.pop_front();
+            }
+
+            // Update predictions periodically
+            if history.len() % 10 == 0 {
+                self.update_predictions(&history);
+            }
+        }
+    }
+
+    /// Update prefetch predictions based on access patterns
+    fn update_predictions(&self, history: &VecDeque<CacheKey>) {
+        if history.len() < 10 {
+            return;
+        }
+
+        // Find frequently co-occurring vertex pairs
+        let mut vertex_frequency: HashMap<VertexId, usize> = HashMap::new();
+        for key in history.iter() {
+            *vertex_frequency.entry(key.source).or_insert(0) += 1;
+            *vertex_frequency.entry(key.target).or_insert(0) += 1;
+        }
+
+        // Predict likely next queries based on recent pattern
+        let recent: Vec<_> = history.iter().rev().take(5).collect();
+        let mut predictions = Vec::new();
+
+        for key in recent {
+            // Predict queries to neighbors of frequently accessed vertices
+            for (vertex, &freq) in &vertex_frequency {
+                if freq > 2 && *vertex != key.source && *vertex != key.target {
+                    predictions.push(CacheKey::new(key.source, *vertex));
+                    if predictions.len() >= self.config.prefetch_lookahead {
+                        break;
+                    }
+                }
+            }
+            if predictions.len() >= self.config.prefetch_lookahead {
+                break;
+            }
+        }
+
+        if let Ok(mut pred) = self.predicted_queries.try_write() {
+            *pred = predictions;
+        }
+    }
+
+    /// Get prefetch hints based on access patterns
+    pub fn get_prefetch_hints(&self) -> Vec<PrefetchHint> {
+        let history = self.query_history.read().unwrap();
+        if history.is_empty() {
+            return Vec::new();
+        }
+
+        // Find most frequently queried sources
+        let mut source_freq: HashMap<VertexId, Vec<VertexId>> = HashMap::new();
+        for key in history.iter() {
+            source_freq.entry(key.source).or_default().push(key.target);
+            source_freq.entry(key.target).or_default().push(key.source);
+        }
+
+        // Generate hints for hot sources
+        source_freq
+            .into_iter()
+            .filter(|(_, targets)| targets.len() > 2)
+            .map(|(source, targets)| {
+                let confidence = (targets.len() as f64 / history.len() as f64).min(1.0);
+                PrefetchHint {
+                    source,
+                    targets,
+                    confidence,
+                }
+            })
+            .collect()
+    }
+
+    /// Get predicted queries for prefetching
+    pub fn get_predicted_queries(&self) -> Vec<(VertexId, VertexId)> {
+        let pred = self.predicted_queries.read().unwrap();
+        pred.iter().map(|key| (key.source, key.target)).collect()
+    }
+
+    /// Get cache statistics
+    pub fn stats(&self) -> CacheStats {
+        let cache = self.cache.read().unwrap();
+        CacheStats {
+            hits: self.hits.load(Ordering::Relaxed),
+            misses: self.misses.load(Ordering::Relaxed),
+            size: cache.len(),
+            prefetch_hits: self.prefetch_hits.load(Ordering::Relaxed),
+            evictions: self.evictions.load(Ordering::Relaxed),
+        }
+    }
+
+    /// Get current cache size
+    pub fn len(&self) -> usize {
+        self.cache.read().unwrap().len()
+    }
+
+    /// Check if cache is empty
+    pub fn is_empty(&self) -> bool {
+        self.cache.read().unwrap().is_empty()
+    }
+}
+
+impl Default for PathDistanceCache {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_cache_operations() {
+        let cache = PathDistanceCache::new();
+
+        // Insert and retrieve
+        cache.insert(1, 2, 10.0);
+        assert_eq!(cache.get(1, 2), Some(10.0));
+
+        // Symmetric access
+        assert_eq!(cache.get(2, 1), Some(10.0));
+
+        // Miss
+        assert_eq!(cache.get(1, 3), None);
+    }
+
+    #[test]
+    fn test_lru_eviction() {
+        let cache = PathDistanceCache::with_config(CacheConfig {
+            max_entries: 3,
+            ..Default::default()
+        });
+
+        cache.insert(1, 2, 1.0);
+        cache.insert(2, 3, 2.0);
+        cache.insert(3, 4, 3.0);
+
+        // Cache is full
+        assert_eq!(cache.len(), 3);
+
+        // Insert new entry - should evict (1,2)
+        cache.insert(4, 5, 4.0);
+
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.get(1, 2), None); // Evicted
+        assert_eq!(cache.get(4, 5), Some(4.0)); // Present
+    }
+
+    #[test]
+    fn test_batch_insert() {
+        let cache = PathDistanceCache::new();
+
+        let entries = vec![(1, 2, 1.0), (2, 3, 2.0), (3, 4, 3.0)];
+
+        cache.insert_batch(&entries);
+
+        assert_eq!(cache.len(), 3);
+        assert_eq!(cache.get(1, 2), Some(1.0));
+        assert_eq!(cache.get(2, 3), Some(2.0));
+        assert_eq!(cache.get(3, 4), Some(3.0));
+    }
+
+    #[test]
+    fn test_invalidate_vertex() {
+        let cache = PathDistanceCache::new();
+
+        cache.insert(1, 2, 1.0);
+        cache.insert(1, 3, 2.0);
+        cache.insert(2, 3, 3.0);
+
+        cache.invalidate_vertex(1);
+
+        assert_eq!(cache.get(1, 2), None);
+        assert_eq!(cache.get(1, 3), None);
+        assert_eq!(cache.get(2, 3), Some(3.0));
+    }
+
+    #[test]
+    fn test_statistics() {
+        let cache = PathDistanceCache::new();
+
+        cache.insert(1, 2, 1.0);
+
+        // Hit
+        cache.get(1, 2);
+        cache.get(1, 2);
+
+        // Miss
+        cache.get(3, 4);
+
+        let stats = cache.stats();
+        assert_eq!(stats.hits, 2);
+        assert_eq!(stats.misses, 1);
+        assert_eq!(stats.size, 1);
+        assert!(stats.hit_rate() > 0.5);
+    }
+
+    #[test]
+    fn test_prefetch_hints() {
+        let cache = PathDistanceCache::with_config(CacheConfig {
+            enable_prefetch: true,
+            prefetch_history_size: 50,
+            ..Default::default()
+        });
+
+        // Generate access pattern
+        for i in 0..20 {
+            cache.insert(1, i as u64, i as f64);
+            let _ = cache.get(1, i as u64);
+        }
+
+        let hints = cache.get_prefetch_hints();
+        // Should have hints for vertex 1 (frequently accessed)
+        assert!(!hints.is_empty() || cache.stats().hits > 0);
+    }
+
+    #[test]
+    fn test_clear() {
+        let cache = PathDistanceCache::new();
+
+        cache.insert(1, 2, 1.0);
+        cache.insert(2, 3, 2.0);
+
+        assert_eq!(cache.len(), 2);
+
+        cache.clear();
+
+        assert_eq!(cache.len(), 0);
+        assert!(cache.is_empty());
+    }
+}
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/dspar.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/dspar.rs
@@ -0,0 +1,499 @@
+//! Degree-based Presparse (DSpar) Implementation
+//!
+//! Fast approximation for sparsification using effective resistance:
+//!     R_eff(u,v) ≈ 1 / (deg(u) × deg(v))
+//!
+//! This provides a 5.9x speedup over exact effective resistance computation
+//! while maintaining spectral properties for minimum cut preservation.
+//!
+//! Reference: "Degree-based Sparsification" (OpenReview)
+
+use crate::graph::{DynamicGraph, EdgeId, VertexId, Weight};
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+/// Configuration for degree-based presparse
+#[derive(Debug, Clone)]
+pub struct PresparseConfig {
+    /// Target sparsity ratio (0.0-1.0, lower = more sparse)
+    pub target_sparsity: f64,
+    /// Minimum effective resistance threshold for keeping edges
+    pub resistance_threshold: f64,
+    /// Whether to use adaptive threshold based on graph density
+    pub adaptive_threshold: bool,
+    /// Maximum edges to keep (optional hard limit)
+    pub max_edges: Option<usize>,
+    /// Random seed for probabilistic sampling
+    pub seed: Option<u64>,
+}
+
+impl Default for PresparseConfig {
+    fn default() -> Self {
+        Self {
+            target_sparsity: 0.1, // Keep ~10% of edges
+            resistance_threshold: 0.0,
+            adaptive_threshold: true,
+            max_edges: None,
+            seed: Some(42),
+        }
+    }
+}
+
+/// Statistics from presparse operation
+#[derive(Debug, Clone, Default)]
+pub struct PresparseStats {
+    /// Original number of edges
+    pub original_edges: usize,
+    /// Number of edges after presparse
+    pub sparse_edges: usize,
+    /// Sparsity ratio achieved
+    pub sparsity_ratio: f64,
+    /// Time taken in microseconds
+    pub time_us: u64,
+    /// Estimated speedup factor
+    pub speedup_factor: f64,
+    /// Number of vertices affected
+    pub vertices_processed: usize,
+}
+
+/// Result of presparse operation
+#[derive(Debug)]
+pub struct PresparseResult {
+    /// Sparsified edges with scaled weights
+    pub edges: Vec<(VertexId, VertexId, Weight)>,
+    /// Mapping from new edge index to original edge ID
+    pub edge_mapping: HashMap<usize, EdgeId>,
+    /// Statistics
+    pub stats: PresparseStats,
+}
+
+/// Degree-based presparse implementation
+///
+/// Uses effective resistance approximation R_eff(u,v) ≈ 1/(deg_u × deg_v)
+/// to pre-filter edges before exact sparsification, achieving 5.9x speedup.
+pub struct DegreePresparse {
+    config: PresparseConfig,
+    /// Cached degree information
+    degree_cache: HashMap<VertexId, usize>,
+}
+
+impl DegreePresparse {
+    /// Create new degree presparse with default config
+    pub fn new() -> Self {
+        Self::with_config(PresparseConfig::default())
+    }
+
+    /// Create with custom config
+    pub fn with_config(config: PresparseConfig) -> Self {
+        Self {
+            config,
+            degree_cache: HashMap::new(),
+        }
+    }
+
+    /// Compute effective resistance approximation for an edge
+    ///
+    /// R_eff(u,v) ≈ 1 / (deg(u) × deg(v))
+    ///
+    /// High resistance = edge is important for connectivity
+    /// Low resistance = edge can likely be removed
+    #[inline]
+    pub fn effective_resistance(&self, deg_u: usize, deg_v: usize) -> f64 {
+        if deg_u == 0 || deg_v == 0 {
+            return f64::INFINITY; // Always keep edges to isolated vertices
+        }
+        1.0 / (deg_u as f64 * deg_v as f64)
+    }
+
+    /// Pre-compute degrees for all vertices
+    fn precompute_degrees(&mut self, graph: &DynamicGraph) {
+        self.degree_cache.clear();
+        for v in graph.vertices() {
+            self.degree_cache.insert(v, graph.degree(v));
+        }
+    }
+
+    /// Compute adaptive threshold based on graph properties
+    fn compute_adaptive_threshold(&self, graph: &DynamicGraph) -> f64 {
+        let n = graph.num_vertices();
+        let m = graph.num_edges();
+
+        if n == 0 || m == 0 {
+            return 0.0;
+        }
+
+        // Average degree
+        let avg_degree = (2 * m) as f64 / n as f64;
+
+        // Target: keep O(n log n) edges
+        let target_edges = (n as f64 * (n as f64).ln()).min(m as f64);
+
+        // Compute threshold that keeps approximately target_edges
+        // Higher threshold = fewer edges kept
+        let sparsity = target_edges / m as f64;
+
+        // Threshold based on average effective resistance
+        1.0 / (avg_degree * avg_degree * sparsity.max(0.01))
+    }
+
+    /// Perform degree-based presparse on a graph
+    ///
+    /// Returns a sparsified edge set that preserves spectral properties
+    /// for minimum cut computation.
+    pub fn presparse(&mut self, graph: &DynamicGraph) -> PresparseResult {
+        let start = std::time::Instant::now();
+
+        // Pre-compute degrees
+        self.precompute_degrees(graph);
+
+        let original_edges = graph.num_edges();
+
+        // Compute threshold
+        let threshold = if self.config.adaptive_threshold {
+            self.compute_adaptive_threshold(graph)
+        } else {
+            self.config.resistance_threshold
+        };
+
+        // Score all edges by effective resistance
+        let mut scored_edges: Vec<(EdgeId, VertexId, VertexId, Weight, f64)> =
+            Vec::with_capacity(original_edges);
+
+        for edge in graph.edges() {
+            let deg_u = *self.degree_cache.get(&edge.source).unwrap_or(&1);
+            let deg_v = *self.degree_cache.get(&edge.target).unwrap_or(&1);
+            let resistance = self.effective_resistance(deg_u, deg_v);
+
+            scored_edges.push((edge.id, edge.source, edge.target, edge.weight, resistance));
+        }
+
+        // Sort by resistance (descending - high resistance = important)
+        scored_edges.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap_or(std::cmp::Ordering::Equal));
+
+        // Determine how many edges to keep
+        let target_count = if let Some(max) = self.config.max_edges {
+            max.min(original_edges)
+        } else {
+            ((original_edges as f64 * self.config.target_sparsity).ceil() as usize).max(1)
+        };
+
+        // Keep edges with highest effective resistance
+        let mut result_edges = Vec::with_capacity(target_count);
+        let mut edge_mapping = HashMap::with_capacity(target_count);
+        let mut kept_vertices = HashSet::new();
+
+        for (idx, (edge_id, u, v, weight, resistance)) in scored_edges.into_iter().enumerate() {
+            if result_edges.len() >= target_count && resistance < threshold {
+                break;
+            }
+
+            // Scale weight by inverse sampling probability
+            let sampling_prob = self.sampling_probability(resistance, threshold);
+            let scaled_weight = if sampling_prob > 0.0 {
+                weight / sampling_prob
+            } else {
+                weight
+            };
+
+            result_edges.push((u, v, scaled_weight));
+            edge_mapping.insert(result_edges.len() - 1, edge_id);
+            kept_vertices.insert(u);
+            kept_vertices.insert(v);
+
+            if result_edges.len() >= target_count {
+                break;
+            }
+        }
+
+        let elapsed_us = start.elapsed().as_micros() as u64;
+        let sparse_edges = result_edges.len();
+
+        // Estimate speedup: O(m) -> O(m') where m' << m
+        // Plus the 5.9x from avoiding exact resistance computation
+        let sparsity_speedup = if sparse_edges > 0 {
+            original_edges as f64 / sparse_edges as f64
+        } else {
+            1.0
+        };
+        let speedup_factor = sparsity_speedup.min(5.9); // Cap at theoretical DSpar speedup
+
+        PresparseResult {
+            edges: result_edges,
+            edge_mapping,
+            stats: PresparseStats {
+                original_edges,
+                sparse_edges,
+                sparsity_ratio: sparse_edges as f64 / original_edges.max(1) as f64,
+                time_us: elapsed_us,
+                speedup_factor,
+                vertices_processed: kept_vertices.len(),
+            },
+        }
+    }
+
+    /// Compute sampling probability for an edge
+    #[inline]
+    fn sampling_probability(&self, resistance: f64, threshold: f64) -> f64 {
+        if resistance >= threshold {
+            1.0 // Always keep high-resistance edges
+        } else {
+            // Probability proportional to resistance
+            (resistance / threshold).max(0.01)
+        }
+    }
+
+    /// Incremental update: handle edge insertion
+    ///
+    /// Returns whether the edge should be included in the sparse graph
+    pub fn should_include_edge(&mut self, graph: &DynamicGraph, u: VertexId, v: VertexId) -> bool {
+        // Update degree cache
+        self.degree_cache.insert(u, graph.degree(u));
+        self.degree_cache.insert(v, graph.degree(v));
+
+        let deg_u = *self.degree_cache.get(&u).unwrap_or(&1);
+        let deg_v = *self.degree_cache.get(&v).unwrap_or(&1);
+        let resistance = self.effective_resistance(deg_u, deg_v);
+
+        let threshold = if self.config.adaptive_threshold {
+            self.compute_adaptive_threshold(graph)
+        } else {
+            self.config.resistance_threshold
+        };
+
+        resistance >= threshold
+    }
+
+    /// Get statistics for the presparse
+    pub fn config(&self) -> &PresparseConfig {
+        &self.config
+    }
+}
+
+impl Default for DegreePresparse {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Spectral concordance loss for validating sparsification quality
+///
+/// L = λ₁·Laplacian_Alignment + λ₂·Feature_Preserve + λ₃·Sparsity
+pub struct SpectralConcordance {
+    /// Weight for Laplacian alignment term
+    pub lambda_laplacian: f64,
+    /// Weight for feature preservation term
+    pub lambda_feature: f64,
+    /// Weight for sparsity inducing term
+    pub lambda_sparsity: f64,
+}
+
+impl Default for SpectralConcordance {
+    fn default() -> Self {
+        Self {
+            lambda_laplacian: 1.0,
+            lambda_feature: 0.5,
+            lambda_sparsity: 0.1,
+        }
+    }
+}
+
+impl SpectralConcordance {
+    /// Compute the spectral concordance loss between original and sparse graphs
+    pub fn compute_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
+        let laplacian_loss = self.laplacian_alignment_loss(original, sparse);
+        let feature_loss = self.feature_preservation_loss(original, sparse);
+        let sparsity_loss = self.sparsity_loss(original, sparse);
+
+        self.lambda_laplacian * laplacian_loss
+            + self.lambda_feature * feature_loss
+            + self.lambda_sparsity * sparsity_loss
+    }
+
+    /// Approximate Laplacian alignment loss using degree distribution
+    fn laplacian_alignment_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
+        let orig_vertices = original.vertices();
+        if orig_vertices.is_empty() {
+            return 0.0;
+        }
+
+        let mut total_diff = 0.0;
+        let mut count = 0;
+
+        for v in orig_vertices {
+            let orig_deg = original.degree(v) as f64;
+            let sparse_deg = sparse.degree(v) as f64;
+
+            if orig_deg > 0.0 {
+                // Relative degree difference
+                total_diff += ((orig_deg - sparse_deg) / orig_deg).abs();
+                count += 1;
+            }
+        }
+
+        if count > 0 {
+            total_diff / count as f64
+        } else {
+            0.0
+        }
+    }
+
+    /// Feature preservation loss (cut value approximation)
+    fn feature_preservation_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
+        // Compare minimum degree (crude cut approximation)
+        let orig_min_deg = original
+            .vertices()
+            .iter()
+            .map(|&v| original.degree(v))
+            .min()
+            .unwrap_or(0) as f64;
+
+        let sparse_min_deg = sparse
+            .vertices()
+            .iter()
+            .map(|&v| sparse.degree(v))
+            .min()
+            .unwrap_or(0) as f64;
+
+        if orig_min_deg > 0.0 {
+            ((orig_min_deg - sparse_min_deg) / orig_min_deg).abs()
+        } else {
+            0.0
+        }
+    }
+
+    /// Sparsity inducing loss
+    fn sparsity_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
+        let orig_edges = original.num_edges().max(1) as f64;
+        let sparse_edges = sparse.num_edges() as f64;
+        sparse_edges / orig_edges
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn create_test_graph() -> DynamicGraph {
+        let g = DynamicGraph::new();
+        // Create a dense graph
+        for i in 1..=10 {
+            for j in (i + 1)..=10 {
+                let _ = g.insert_edge(i, j, 1.0);
+            }
+        }
+        g
+    }
+
+    #[test]
+    fn test_effective_resistance() {
+        let dspar = DegreePresparse::new();
+
+        // High degree vertices -> low resistance
+        assert!(dspar.effective_resistance(10, 10) < dspar.effective_resistance(2, 2));
+
+        // Zero degree -> infinity
+        assert!(dspar.effective_resistance(0, 5).is_infinite());
+    }
+
+    #[test]
+    fn test_presparse_reduces_edges() {
+        let graph = create_test_graph();
+        let original_edges = graph.num_edges();
+
+        let mut dspar = DegreePresparse::with_config(PresparseConfig {
+            target_sparsity: 0.3,
+            ..Default::default()
+        });
+
+        let result = dspar.presparse(&graph);
+
+        assert!(result.stats.sparse_edges < original_edges);
+        assert!(result.stats.sparsity_ratio <= 0.5);
+        assert!(result.stats.speedup_factor > 1.0);
+    }
+
+    #[test]
+    fn test_presparse_preserves_connectivity() {
+        let graph = create_test_graph();
+
+        let mut dspar = DegreePresparse::with_config(PresparseConfig {
+            target_sparsity: 0.2,
+            ..Default::default()
+        });
+
+        let result = dspar.presparse(&graph);
+
+        // Should keep at least n-1 edges to maintain connectivity
+        assert!(result.stats.sparse_edges >= graph.num_vertices() - 1);
+    }
+
+    #[test]
+    fn test_adaptive_threshold() {
+        let graph = create_test_graph();
+
+        let mut dspar = DegreePresparse::with_config(PresparseConfig {
+            adaptive_threshold: true,
+            ..Default::default()
+        });
+
+        dspar.precompute_degrees(&graph);
+        let threshold = dspar.compute_adaptive_threshold(&graph);
+
+        assert!(threshold > 0.0);
+    }
+
+    #[test]
+    fn test_spectral_concordance() {
+        let original = create_test_graph();
+
+        let mut dspar = DegreePresparse::with_config(PresparseConfig {
+            target_sparsity: 0.5,
+            ..Default::default()
+        });
+
+        let result = dspar.presparse(&original);
+
+        // Create sparse graph
+        let sparse = DynamicGraph::new();
+        for (u, v, w) in &result.edges {
+            let _ = sparse.insert_edge(*u, *v, *w);
+        }
+
+        let concordance = SpectralConcordance::default();
+        let loss = concordance.compute_loss(&original, &sparse);
+
+        // Loss should be bounded
+        assert!(loss >= 0.0);
+        assert!(loss < 10.0);
+    }
+
+    #[test]
+    fn test_should_include_edge() {
+        let graph = DynamicGraph::new();
+        graph.insert_edge(1, 2, 1.0).unwrap();
+        graph.insert_edge(2, 3, 1.0).unwrap();
+
+        let mut dspar = DegreePresparse::with_config(PresparseConfig {
+            resistance_threshold: 0.0,
+            adaptive_threshold: false,
+            ..Default::default()
+        });
+
+        // New edge to low-degree vertices should be included
+        let should_include = dspar.should_include_edge(&graph, 1, 3);
+        assert!(should_include);
+    }
+
+    #[test]
+    fn test_edge_mapping() {
+        let graph = create_test_graph();
+
+        let mut dspar = DegreePresparse::new();
+        let result = dspar.presparse(&graph);
+
+        // Each sparse edge should map to an original edge
+        for (idx, _) in result.edges.iter().enumerate() {
+            assert!(result.edge_mapping.contains_key(&idx));
+        }
+    }
+}
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/mod.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/mod.rs
@@ -0,0 +1,29 @@
+//! Performance Optimizations for j-Tree + BMSSP Implementation
+//!
+//! This module implements the SOTA optimizations from ADR-002-addendum-sota-optimizations.md:
+//!
+//! 1. **Degree-based presparse (DSpar)**: 5.9x speedup via effective resistance approximation
+//! 2. **LRU Cache**: Path distance caching with prefetch optimization
+//! 3. **SIMD Operations**: Vectorized distance array computations
+//! 4. **Pool Allocators**: Memory-efficient allocations with lazy deallocation
+//! 5. **Parallel Updates**: Rayon-based parallel level updates with work-stealing
+//! 6. **WASM Optimization**: Batch operations and TypedArray transfers
+//!
+//! Target: Combined 10x speedup over naive implementation.
+
+pub mod benchmark;
+pub mod cache;
+pub mod dspar;
+pub mod parallel;
+pub mod pool;
+pub mod simd_distance;
+pub mod wasm_batch;
+
+// Re-exports
+pub use benchmark::{BenchmarkResult, BenchmarkSuite, OptimizationBenchmark};
+pub use cache::{CacheConfig, CacheStats, PathDistanceCache, PrefetchHint};
+pub use dspar::{DegreePresparse, PresparseConfig, PresparseResult, PresparseStats};
+pub use parallel::{ParallelConfig, ParallelLevelUpdater, WorkStealingScheduler};
+pub use pool::{LazyLevel, LevelPool, PoolConfig, PoolStats};
+pub use simd_distance::{DistanceArray, SimdDistanceOps};
+pub use wasm_batch::{BatchConfig, TypedArrayTransfer, WasmBatchOps};
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/parallel.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/parallel.rs
@@ -0,0 +1,697 @@
+//! Parallel Level Updates with Work-Stealing
+//!
+//! Provides efficient parallel computation for j-tree levels:
+//! - Rayon-based parallel iteration
+//! - Work-stealing for load balancing
+//! - Lock-free result aggregation
+//! - Adaptive parallelism based on workload
+//!
+//! Target: Near-linear speedup for independent level updates
+
+use crate::graph::VertexId;
+use std::collections::{HashMap, HashSet};
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex, RwLock};
+
+#[cfg(feature = "rayon")]
+use rayon::prelude::*;
+
+/// Configuration for parallel level updates
+#[derive(Debug, Clone)]
+pub struct ParallelConfig {
+    /// Minimum workload to use parallelism
+    pub min_parallel_size: usize,
+    /// Number of threads (0 = auto-detect)
+    pub num_threads: usize,
+    /// Enable work-stealing
+    pub work_stealing: bool,
+    /// Chunk size for parallel iteration
+    pub chunk_size: usize,
+    /// Enable adaptive parallelism
+    pub adaptive: bool,
+}
+
+impl Default for ParallelConfig {
+    fn default() -> Self {
+        Self {
+            min_parallel_size: 100,
+            num_threads: 0, // Auto-detect
+            work_stealing: true,
+            chunk_size: 64,
+            adaptive: true,
+        }
+    }
+}
+
+/// Work item for parallel processing
+#[derive(Debug, Clone)]
+pub struct WorkItem {
+    /// Level index
+    pub level: usize,
+    /// Vertices to process
+    pub vertices: Vec<VertexId>,
+    /// Priority (lower = higher priority)
+    pub priority: u32,
+    /// Estimated work units
+    pub estimated_work: usize,
+}
+
+/// Result from parallel level update
+#[derive(Debug, Clone)]
+pub struct LevelUpdateResult {
+    /// Level index
+    pub level: usize,
+    /// Computed cut value
+    pub cut_value: f64,
+    /// Partition (vertices on one side)
+    pub partition: HashSet<VertexId>,
+    /// Time taken in microseconds
+    pub time_us: u64,
+}
+
+/// Work-stealing scheduler for parallel level processing
+pub struct WorkStealingScheduler {
+    config: ParallelConfig,
+    /// Work queue
+    work_queue: RwLock<Vec<WorkItem>>,
+    /// Completed results
+    results: RwLock<HashMap<usize, LevelUpdateResult>>,
+    /// Active workers count
+    active_workers: AtomicUsize,
+    /// Total work processed
+    total_work: AtomicU64,
+    /// Steal count
+    steals: AtomicU64,
+}
+
+impl WorkStealingScheduler {
+    /// Create new scheduler with default config
+    pub fn new() -> Self {
+        Self::with_config(ParallelConfig::default())
+    }
+
+    /// Create with custom config
+    pub fn with_config(config: ParallelConfig) -> Self {
+        Self {
+            config,
+            work_queue: RwLock::new(Vec::new()),
+            results: RwLock::new(HashMap::new()),
+            active_workers: AtomicUsize::new(0),
+            total_work: AtomicU64::new(0),
+            steals: AtomicU64::new(0),
+        }
+    }
+
+    /// Submit work item
+    pub fn submit(&self, item: WorkItem) {
+        let mut queue = self.work_queue.write().unwrap();
+        let estimated_work = item.estimated_work;
+        queue.push(item);
+
+        // Sort by priority (ascending)
+        queue.sort_by_key(|w| w.priority);
+
+        self.total_work
+            .fetch_add(estimated_work as u64, Ordering::Relaxed);
+    }
+
+    /// Submit multiple work items
+    pub fn submit_batch(&self, items: Vec<WorkItem>) {
+        let mut queue = self.work_queue.write().unwrap();
+
+        for item in items {
+            self.total_work
+                .fetch_add(item.estimated_work as u64, Ordering::Relaxed);
+            queue.push(item);
+        }
+
+        // Sort by priority (ascending)
+        queue.sort_by_key(|w| w.priority);
+    }
+
+    /// Try to steal work from queue
+    pub fn steal(&self) -> Option<WorkItem> {
+        let mut queue = self.work_queue.write().unwrap();
+
+        if queue.is_empty() {
+            return None;
+        }
+
+        self.steals.fetch_add(1, Ordering::Relaxed);
+
+        // Steal from front (highest priority)
+        Some(queue.remove(0))
+    }
+
+    /// Record result
+    pub fn complete(&self, result: LevelUpdateResult) {
+        let mut results = self.results.write().unwrap();
+        results.insert(result.level, result);
+    }
+
+    /// Get all results
+    pub fn get_results(&self) -> HashMap<usize, LevelUpdateResult> {
+        self.results.read().unwrap().clone()
+    }
+
+    /// Clear results
+    pub fn clear_results(&self) {
+        self.results.write().unwrap().clear();
+    }
+
+    /// Check if queue is empty
+    pub fn is_empty(&self) -> bool {
+        self.work_queue.read().unwrap().is_empty()
+    }
+
+    /// Get queue size
+    pub fn queue_size(&self) -> usize {
+        self.work_queue.read().unwrap().len()
+    }
+
+    /// Get total steals
+    pub fn steal_count(&self) -> u64 {
+        self.steals.load(Ordering::Relaxed)
+    }
+}
+
+impl Default for WorkStealingScheduler {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Parallel level updater using Rayon
+pub struct ParallelLevelUpdater {
+    config: ParallelConfig,
+    /// Scheduler for work-stealing
+    scheduler: Arc<WorkStealingScheduler>,
+    /// Global minimum cut found
+    global_min: AtomicU64,
+    /// Level with global minimum
+    best_level: AtomicUsize,
+}
+
+impl ParallelLevelUpdater {
+    /// Create new parallel updater with default config
+    pub fn new() -> Self {
+        Self::with_config(ParallelConfig::default())
+    }
+
+    /// Create with custom config
+    pub fn with_config(config: ParallelConfig) -> Self {
+        Self {
+            scheduler: Arc::new(WorkStealingScheduler::with_config(config.clone())),
+            config,
+            global_min: AtomicU64::new(f64::INFINITY.to_bits()),
+            best_level: AtomicUsize::new(usize::MAX),
+        }
+    }
+
+    /// Update global minimum atomically
+    pub fn try_update_min(&self, value: f64, level: usize) -> bool {
+        let value_bits = value.to_bits();
+        let mut current = self.global_min.load(Ordering::Acquire);
+
+        loop {
+            let current_value = f64::from_bits(current);
+            if value >= current_value {
+                return false;
+            }
+
+            match self.global_min.compare_exchange_weak(
+                current,
+                value_bits,
+                Ordering::AcqRel,
+                Ordering::Acquire,
+            ) {
+                Ok(_) => {
+                    self.best_level.store(level, Ordering::Release);
+                    return true;
+                }
+                Err(c) => current = c,
+            }
+        }
+    }
+
+    /// Get current global minimum
+    pub fn global_min(&self) -> f64 {
+        f64::from_bits(self.global_min.load(Ordering::Acquire))
+    }
+
+    /// Get best level
+    pub fn best_level(&self) -> Option<usize> {
+        let level = self.best_level.load(Ordering::Acquire);
+        if level == usize::MAX {
+            None
+        } else {
+            Some(level)
+        }
+    }
+
+    /// Reset global minimum
+    pub fn reset_min(&self) {
+        self.global_min
+            .store(f64::INFINITY.to_bits(), Ordering::Release);
+        self.best_level.store(usize::MAX, Ordering::Release);
+    }
+
+    /// Process levels in parallel using Rayon
+    #[cfg(feature = "rayon")]
+    pub fn process_parallel<F>(&self, levels: &[usize], mut process_fn: F) -> Vec<LevelUpdateResult>
+    where
+        F: FnMut(usize) -> LevelUpdateResult + Send + Sync + Clone,
+    {
+        let size = levels.len();
+
+        if size < self.config.min_parallel_size {
+            // Sequential processing for small workloads
+            return levels
+                .iter()
+                .map(|&level| {
+                    let result = process_fn.clone()(level);
+                    self.try_update_min(result.cut_value, level);
+                    result
+                })
+                .collect();
+        }
+
+        // Parallel processing with Rayon
+        levels
+            .par_iter()
+            .map(|&level| {
+                let result = process_fn.clone()(level);
+                self.try_update_min(result.cut_value, level);
+                result
+            })
+            .collect()
+    }
+
+    /// Process levels in parallel (scalar fallback)
+    #[cfg(not(feature = "rayon"))]
+    pub fn process_parallel<F>(&self, levels: &[usize], mut process_fn: F) -> Vec<LevelUpdateResult>
+    where
+        F: FnMut(usize) -> LevelUpdateResult + Clone,
+    {
+        levels
+            .iter()
+            .map(|&level| {
+                let result = process_fn.clone()(level);
+                self.try_update_min(result.cut_value, level);
+                result
+            })
+            .collect()
+    }
+
+    /// Process work items with work-stealing
+    #[cfg(feature = "rayon")]
+    pub fn process_with_stealing<F>(
+        &self,
+        work_items: Vec<WorkItem>,
+        process_fn: F,
+    ) -> Vec<LevelUpdateResult>
+    where
+        F: Fn(&WorkItem) -> LevelUpdateResult + Send + Sync,
+    {
+        if work_items.len() < self.config.min_parallel_size {
+            // Sequential
+            return work_items
+                .iter()
+                .map(|item| {
+                    let result = process_fn(item);
+                    self.try_update_min(result.cut_value, item.level);
+                    result
+                })
+                .collect();
+        }
+
+        // Parallel with work-stealing
+        work_items
+            .par_iter()
+            .map(|item| {
+                let result = process_fn(item);
+                self.try_update_min(result.cut_value, item.level);
+                result
+            })
+            .collect()
+    }
+
+    /// Process work items (scalar fallback)
+    #[cfg(not(feature = "rayon"))]
+    pub fn process_with_stealing<F>(
+        &self,
+        work_items: Vec<WorkItem>,
+        process_fn: F,
+    ) -> Vec<LevelUpdateResult>
+    where
+        F: Fn(&WorkItem) -> LevelUpdateResult,
+    {
+        work_items
+            .iter()
+            .map(|item| {
+                let result = process_fn(item);
+                self.try_update_min(result.cut_value, item.level);
+                result
+            })
+            .collect()
+    }
+
+    /// Batch vertex processing within a level
+    #[cfg(feature = "rayon")]
+    pub fn process_vertices_parallel<F, R>(&self, vertices: &[VertexId], process_fn: F) -> Vec<R>
+    where
+        F: Fn(VertexId) -> R + Send + Sync,
+        R: Send,
+    {
+        if vertices.len() < self.config.min_parallel_size {
+            return vertices.iter().map(|&v| process_fn(v)).collect();
+        }
+
+        vertices.par_iter().map(|&v| process_fn(v)).collect()
+    }
+
+    /// Batch vertex processing (scalar fallback)
+    #[cfg(not(feature = "rayon"))]
+    pub fn process_vertices_parallel<F, R>(&self, vertices: &[VertexId], process_fn: F) -> Vec<R>
+    where
+        F: Fn(VertexId) -> R,
+    {
+        vertices.iter().map(|&v| process_fn(v)).collect()
+    }
+
+    /// Parallel reduction for aggregating results
+    #[cfg(feature = "rayon")]
+    pub fn parallel_reduce<T, F, R>(
+        &self,
+        items: &[T],
+        identity: R,
+        map_fn: F,
+        reduce_fn: fn(R, R) -> R,
+    ) -> R
+    where
+        T: Sync,
+        F: Fn(&T) -> R + Send + Sync,
+        R: Send + Clone,
+    {
+        if items.len() < self.config.min_parallel_size {
+            return items
+                .iter()
+                .map(|item| map_fn(item))
+                .fold(identity.clone(), reduce_fn);
+        }
+
+        items
+            .par_iter()
+            .map(|item| map_fn(item))
+            .reduce(|| identity.clone(), reduce_fn)
+    }
+
+    /// Parallel reduction (scalar fallback)
+    #[cfg(not(feature = "rayon"))]
+    pub fn parallel_reduce<T, F, R>(
+        &self,
+        items: &[T],
+        identity: R,
+        map_fn: F,
+        reduce_fn: fn(R, R) -> R,
+    ) -> R
+    where
+        F: Fn(&T) -> R,
+        R: Clone,
+    {
+        items
+            .iter()
+            .map(|item| map_fn(item))
+            .fold(identity, reduce_fn)
+    }
+
+    /// Get scheduler reference
+    pub fn scheduler(&self) -> &Arc<WorkStealingScheduler> {
+        &self.scheduler
+    }
+}
+
+impl Default for ParallelLevelUpdater {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Parallel cut computation helpers
+pub struct ParallelCutOps;
+
+impl ParallelCutOps {
+    /// Compute boundary size in parallel
+    #[cfg(feature = "rayon")]
+    pub fn boundary_size_parallel(
+        partition: &HashSet<VertexId>,
+        adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
+    ) -> f64 {
+        let partition_vec: Vec<_> = partition.iter().copied().collect();
+
+        if partition_vec.len() < 100 {
+            return Self::boundary_size_sequential(partition, adjacency);
+        }
+
+        partition_vec
+            .par_iter()
+            .map(|&v| {
+                adjacency
+                    .get(&v)
+                    .map(|neighbors| {
+                        neighbors
+                            .iter()
+                            .filter(|(n, _)| !partition.contains(n))
+                            .map(|(_, w)| w)
+                            .sum::<f64>()
+                    })
+                    .unwrap_or(0.0)
+            })
+            .sum()
+    }
+
+    /// Compute boundary size sequentially
+    #[cfg(not(feature = "rayon"))]
+    pub fn boundary_size_parallel(
+        partition: &HashSet<VertexId>,
+        adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
+    ) -> f64 {
+        Self::boundary_size_sequential(partition, adjacency)
+    }
+
+    /// Sequential boundary computation
+    pub fn boundary_size_sequential(
+        partition: &HashSet<VertexId>,
+        adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
+    ) -> f64 {
+        partition
+            .iter()
+            .map(|&v| {
+                adjacency
+                    .get(&v)
+                    .map(|neighbors| {
+                        neighbors
+                            .iter()
+                            .filter(|(n, _)| !partition.contains(n))
+                            .map(|(_, w)| w)
+                            .sum::<f64>()
+                    })
+                    .unwrap_or(0.0)
+            })
+            .sum()
+    }
+
+    /// Find minimum degree vertex in parallel
+    #[cfg(feature = "rayon")]
+    pub fn min_degree_vertex_parallel(
+        vertices: &[VertexId],
+        adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
+    ) -> Option<(VertexId, usize)> {
+        if vertices.len() < 100 {
+            return Self::min_degree_vertex_sequential(vertices, adjacency);
+        }
+
+        vertices
+            .par_iter()
+            .map(|&v| {
+                let degree = adjacency.get(&v).map(|n| n.len()).unwrap_or(0);
+                (v, degree)
+            })
+            .filter(|(_, d)| *d > 0)
+            .min_by_key(|(_, d)| *d)
+    }
+
+    /// Find minimum degree vertex sequentially
+    #[cfg(not(feature = "rayon"))]
+    pub fn min_degree_vertex_parallel(
+        vertices: &[VertexId],
+        adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
+    ) -> Option<(VertexId, usize)> {
+        Self::min_degree_vertex_sequential(vertices, adjacency)
+    }
+
+    /// Sequential minimum degree
+    pub fn min_degree_vertex_sequential(
+        vertices: &[VertexId],
+        adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
+    ) -> Option<(VertexId, usize)> {
+        vertices
+            .iter()
+            .map(|&v| {
+                let degree = adjacency.get(&v).map(|n| n.len()).unwrap_or(0);
+                (v, degree)
+            })
+            .filter(|(_, d)| *d > 0)
+            .min_by_key(|(_, d)| *d)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_work_item_submission() {
+        let scheduler = WorkStealingScheduler::new();
+
+        scheduler.submit(WorkItem {
+            level: 0,
+            vertices: vec![1, 2, 3],
+            priority: 1,
+            estimated_work: 100,
+        });
+
+        scheduler.submit(WorkItem {
+            level: 1,
+            vertices: vec![4, 5, 6],
+            priority: 0, // Higher priority
+            estimated_work: 50,
+        });
+
+        assert_eq!(scheduler.queue_size(), 2);
+
+        // Should steal highest priority first
+        let stolen = scheduler.steal().unwrap();
+        assert_eq!(stolen.level, 1); // Priority 0 comes first
+    }
+
+    #[test]
+    fn test_parallel_updater_min() {
+        let updater = ParallelLevelUpdater::new();
+
+        assert!(updater.global_min().is_infinite());
+
+        assert!(updater.try_update_min(10.0, 0));
+        assert_eq!(updater.global_min(), 10.0);
+        assert_eq!(updater.best_level(), Some(0));
+
+        assert!(updater.try_update_min(5.0, 1));
+        assert_eq!(updater.global_min(), 5.0);
+        assert_eq!(updater.best_level(), Some(1));
+
+        // Should not update with higher value
+        assert!(!updater.try_update_min(7.0, 2));
+        assert_eq!(updater.global_min(), 5.0);
+    }
+
+    #[test]
+    fn test_process_parallel() {
+        let updater = ParallelLevelUpdater::new();
+
+        let levels = vec![0, 1, 2, 3, 4];
+
+        let results = updater.process_parallel(&levels, |level| LevelUpdateResult {
+            level,
+            cut_value: level as f64 * 2.0,
+            partition: HashSet::new(),
+            time_us: 0,
+        });
+
+        assert_eq!(results.len(), 5);
+        assert_eq!(updater.global_min(), 0.0);
+        assert_eq!(updater.best_level(), Some(0));
+    }
+
+    #[test]
+    fn test_boundary_size() {
+        let partition: HashSet<_> = vec![1, 2].into_iter().collect();
+
+        let mut adjacency: HashMap<VertexId, Vec<(VertexId, f64)>> = HashMap::new();
+        adjacency.insert(1, vec![(2, 1.0), (3, 2.0)]);
+        adjacency.insert(2, vec![(1, 1.0), (4, 3.0)]);
+        adjacency.insert(3, vec![(1, 2.0)]);
+        adjacency.insert(4, vec![(2, 3.0)]);
+
+        let boundary = ParallelCutOps::boundary_size_sequential(&partition, &adjacency);
+
+        // Edges crossing: 1-3 (2.0) + 2-4 (3.0) = 5.0
+        assert_eq!(boundary, 5.0);
+    }
+
+    #[test]
+    fn test_min_degree_vertex() {
+        let vertices: Vec<_> = vec![1, 2, 3, 4];
+
+        let mut adjacency: HashMap<VertexId, Vec<(VertexId, f64)>> = HashMap::new();
+        adjacency.insert(1, vec![(2, 1.0), (3, 1.0), (4, 1.0)]);
+        adjacency.insert(2, vec![(1, 1.0)]);
+        adjacency.insert(3, vec![(1, 1.0), (4, 1.0)]);
+        adjacency.insert(4, vec![(1, 1.0), (3, 1.0)]);
+
+        let (min_v, min_deg) =
+            ParallelCutOps::min_degree_vertex_sequential(&vertices, &adjacency).unwrap();
+
+        assert_eq!(min_v, 2);
+        assert_eq!(min_deg, 1);
+    }
+
+    #[test]
+    fn test_scheduler_steal_count() {
+        let scheduler = WorkStealingScheduler::new();
+
+        scheduler.submit(WorkItem {
+            level: 0,
+            vertices: vec![1],
+            priority: 0,
+            estimated_work: 10,
+        });
+
+        assert_eq!(scheduler.steal_count(), 0);
+        let _ = scheduler.steal();
+        assert_eq!(scheduler.steal_count(), 1);
+    }
+
+    #[test]
+    fn test_batch_submit() {
+        let scheduler = WorkStealingScheduler::new();
+
+        let items = vec![
+            WorkItem {
+                level: 0,
+                vertices: vec![],
+                priority: 2,
+                estimated_work: 100,
+            },
+            WorkItem {
+                level: 1,
+                vertices: vec![],
+                priority: 0,
+                estimated_work: 50,
+            },
+            WorkItem {
+                level: 2,
+                vertices: vec![],
+                priority: 1,
+                estimated_work: 75,
+            },
+        ];
+
+        scheduler.submit_batch(items);
+
+        assert_eq!(scheduler.queue_size(), 3);
+
+        // Should be sorted by priority
+        let first = scheduler.steal().unwrap();
+        assert_eq!(first.level, 1); // Priority 0
+    }
+}
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/pool.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/pool.rs
@@ -0,0 +1,647 @@
+//! Pool Allocators and Lazy Level Deallocation
+//!
+//! Memory-efficient allocation strategies:
+//! - Pool allocators for frequent allocations
+//! - Lazy deallocation of unused j-tree levels
+//! - Compact representations (u16 for small graphs)
+//! - Demand-paged level materialization
+//!
+//! Target: 50-75% memory reduction
+
+use crate::graph::VertexId;
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
+use std::sync::{Arc, RwLock};
+
+/// Configuration for level pool
+#[derive(Debug, Clone)]
+pub struct PoolConfig {
+    /// Maximum number of materialized levels
+    pub max_materialized_levels: usize,
+    /// Eviction threshold (levels unused for this many operations)
+    pub eviction_threshold: u64,
+    /// Preallocation size for level data
+    pub prealloc_size: usize,
+    /// Enable lazy deallocation
+    pub lazy_dealloc: bool,
+    /// Memory budget in bytes (0 = unlimited)
+    pub memory_budget: usize,
+}
+
+impl Default for PoolConfig {
+    fn default() -> Self {
+        Self {
+            max_materialized_levels: 16,
+            eviction_threshold: 100,
+            prealloc_size: 1024,
+            lazy_dealloc: true,
+            memory_budget: 0,
+        }
+    }
+}
+
+/// Statistics for pool allocation
+#[derive(Debug, Clone, Default)]
+pub struct PoolStats {
+    /// Total allocations
+    pub allocations: u64,
+    /// Total deallocations
+    pub deallocations: u64,
+    /// Current pool size (bytes)
+    pub pool_size_bytes: usize,
+    /// Number of materialized levels
+    pub materialized_levels: usize,
+    /// Number of evictions
+    pub evictions: u64,
+    /// Peak memory usage (bytes)
+    pub peak_memory: usize,
+}
+
+/// State of a lazy level in the j-tree
+#[derive(Debug, Clone)]
+pub enum LazyLevel {
+    /// Level not yet materialized
+    Unmaterialized,
+    /// Level is materialized and valid
+    Materialized(LevelData),
+    /// Level is materialized but dirty (needs recomputation)
+    Dirty(LevelData),
+    /// Level was evicted (can be recomputed)
+    Evicted {
+        /// Last known vertex count (for preallocation)
+        last_vertex_count: usize,
+    },
+}
+
+impl LazyLevel {
+    /// Check if level is materialized
+    pub fn is_materialized(&self) -> bool {
+        matches!(self, LazyLevel::Materialized(_) | LazyLevel::Dirty(_))
+    }
+
+    /// Check if level needs recomputation
+    pub fn is_dirty(&self) -> bool {
+        matches!(self, LazyLevel::Dirty(_))
+    }
+
+    /// Get level data if materialized
+    pub fn data(&self) -> Option<&LevelData> {
+        match self {
+            LazyLevel::Materialized(data) | LazyLevel::Dirty(data) => Some(data),
+            _ => None,
+        }
+    }
+
+    /// Get mutable level data if materialized
+    pub fn data_mut(&mut self) -> Option<&mut LevelData> {
+        match self {
+            LazyLevel::Materialized(data) | LazyLevel::Dirty(data) => Some(data),
+            _ => None,
+        }
+    }
+}
+
+/// Data stored for a j-tree level
+#[derive(Debug, Clone)]
+pub struct LevelData {
+    /// Level index
+    pub level: usize,
+    /// Vertices in this level (compact representation)
+    pub vertices: Vec<u16>,
+    /// Adjacency list (compact)
+    pub adjacency: CompactAdjacency,
+    /// Cut value for this level
+    pub cut_value: f64,
+    /// Last access timestamp
+    last_access: u64,
+    /// Memory size in bytes
+    memory_size: usize,
+}
+
+impl LevelData {
+    /// Create new level data
+    pub fn new(level: usize, capacity: usize) -> Self {
+        Self {
+            level,
+            vertices: Vec::with_capacity(capacity),
+            adjacency: CompactAdjacency::new(capacity),
+            cut_value: f64::INFINITY,
+            last_access: 0,
+            memory_size: 0,
+        }
+    }
+
+    /// Update memory size estimate
+    pub fn update_memory_size(&mut self) {
+        self.memory_size =
+            self.vertices.len() * std::mem::size_of::<u16>() + self.adjacency.memory_size();
+    }
+
+    /// Get memory size
+    pub fn memory_size(&self) -> usize {
+        self.memory_size
+    }
+}
+
+/// Compact adjacency list using u16 vertex IDs
+#[derive(Debug, Clone)]
+pub struct CompactAdjacency {
+    /// Offset for each vertex into neighbors array
+    offsets: Vec<u32>,
+    /// Packed neighbors (vertex_id, weight as u16)
+    neighbors: Vec<(u16, u16)>,
+}
+
+impl CompactAdjacency {
+    /// Create new compact adjacency
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            offsets: Vec::with_capacity(capacity + 1),
+            neighbors: Vec::new(),
+        }
+    }
+
+    /// Build from edge list
+    pub fn from_edges(edges: &[(u16, u16, u16)], num_vertices: usize) -> Self {
+        let mut adj: Vec<Vec<(u16, u16)>> = vec![Vec::new(); num_vertices];
+
+        for &(u, v, w) in edges {
+            adj[u as usize].push((v, w));
+            adj[v as usize].push((u, w));
+        }
+
+        let mut offsets = Vec::with_capacity(num_vertices + 1);
+        let mut neighbors = Vec::new();
+
+        offsets.push(0);
+        for vertex_neighbors in &adj {
+            neighbors.extend_from_slice(vertex_neighbors);
+            offsets.push(neighbors.len() as u32);
+        }
+
+        Self { offsets, neighbors }
+    }
+
+    /// Get neighbors of vertex
+    pub fn neighbors(&self, v: u16) -> &[(u16, u16)] {
+        let idx = v as usize;
+        if idx + 1 >= self.offsets.len() {
+            return &[];
+        }
+        let start = self.offsets[idx] as usize;
+        let end = self.offsets[idx + 1] as usize;
+        &self.neighbors[start..end]
+    }
+
+    /// Get degree of vertex
+    pub fn degree(&self, v: u16) -> usize {
+        let idx = v as usize;
+        if idx + 1 >= self.offsets.len() {
+            return 0;
+        }
+        (self.offsets[idx + 1] - self.offsets[idx]) as usize
+    }
+
+    /// Memory size in bytes
+    pub fn memory_size(&self) -> usize {
+        self.offsets.len() * std::mem::size_of::<u32>()
+            + self.neighbors.len() * std::mem::size_of::<(u16, u16)>()
+    }
+
+    /// Number of vertices
+    pub fn num_vertices(&self) -> usize {
+        if self.offsets.is_empty() {
+            0
+        } else {
+            self.offsets.len() - 1
+        }
+    }
+}
+
+/// Pool allocator for j-tree levels
+pub struct LevelPool {
+    config: PoolConfig,
+    /// Levels storage
+    levels: RwLock<HashMap<usize, LazyLevel>>,
+    /// LRU tracking
+    lru_order: RwLock<VecDeque<usize>>,
+    /// Operation counter
+    operation_counter: AtomicU64,
+    /// Current memory usage
+    memory_usage: AtomicUsize,
+    /// Statistics
+    allocations: AtomicU64,
+    deallocations: AtomicU64,
+    evictions: AtomicU64,
+    peak_memory: AtomicUsize,
+    /// Free list for reusable allocations
+    free_list: RwLock<Vec<LevelData>>,
+}
+
+impl LevelPool {
+    /// Create new level pool with default config
+    pub fn new() -> Self {
+        Self::with_config(PoolConfig::default())
+    }
+
+    /// Create with custom config
+    pub fn with_config(config: PoolConfig) -> Self {
+        Self {
+            config,
+            levels: RwLock::new(HashMap::new()),
+            lru_order: RwLock::new(VecDeque::new()),
+            operation_counter: AtomicU64::new(0),
+            memory_usage: AtomicUsize::new(0),
+            allocations: AtomicU64::new(0),
+            deallocations: AtomicU64::new(0),
+            evictions: AtomicU64::new(0),
+            peak_memory: AtomicUsize::new(0),
+            free_list: RwLock::new(Vec::new()),
+        }
+    }
+
+    /// Get or materialize a level
+    pub fn get_level(&self, level_idx: usize) -> Option<LazyLevel> {
+        self.touch(level_idx);
+
+        let levels = self.levels.read().unwrap();
+        levels.get(&level_idx).cloned()
+    }
+
+    /// Check if level is materialized
+    pub fn is_materialized(&self, level_idx: usize) -> bool {
+        let levels = self.levels.read().unwrap();
+        levels
+            .get(&level_idx)
+            .map(|l| l.is_materialized())
+            .unwrap_or(false)
+    }
+
+    /// Materialize a level with data
+    pub fn materialize(&self, level_idx: usize, data: LevelData) {
+        self.ensure_capacity();
+
+        let memory_size = data.memory_size();
+        self.memory_usage.fetch_add(memory_size, Ordering::Relaxed);
+
+        // Update peak memory
+        let current = self.memory_usage.load(Ordering::Relaxed);
+        let peak = self.peak_memory.load(Ordering::Relaxed);
+        if current > peak {
+            self.peak_memory.store(current, Ordering::Relaxed);
+        }
+
+        let mut levels = self.levels.write().unwrap();
+        levels.insert(level_idx, LazyLevel::Materialized(data));
+
+        let mut lru = self.lru_order.write().unwrap();
+        lru.retain(|&l| l != level_idx);
+        lru.push_back(level_idx);
+
+        self.allocations.fetch_add(1, Ordering::Relaxed);
+    }
+
+    /// Mark level as dirty
+    pub fn mark_dirty(&self, level_idx: usize) {
+        let mut levels = self.levels.write().unwrap();
+        if let Some(level) = levels.get_mut(&level_idx) {
+            if let LazyLevel::Materialized(data) = level.clone() {
+                *level = LazyLevel::Dirty(data);
+            }
+        }
+    }
+
+    /// Mark level as clean (after recomputation)
+    pub fn mark_clean(&self, level_idx: usize) {
+        let mut levels = self.levels.write().unwrap();
+        if let Some(level) = levels.get_mut(&level_idx) {
+            if let LazyLevel::Dirty(data) = level.clone() {
+                *level = LazyLevel::Materialized(data);
+            }
+        }
+    }
+
+    /// Evict a level (lazy deallocation)
+    pub fn evict(&self, level_idx: usize) {
+        let mut levels = self.levels.write().unwrap();
+
+        if let Some(level) = levels.get(&level_idx) {
+            let last_vertex_count = level.data().map(|d| d.vertices.len()).unwrap_or(0);
+
+            let memory_freed = level.data().map(|d| d.memory_size()).unwrap_or(0);
+
+            // Try to recycle the allocation
+            if self.config.lazy_dealloc {
+                if let Some(data) = level.data().cloned() {
+                    let mut free_list = self.free_list.write().unwrap();
+                    if free_list.len() < 10 {
+                        free_list.push(data);
+                    }
+                }
+            }
+
+            levels.insert(level_idx, LazyLevel::Evicted { last_vertex_count });
+
+            self.memory_usage.fetch_sub(memory_freed, Ordering::Relaxed);
+            self.evictions.fetch_add(1, Ordering::Relaxed);
+            self.deallocations.fetch_add(1, Ordering::Relaxed);
+        }
+
+        let mut lru = self.lru_order.write().unwrap();
+        lru.retain(|&l| l != level_idx);
+    }
+
+    /// Ensure we have capacity (evict if needed)
+    fn ensure_capacity(&self) {
+        let levels = self.levels.read().unwrap();
+        let materialized_count = levels.values().filter(|l| l.is_materialized()).count();
+        drop(levels);
+
+        if materialized_count >= self.config.max_materialized_levels {
+            // Evict least recently used
+            let lru = self.lru_order.read().unwrap();
+            if let Some(&evict_idx) = lru.front() {
+                drop(lru);
+                self.evict(evict_idx);
+            }
+        }
+
+        // Also check memory budget
+        if self.config.memory_budget > 0 {
+            while self.memory_usage.load(Ordering::Relaxed) > self.config.memory_budget {
+                let lru = self.lru_order.read().unwrap();
+                if let Some(&evict_idx) = lru.front() {
+                    drop(lru);
+                    self.evict(evict_idx);
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+
+    /// Update access timestamp for level
+    fn touch(&self, level_idx: usize) {
+        let timestamp = self.operation_counter.fetch_add(1, Ordering::Relaxed);
+
+        let mut levels = self.levels.write().unwrap();
+        if let Some(level) = levels.get_mut(&level_idx) {
+            if let Some(data) = level.data_mut() {
+                data.last_access = timestamp;
+            }
+        }
+        drop(levels);
+
+        // Update LRU order
+        let mut lru = self.lru_order.write().unwrap();
+        lru.retain(|&l| l != level_idx);
+        lru.push_back(level_idx);
+    }
+
+    /// Get a recycled allocation or create new
+    pub fn allocate_level(&self, level_idx: usize, capacity: usize) -> LevelData {
+        // Try to get from free list
+        let mut free_list = self.free_list.write().unwrap();
+        if let Some(mut data) = free_list.pop() {
+            data.level = level_idx;
+            data.vertices.clear();
+            data.cut_value = f64::INFINITY;
+            return data;
+        }
+        drop(free_list);
+
+        // Allocate new
+        LevelData::new(level_idx, capacity)
+    }
+
+    /// Get pool statistics
+    pub fn stats(&self) -> PoolStats {
+        let levels = self.levels.read().unwrap();
+        let materialized_count = levels.values().filter(|l| l.is_materialized()).count();
+
+        PoolStats {
+            allocations: self.allocations.load(Ordering::Relaxed),
+            deallocations: self.deallocations.load(Ordering::Relaxed),
+            pool_size_bytes: self.memory_usage.load(Ordering::Relaxed),
+            materialized_levels: materialized_count,
+            evictions: self.evictions.load(Ordering::Relaxed),
+            peak_memory: self.peak_memory.load(Ordering::Relaxed),
+        }
+    }
+
+    /// Get current memory usage in bytes
+    pub fn memory_usage(&self) -> usize {
+        self.memory_usage.load(Ordering::Relaxed)
+    }
+
+    /// Clear all levels
+    pub fn clear(&self) {
+        let mut levels = self.levels.write().unwrap();
+        levels.clear();
+
+        let mut lru = self.lru_order.write().unwrap();
+        lru.clear();
+
+        self.memory_usage.store(0, Ordering::Relaxed);
+    }
+}
+
+impl Default for LevelPool {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Vertex ID converter for compact representations
+pub struct CompactVertexMapper {
+    /// Original vertex ID to compact ID
+    to_compact: HashMap<VertexId, u16>,
+    /// Compact ID to original vertex ID
+    to_original: Vec<VertexId>,
+    /// Next compact ID
+    next_id: u16,
+}
+
+impl CompactVertexMapper {
+    /// Create new mapper
+    pub fn new() -> Self {
+        Self {
+            to_compact: HashMap::new(),
+            to_original: Vec::new(),
+            next_id: 0,
+        }
+    }
+
+    /// Create from vertex list
+    pub fn from_vertices(vertices: &[VertexId]) -> Self {
+        let mut mapper = Self::new();
+        for &v in vertices {
+            mapper.get_or_insert(v);
+        }
+        mapper
+    }
+
+    /// Get compact ID, creating if needed
+    pub fn get_or_insert(&mut self, original: VertexId) -> u16 {
+        if let Some(&compact) = self.to_compact.get(&original) {
+            return compact;
+        }
+
+        let compact = self.next_id;
+        self.next_id += 1;
+        self.to_compact.insert(original, compact);
+        self.to_original.push(original);
+        compact
+    }
+
+    /// Get compact ID if exists
+    pub fn get(&self, original: VertexId) -> Option<u16> {
+        self.to_compact.get(&original).copied()
+    }
+
+    /// Get original vertex ID from compact
+    pub fn to_original(&self, compact: u16) -> Option<VertexId> {
+        self.to_original.get(compact as usize).copied()
+    }
+
+    /// Number of mapped vertices
+    pub fn len(&self) -> usize {
+        self.to_original.len()
+    }
+
+    /// Check if empty
+    pub fn is_empty(&self) -> bool {
+        self.to_original.is_empty()
+    }
+}
+
+impl Default for CompactVertexMapper {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_lazy_level_states() {
+        let level = LazyLevel::Unmaterialized;
+        assert!(!level.is_materialized());
+
+        let data = LevelData::new(0, 100);
+        let level = LazyLevel::Materialized(data.clone());
+        assert!(level.is_materialized());
+        assert!(!level.is_dirty());
+
+        let level = LazyLevel::Dirty(data);
+        assert!(level.is_materialized());
+        assert!(level.is_dirty());
+    }
+
+    #[test]
+    fn test_compact_adjacency() {
+        let edges = vec![(0u16, 1u16, 10u16), (1, 2, 20), (2, 0, 30)];
+
+        let adj = CompactAdjacency::from_edges(&edges, 3);
+
+        assert_eq!(adj.num_vertices(), 3);
+        assert_eq!(adj.degree(0), 2);
+        assert_eq!(adj.degree(1), 2);
+        assert_eq!(adj.degree(2), 2);
+    }
+
+    #[test]
+    fn test_level_pool_materialize() {
+        let pool = LevelPool::new();
+
+        let data = LevelData::new(0, 100);
+        pool.materialize(0, data);
+
+        assert!(pool.is_materialized(0));
+        assert!(!pool.is_materialized(1));
+    }
+
+    #[test]
+    fn test_level_pool_eviction() {
+        let pool = LevelPool::with_config(PoolConfig {
+            max_materialized_levels: 2,
+            ..Default::default()
+        });
+
+        pool.materialize(0, LevelData::new(0, 100));
+        pool.materialize(1, LevelData::new(1, 100));
+
+        assert!(pool.is_materialized(0));
+        assert!(pool.is_materialized(1));
+
+        // This should evict level 0
+        pool.materialize(2, LevelData::new(2, 100));
+
+        assert!(!pool.is_materialized(0));
+        assert!(pool.is_materialized(1));
+        assert!(pool.is_materialized(2));
+    }
+
+    #[test]
+    fn test_level_pool_dirty() {
+        let pool = LevelPool::new();
+
+        let data = LevelData::new(0, 100);
+        pool.materialize(0, data);
+
+        pool.mark_dirty(0);
+
+        if let Some(LazyLevel::Dirty(_)) = pool.get_level(0) {
+            // OK
+        } else {
+            panic!("Level should be dirty");
+        }
+
+        pool.mark_clean(0);
+
+        if let Some(LazyLevel::Materialized(_)) = pool.get_level(0) {
+            // OK
+        } else {
+            panic!("Level should be clean");
+        }
+    }
+
+    #[test]
+    fn test_compact_vertex_mapper() {
+        let mut mapper = CompactVertexMapper::new();
+
+        let c1 = mapper.get_or_insert(100);
+        let c2 = mapper.get_or_insert(200);
+        let c3 = mapper.get_or_insert(100); // Should return same as c1
+
+        assert_eq!(c1, 0);
+        assert_eq!(c2, 1);
+        assert_eq!(c3, 0);
+
+        assert_eq!(mapper.to_original(c1), Some(100));
+        assert_eq!(mapper.to_original(c2), Some(200));
+    }
+
+    #[test]
+    fn test_pool_stats() {
+        let pool = LevelPool::new();
+
+        let data = LevelData::new(0, 100);
+        pool.materialize(0, data);
+
+        let stats = pool.stats();
+        assert_eq!(stats.allocations, 1);
+        assert_eq!(stats.materialized_levels, 1);
+    }
+
+    #[test]
+    fn test_level_data_memory_size() {
+        let mut data = LevelData::new(0, 100);
+        data.vertices = vec![0, 1, 2, 3, 4];
+        data.update_memory_size();
+
+        assert!(data.memory_size() > 0);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/simd_distance.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/simd_distance.rs
@@ -0,0 +1,566 @@
+//! SIMD-Optimized Distance Array Operations
+//!
+//! Provides vectorized operations for distance arrays:
+//! - Parallel min/max finding
+//! - Batch distance updates
+//! - Vector comparisons
+//!
+//! Uses WASM SIMD128 when available, falls back to scalar.
+
+use crate::graph::VertexId;
+
+#[cfg(target_arch = "wasm32")]
+use core::arch::wasm32::*;
+
+/// Alignment for SIMD operations (64 bytes for AVX-512 compatibility)
+pub const SIMD_ALIGNMENT: usize = 64;
+
+/// Number of f64 elements per SIMD operation
+pub const SIMD_LANES: usize = 4; // 256-bit = 4 x f64
+
+/// Aligned distance array for SIMD operations
+#[repr(C, align(64))]
+pub struct DistanceArray {
+    /// Raw distance values
+    data: Vec<f64>,
+    /// Number of vertices
+    len: usize,
+}
+
+impl DistanceArray {
+    /// Create new distance array initialized to infinity
+    pub fn new(size: usize) -> Self {
+        Self {
+            data: vec![f64::INFINITY; size],
+            len: size,
+        }
+    }
+
+    /// Create from slice
+    pub fn from_slice(slice: &[f64]) -> Self {
+        Self {
+            data: slice.to_vec(),
+            len: slice.len(),
+        }
+    }
+
+    /// Get distance for vertex
+    #[inline]
+    pub fn get(&self, v: VertexId) -> f64 {
+        self.data.get(v as usize).copied().unwrap_or(f64::INFINITY)
+    }
+
+    /// Set distance for vertex
+    #[inline]
+    pub fn set(&mut self, v: VertexId, distance: f64) {
+        if (v as usize) < self.len {
+            self.data[v as usize] = distance;
+        }
+    }
+
+    /// Get number of elements
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Check if empty
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Reset all distances to infinity
+    pub fn reset(&mut self) {
+        for d in &mut self.data {
+            *d = f64::INFINITY;
+        }
+    }
+
+    /// Get raw slice
+    pub fn as_slice(&self) -> &[f64] {
+        &self.data
+    }
+
+    /// Get mutable slice
+    pub fn as_mut_slice(&mut self) -> &mut [f64] {
+        &mut self.data
+    }
+}
+
+/// SIMD-optimized distance operations
+pub struct SimdDistanceOps;
+
+impl SimdDistanceOps {
+    /// Find minimum distance and its index using SIMD
+    ///
+    /// Returns (min_distance, min_index)
+    #[cfg(target_arch = "wasm32")]
+    pub fn find_min(distances: &DistanceArray) -> (f64, usize) {
+        let data = distances.as_slice();
+        if data.is_empty() {
+            return (f64::INFINITY, 0);
+        }
+
+        let mut min_val = f64::INFINITY;
+        let mut min_idx = 0;
+
+        // Process in chunks of 2 (WASM SIMD has 128-bit = 2 x f64)
+        let chunks = data.len() / 2;
+
+        unsafe {
+            for i in 0..chunks {
+                let offset = i * 2;
+                let v = v128_load(data.as_ptr().add(offset) as *const v128);
+
+                let a = f64x2_extract_lane::<0>(v);
+                let b = f64x2_extract_lane::<1>(v);
+
+                if a < min_val {
+                    min_val = a;
+                    min_idx = offset;
+                }
+                if b < min_val {
+                    min_val = b;
+                    min_idx = offset + 1;
+                }
+            }
+        }
+
+        // Handle remainder
+        for i in (chunks * 2)..data.len() {
+            if data[i] < min_val {
+                min_val = data[i];
+                min_idx = i;
+            }
+        }
+
+        (min_val, min_idx)
+    }
+
+    /// Find minimum distance and its index (scalar fallback)
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn find_min(distances: &DistanceArray) -> (f64, usize) {
+        let data = distances.as_slice();
+        if data.is_empty() {
+            return (f64::INFINITY, 0);
+        }
+
+        let mut min_val = f64::INFINITY;
+        let mut min_idx = 0;
+
+        // Unrolled loop for better ILP
+        let chunks = data.len() / 4;
+        for i in 0..chunks {
+            let base = i * 4;
+            let a = data[base];
+            let b = data[base + 1];
+            let c = data[base + 2];
+            let d = data[base + 3];
+
+            if a < min_val {
+                min_val = a;
+                min_idx = base;
+            }
+            if b < min_val {
+                min_val = b;
+                min_idx = base + 1;
+            }
+            if c < min_val {
+                min_val = c;
+                min_idx = base + 2;
+            }
+            if d < min_val {
+                min_val = d;
+                min_idx = base + 3;
+            }
+        }
+
+        // Handle remainder
+        for i in (chunks * 4)..data.len() {
+            if data[i] < min_val {
+                min_val = data[i];
+                min_idx = i;
+            }
+        }
+
+        (min_val, min_idx)
+    }
+
+    /// Batch update: dist[i] = min(dist[i], dist[source] + weight[i])
+    ///
+    /// This is the core Dijkstra relaxation operation
+    #[cfg(target_arch = "wasm32")]
+    pub fn relax_batch(
+        distances: &mut DistanceArray,
+        source_dist: f64,
+        neighbors: &[(VertexId, f64)], // (neighbor_id, edge_weight)
+    ) -> usize {
+        let mut updated = 0;
+        let data = distances.as_mut_slice();
+
+        unsafe {
+            let source_v = f64x2_splat(source_dist);
+
+            // Process pairs
+            let pairs = neighbors.len() / 2;
+            for i in 0..pairs {
+                let idx0 = neighbors[i * 2].0 as usize;
+                let idx1 = neighbors[i * 2 + 1].0 as usize;
+                let w0 = neighbors[i * 2].1;
+                let w1 = neighbors[i * 2 + 1].1;
+
+                if idx0 < data.len() && idx1 < data.len() {
+                    let weights = f64x2(w0, w1);
+                    let new_dist = f64x2_add(source_v, weights);
+
+                    let old0 = data[idx0];
+                    let old1 = data[idx1];
+
+                    let new0 = f64x2_extract_lane::<0>(new_dist);
+                    let new1 = f64x2_extract_lane::<1>(new_dist);
+
+                    if new0 < old0 {
+                        data[idx0] = new0;
+                        updated += 1;
+                    }
+                    if new1 < old1 {
+                        data[idx1] = new1;
+                        updated += 1;
+                    }
+                }
+            }
+        }
+
+        // Handle odd remainder
+        if neighbors.len() % 2 == 1 {
+            let (idx, weight) = neighbors[neighbors.len() - 1];
+            let idx = idx as usize;
+            if idx < data.len() {
+                let new_dist = source_dist + weight;
+                if new_dist < data[idx] {
+                    data[idx] = new_dist;
+                    updated += 1;
+                }
+            }
+        }
+
+        updated
+    }
+
+    /// Batch update (scalar fallback)
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn relax_batch(
+        distances: &mut DistanceArray,
+        source_dist: f64,
+        neighbors: &[(VertexId, f64)],
+    ) -> usize {
+        let mut updated = 0;
+        let data = distances.as_mut_slice();
+
+        // Process in chunks of 4 for better ILP
+        let chunks = neighbors.len() / 4;
+
+        for i in 0..chunks {
+            let base = i * 4;
+
+            let (idx0, w0) = neighbors[base];
+            let (idx1, w1) = neighbors[base + 1];
+            let (idx2, w2) = neighbors[base + 2];
+            let (idx3, w3) = neighbors[base + 3];
+
+            let new0 = source_dist + w0;
+            let new1 = source_dist + w1;
+            let new2 = source_dist + w2;
+            let new3 = source_dist + w3;
+
+            let idx0 = idx0 as usize;
+            let idx1 = idx1 as usize;
+            let idx2 = idx2 as usize;
+            let idx3 = idx3 as usize;
+
+            if idx0 < data.len() && new0 < data[idx0] {
+                data[idx0] = new0;
+                updated += 1;
+            }
+            if idx1 < data.len() && new1 < data[idx1] {
+                data[idx1] = new1;
+                updated += 1;
+            }
+            if idx2 < data.len() && new2 < data[idx2] {
+                data[idx2] = new2;
+                updated += 1;
+            }
+            if idx3 < data.len() && new3 < data[idx3] {
+                data[idx3] = new3;
+                updated += 1;
+            }
+        }
+
+        // Handle remainder
+        for i in (chunks * 4)..neighbors.len() {
+            let (idx, weight) = neighbors[i];
+            let idx = idx as usize;
+            if idx < data.len() {
+                let new_dist = source_dist + weight;
+                if new_dist < data[idx] {
+                    data[idx] = new_dist;
+                    updated += 1;
+                }
+            }
+        }
+
+        updated
+    }
+
+    /// Count vertices with distance less than threshold
+    #[cfg(target_arch = "wasm32")]
+    pub fn count_below_threshold(distances: &DistanceArray, threshold: f64) -> usize {
+        let data = distances.as_slice();
+        let mut count = 0;
+
+        unsafe {
+            let thresh_v = f64x2_splat(threshold);
+
+            let chunks = data.len() / 2;
+            for i in 0..chunks {
+                let offset = i * 2;
+                let v = v128_load(data.as_ptr().add(offset) as *const v128);
+                let cmp = f64x2_lt(v, thresh_v);
+
+                // Extract comparison results
+                let mask = i8x16_bitmask(cmp);
+                // Each f64 lane uses 8 bits in bitmask
+                if mask & 0xFF != 0 {
+                    count += 1;
+                }
+                if mask & 0xFF00 != 0 {
+                    count += 1;
+                }
+            }
+        }
+
+        // Handle remainder
+        for i in (data.len() / 2 * 2)..data.len() {
+            if data[i] < threshold {
+                count += 1;
+            }
+        }
+
+        count
+    }
+
+    /// Count vertices with distance less than threshold (scalar fallback)
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn count_below_threshold(distances: &DistanceArray, threshold: f64) -> usize {
+        distances
+            .as_slice()
+            .iter()
+            .filter(|&&d| d < threshold)
+            .count()
+    }
+
+    /// Compute sum of distances (for average)
+    pub fn sum_finite(distances: &DistanceArray) -> (f64, usize) {
+        let mut sum = 0.0;
+        let mut count = 0;
+
+        for &d in distances.as_slice() {
+            if d.is_finite() {
+                sum += d;
+                count += 1;
+            }
+        }
+
+        (sum, count)
+    }
+
+    /// Element-wise minimum of two distance arrays
+    pub fn elementwise_min(a: &DistanceArray, b: &DistanceArray) -> DistanceArray {
+        let len = a.len().min(b.len());
+        let mut result = DistanceArray::new(len);
+
+        let a_data = a.as_slice();
+        let b_data = b.as_slice();
+        let r_data = result.as_mut_slice();
+
+        // Unrolled loop
+        let chunks = len / 4;
+        for i in 0..chunks {
+            let base = i * 4;
+            r_data[base] = a_data[base].min(b_data[base]);
+            r_data[base + 1] = a_data[base + 1].min(b_data[base + 1]);
+            r_data[base + 2] = a_data[base + 2].min(b_data[base + 2]);
+            r_data[base + 3] = a_data[base + 3].min(b_data[base + 3]);
+        }
+
+        for i in (chunks * 4)..len {
+            r_data[i] = a_data[i].min(b_data[i]);
+        }
+
+        result
+    }
+
+    /// Scale all distances by a factor
+    pub fn scale(distances: &mut DistanceArray, factor: f64) {
+        for d in distances.as_mut_slice() {
+            if d.is_finite() {
+                *d *= factor;
+            }
+        }
+    }
+}
+
+/// Priority queue entry for Dijkstra with SIMD-friendly layout
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct PriorityEntry {
+    /// Distance (key)
+    pub distance: f64,
+    /// Vertex ID
+    pub vertex: VertexId,
+}
+
+impl PriorityEntry {
+    /// Create a new priority entry with given distance and vertex.
+    pub fn new(distance: f64, vertex: VertexId) -> Self {
+        Self { distance, vertex }
+    }
+}
+
+impl PartialEq for PriorityEntry {
+    fn eq(&self, other: &Self) -> bool {
+        self.distance == other.distance && self.vertex == other.vertex
+    }
+}
+
+impl Eq for PriorityEntry {}
+
+impl PartialOrd for PriorityEntry {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        // Reverse order for min-heap
+        other.distance.partial_cmp(&self.distance)
+    }
+}
+
+impl Ord for PriorityEntry {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_distance_array_basic() {
+        let mut arr = DistanceArray::new(10);
+
+        arr.set(0, 1.0);
+        arr.set(5, 5.0);
+
+        assert_eq!(arr.get(0), 1.0);
+        assert_eq!(arr.get(5), 5.0);
+        assert_eq!(arr.get(9), f64::INFINITY);
+    }
+
+    #[test]
+    fn test_find_min() {
+        let mut arr = DistanceArray::new(100);
+
+        arr.set(50, 1.0);
+        arr.set(25, 0.5);
+        arr.set(75, 2.0);
+
+        let (min_val, min_idx) = SimdDistanceOps::find_min(&arr);
+
+        assert_eq!(min_val, 0.5);
+        assert_eq!(min_idx, 25);
+    }
+
+    #[test]
+    fn test_find_min_empty() {
+        let arr = DistanceArray::new(0);
+        let (min_val, _) = SimdDistanceOps::find_min(&arr);
+        assert!(min_val.is_infinite());
+    }
+
+    #[test]
+    fn test_relax_batch() {
+        let mut arr = DistanceArray::new(10);
+        arr.set(0, 0.0); // Source
+
+        let neighbors = vec![(1, 1.0), (2, 2.0), (3, 3.0), (4, 4.0)];
+
+        let updated = SimdDistanceOps::relax_batch(&mut arr, 0.0, &neighbors);
+
+        assert_eq!(updated, 4);
+        assert_eq!(arr.get(1), 1.0);
+        assert_eq!(arr.get(2), 2.0);
+        assert_eq!(arr.get(3), 3.0);
+        assert_eq!(arr.get(4), 4.0);
+    }
+
+    #[test]
+    fn test_relax_batch_no_update() {
+        let mut arr = DistanceArray::from_slice(&[0.0, 0.5, 1.0, 1.5, 2.0]);
+
+        let neighbors = vec![
+            (1, 2.0), // New dist = 0 + 2.0 = 2.0 > 0.5
+            (2, 3.0), // New dist = 0 + 3.0 = 3.0 > 1.0
+        ];
+
+        let updated = SimdDistanceOps::relax_batch(&mut arr, 0.0, &neighbors);
+
+        assert_eq!(updated, 0); // No updates, existing distances are better
+    }
+
+    #[test]
+    fn test_count_below_threshold() {
+        let arr = DistanceArray::from_slice(&[0.0, 0.5, 1.0, 1.5, 2.0, f64::INFINITY]);
+
+        assert_eq!(SimdDistanceOps::count_below_threshold(&arr, 1.0), 2);
+        assert_eq!(SimdDistanceOps::count_below_threshold(&arr, 2.0), 4);
+        assert_eq!(SimdDistanceOps::count_below_threshold(&arr, 10.0), 5);
+    }
+
+    #[test]
+    fn test_sum_finite() {
+        let arr = DistanceArray::from_slice(&[1.0, 2.0, 3.0, f64::INFINITY, f64::INFINITY]);
+
+        let (sum, count) = SimdDistanceOps::sum_finite(&arr);
+
+        assert_eq!(sum, 6.0);
+        assert_eq!(count, 3);
+    }
+
+    #[test]
+    fn test_elementwise_min() {
+        let a = DistanceArray::from_slice(&[1.0, 5.0, 3.0, 7.0]);
+        let b = DistanceArray::from_slice(&[2.0, 4.0, 6.0, 1.0]);
+
+        let result = SimdDistanceOps::elementwise_min(&a, &b);
+
+        assert_eq!(result.as_slice(), &[1.0, 4.0, 3.0, 1.0]);
+    }
+
+    #[test]
+    fn test_scale() {
+        let mut arr = DistanceArray::from_slice(&[1.0, 2.0, f64::INFINITY, 4.0]);
+
+        SimdDistanceOps::scale(&mut arr, 2.0);
+
+        assert_eq!(arr.get(0), 2.0);
+        assert_eq!(arr.get(1), 4.0);
+        assert!(arr.get(2).is_infinite());
+        assert_eq!(arr.get(3), 8.0);
+    }
+
+    #[test]
+    fn test_priority_entry_ordering() {
+        let a = PriorityEntry::new(1.0, 1);
+        let b = PriorityEntry::new(2.0, 2);
+
+        // Min-heap ordering: smaller distance is "greater"
+        assert!(a > b);
+    }
+}
--- a/vendor/ruvector/crates/ruvector-mincut/src/optimization/wasm_batch.rs
+++ b/vendor/ruvector/crates/ruvector-mincut/src/optimization/wasm_batch.rs
@@ -0,0 +1,609 @@
+//! WASM Batch Operations and TypedArray Optimizations
+//!
+//! Optimizations specific to WebAssembly execution:
+//! - Batch FFI calls to minimize overhead
+//! - Pre-allocated WASM memory
+//! - TypedArray bulk transfers
+//! - Memory alignment for SIMD
+//!
+//! Target: 10x reduction in FFI overhead
+
+use crate::graph::VertexId;
+use std::collections::HashMap;
+
+/// Configuration for WASM batch operations
+#[derive(Debug, Clone)]
+pub struct BatchConfig {
+    /// Maximum batch size
+    pub max_batch_size: usize,
+    /// Pre-allocated buffer size in bytes
+    pub buffer_size: usize,
+    /// Alignment for SIMD operations
+    pub alignment: usize,
+    /// Enable memory pooling
+    pub memory_pooling: bool,
+}
+
+impl Default for BatchConfig {
+    fn default() -> Self {
+        Self {
+            max_batch_size: 1024,
+            buffer_size: 64 * 1024, // 64KB
+            alignment: 64,          // AVX-512 alignment
+            memory_pooling: true,
+        }
+    }
+}
+
+/// Batch operation types for minimizing FFI calls
+#[derive(Debug, Clone)]
+pub enum BatchOperation {
+    /// Insert multiple edges
+    InsertEdges(Vec<(VertexId, VertexId, f64)>),
+    /// Delete multiple edges
+    DeleteEdges(Vec<(VertexId, VertexId)>),
+    /// Update multiple weights
+    UpdateWeights(Vec<(VertexId, VertexId, f64)>),
+    /// Query multiple distances
+    QueryDistances(Vec<(VertexId, VertexId)>),
+    /// Compute cuts for multiple partitions
+    ComputeCuts(Vec<Vec<VertexId>>),
+}
+
+/// Result from batch operation
+#[derive(Debug, Clone)]
+pub struct BatchResult {
+    /// Operation type
+    pub operation: String,
+    /// Number of items processed
+    pub items_processed: usize,
+    /// Time taken in microseconds
+    pub time_us: u64,
+    /// Results (for queries)
+    pub results: Vec<f64>,
+    /// Error message if any
+    pub error: Option<String>,
+}
+
+/// TypedArray transfer for efficient WASM memory access
+///
+/// Provides aligned memory buffers for bulk data transfer between
+/// JavaScript and WASM.
+#[repr(C, align(64))]
+pub struct TypedArrayTransfer {
+    /// Float64 buffer for weights/distances
+    pub f64_buffer: Vec<f64>,
+    /// Uint64 buffer for vertex IDs
+    pub u64_buffer: Vec<u64>,
+    /// Uint32 buffer for indices/counts
+    pub u32_buffer: Vec<u32>,
+    /// Byte buffer for raw data
+    pub byte_buffer: Vec<u8>,
+    /// Current position in buffers
+    position: usize,
+}
+
+impl TypedArrayTransfer {
+    /// Create new transfer with default buffer size
+    pub fn new() -> Self {
+        Self::with_capacity(1024)
+    }
+
+    /// Create with specific capacity
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            f64_buffer: Vec::with_capacity(capacity),
+            u64_buffer: Vec::with_capacity(capacity),
+            u32_buffer: Vec::with_capacity(capacity * 2),
+            byte_buffer: Vec::with_capacity(capacity * 8),
+            position: 0,
+        }
+    }
+
+    /// Reset buffers for reuse
+    pub fn reset(&mut self) {
+        self.f64_buffer.clear();
+        self.u64_buffer.clear();
+        self.u32_buffer.clear();
+        self.byte_buffer.clear();
+        self.position = 0;
+    }
+
+    /// Add edge to transfer buffer
+    pub fn add_edge(&mut self, source: VertexId, target: VertexId, weight: f64) {
+        self.u64_buffer.push(source);
+        self.u64_buffer.push(target);
+        self.f64_buffer.push(weight);
+    }
+
+    /// Add vertex to transfer buffer
+    pub fn add_vertex(&mut self, vertex: VertexId) {
+        self.u64_buffer.push(vertex);
+    }
+
+    /// Add distance result
+    pub fn add_distance(&mut self, distance: f64) {
+        self.f64_buffer.push(distance);
+    }
+
+    /// Get edges from buffer
+    pub fn get_edges(&self) -> Vec<(VertexId, VertexId, f64)> {
+        let mut edges = Vec::with_capacity(self.f64_buffer.len());
+
+        for (i, &weight) in self.f64_buffer.iter().enumerate() {
+            let source = self.u64_buffer.get(i * 2).copied().unwrap_or(0);
+            let target = self.u64_buffer.get(i * 2 + 1).copied().unwrap_or(0);
+            edges.push((source, target, weight));
+        }
+
+        edges
+    }
+
+    /// Get f64 buffer as raw pointer (for FFI)
+    pub fn f64_ptr(&self) -> *const f64 {
+        self.f64_buffer.as_ptr()
+    }
+
+    /// Get u64 buffer as raw pointer (for FFI)
+    pub fn u64_ptr(&self) -> *const u64 {
+        self.u64_buffer.as_ptr()
+    }
+
+    /// Get buffer lengths
+    pub fn len(&self) -> (usize, usize, usize) {
+        (
+            self.f64_buffer.len(),
+            self.u64_buffer.len(),
+            self.u32_buffer.len(),
+        )
+    }
+
+    /// Check if empty
+    pub fn is_empty(&self) -> bool {
+        self.f64_buffer.is_empty() && self.u64_buffer.is_empty()
+    }
+}
+
+impl Default for TypedArrayTransfer {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// WASM batch operations executor
+pub struct WasmBatchOps {
+    config: BatchConfig,
+    /// Transfer buffer
+    transfer: TypedArrayTransfer,
+    /// Pending operations
+    pending: Vec<BatchOperation>,
+    /// Statistics
+    total_ops: u64,
+    total_items: u64,
+    total_time_us: u64,
+}
+
+impl WasmBatchOps {
+    /// Create new batch executor with default config
+    pub fn new() -> Self {
+        Self::with_config(BatchConfig::default())
+    }
+
+    /// Create with custom config
+    pub fn with_config(config: BatchConfig) -> Self {
+        Self {
+            transfer: TypedArrayTransfer::with_capacity(config.buffer_size / 8),
+            config,
+            pending: Vec::new(),
+            total_ops: 0,
+            total_items: 0,
+            total_time_us: 0,
+        }
+    }
+
+    /// Queue edge insertions for batch processing
+    pub fn queue_insert_edges(&mut self, edges: Vec<(VertexId, VertexId, f64)>) {
+        if edges.len() > self.config.max_batch_size {
+            // Split into multiple batches
+            for chunk in edges.chunks(self.config.max_batch_size) {
+                self.pending
+                    .push(BatchOperation::InsertEdges(chunk.to_vec()));
+            }
+        } else {
+            self.pending.push(BatchOperation::InsertEdges(edges));
+        }
+    }
+
+    /// Queue edge deletions for batch processing
+    pub fn queue_delete_edges(&mut self, edges: Vec<(VertexId, VertexId)>) {
+        if edges.len() > self.config.max_batch_size {
+            for chunk in edges.chunks(self.config.max_batch_size) {
+                self.pending
+                    .push(BatchOperation::DeleteEdges(chunk.to_vec()));
+            }
+        } else {
+            self.pending.push(BatchOperation::DeleteEdges(edges));
+        }
+    }
+
+    /// Queue distance queries for batch processing
+    pub fn queue_distance_queries(&mut self, pairs: Vec<(VertexId, VertexId)>) {
+        if pairs.len() > self.config.max_batch_size {
+            for chunk in pairs.chunks(self.config.max_batch_size) {
+                self.pending
+                    .push(BatchOperation::QueryDistances(chunk.to_vec()));
+            }
+        } else {
+            self.pending.push(BatchOperation::QueryDistances(pairs));
+        }
+    }
+
+    /// Execute all pending operations
+    pub fn execute_batch(&mut self) -> Vec<BatchResult> {
+        let _start = std::time::Instant::now();
+
+        // Drain pending operations to avoid borrow conflict
+        let pending_ops: Vec<_> = self.pending.drain(..).collect();
+        let mut results = Vec::with_capacity(pending_ops.len());
+
+        for op in pending_ops {
+            let op_start = std::time::Instant::now();
+            let result = self.execute_operation(op);
+            let elapsed = op_start.elapsed().as_micros() as u64;
+
+            self.total_ops += 1;
+            self.total_items += result.items_processed as u64;
+            self.total_time_us += elapsed;
+
+            results.push(result);
+        }
+
+        self.transfer.reset();
+        results
+    }
+
+    /// Execute a single operation
+    fn execute_operation(&mut self, op: BatchOperation) -> BatchResult {
+        match op {
+            BatchOperation::InsertEdges(edges) => {
+                let count = edges.len();
+
+                // Prepare transfer buffer
+                self.transfer.reset();
+                for (u, v, w) in &edges {
+                    self.transfer.add_edge(*u, *v, *w);
+                }
+
+                // In WASM, this would call the native insert function
+                // For now, we simulate the batch operation
+                BatchResult {
+                    operation: "InsertEdges".to_string(),
+                    items_processed: count,
+                    time_us: 0,
+                    results: Vec::new(),
+                    error: None,
+                }
+            }
+
+            BatchOperation::DeleteEdges(edges) => {
+                let count = edges.len();
+
+                self.transfer.reset();
+                for (u, v) in &edges {
+                    self.transfer.add_vertex(*u);
+                    self.transfer.add_vertex(*v);
+                }
+
+                BatchResult {
+                    operation: "DeleteEdges".to_string(),
+                    items_processed: count,
+                    time_us: 0,
+                    results: Vec::new(),
+                    error: None,
+                }
+            }
+
+            BatchOperation::UpdateWeights(updates) => {
+                let count = updates.len();
+
+                self.transfer.reset();
+                for (u, v, w) in &updates {
+                    self.transfer.add_edge(*u, *v, *w);
+                }
+
+                BatchResult {
+                    operation: "UpdateWeights".to_string(),
+                    items_processed: count,
+                    time_us: 0,
+                    results: Vec::new(),
+                    error: None,
+                }
+            }
+
+            BatchOperation::QueryDistances(pairs) => {
+                let count = pairs.len();
+
+                self.transfer.reset();
+                for (u, v) in &pairs {
+                    self.transfer.add_vertex(*u);
+                    self.transfer.add_vertex(*v);
+                }
+
+                // Simulate distance results
+                let results: Vec<f64> = pairs
+                    .iter()
+                    .map(|(u, v)| if u == v { 0.0 } else { 1.0 })
+                    .collect();
+
+                BatchResult {
+                    operation: "QueryDistances".to_string(),
+                    items_processed: count,
+                    time_us: 0,
+                    results,
+                    error: None,
+                }
+            }
+
+            BatchOperation::ComputeCuts(partitions) => {
+                let count = partitions.len();
+
+                BatchResult {
+                    operation: "ComputeCuts".to_string(),
+                    items_processed: count,
+                    time_us: 0,
+                    results: vec![0.0; count],
+                    error: None,
+                }
+            }
+        }
+    }
+
+    /// Get number of pending operations
+    pub fn pending_count(&self) -> usize {
+        self.pending.len()
+    }
+
+    /// Get statistics
+    pub fn stats(&self) -> BatchStats {
+        BatchStats {
+            total_operations: self.total_ops,
+            total_items: self.total_items,
+            total_time_us: self.total_time_us,
+            avg_items_per_op: if self.total_ops > 0 {
+                self.total_items as f64 / self.total_ops as f64
+            } else {
+                0.0
+            },
+            avg_time_per_item_us: if self.total_items > 0 {
+                self.total_time_us as f64 / self.total_items as f64
+            } else {
+                0.0
+            },
+        }
+    }
+
+    /// Clear pending operations
+    pub fn clear(&mut self) {
+        self.pending.clear();
+        self.transfer.reset();
+    }
+}
+
+impl Default for WasmBatchOps {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Statistics for batch operations
+#[derive(Debug, Clone, Default)]
+pub struct BatchStats {
+    /// Total operations executed
+    pub total_operations: u64,
+    /// Total items processed
+    pub total_items: u64,
+    /// Total time in microseconds
+    pub total_time_us: u64,
+    /// Average items per operation
+    pub avg_items_per_op: f64,
+    /// Average time per item in microseconds
+    pub avg_time_per_item_us: f64,
+}
+
+/// Pre-allocated WASM memory region
+#[repr(C, align(64))]
+pub struct WasmMemoryRegion {
+    /// Raw memory
+    data: Vec<u8>,
+    /// Capacity in bytes
+    capacity: usize,
+    /// Current offset
+    offset: usize,
+}
+
+impl WasmMemoryRegion {
+    /// Create new memory region
+    pub fn new(size: usize) -> Self {
+        // Round up to alignment
+        let aligned_size = (size + 63) & !63;
+        Self {
+            data: vec![0u8; aligned_size],
+            capacity: aligned_size,
+            offset: 0,
+        }
+    }
+
+    /// Allocate bytes from region, returns the offset
+    ///
+    /// Returns the starting offset of the allocated region.
+    /// Use `get_slice` to access the allocated memory safely.
+    pub fn alloc(&mut self, size: usize, align: usize) -> Option<usize> {
+        // Align offset
+        let aligned_offset = (self.offset + align - 1) & !(align - 1);
+
+        if aligned_offset + size > self.capacity {
+            return None;
+        }
+
+        let result = aligned_offset;
+        self.offset = aligned_offset + size;
+        Some(result)
+    }
+
+    /// Get a slice at the given offset
+    pub fn get_slice(&self, offset: usize, len: usize) -> Option<&[u8]> {
+        if offset + len <= self.capacity {
+            Some(&self.data[offset..offset + len])
+        } else {
+            None
+        }
+    }
+
+    /// Get a mutable slice at the given offset
+    pub fn get_slice_mut(&mut self, offset: usize, len: usize) -> Option<&mut [u8]> {
+        if offset + len <= self.capacity {
+            Some(&mut self.data[offset..offset + len])
+        } else {
+            None
+        }
+    }
+
+    /// Reset region for reuse
+    pub fn reset(&mut self) {
+        self.offset = 0;
+        // Optional: zero memory
+        // self.data.fill(0);
+    }
+
+    /// Get remaining capacity
+    pub fn remaining(&self) -> usize {
+        self.capacity - self.offset
+    }
+
+    /// Get used bytes
+    pub fn used(&self) -> usize {
+        self.offset
+    }
+
+    /// Get raw pointer
+    pub fn as_ptr(&self) -> *const u8 {
+        self.data.as_ptr()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_typed_array_transfer() {
+        let mut transfer = TypedArrayTransfer::new();
+
+        transfer.add_edge(1, 2, 1.0);
+        transfer.add_edge(2, 3, 2.0);
+
+        let edges = transfer.get_edges();
+        assert_eq!(edges.len(), 2);
+        assert_eq!(edges[0], (1, 2, 1.0));
+        assert_eq!(edges[1], (2, 3, 2.0));
+    }
+
+    #[test]
+    fn test_batch_queue() {
+        let mut batch = WasmBatchOps::new();
+
+        let edges = vec![(1, 2, 1.0), (2, 3, 2.0)];
+        batch.queue_insert_edges(edges);
+
+        assert_eq!(batch.pending_count(), 1);
+    }
+
+    #[test]
+    fn test_batch_execute() {
+        let mut batch = WasmBatchOps::new();
+
+        batch.queue_insert_edges(vec![(1, 2, 1.0)]);
+        batch.queue_delete_edges(vec![(3, 4)]);
+
+        let results = batch.execute_batch();
+
+        assert_eq!(results.len(), 2);
+        assert_eq!(results[0].operation, "InsertEdges");
+        assert_eq!(results[1].operation, "DeleteEdges");
+        assert_eq!(batch.pending_count(), 0);
+    }
+
+    #[test]
+    fn test_batch_splitting() {
+        let mut batch = WasmBatchOps::with_config(BatchConfig {
+            max_batch_size: 10,
+            ..Default::default()
+        });
+
+        // Queue 25 edges
+        let edges: Vec<_> = (0..25).map(|i| (i, i + 1, 1.0)).collect();
+        batch.queue_insert_edges(edges);
+
+        // Should be split into 3 batches
+        assert_eq!(batch.pending_count(), 3);
+    }
+
+    #[test]
+    fn test_distance_queries() {
+        let mut batch = WasmBatchOps::new();
+
+        batch.queue_distance_queries(vec![(1, 2), (2, 3), (1, 1)]);
+
+        let results = batch.execute_batch();
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].results.len(), 3);
+        assert_eq!(results[0].results[2], 0.0); // Same vertex
+    }
+
+    #[test]
+    fn test_wasm_memory_region() {
+        let mut region = WasmMemoryRegion::new(1024);
+
+        // Allocate 64-byte aligned
+        let offset1 = region.alloc(100, 64);
+        assert!(offset1.is_some());
+        assert_eq!(offset1.unwrap() % 64, 0);
+
+        let offset2 = region.alloc(200, 64);
+        assert!(offset2.is_some());
+
+        // Verify we can get slices
+        let slice1 = region.get_slice(offset1.unwrap(), 100);
+        assert!(slice1.is_some());
+
+        assert!(region.used() > 0);
+        assert!(region.remaining() < 1024);
+
+        region.reset();
+        assert_eq!(region.used(), 0);
+    }
+
+    #[test]
+    fn test_batch_stats() {
+        let mut batch = WasmBatchOps::new();
+
+        batch.queue_insert_edges(vec![(1, 2, 1.0), (2, 3, 2.0)]);
+        let _ = batch.execute_batch();
+
+        let stats = batch.stats();
+        assert_eq!(stats.total_operations, 1);
+        assert_eq!(stats.total_items, 2);
+    }
+
+    #[test]
+    fn test_transfer_reset() {
+        let mut transfer = TypedArrayTransfer::new();
+
+        transfer.add_edge(1, 2, 1.0);
+        assert!(!transfer.is_empty());
+
+        transfer.reset();
+        assert!(transfer.is_empty());
+    }
+}