Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,755 @@
//! Comprehensive Benchmark Suite for j-Tree + BMSSP Optimizations
//!
//! Measures before/after performance for each optimization:
//! - DSpar: 5.9x target speedup
//! - Cache: 10x target for repeated queries
//! - SIMD: 2-4x target for distance operations
//! - Pool: 50-75% memory reduction
//! - Parallel: Near-linear scaling
//! - WASM Batch: 10x FFI overhead reduction
//!
//! Target: Combined 10x speedup over naive implementation
use super::cache::{CacheConfig, PathDistanceCache};
use super::dspar::{DegreePresparse, PresparseConfig};
use super::parallel::{LevelUpdateResult, ParallelConfig, ParallelLevelUpdater, WorkItem};
use super::pool::{LevelData, LevelPool, PoolConfig};
use super::simd_distance::{DistanceArray, SimdDistanceOps};
use super::wasm_batch::{BatchConfig, WasmBatchOps};
use crate::graph::DynamicGraph;
use std::collections::HashSet;
use std::time::{Duration, Instant};
/// Single benchmark result
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
/// Name of the benchmark
pub name: String,
/// Baseline time (naive implementation)
pub baseline_us: u64,
/// Optimized time
pub optimized_us: u64,
/// Speedup factor (baseline / optimized)
pub speedup: f64,
/// Target speedup
pub target_speedup: f64,
/// Whether target was achieved
pub target_achieved: bool,
/// Memory usage baseline (bytes)
pub baseline_memory: usize,
/// Memory usage optimized (bytes)
pub optimized_memory: usize,
/// Memory reduction percentage
pub memory_reduction_percent: f64,
/// Additional metrics
pub metrics: Vec<(String, f64)>,
}
impl BenchmarkResult {
/// Create new result
pub fn new(name: &str, baseline_us: u64, optimized_us: u64, target_speedup: f64) -> Self {
let speedup = if optimized_us > 0 {
baseline_us as f64 / optimized_us as f64
} else {
f64::INFINITY
};
Self {
name: name.to_string(),
baseline_us,
optimized_us,
speedup,
target_speedup,
target_achieved: speedup >= target_speedup,
baseline_memory: 0,
optimized_memory: 0,
memory_reduction_percent: 0.0,
metrics: Vec::new(),
}
}
/// Set memory metrics
pub fn with_memory(mut self, baseline: usize, optimized: usize) -> Self {
self.baseline_memory = baseline;
self.optimized_memory = optimized;
self.memory_reduction_percent = if baseline > 0 {
100.0 * (1.0 - (optimized as f64 / baseline as f64))
} else {
0.0
};
self
}
/// Add custom metric
pub fn add_metric(&mut self, name: &str, value: f64) {
self.metrics.push((name.to_string(), value));
}
}
/// Individual optimization benchmark
#[derive(Debug, Clone)]
pub struct OptimizationBenchmark {
/// Optimization name
pub name: String,
/// Results for different workloads
pub results: Vec<BenchmarkResult>,
/// Overall assessment
pub summary: BenchmarkSummary,
}
/// Summary of benchmark results
#[derive(Debug, Clone, Default)]
pub struct BenchmarkSummary {
/// Average speedup achieved
pub avg_speedup: f64,
/// Minimum speedup
pub min_speedup: f64,
/// Maximum speedup
pub max_speedup: f64,
/// Percentage of targets achieved
pub targets_achieved_percent: f64,
/// Overall memory reduction
pub avg_memory_reduction: f64,
}
/// Comprehensive benchmark suite
pub struct BenchmarkSuite {
/// Test graph sizes
sizes: Vec<usize>,
/// Number of iterations per test
iterations: usize,
/// Results
results: Vec<OptimizationBenchmark>,
}
impl BenchmarkSuite {
/// Create new benchmark suite
pub fn new() -> Self {
Self {
sizes: vec![100, 1000, 10000],
iterations: 10,
results: Vec::new(),
}
}
/// Set test sizes
pub fn with_sizes(mut self, sizes: Vec<usize>) -> Self {
self.sizes = sizes;
self
}
/// Set iterations
pub fn with_iterations(mut self, iterations: usize) -> Self {
self.iterations = iterations;
self
}
/// Run all benchmarks
pub fn run_all(&mut self) -> &Vec<OptimizationBenchmark> {
self.results.clear();
self.results.push(self.benchmark_dspar());
self.results.push(self.benchmark_cache());
self.results.push(self.benchmark_simd());
self.results.push(self.benchmark_pool());
self.results.push(self.benchmark_parallel());
self.results.push(self.benchmark_wasm_batch());
&self.results
}
/// Get combined speedup estimate
pub fn combined_speedup(&self) -> f64 {
if self.results.is_empty() {
return 1.0;
}
// Estimate combined speedup (conservative: product of square roots)
// Skip results with zero or negative speedup to avoid NaN
let mut combined = 1.0;
let mut count = 0;
for result in &self.results {
let speedup = result.summary.avg_speedup;
if speedup > 0.0 && speedup.is_finite() {
combined *= speedup.sqrt();
count += 1;
}
}
if count == 0 {
return 1.0;
}
combined
}
/// Benchmark DSpar (Degree-based presparse)
fn benchmark_dspar(&self) -> OptimizationBenchmark {
let mut results = Vec::new();
for &size in &self.sizes {
let graph = create_test_graph(size, size * 5);
// Baseline: process all edges
let baseline_start = Instant::now();
for _ in 0..self.iterations {
let edges = graph.edges();
let _count = edges.len();
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
// Optimized: DSpar filtering
let mut dspar = DegreePresparse::with_config(PresparseConfig {
target_sparsity: 0.1,
..Default::default()
});
let opt_start = Instant::now();
for _ in 0..self.iterations {
let _ = dspar.presparse(&graph);
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let mut result = BenchmarkResult::new(
&format!("DSpar n={}", size),
baseline_us,
opt_us,
5.9, // Target speedup
);
// Get sparsification stats
let sparse_result = dspar.presparse(&graph);
result.add_metric("sparsity_ratio", sparse_result.stats.sparsity_ratio);
result.add_metric(
"edges_reduced",
(sparse_result.stats.original_edges - sparse_result.stats.sparse_edges) as f64,
);
results.push(result);
}
compute_summary("DSpar", results)
}
/// Benchmark cache performance
fn benchmark_cache(&self) -> OptimizationBenchmark {
let mut results = Vec::new();
for &size in &self.sizes {
// Baseline: no caching (compute every time)
let baseline_start = Instant::now();
let mut total = 0.0;
for _ in 0..self.iterations {
for i in 0..size {
// Simulate distance computation
total += (i as f64 * 1.414).sqrt();
}
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
let _ = total; // Prevent optimization
// Optimized: with caching
let cache = PathDistanceCache::with_config(CacheConfig {
max_entries: size,
..Default::default()
});
// Warm up cache
for i in 0..(size / 2) {
cache.insert(i as u64, (i + 1) as u64, (i as f64).sqrt());
}
let opt_start = Instant::now();
for _ in 0..self.iterations {
for i in 0..size {
if cache.get(i as u64, (i + 1) as u64).is_none() {
cache.insert(i as u64, (i + 1) as u64, (i as f64).sqrt());
}
}
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let mut result = BenchmarkResult::new(
&format!("Cache n={}", size),
baseline_us,
opt_us,
10.0, // Target speedup for cached hits
);
let stats = cache.stats();
result.add_metric("hit_rate", stats.hit_rate());
result.add_metric("cache_size", stats.size as f64);
results.push(result);
}
compute_summary("Cache", results)
}
/// Benchmark SIMD operations
fn benchmark_simd(&self) -> OptimizationBenchmark {
let mut results = Vec::new();
for &size in &self.sizes {
let mut arr = DistanceArray::new(size);
// Initialize with test data
for i in 0..size {
arr.set(i as u64, (i as f64) * 0.5 + 1.0);
}
arr.set((size / 2) as u64, 0.1); // Min value
// Baseline: naive find_min
let baseline_start = Instant::now();
for _ in 0..self.iterations {
let data = arr.as_slice();
let mut min_val = f64::INFINITY;
let mut min_idx = 0;
for (i, &d) in data.iter().enumerate() {
if d < min_val {
min_val = d;
min_idx = i;
}
}
let _ = (min_val, min_idx);
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
// Optimized: SIMD find_min
let opt_start = Instant::now();
for _ in 0..self.iterations {
let _ = SimdDistanceOps::find_min(&arr);
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let result = BenchmarkResult::new(
&format!("SIMD find_min n={}", size),
baseline_us,
opt_us.max(1), // Avoid divide by zero
2.0, // Target speedup
);
results.push(result);
// Also benchmark relax_batch
let neighbors: Vec<_> = (0..(size / 10).min(100))
.map(|i| ((i * 10) as u64, 1.0))
.collect();
let baseline_start = Instant::now();
let mut arr_baseline = DistanceArray::new(size);
for _ in 0..self.iterations {
let data = arr_baseline.as_mut_slice();
for &(idx, weight) in &neighbors {
let idx = idx as usize;
if idx < data.len() {
let new_dist = 0.0 + weight;
if new_dist < data[idx] {
data[idx] = new_dist;
}
}
}
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
let mut arr_opt = DistanceArray::new(size);
let opt_start = Instant::now();
for _ in 0..self.iterations {
SimdDistanceOps::relax_batch(&mut arr_opt, 0.0, &neighbors);
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let result = BenchmarkResult::new(
&format!("SIMD relax_batch n={}", size),
baseline_us,
opt_us.max(1),
2.0,
);
results.push(result);
}
compute_summary("SIMD", results)
}
/// Benchmark pool allocation
fn benchmark_pool(&self) -> OptimizationBenchmark {
let mut results = Vec::new();
for &size in &self.sizes {
// Baseline: allocate/deallocate each time
let baseline_start = Instant::now();
let mut baseline_memory = 0usize;
for _ in 0..self.iterations {
let mut levels = Vec::new();
for i in 0..10 {
let level = LevelData::new(i, size);
baseline_memory = baseline_memory.max(std::mem::size_of_val(&level));
levels.push(level);
}
// Drop all
drop(levels);
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
// Optimized: pool allocation with lazy deallocation
let pool = LevelPool::with_config(PoolConfig {
max_materialized_levels: 5,
lazy_dealloc: true,
..Default::default()
});
let opt_start = Instant::now();
for _ in 0..self.iterations {
for i in 0..10 {
let level = pool.allocate_level(i, size);
pool.materialize(i, level);
}
// Some evictions happen automatically
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let stats = pool.stats();
let mut result =
BenchmarkResult::new(&format!("Pool n={}", size), baseline_us, opt_us.max(1), 2.0);
result = result.with_memory(
baseline_memory * 10, // Baseline: all levels materialized
stats.pool_size_bytes, // Optimized: only max_materialized
);
result.add_metric("evictions", stats.evictions as f64);
result.add_metric("materialized_levels", stats.materialized_levels as f64);
results.push(result);
}
compute_summary("Pool", results)
}
/// Benchmark parallel processing
fn benchmark_parallel(&self) -> OptimizationBenchmark {
let mut results = Vec::new();
for &size in &self.sizes {
let levels: Vec<usize> = (0..100).collect();
// Baseline: sequential processing
let baseline_start = Instant::now();
for _ in 0..self.iterations {
let _results: Vec<_> = levels
.iter()
.map(|&level| {
// Simulate work
let mut sum = 0.0;
for i in 0..(size / 100).max(1) {
sum += (i as f64).sqrt();
}
LevelUpdateResult {
level,
cut_value: sum,
partition: HashSet::new(),
time_us: 0,
}
})
.collect();
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
// Optimized: parallel processing
let updater = ParallelLevelUpdater::with_config(ParallelConfig {
min_parallel_size: 10,
..Default::default()
});
let opt_start = Instant::now();
for _ in 0..self.iterations {
let _results = updater.process_parallel(&levels, |level| {
let mut sum = 0.0;
for i in 0..(size / 100).max(1) {
sum += (i as f64).sqrt();
}
LevelUpdateResult {
level,
cut_value: sum,
partition: HashSet::new(),
time_us: 0,
}
});
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let result = BenchmarkResult::new(
&format!("Parallel n={}", size),
baseline_us,
opt_us.max(1),
2.0, // Conservative target (depends on core count)
);
results.push(result);
}
compute_summary("Parallel", results)
}
/// Benchmark WASM batch operations
fn benchmark_wasm_batch(&self) -> OptimizationBenchmark {
let mut results = Vec::new();
for &size in &self.sizes {
let edges: Vec<_> = (0..size).map(|i| (i as u64, (i + 1) as u64, 1.0)).collect();
// Baseline: individual operations
let baseline_start = Instant::now();
for _ in 0..self.iterations {
// Simulate individual FFI calls
for edge in &edges {
let _ = edge; // FFI overhead simulation
std::hint::black_box(edge);
}
}
let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
// Optimized: batch operations
let mut batch = WasmBatchOps::with_config(BatchConfig {
max_batch_size: 1024,
..Default::default()
});
let opt_start = Instant::now();
for _ in 0..self.iterations {
batch.queue_insert_edges(edges.clone());
let _ = batch.execute_batch();
}
let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
let stats = batch.stats();
let mut result = BenchmarkResult::new(
&format!("WASM Batch n={}", size),
baseline_us,
opt_us.max(1),
10.0,
);
result.add_metric("avg_items_per_op", stats.avg_items_per_op);
results.push(result);
}
compute_summary("WASM Batch", results)
}
/// Get results
pub fn results(&self) -> &Vec<OptimizationBenchmark> {
&self.results
}
/// Generate report string
pub fn report(&self) -> String {
let mut report = String::new();
report.push_str("=== j-Tree + BMSSP Optimization Benchmark Report ===\n\n");
for opt in &self.results {
report.push_str(&format!("## {} Optimization\n", opt.name));
report.push_str(&format!(
" Average Speedup: {:.2}x\n",
opt.summary.avg_speedup
));
report.push_str(&format!(
" Min/Max: {:.2}x / {:.2}x\n",
opt.summary.min_speedup, opt.summary.max_speedup
));
report.push_str(&format!(
" Targets Achieved: {:.0}%\n",
opt.summary.targets_achieved_percent
));
if opt.summary.avg_memory_reduction > 0.0 {
report.push_str(&format!(
" Memory Reduction: {:.1}%\n",
opt.summary.avg_memory_reduction
));
}
report.push_str("\n Details:\n");
for result in &opt.results {
report.push_str(&format!(
" - {}: {:.2}x (target: {:.2}x) {}\n",
result.name,
result.speedup,
result.target_speedup,
if result.target_achieved {
"[OK]"
} else {
"[MISS]"
}
));
}
report.push_str("\n");
}
let combined = self.combined_speedup();
report.push_str(&format!("## Combined Speedup Estimate: {:.2}x\n", combined));
report.push_str(&format!(" Target: 10x\n"));
report.push_str(&format!(
" Status: {}\n",
if combined >= 10.0 {
"TARGET ACHIEVED"
} else {
"In Progress"
}
));
report
}
}
impl Default for BenchmarkSuite {
fn default() -> Self {
Self::new()
}
}
/// Helper to create test graph
fn create_test_graph(vertices: usize, edges: usize) -> DynamicGraph {
let graph = DynamicGraph::new();
// Create vertices
for i in 0..vertices {
graph.add_vertex(i as u64);
}
// Create random-ish edges
let mut edge_count = 0;
for i in 0..vertices {
for j in (i + 1)..vertices {
if edge_count >= edges {
break;
}
let _ = graph.insert_edge(i as u64, j as u64, 1.0);
edge_count += 1;
}
if edge_count >= edges {
break;
}
}
graph
}
/// Compute summary from results
fn compute_summary(name: &str, results: Vec<BenchmarkResult>) -> OptimizationBenchmark {
if results.is_empty() {
return OptimizationBenchmark {
name: name.to_string(),
results: Vec::new(),
summary: BenchmarkSummary::default(),
};
}
let speedups: Vec<f64> = results.iter().map(|r| r.speedup).collect();
let achieved: Vec<bool> = results.iter().map(|r| r.target_achieved).collect();
let memory_reductions: Vec<f64> = results
.iter()
.filter(|r| r.baseline_memory > 0)
.map(|r| r.memory_reduction_percent)
.collect();
let avg_speedup = speedups.iter().sum::<f64>() / speedups.len() as f64;
let min_speedup = speedups.iter().copied().fold(f64::INFINITY, f64::min);
let max_speedup = speedups.iter().copied().fold(0.0, f64::max);
let achieved_count = achieved.iter().filter(|&&a| a).count();
let targets_achieved_percent = 100.0 * achieved_count as f64 / achieved.len() as f64;
let avg_memory_reduction = if memory_reductions.is_empty() {
0.0
} else {
memory_reductions.iter().sum::<f64>() / memory_reductions.len() as f64
};
OptimizationBenchmark {
name: name.to_string(),
results,
summary: BenchmarkSummary {
avg_speedup,
min_speedup,
max_speedup,
targets_achieved_percent,
avg_memory_reduction,
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_benchmark_result() {
let result = BenchmarkResult::new("test", 1000, 100, 5.0);
assert_eq!(result.speedup, 10.0);
assert!(result.target_achieved);
}
#[test]
fn test_benchmark_result_memory() {
let result = BenchmarkResult::new("test", 100, 50, 1.0).with_memory(1000, 250);
assert_eq!(result.memory_reduction_percent, 75.0);
}
#[test]
fn test_create_test_graph() {
let graph = create_test_graph(10, 20);
assert_eq!(graph.num_vertices(), 10);
assert!(graph.num_edges() <= 20);
}
#[test]
fn test_benchmark_suite_small() {
let mut suite = BenchmarkSuite::new()
.with_sizes(vec![10])
.with_iterations(1);
let results = suite.run_all();
assert!(!results.is_empty());
}
#[test]
fn test_combined_speedup() {
let mut suite = BenchmarkSuite::new()
.with_sizes(vec![10])
.with_iterations(1);
suite.run_all();
let combined = suite.combined_speedup();
// For very small inputs, overhead may exceed benefit
// Just verify we get a valid positive result
assert!(
combined > 0.0 && combined.is_finite(),
"Combined speedup {} should be positive and finite",
combined
);
}
#[test]
fn test_report_generation() {
let mut suite = BenchmarkSuite::new()
.with_sizes(vec![10])
.with_iterations(1);
suite.run_all();
let report = suite.report();
assert!(report.contains("Benchmark Report"));
assert!(report.contains("DSpar"));
assert!(report.contains("Combined Speedup"));
}
}

View File

@@ -0,0 +1,535 @@
//! LRU Cache for Path Distances
//!
//! Provides efficient caching of path distances with:
//! - LRU eviction policy
//! - Prefetch hints based on access patterns
//! - Lock-free concurrent reads
//! - Batch update support
//!
//! Target: 10x speedup for repeated distance queries
use crate::graph::VertexId;
use std::collections::{HashMap, VecDeque};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::RwLock;
/// Configuration for path distance cache
#[derive(Debug, Clone)]
pub struct CacheConfig {
/// Maximum number of entries in cache
pub max_entries: usize,
/// Enable access pattern tracking for prefetch
pub enable_prefetch: bool,
/// Number of recent queries to track for prefetch
pub prefetch_history_size: usize,
/// Prefetch lookahead count
pub prefetch_lookahead: usize,
}
impl Default for CacheConfig {
fn default() -> Self {
Self {
max_entries: 10_000,
enable_prefetch: true,
prefetch_history_size: 100,
prefetch_lookahead: 4,
}
}
}
/// Statistics for cache performance
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
/// Total cache hits
pub hits: u64,
/// Total cache misses
pub misses: u64,
/// Current cache size
pub size: usize,
/// Number of prefetch hits
pub prefetch_hits: u64,
/// Number of evictions
pub evictions: u64,
}
impl CacheStats {
/// Get hit rate
pub fn hit_rate(&self) -> f64 {
let total = self.hits + self.misses;
if total > 0 {
self.hits as f64 / total as f64
} else {
0.0
}
}
}
/// Hint for prefetching likely queries
#[derive(Debug, Clone)]
pub struct PrefetchHint {
/// Source vertex
pub source: VertexId,
/// Likely target vertices
pub targets: Vec<VertexId>,
/// Confidence score (0.0-1.0)
pub confidence: f64,
}
/// Entry in the LRU cache
#[derive(Debug, Clone)]
struct CacheEntry {
/// Source vertex
source: VertexId,
/// Target vertex
target: VertexId,
/// Cached distance
distance: f64,
/// Last access time (for LRU)
last_access: u64,
/// Was this a prefetch?
prefetched: bool,
}
/// Key for cache lookup
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
struct CacheKey {
source: VertexId,
target: VertexId,
}
impl CacheKey {
fn new(source: VertexId, target: VertexId) -> Self {
// Normalize key so (a,b) == (b,a)
if source <= target {
Self { source, target }
} else {
Self {
source: target,
target: source,
}
}
}
}
/// LRU cache for path distances
pub struct PathDistanceCache {
config: CacheConfig,
/// Main cache storage
cache: RwLock<HashMap<CacheKey, CacheEntry>>,
/// LRU order tracking
lru_order: RwLock<VecDeque<CacheKey>>,
/// Access counter for LRU timestamps
access_counter: AtomicU64,
/// Statistics
hits: AtomicU64,
misses: AtomicU64,
prefetch_hits: AtomicU64,
evictions: AtomicU64,
/// Query history for prefetch prediction
query_history: RwLock<VecDeque<CacheKey>>,
/// Predicted next queries
predicted_queries: RwLock<Vec<CacheKey>>,
}
impl PathDistanceCache {
/// Create new cache with default config
pub fn new() -> Self {
Self::with_config(CacheConfig::default())
}
/// Create with custom config
pub fn with_config(config: CacheConfig) -> Self {
Self {
config,
cache: RwLock::new(HashMap::new()),
lru_order: RwLock::new(VecDeque::new()),
access_counter: AtomicU64::new(0),
hits: AtomicU64::new(0),
misses: AtomicU64::new(0),
prefetch_hits: AtomicU64::new(0),
evictions: AtomicU64::new(0),
query_history: RwLock::new(VecDeque::new()),
predicted_queries: RwLock::new(Vec::new()),
}
}
/// Get cached distance if available
pub fn get(&self, source: VertexId, target: VertexId) -> Option<f64> {
let key = CacheKey::new(source, target);
// Try to read from cache
let cache = self.cache.read().unwrap();
if let Some(entry) = cache.get(&key) {
self.hits.fetch_add(1, Ordering::Relaxed);
if entry.prefetched {
self.prefetch_hits.fetch_add(1, Ordering::Relaxed);
}
// Update access pattern
if self.config.enable_prefetch {
self.record_query(key);
}
return Some(entry.distance);
}
drop(cache);
self.misses.fetch_add(1, Ordering::Relaxed);
// Record miss for prefetch prediction
if self.config.enable_prefetch {
self.record_query(key);
}
None
}
/// Insert distance into cache
pub fn insert(&self, source: VertexId, target: VertexId, distance: f64) {
let key = CacheKey::new(source, target);
let timestamp = self.access_counter.fetch_add(1, Ordering::Relaxed);
let entry = CacheEntry {
source,
target,
distance,
last_access: timestamp,
prefetched: false,
};
self.insert_entry(key, entry);
}
/// Insert with prefetch flag
pub fn insert_prefetch(&self, source: VertexId, target: VertexId, distance: f64) {
let key = CacheKey::new(source, target);
let timestamp = self.access_counter.fetch_add(1, Ordering::Relaxed);
let entry = CacheEntry {
source,
target,
distance,
last_access: timestamp,
prefetched: true,
};
self.insert_entry(key, entry);
}
/// Internal insert with eviction
fn insert_entry(&self, key: CacheKey, entry: CacheEntry) {
let mut cache = self.cache.write().unwrap();
let mut lru = self.lru_order.write().unwrap();
// Evict if at capacity
while cache.len() >= self.config.max_entries {
if let Some(evict_key) = lru.pop_front() {
cache.remove(&evict_key);
self.evictions.fetch_add(1, Ordering::Relaxed);
} else {
break;
}
}
// Insert new entry
cache.insert(key, entry);
lru.push_back(key);
}
/// Batch insert multiple distances
pub fn insert_batch(&self, entries: &[(VertexId, VertexId, f64)]) {
let mut cache = self.cache.write().unwrap();
let mut lru = self.lru_order.write().unwrap();
for &(source, target, distance) in entries {
let key = CacheKey::new(source, target);
let timestamp = self.access_counter.fetch_add(1, Ordering::Relaxed);
let entry = CacheEntry {
source,
target,
distance,
last_access: timestamp,
prefetched: false,
};
// Evict if needed
while cache.len() >= self.config.max_entries {
if let Some(evict_key) = lru.pop_front() {
cache.remove(&evict_key);
self.evictions.fetch_add(1, Ordering::Relaxed);
} else {
break;
}
}
cache.insert(key, entry);
lru.push_back(key);
}
}
/// Invalidate entries involving a vertex
pub fn invalidate_vertex(&self, vertex: VertexId) {
let mut cache = self.cache.write().unwrap();
let mut lru = self.lru_order.write().unwrap();
let keys_to_remove: Vec<CacheKey> = cache
.keys()
.filter(|k| k.source == vertex || k.target == vertex)
.copied()
.collect();
for key in keys_to_remove {
cache.remove(&key);
lru.retain(|k| *k != key);
}
}
/// Clear entire cache
pub fn clear(&self) {
let mut cache = self.cache.write().unwrap();
let mut lru = self.lru_order.write().unwrap();
cache.clear();
lru.clear();
}
/// Record a query for prefetch prediction
fn record_query(&self, key: CacheKey) {
if let Ok(mut history) = self.query_history.try_write() {
history.push_back(key);
while history.len() > self.config.prefetch_history_size {
history.pop_front();
}
// Update predictions periodically
if history.len() % 10 == 0 {
self.update_predictions(&history);
}
}
}
/// Update prefetch predictions based on access patterns
fn update_predictions(&self, history: &VecDeque<CacheKey>) {
if history.len() < 10 {
return;
}
// Find frequently co-occurring vertex pairs
let mut vertex_frequency: HashMap<VertexId, usize> = HashMap::new();
for key in history.iter() {
*vertex_frequency.entry(key.source).or_insert(0) += 1;
*vertex_frequency.entry(key.target).or_insert(0) += 1;
}
// Predict likely next queries based on recent pattern
let recent: Vec<_> = history.iter().rev().take(5).collect();
let mut predictions = Vec::new();
for key in recent {
// Predict queries to neighbors of frequently accessed vertices
for (vertex, &freq) in &vertex_frequency {
if freq > 2 && *vertex != key.source && *vertex != key.target {
predictions.push(CacheKey::new(key.source, *vertex));
if predictions.len() >= self.config.prefetch_lookahead {
break;
}
}
}
if predictions.len() >= self.config.prefetch_lookahead {
break;
}
}
if let Ok(mut pred) = self.predicted_queries.try_write() {
*pred = predictions;
}
}
/// Get prefetch hints based on access patterns
pub fn get_prefetch_hints(&self) -> Vec<PrefetchHint> {
let history = self.query_history.read().unwrap();
if history.is_empty() {
return Vec::new();
}
// Find most frequently queried sources
let mut source_freq: HashMap<VertexId, Vec<VertexId>> = HashMap::new();
for key in history.iter() {
source_freq.entry(key.source).or_default().push(key.target);
source_freq.entry(key.target).or_default().push(key.source);
}
// Generate hints for hot sources
source_freq
.into_iter()
.filter(|(_, targets)| targets.len() > 2)
.map(|(source, targets)| {
let confidence = (targets.len() as f64 / history.len() as f64).min(1.0);
PrefetchHint {
source,
targets,
confidence,
}
})
.collect()
}
/// Get predicted queries for prefetching
pub fn get_predicted_queries(&self) -> Vec<(VertexId, VertexId)> {
let pred = self.predicted_queries.read().unwrap();
pred.iter().map(|key| (key.source, key.target)).collect()
}
/// Get cache statistics
pub fn stats(&self) -> CacheStats {
let cache = self.cache.read().unwrap();
CacheStats {
hits: self.hits.load(Ordering::Relaxed),
misses: self.misses.load(Ordering::Relaxed),
size: cache.len(),
prefetch_hits: self.prefetch_hits.load(Ordering::Relaxed),
evictions: self.evictions.load(Ordering::Relaxed),
}
}
/// Get current cache size
pub fn len(&self) -> usize {
self.cache.read().unwrap().len()
}
/// Check if cache is empty
pub fn is_empty(&self) -> bool {
self.cache.read().unwrap().is_empty()
}
}
impl Default for PathDistanceCache {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_cache_operations() {
let cache = PathDistanceCache::new();
// Insert and retrieve
cache.insert(1, 2, 10.0);
assert_eq!(cache.get(1, 2), Some(10.0));
// Symmetric access
assert_eq!(cache.get(2, 1), Some(10.0));
// Miss
assert_eq!(cache.get(1, 3), None);
}
#[test]
fn test_lru_eviction() {
let cache = PathDistanceCache::with_config(CacheConfig {
max_entries: 3,
..Default::default()
});
cache.insert(1, 2, 1.0);
cache.insert(2, 3, 2.0);
cache.insert(3, 4, 3.0);
// Cache is full
assert_eq!(cache.len(), 3);
// Insert new entry - should evict (1,2)
cache.insert(4, 5, 4.0);
assert_eq!(cache.len(), 3);
assert_eq!(cache.get(1, 2), None); // Evicted
assert_eq!(cache.get(4, 5), Some(4.0)); // Present
}
#[test]
fn test_batch_insert() {
let cache = PathDistanceCache::new();
let entries = vec![(1, 2, 1.0), (2, 3, 2.0), (3, 4, 3.0)];
cache.insert_batch(&entries);
assert_eq!(cache.len(), 3);
assert_eq!(cache.get(1, 2), Some(1.0));
assert_eq!(cache.get(2, 3), Some(2.0));
assert_eq!(cache.get(3, 4), Some(3.0));
}
#[test]
fn test_invalidate_vertex() {
let cache = PathDistanceCache::new();
cache.insert(1, 2, 1.0);
cache.insert(1, 3, 2.0);
cache.insert(2, 3, 3.0);
cache.invalidate_vertex(1);
assert_eq!(cache.get(1, 2), None);
assert_eq!(cache.get(1, 3), None);
assert_eq!(cache.get(2, 3), Some(3.0));
}
#[test]
fn test_statistics() {
let cache = PathDistanceCache::new();
cache.insert(1, 2, 1.0);
// Hit
cache.get(1, 2);
cache.get(1, 2);
// Miss
cache.get(3, 4);
let stats = cache.stats();
assert_eq!(stats.hits, 2);
assert_eq!(stats.misses, 1);
assert_eq!(stats.size, 1);
assert!(stats.hit_rate() > 0.5);
}
#[test]
fn test_prefetch_hints() {
let cache = PathDistanceCache::with_config(CacheConfig {
enable_prefetch: true,
prefetch_history_size: 50,
..Default::default()
});
// Generate access pattern
for i in 0..20 {
cache.insert(1, i as u64, i as f64);
let _ = cache.get(1, i as u64);
}
let hints = cache.get_prefetch_hints();
// Should have hints for vertex 1 (frequently accessed)
assert!(!hints.is_empty() || cache.stats().hits > 0);
}
#[test]
fn test_clear() {
let cache = PathDistanceCache::new();
cache.insert(1, 2, 1.0);
cache.insert(2, 3, 2.0);
assert_eq!(cache.len(), 2);
cache.clear();
assert_eq!(cache.len(), 0);
assert!(cache.is_empty());
}
}

View File

@@ -0,0 +1,499 @@
//! Degree-based Presparse (DSpar) Implementation
//!
//! Fast approximation for sparsification using effective resistance:
//! R_eff(u,v) ≈ 1 / (deg(u) × deg(v))
//!
//! This provides a 5.9x speedup over exact effective resistance computation
//! while maintaining spectral properties for minimum cut preservation.
//!
//! Reference: "Degree-based Sparsification" (OpenReview)
use crate::graph::{DynamicGraph, EdgeId, VertexId, Weight};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
/// Configuration for degree-based presparse
#[derive(Debug, Clone)]
pub struct PresparseConfig {
/// Target sparsity ratio (0.0-1.0, lower = more sparse)
pub target_sparsity: f64,
/// Minimum effective resistance threshold for keeping edges
pub resistance_threshold: f64,
/// Whether to use adaptive threshold based on graph density
pub adaptive_threshold: bool,
/// Maximum edges to keep (optional hard limit)
pub max_edges: Option<usize>,
/// Random seed for probabilistic sampling
pub seed: Option<u64>,
}
impl Default for PresparseConfig {
fn default() -> Self {
Self {
target_sparsity: 0.1, // Keep ~10% of edges
resistance_threshold: 0.0,
adaptive_threshold: true,
max_edges: None,
seed: Some(42),
}
}
}
/// Statistics from presparse operation
#[derive(Debug, Clone, Default)]
pub struct PresparseStats {
/// Original number of edges
pub original_edges: usize,
/// Number of edges after presparse
pub sparse_edges: usize,
/// Sparsity ratio achieved
pub sparsity_ratio: f64,
/// Time taken in microseconds
pub time_us: u64,
/// Estimated speedup factor
pub speedup_factor: f64,
/// Number of vertices affected
pub vertices_processed: usize,
}
/// Result of presparse operation
#[derive(Debug)]
pub struct PresparseResult {
/// Sparsified edges with scaled weights
pub edges: Vec<(VertexId, VertexId, Weight)>,
/// Mapping from new edge index to original edge ID
pub edge_mapping: HashMap<usize, EdgeId>,
/// Statistics
pub stats: PresparseStats,
}
/// Degree-based presparse implementation
///
/// Uses effective resistance approximation R_eff(u,v) ≈ 1/(deg_u × deg_v)
/// to pre-filter edges before exact sparsification, achieving 5.9x speedup.
pub struct DegreePresparse {
config: PresparseConfig,
/// Cached degree information
degree_cache: HashMap<VertexId, usize>,
}
impl DegreePresparse {
/// Create new degree presparse with default config
pub fn new() -> Self {
Self::with_config(PresparseConfig::default())
}
/// Create with custom config
pub fn with_config(config: PresparseConfig) -> Self {
Self {
config,
degree_cache: HashMap::new(),
}
}
/// Compute effective resistance approximation for an edge
///
/// R_eff(u,v) ≈ 1 / (deg(u) × deg(v))
///
/// High resistance = edge is important for connectivity
/// Low resistance = edge can likely be removed
#[inline]
pub fn effective_resistance(&self, deg_u: usize, deg_v: usize) -> f64 {
if deg_u == 0 || deg_v == 0 {
return f64::INFINITY; // Always keep edges to isolated vertices
}
1.0 / (deg_u as f64 * deg_v as f64)
}
/// Pre-compute degrees for all vertices
fn precompute_degrees(&mut self, graph: &DynamicGraph) {
self.degree_cache.clear();
for v in graph.vertices() {
self.degree_cache.insert(v, graph.degree(v));
}
}
/// Compute adaptive threshold based on graph properties
fn compute_adaptive_threshold(&self, graph: &DynamicGraph) -> f64 {
let n = graph.num_vertices();
let m = graph.num_edges();
if n == 0 || m == 0 {
return 0.0;
}
// Average degree
let avg_degree = (2 * m) as f64 / n as f64;
// Target: keep O(n log n) edges
let target_edges = (n as f64 * (n as f64).ln()).min(m as f64);
// Compute threshold that keeps approximately target_edges
// Higher threshold = fewer edges kept
let sparsity = target_edges / m as f64;
// Threshold based on average effective resistance
1.0 / (avg_degree * avg_degree * sparsity.max(0.01))
}
/// Perform degree-based presparse on a graph
///
/// Returns a sparsified edge set that preserves spectral properties
/// for minimum cut computation.
pub fn presparse(&mut self, graph: &DynamicGraph) -> PresparseResult {
let start = std::time::Instant::now();
// Pre-compute degrees
self.precompute_degrees(graph);
let original_edges = graph.num_edges();
// Compute threshold
let threshold = if self.config.adaptive_threshold {
self.compute_adaptive_threshold(graph)
} else {
self.config.resistance_threshold
};
// Score all edges by effective resistance
let mut scored_edges: Vec<(EdgeId, VertexId, VertexId, Weight, f64)> =
Vec::with_capacity(original_edges);
for edge in graph.edges() {
let deg_u = *self.degree_cache.get(&edge.source).unwrap_or(&1);
let deg_v = *self.degree_cache.get(&edge.target).unwrap_or(&1);
let resistance = self.effective_resistance(deg_u, deg_v);
scored_edges.push((edge.id, edge.source, edge.target, edge.weight, resistance));
}
// Sort by resistance (descending - high resistance = important)
scored_edges.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap_or(std::cmp::Ordering::Equal));
// Determine how many edges to keep
let target_count = if let Some(max) = self.config.max_edges {
max.min(original_edges)
} else {
((original_edges as f64 * self.config.target_sparsity).ceil() as usize).max(1)
};
// Keep edges with highest effective resistance
let mut result_edges = Vec::with_capacity(target_count);
let mut edge_mapping = HashMap::with_capacity(target_count);
let mut kept_vertices = HashSet::new();
for (idx, (edge_id, u, v, weight, resistance)) in scored_edges.into_iter().enumerate() {
if result_edges.len() >= target_count && resistance < threshold {
break;
}
// Scale weight by inverse sampling probability
let sampling_prob = self.sampling_probability(resistance, threshold);
let scaled_weight = if sampling_prob > 0.0 {
weight / sampling_prob
} else {
weight
};
result_edges.push((u, v, scaled_weight));
edge_mapping.insert(result_edges.len() - 1, edge_id);
kept_vertices.insert(u);
kept_vertices.insert(v);
if result_edges.len() >= target_count {
break;
}
}
let elapsed_us = start.elapsed().as_micros() as u64;
let sparse_edges = result_edges.len();
// Estimate speedup: O(m) -> O(m') where m' << m
// Plus the 5.9x from avoiding exact resistance computation
let sparsity_speedup = if sparse_edges > 0 {
original_edges as f64 / sparse_edges as f64
} else {
1.0
};
let speedup_factor = sparsity_speedup.min(5.9); // Cap at theoretical DSpar speedup
PresparseResult {
edges: result_edges,
edge_mapping,
stats: PresparseStats {
original_edges,
sparse_edges,
sparsity_ratio: sparse_edges as f64 / original_edges.max(1) as f64,
time_us: elapsed_us,
speedup_factor,
vertices_processed: kept_vertices.len(),
},
}
}
/// Compute sampling probability for an edge
#[inline]
fn sampling_probability(&self, resistance: f64, threshold: f64) -> f64 {
if resistance >= threshold {
1.0 // Always keep high-resistance edges
} else {
// Probability proportional to resistance
(resistance / threshold).max(0.01)
}
}
/// Incremental update: handle edge insertion
///
/// Returns whether the edge should be included in the sparse graph
pub fn should_include_edge(&mut self, graph: &DynamicGraph, u: VertexId, v: VertexId) -> bool {
// Update degree cache
self.degree_cache.insert(u, graph.degree(u));
self.degree_cache.insert(v, graph.degree(v));
let deg_u = *self.degree_cache.get(&u).unwrap_or(&1);
let deg_v = *self.degree_cache.get(&v).unwrap_or(&1);
let resistance = self.effective_resistance(deg_u, deg_v);
let threshold = if self.config.adaptive_threshold {
self.compute_adaptive_threshold(graph)
} else {
self.config.resistance_threshold
};
resistance >= threshold
}
/// Get statistics for the presparse
pub fn config(&self) -> &PresparseConfig {
&self.config
}
}
impl Default for DegreePresparse {
fn default() -> Self {
Self::new()
}
}
/// Spectral concordance loss for validating sparsification quality
///
/// L = λ₁·Laplacian_Alignment + λ₂·Feature_Preserve + λ₃·Sparsity
pub struct SpectralConcordance {
/// Weight for Laplacian alignment term
pub lambda_laplacian: f64,
/// Weight for feature preservation term
pub lambda_feature: f64,
/// Weight for sparsity inducing term
pub lambda_sparsity: f64,
}
impl Default for SpectralConcordance {
fn default() -> Self {
Self {
lambda_laplacian: 1.0,
lambda_feature: 0.5,
lambda_sparsity: 0.1,
}
}
}
impl SpectralConcordance {
/// Compute the spectral concordance loss between original and sparse graphs
pub fn compute_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
let laplacian_loss = self.laplacian_alignment_loss(original, sparse);
let feature_loss = self.feature_preservation_loss(original, sparse);
let sparsity_loss = self.sparsity_loss(original, sparse);
self.lambda_laplacian * laplacian_loss
+ self.lambda_feature * feature_loss
+ self.lambda_sparsity * sparsity_loss
}
/// Approximate Laplacian alignment loss using degree distribution
fn laplacian_alignment_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
let orig_vertices = original.vertices();
if orig_vertices.is_empty() {
return 0.0;
}
let mut total_diff = 0.0;
let mut count = 0;
for v in orig_vertices {
let orig_deg = original.degree(v) as f64;
let sparse_deg = sparse.degree(v) as f64;
if orig_deg > 0.0 {
// Relative degree difference
total_diff += ((orig_deg - sparse_deg) / orig_deg).abs();
count += 1;
}
}
if count > 0 {
total_diff / count as f64
} else {
0.0
}
}
/// Feature preservation loss (cut value approximation)
fn feature_preservation_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
// Compare minimum degree (crude cut approximation)
let orig_min_deg = original
.vertices()
.iter()
.map(|&v| original.degree(v))
.min()
.unwrap_or(0) as f64;
let sparse_min_deg = sparse
.vertices()
.iter()
.map(|&v| sparse.degree(v))
.min()
.unwrap_or(0) as f64;
if orig_min_deg > 0.0 {
((orig_min_deg - sparse_min_deg) / orig_min_deg).abs()
} else {
0.0
}
}
/// Sparsity inducing loss
fn sparsity_loss(&self, original: &DynamicGraph, sparse: &DynamicGraph) -> f64 {
let orig_edges = original.num_edges().max(1) as f64;
let sparse_edges = sparse.num_edges() as f64;
sparse_edges / orig_edges
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_graph() -> DynamicGraph {
let g = DynamicGraph::new();
// Create a dense graph
for i in 1..=10 {
for j in (i + 1)..=10 {
let _ = g.insert_edge(i, j, 1.0);
}
}
g
}
#[test]
fn test_effective_resistance() {
let dspar = DegreePresparse::new();
// High degree vertices -> low resistance
assert!(dspar.effective_resistance(10, 10) < dspar.effective_resistance(2, 2));
// Zero degree -> infinity
assert!(dspar.effective_resistance(0, 5).is_infinite());
}
#[test]
fn test_presparse_reduces_edges() {
let graph = create_test_graph();
let original_edges = graph.num_edges();
let mut dspar = DegreePresparse::with_config(PresparseConfig {
target_sparsity: 0.3,
..Default::default()
});
let result = dspar.presparse(&graph);
assert!(result.stats.sparse_edges < original_edges);
assert!(result.stats.sparsity_ratio <= 0.5);
assert!(result.stats.speedup_factor > 1.0);
}
#[test]
fn test_presparse_preserves_connectivity() {
let graph = create_test_graph();
let mut dspar = DegreePresparse::with_config(PresparseConfig {
target_sparsity: 0.2,
..Default::default()
});
let result = dspar.presparse(&graph);
// Should keep at least n-1 edges to maintain connectivity
assert!(result.stats.sparse_edges >= graph.num_vertices() - 1);
}
#[test]
fn test_adaptive_threshold() {
let graph = create_test_graph();
let mut dspar = DegreePresparse::with_config(PresparseConfig {
adaptive_threshold: true,
..Default::default()
});
dspar.precompute_degrees(&graph);
let threshold = dspar.compute_adaptive_threshold(&graph);
assert!(threshold > 0.0);
}
#[test]
fn test_spectral_concordance() {
let original = create_test_graph();
let mut dspar = DegreePresparse::with_config(PresparseConfig {
target_sparsity: 0.5,
..Default::default()
});
let result = dspar.presparse(&original);
// Create sparse graph
let sparse = DynamicGraph::new();
for (u, v, w) in &result.edges {
let _ = sparse.insert_edge(*u, *v, *w);
}
let concordance = SpectralConcordance::default();
let loss = concordance.compute_loss(&original, &sparse);
// Loss should be bounded
assert!(loss >= 0.0);
assert!(loss < 10.0);
}
#[test]
fn test_should_include_edge() {
let graph = DynamicGraph::new();
graph.insert_edge(1, 2, 1.0).unwrap();
graph.insert_edge(2, 3, 1.0).unwrap();
let mut dspar = DegreePresparse::with_config(PresparseConfig {
resistance_threshold: 0.0,
adaptive_threshold: false,
..Default::default()
});
// New edge to low-degree vertices should be included
let should_include = dspar.should_include_edge(&graph, 1, 3);
assert!(should_include);
}
#[test]
fn test_edge_mapping() {
let graph = create_test_graph();
let mut dspar = DegreePresparse::new();
let result = dspar.presparse(&graph);
// Each sparse edge should map to an original edge
for (idx, _) in result.edges.iter().enumerate() {
assert!(result.edge_mapping.contains_key(&idx));
}
}
}

View File

@@ -0,0 +1,29 @@
//! Performance Optimizations for j-Tree + BMSSP Implementation
//!
//! This module implements the SOTA optimizations from ADR-002-addendum-sota-optimizations.md:
//!
//! 1. **Degree-based presparse (DSpar)**: 5.9x speedup via effective resistance approximation
//! 2. **LRU Cache**: Path distance caching with prefetch optimization
//! 3. **SIMD Operations**: Vectorized distance array computations
//! 4. **Pool Allocators**: Memory-efficient allocations with lazy deallocation
//! 5. **Parallel Updates**: Rayon-based parallel level updates with work-stealing
//! 6. **WASM Optimization**: Batch operations and TypedArray transfers
//!
//! Target: Combined 10x speedup over naive implementation.
pub mod benchmark;
pub mod cache;
pub mod dspar;
pub mod parallel;
pub mod pool;
pub mod simd_distance;
pub mod wasm_batch;
// Re-exports
pub use benchmark::{BenchmarkResult, BenchmarkSuite, OptimizationBenchmark};
pub use cache::{CacheConfig, CacheStats, PathDistanceCache, PrefetchHint};
pub use dspar::{DegreePresparse, PresparseConfig, PresparseResult, PresparseStats};
pub use parallel::{ParallelConfig, ParallelLevelUpdater, WorkStealingScheduler};
pub use pool::{LazyLevel, LevelPool, PoolConfig, PoolStats};
pub use simd_distance::{DistanceArray, SimdDistanceOps};
pub use wasm_batch::{BatchConfig, TypedArrayTransfer, WasmBatchOps};

View File

@@ -0,0 +1,697 @@
//! Parallel Level Updates with Work-Stealing
//!
//! Provides efficient parallel computation for j-tree levels:
//! - Rayon-based parallel iteration
//! - Work-stealing for load balancing
//! - Lock-free result aggregation
//! - Adaptive parallelism based on workload
//!
//! Target: Near-linear speedup for independent level updates
use crate::graph::VertexId;
use std::collections::{HashMap, HashSet};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex, RwLock};
#[cfg(feature = "rayon")]
use rayon::prelude::*;
/// Configuration for parallel level updates
#[derive(Debug, Clone)]
pub struct ParallelConfig {
/// Minimum workload to use parallelism
pub min_parallel_size: usize,
/// Number of threads (0 = auto-detect)
pub num_threads: usize,
/// Enable work-stealing
pub work_stealing: bool,
/// Chunk size for parallel iteration
pub chunk_size: usize,
/// Enable adaptive parallelism
pub adaptive: bool,
}
impl Default for ParallelConfig {
fn default() -> Self {
Self {
min_parallel_size: 100,
num_threads: 0, // Auto-detect
work_stealing: true,
chunk_size: 64,
adaptive: true,
}
}
}
/// Work item for parallel processing
#[derive(Debug, Clone)]
pub struct WorkItem {
/// Level index
pub level: usize,
/// Vertices to process
pub vertices: Vec<VertexId>,
/// Priority (lower = higher priority)
pub priority: u32,
/// Estimated work units
pub estimated_work: usize,
}
/// Result from parallel level update
#[derive(Debug, Clone)]
pub struct LevelUpdateResult {
/// Level index
pub level: usize,
/// Computed cut value
pub cut_value: f64,
/// Partition (vertices on one side)
pub partition: HashSet<VertexId>,
/// Time taken in microseconds
pub time_us: u64,
}
/// Work-stealing scheduler for parallel level processing
pub struct WorkStealingScheduler {
config: ParallelConfig,
/// Work queue
work_queue: RwLock<Vec<WorkItem>>,
/// Completed results
results: RwLock<HashMap<usize, LevelUpdateResult>>,
/// Active workers count
active_workers: AtomicUsize,
/// Total work processed
total_work: AtomicU64,
/// Steal count
steals: AtomicU64,
}
impl WorkStealingScheduler {
/// Create new scheduler with default config
pub fn new() -> Self {
Self::with_config(ParallelConfig::default())
}
/// Create with custom config
pub fn with_config(config: ParallelConfig) -> Self {
Self {
config,
work_queue: RwLock::new(Vec::new()),
results: RwLock::new(HashMap::new()),
active_workers: AtomicUsize::new(0),
total_work: AtomicU64::new(0),
steals: AtomicU64::new(0),
}
}
/// Submit work item
pub fn submit(&self, item: WorkItem) {
let mut queue = self.work_queue.write().unwrap();
let estimated_work = item.estimated_work;
queue.push(item);
// Sort by priority (ascending)
queue.sort_by_key(|w| w.priority);
self.total_work
.fetch_add(estimated_work as u64, Ordering::Relaxed);
}
/// Submit multiple work items
pub fn submit_batch(&self, items: Vec<WorkItem>) {
let mut queue = self.work_queue.write().unwrap();
for item in items {
self.total_work
.fetch_add(item.estimated_work as u64, Ordering::Relaxed);
queue.push(item);
}
// Sort by priority (ascending)
queue.sort_by_key(|w| w.priority);
}
/// Try to steal work from queue
pub fn steal(&self) -> Option<WorkItem> {
let mut queue = self.work_queue.write().unwrap();
if queue.is_empty() {
return None;
}
self.steals.fetch_add(1, Ordering::Relaxed);
// Steal from front (highest priority)
Some(queue.remove(0))
}
/// Record result
pub fn complete(&self, result: LevelUpdateResult) {
let mut results = self.results.write().unwrap();
results.insert(result.level, result);
}
/// Get all results
pub fn get_results(&self) -> HashMap<usize, LevelUpdateResult> {
self.results.read().unwrap().clone()
}
/// Clear results
pub fn clear_results(&self) {
self.results.write().unwrap().clear();
}
/// Check if queue is empty
pub fn is_empty(&self) -> bool {
self.work_queue.read().unwrap().is_empty()
}
/// Get queue size
pub fn queue_size(&self) -> usize {
self.work_queue.read().unwrap().len()
}
/// Get total steals
pub fn steal_count(&self) -> u64 {
self.steals.load(Ordering::Relaxed)
}
}
impl Default for WorkStealingScheduler {
fn default() -> Self {
Self::new()
}
}
/// Parallel level updater using Rayon
pub struct ParallelLevelUpdater {
config: ParallelConfig,
/// Scheduler for work-stealing
scheduler: Arc<WorkStealingScheduler>,
/// Global minimum cut found
global_min: AtomicU64,
/// Level with global minimum
best_level: AtomicUsize,
}
impl ParallelLevelUpdater {
/// Create new parallel updater with default config
pub fn new() -> Self {
Self::with_config(ParallelConfig::default())
}
/// Create with custom config
pub fn with_config(config: ParallelConfig) -> Self {
Self {
scheduler: Arc::new(WorkStealingScheduler::with_config(config.clone())),
config,
global_min: AtomicU64::new(f64::INFINITY.to_bits()),
best_level: AtomicUsize::new(usize::MAX),
}
}
/// Update global minimum atomically
pub fn try_update_min(&self, value: f64, level: usize) -> bool {
let value_bits = value.to_bits();
let mut current = self.global_min.load(Ordering::Acquire);
loop {
let current_value = f64::from_bits(current);
if value >= current_value {
return false;
}
match self.global_min.compare_exchange_weak(
current,
value_bits,
Ordering::AcqRel,
Ordering::Acquire,
) {
Ok(_) => {
self.best_level.store(level, Ordering::Release);
return true;
}
Err(c) => current = c,
}
}
}
/// Get current global minimum
pub fn global_min(&self) -> f64 {
f64::from_bits(self.global_min.load(Ordering::Acquire))
}
/// Get best level
pub fn best_level(&self) -> Option<usize> {
let level = self.best_level.load(Ordering::Acquire);
if level == usize::MAX {
None
} else {
Some(level)
}
}
/// Reset global minimum
pub fn reset_min(&self) {
self.global_min
.store(f64::INFINITY.to_bits(), Ordering::Release);
self.best_level.store(usize::MAX, Ordering::Release);
}
/// Process levels in parallel using Rayon
#[cfg(feature = "rayon")]
pub fn process_parallel<F>(&self, levels: &[usize], mut process_fn: F) -> Vec<LevelUpdateResult>
where
F: FnMut(usize) -> LevelUpdateResult + Send + Sync + Clone,
{
let size = levels.len();
if size < self.config.min_parallel_size {
// Sequential processing for small workloads
return levels
.iter()
.map(|&level| {
let result = process_fn.clone()(level);
self.try_update_min(result.cut_value, level);
result
})
.collect();
}
// Parallel processing with Rayon
levels
.par_iter()
.map(|&level| {
let result = process_fn.clone()(level);
self.try_update_min(result.cut_value, level);
result
})
.collect()
}
/// Process levels in parallel (scalar fallback)
#[cfg(not(feature = "rayon"))]
pub fn process_parallel<F>(&self, levels: &[usize], mut process_fn: F) -> Vec<LevelUpdateResult>
where
F: FnMut(usize) -> LevelUpdateResult + Clone,
{
levels
.iter()
.map(|&level| {
let result = process_fn.clone()(level);
self.try_update_min(result.cut_value, level);
result
})
.collect()
}
/// Process work items with work-stealing
#[cfg(feature = "rayon")]
pub fn process_with_stealing<F>(
&self,
work_items: Vec<WorkItem>,
process_fn: F,
) -> Vec<LevelUpdateResult>
where
F: Fn(&WorkItem) -> LevelUpdateResult + Send + Sync,
{
if work_items.len() < self.config.min_parallel_size {
// Sequential
return work_items
.iter()
.map(|item| {
let result = process_fn(item);
self.try_update_min(result.cut_value, item.level);
result
})
.collect();
}
// Parallel with work-stealing
work_items
.par_iter()
.map(|item| {
let result = process_fn(item);
self.try_update_min(result.cut_value, item.level);
result
})
.collect()
}
/// Process work items (scalar fallback)
#[cfg(not(feature = "rayon"))]
pub fn process_with_stealing<F>(
&self,
work_items: Vec<WorkItem>,
process_fn: F,
) -> Vec<LevelUpdateResult>
where
F: Fn(&WorkItem) -> LevelUpdateResult,
{
work_items
.iter()
.map(|item| {
let result = process_fn(item);
self.try_update_min(result.cut_value, item.level);
result
})
.collect()
}
/// Batch vertex processing within a level
#[cfg(feature = "rayon")]
pub fn process_vertices_parallel<F, R>(&self, vertices: &[VertexId], process_fn: F) -> Vec<R>
where
F: Fn(VertexId) -> R + Send + Sync,
R: Send,
{
if vertices.len() < self.config.min_parallel_size {
return vertices.iter().map(|&v| process_fn(v)).collect();
}
vertices.par_iter().map(|&v| process_fn(v)).collect()
}
/// Batch vertex processing (scalar fallback)
#[cfg(not(feature = "rayon"))]
pub fn process_vertices_parallel<F, R>(&self, vertices: &[VertexId], process_fn: F) -> Vec<R>
where
F: Fn(VertexId) -> R,
{
vertices.iter().map(|&v| process_fn(v)).collect()
}
/// Parallel reduction for aggregating results
#[cfg(feature = "rayon")]
pub fn parallel_reduce<T, F, R>(
&self,
items: &[T],
identity: R,
map_fn: F,
reduce_fn: fn(R, R) -> R,
) -> R
where
T: Sync,
F: Fn(&T) -> R + Send + Sync,
R: Send + Clone,
{
if items.len() < self.config.min_parallel_size {
return items
.iter()
.map(|item| map_fn(item))
.fold(identity.clone(), reduce_fn);
}
items
.par_iter()
.map(|item| map_fn(item))
.reduce(|| identity.clone(), reduce_fn)
}
/// Parallel reduction (scalar fallback)
#[cfg(not(feature = "rayon"))]
pub fn parallel_reduce<T, F, R>(
&self,
items: &[T],
identity: R,
map_fn: F,
reduce_fn: fn(R, R) -> R,
) -> R
where
F: Fn(&T) -> R,
R: Clone,
{
items
.iter()
.map(|item| map_fn(item))
.fold(identity, reduce_fn)
}
/// Get scheduler reference
pub fn scheduler(&self) -> &Arc<WorkStealingScheduler> {
&self.scheduler
}
}
impl Default for ParallelLevelUpdater {
fn default() -> Self {
Self::new()
}
}
/// Parallel cut computation helpers
pub struct ParallelCutOps;
impl ParallelCutOps {
/// Compute boundary size in parallel
#[cfg(feature = "rayon")]
pub fn boundary_size_parallel(
partition: &HashSet<VertexId>,
adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
) -> f64 {
let partition_vec: Vec<_> = partition.iter().copied().collect();
if partition_vec.len() < 100 {
return Self::boundary_size_sequential(partition, adjacency);
}
partition_vec
.par_iter()
.map(|&v| {
adjacency
.get(&v)
.map(|neighbors| {
neighbors
.iter()
.filter(|(n, _)| !partition.contains(n))
.map(|(_, w)| w)
.sum::<f64>()
})
.unwrap_or(0.0)
})
.sum()
}
/// Compute boundary size sequentially
#[cfg(not(feature = "rayon"))]
pub fn boundary_size_parallel(
partition: &HashSet<VertexId>,
adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
) -> f64 {
Self::boundary_size_sequential(partition, adjacency)
}
/// Sequential boundary computation
pub fn boundary_size_sequential(
partition: &HashSet<VertexId>,
adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
) -> f64 {
partition
.iter()
.map(|&v| {
adjacency
.get(&v)
.map(|neighbors| {
neighbors
.iter()
.filter(|(n, _)| !partition.contains(n))
.map(|(_, w)| w)
.sum::<f64>()
})
.unwrap_or(0.0)
})
.sum()
}
/// Find minimum degree vertex in parallel
#[cfg(feature = "rayon")]
pub fn min_degree_vertex_parallel(
vertices: &[VertexId],
adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
) -> Option<(VertexId, usize)> {
if vertices.len() < 100 {
return Self::min_degree_vertex_sequential(vertices, adjacency);
}
vertices
.par_iter()
.map(|&v| {
let degree = adjacency.get(&v).map(|n| n.len()).unwrap_or(0);
(v, degree)
})
.filter(|(_, d)| *d > 0)
.min_by_key(|(_, d)| *d)
}
/// Find minimum degree vertex sequentially
#[cfg(not(feature = "rayon"))]
pub fn min_degree_vertex_parallel(
vertices: &[VertexId],
adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
) -> Option<(VertexId, usize)> {
Self::min_degree_vertex_sequential(vertices, adjacency)
}
/// Sequential minimum degree
pub fn min_degree_vertex_sequential(
vertices: &[VertexId],
adjacency: &HashMap<VertexId, Vec<(VertexId, f64)>>,
) -> Option<(VertexId, usize)> {
vertices
.iter()
.map(|&v| {
let degree = adjacency.get(&v).map(|n| n.len()).unwrap_or(0);
(v, degree)
})
.filter(|(_, d)| *d > 0)
.min_by_key(|(_, d)| *d)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_work_item_submission() {
let scheduler = WorkStealingScheduler::new();
scheduler.submit(WorkItem {
level: 0,
vertices: vec![1, 2, 3],
priority: 1,
estimated_work: 100,
});
scheduler.submit(WorkItem {
level: 1,
vertices: vec![4, 5, 6],
priority: 0, // Higher priority
estimated_work: 50,
});
assert_eq!(scheduler.queue_size(), 2);
// Should steal highest priority first
let stolen = scheduler.steal().unwrap();
assert_eq!(stolen.level, 1); // Priority 0 comes first
}
#[test]
fn test_parallel_updater_min() {
let updater = ParallelLevelUpdater::new();
assert!(updater.global_min().is_infinite());
assert!(updater.try_update_min(10.0, 0));
assert_eq!(updater.global_min(), 10.0);
assert_eq!(updater.best_level(), Some(0));
assert!(updater.try_update_min(5.0, 1));
assert_eq!(updater.global_min(), 5.0);
assert_eq!(updater.best_level(), Some(1));
// Should not update with higher value
assert!(!updater.try_update_min(7.0, 2));
assert_eq!(updater.global_min(), 5.0);
}
#[test]
fn test_process_parallel() {
let updater = ParallelLevelUpdater::new();
let levels = vec![0, 1, 2, 3, 4];
let results = updater.process_parallel(&levels, |level| LevelUpdateResult {
level,
cut_value: level as f64 * 2.0,
partition: HashSet::new(),
time_us: 0,
});
assert_eq!(results.len(), 5);
assert_eq!(updater.global_min(), 0.0);
assert_eq!(updater.best_level(), Some(0));
}
#[test]
fn test_boundary_size() {
let partition: HashSet<_> = vec![1, 2].into_iter().collect();
let mut adjacency: HashMap<VertexId, Vec<(VertexId, f64)>> = HashMap::new();
adjacency.insert(1, vec![(2, 1.0), (3, 2.0)]);
adjacency.insert(2, vec![(1, 1.0), (4, 3.0)]);
adjacency.insert(3, vec![(1, 2.0)]);
adjacency.insert(4, vec![(2, 3.0)]);
let boundary = ParallelCutOps::boundary_size_sequential(&partition, &adjacency);
// Edges crossing: 1-3 (2.0) + 2-4 (3.0) = 5.0
assert_eq!(boundary, 5.0);
}
#[test]
fn test_min_degree_vertex() {
let vertices: Vec<_> = vec![1, 2, 3, 4];
let mut adjacency: HashMap<VertexId, Vec<(VertexId, f64)>> = HashMap::new();
adjacency.insert(1, vec![(2, 1.0), (3, 1.0), (4, 1.0)]);
adjacency.insert(2, vec![(1, 1.0)]);
adjacency.insert(3, vec![(1, 1.0), (4, 1.0)]);
adjacency.insert(4, vec![(1, 1.0), (3, 1.0)]);
let (min_v, min_deg) =
ParallelCutOps::min_degree_vertex_sequential(&vertices, &adjacency).unwrap();
assert_eq!(min_v, 2);
assert_eq!(min_deg, 1);
}
#[test]
fn test_scheduler_steal_count() {
let scheduler = WorkStealingScheduler::new();
scheduler.submit(WorkItem {
level: 0,
vertices: vec![1],
priority: 0,
estimated_work: 10,
});
assert_eq!(scheduler.steal_count(), 0);
let _ = scheduler.steal();
assert_eq!(scheduler.steal_count(), 1);
}
#[test]
fn test_batch_submit() {
let scheduler = WorkStealingScheduler::new();
let items = vec![
WorkItem {
level: 0,
vertices: vec![],
priority: 2,
estimated_work: 100,
},
WorkItem {
level: 1,
vertices: vec![],
priority: 0,
estimated_work: 50,
},
WorkItem {
level: 2,
vertices: vec![],
priority: 1,
estimated_work: 75,
},
];
scheduler.submit_batch(items);
assert_eq!(scheduler.queue_size(), 3);
// Should be sorted by priority
let first = scheduler.steal().unwrap();
assert_eq!(first.level, 1); // Priority 0
}
}

View File

@@ -0,0 +1,647 @@
//! Pool Allocators and Lazy Level Deallocation
//!
//! Memory-efficient allocation strategies:
//! - Pool allocators for frequent allocations
//! - Lazy deallocation of unused j-tree levels
//! - Compact representations (u16 for small graphs)
//! - Demand-paged level materialization
//!
//! Target: 50-75% memory reduction
use crate::graph::VertexId;
use std::collections::{HashMap, HashSet, VecDeque};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
/// Configuration for level pool
#[derive(Debug, Clone)]
pub struct PoolConfig {
/// Maximum number of materialized levels
pub max_materialized_levels: usize,
/// Eviction threshold (levels unused for this many operations)
pub eviction_threshold: u64,
/// Preallocation size for level data
pub prealloc_size: usize,
/// Enable lazy deallocation
pub lazy_dealloc: bool,
/// Memory budget in bytes (0 = unlimited)
pub memory_budget: usize,
}
impl Default for PoolConfig {
fn default() -> Self {
Self {
max_materialized_levels: 16,
eviction_threshold: 100,
prealloc_size: 1024,
lazy_dealloc: true,
memory_budget: 0,
}
}
}
/// Statistics for pool allocation
#[derive(Debug, Clone, Default)]
pub struct PoolStats {
/// Total allocations
pub allocations: u64,
/// Total deallocations
pub deallocations: u64,
/// Current pool size (bytes)
pub pool_size_bytes: usize,
/// Number of materialized levels
pub materialized_levels: usize,
/// Number of evictions
pub evictions: u64,
/// Peak memory usage (bytes)
pub peak_memory: usize,
}
/// State of a lazy level in the j-tree
#[derive(Debug, Clone)]
pub enum LazyLevel {
/// Level not yet materialized
Unmaterialized,
/// Level is materialized and valid
Materialized(LevelData),
/// Level is materialized but dirty (needs recomputation)
Dirty(LevelData),
/// Level was evicted (can be recomputed)
Evicted {
/// Last known vertex count (for preallocation)
last_vertex_count: usize,
},
}
impl LazyLevel {
/// Check if level is materialized
pub fn is_materialized(&self) -> bool {
matches!(self, LazyLevel::Materialized(_) | LazyLevel::Dirty(_))
}
/// Check if level needs recomputation
pub fn is_dirty(&self) -> bool {
matches!(self, LazyLevel::Dirty(_))
}
/// Get level data if materialized
pub fn data(&self) -> Option<&LevelData> {
match self {
LazyLevel::Materialized(data) | LazyLevel::Dirty(data) => Some(data),
_ => None,
}
}
/// Get mutable level data if materialized
pub fn data_mut(&mut self) -> Option<&mut LevelData> {
match self {
LazyLevel::Materialized(data) | LazyLevel::Dirty(data) => Some(data),
_ => None,
}
}
}
/// Data stored for a j-tree level
#[derive(Debug, Clone)]
pub struct LevelData {
/// Level index
pub level: usize,
/// Vertices in this level (compact representation)
pub vertices: Vec<u16>,
/// Adjacency list (compact)
pub adjacency: CompactAdjacency,
/// Cut value for this level
pub cut_value: f64,
/// Last access timestamp
last_access: u64,
/// Memory size in bytes
memory_size: usize,
}
impl LevelData {
/// Create new level data
pub fn new(level: usize, capacity: usize) -> Self {
Self {
level,
vertices: Vec::with_capacity(capacity),
adjacency: CompactAdjacency::new(capacity),
cut_value: f64::INFINITY,
last_access: 0,
memory_size: 0,
}
}
/// Update memory size estimate
pub fn update_memory_size(&mut self) {
self.memory_size =
self.vertices.len() * std::mem::size_of::<u16>() + self.adjacency.memory_size();
}
/// Get memory size
pub fn memory_size(&self) -> usize {
self.memory_size
}
}
/// Compact adjacency list using u16 vertex IDs
#[derive(Debug, Clone)]
pub struct CompactAdjacency {
/// Offset for each vertex into neighbors array
offsets: Vec<u32>,
/// Packed neighbors (vertex_id, weight as u16)
neighbors: Vec<(u16, u16)>,
}
impl CompactAdjacency {
/// Create new compact adjacency
pub fn new(capacity: usize) -> Self {
Self {
offsets: Vec::with_capacity(capacity + 1),
neighbors: Vec::new(),
}
}
/// Build from edge list
pub fn from_edges(edges: &[(u16, u16, u16)], num_vertices: usize) -> Self {
let mut adj: Vec<Vec<(u16, u16)>> = vec![Vec::new(); num_vertices];
for &(u, v, w) in edges {
adj[u as usize].push((v, w));
adj[v as usize].push((u, w));
}
let mut offsets = Vec::with_capacity(num_vertices + 1);
let mut neighbors = Vec::new();
offsets.push(0);
for vertex_neighbors in &adj {
neighbors.extend_from_slice(vertex_neighbors);
offsets.push(neighbors.len() as u32);
}
Self { offsets, neighbors }
}
/// Get neighbors of vertex
pub fn neighbors(&self, v: u16) -> &[(u16, u16)] {
let idx = v as usize;
if idx + 1 >= self.offsets.len() {
return &[];
}
let start = self.offsets[idx] as usize;
let end = self.offsets[idx + 1] as usize;
&self.neighbors[start..end]
}
/// Get degree of vertex
pub fn degree(&self, v: u16) -> usize {
let idx = v as usize;
if idx + 1 >= self.offsets.len() {
return 0;
}
(self.offsets[idx + 1] - self.offsets[idx]) as usize
}
/// Memory size in bytes
pub fn memory_size(&self) -> usize {
self.offsets.len() * std::mem::size_of::<u32>()
+ self.neighbors.len() * std::mem::size_of::<(u16, u16)>()
}
/// Number of vertices
pub fn num_vertices(&self) -> usize {
if self.offsets.is_empty() {
0
} else {
self.offsets.len() - 1
}
}
}
/// Pool allocator for j-tree levels
pub struct LevelPool {
config: PoolConfig,
/// Levels storage
levels: RwLock<HashMap<usize, LazyLevel>>,
/// LRU tracking
lru_order: RwLock<VecDeque<usize>>,
/// Operation counter
operation_counter: AtomicU64,
/// Current memory usage
memory_usage: AtomicUsize,
/// Statistics
allocations: AtomicU64,
deallocations: AtomicU64,
evictions: AtomicU64,
peak_memory: AtomicUsize,
/// Free list for reusable allocations
free_list: RwLock<Vec<LevelData>>,
}
impl LevelPool {
/// Create new level pool with default config
pub fn new() -> Self {
Self::with_config(PoolConfig::default())
}
/// Create with custom config
pub fn with_config(config: PoolConfig) -> Self {
Self {
config,
levels: RwLock::new(HashMap::new()),
lru_order: RwLock::new(VecDeque::new()),
operation_counter: AtomicU64::new(0),
memory_usage: AtomicUsize::new(0),
allocations: AtomicU64::new(0),
deallocations: AtomicU64::new(0),
evictions: AtomicU64::new(0),
peak_memory: AtomicUsize::new(0),
free_list: RwLock::new(Vec::new()),
}
}
/// Get or materialize a level
pub fn get_level(&self, level_idx: usize) -> Option<LazyLevel> {
self.touch(level_idx);
let levels = self.levels.read().unwrap();
levels.get(&level_idx).cloned()
}
/// Check if level is materialized
pub fn is_materialized(&self, level_idx: usize) -> bool {
let levels = self.levels.read().unwrap();
levels
.get(&level_idx)
.map(|l| l.is_materialized())
.unwrap_or(false)
}
/// Materialize a level with data
pub fn materialize(&self, level_idx: usize, data: LevelData) {
self.ensure_capacity();
let memory_size = data.memory_size();
self.memory_usage.fetch_add(memory_size, Ordering::Relaxed);
// Update peak memory
let current = self.memory_usage.load(Ordering::Relaxed);
let peak = self.peak_memory.load(Ordering::Relaxed);
if current > peak {
self.peak_memory.store(current, Ordering::Relaxed);
}
let mut levels = self.levels.write().unwrap();
levels.insert(level_idx, LazyLevel::Materialized(data));
let mut lru = self.lru_order.write().unwrap();
lru.retain(|&l| l != level_idx);
lru.push_back(level_idx);
self.allocations.fetch_add(1, Ordering::Relaxed);
}
/// Mark level as dirty
pub fn mark_dirty(&self, level_idx: usize) {
let mut levels = self.levels.write().unwrap();
if let Some(level) = levels.get_mut(&level_idx) {
if let LazyLevel::Materialized(data) = level.clone() {
*level = LazyLevel::Dirty(data);
}
}
}
/// Mark level as clean (after recomputation)
pub fn mark_clean(&self, level_idx: usize) {
let mut levels = self.levels.write().unwrap();
if let Some(level) = levels.get_mut(&level_idx) {
if let LazyLevel::Dirty(data) = level.clone() {
*level = LazyLevel::Materialized(data);
}
}
}
/// Evict a level (lazy deallocation)
pub fn evict(&self, level_idx: usize) {
let mut levels = self.levels.write().unwrap();
if let Some(level) = levels.get(&level_idx) {
let last_vertex_count = level.data().map(|d| d.vertices.len()).unwrap_or(0);
let memory_freed = level.data().map(|d| d.memory_size()).unwrap_or(0);
// Try to recycle the allocation
if self.config.lazy_dealloc {
if let Some(data) = level.data().cloned() {
let mut free_list = self.free_list.write().unwrap();
if free_list.len() < 10 {
free_list.push(data);
}
}
}
levels.insert(level_idx, LazyLevel::Evicted { last_vertex_count });
self.memory_usage.fetch_sub(memory_freed, Ordering::Relaxed);
self.evictions.fetch_add(1, Ordering::Relaxed);
self.deallocations.fetch_add(1, Ordering::Relaxed);
}
let mut lru = self.lru_order.write().unwrap();
lru.retain(|&l| l != level_idx);
}
/// Ensure we have capacity (evict if needed)
fn ensure_capacity(&self) {
let levels = self.levels.read().unwrap();
let materialized_count = levels.values().filter(|l| l.is_materialized()).count();
drop(levels);
if materialized_count >= self.config.max_materialized_levels {
// Evict least recently used
let lru = self.lru_order.read().unwrap();
if let Some(&evict_idx) = lru.front() {
drop(lru);
self.evict(evict_idx);
}
}
// Also check memory budget
if self.config.memory_budget > 0 {
while self.memory_usage.load(Ordering::Relaxed) > self.config.memory_budget {
let lru = self.lru_order.read().unwrap();
if let Some(&evict_idx) = lru.front() {
drop(lru);
self.evict(evict_idx);
} else {
break;
}
}
}
}
/// Update access timestamp for level
fn touch(&self, level_idx: usize) {
let timestamp = self.operation_counter.fetch_add(1, Ordering::Relaxed);
let mut levels = self.levels.write().unwrap();
if let Some(level) = levels.get_mut(&level_idx) {
if let Some(data) = level.data_mut() {
data.last_access = timestamp;
}
}
drop(levels);
// Update LRU order
let mut lru = self.lru_order.write().unwrap();
lru.retain(|&l| l != level_idx);
lru.push_back(level_idx);
}
/// Get a recycled allocation or create new
pub fn allocate_level(&self, level_idx: usize, capacity: usize) -> LevelData {
// Try to get from free list
let mut free_list = self.free_list.write().unwrap();
if let Some(mut data) = free_list.pop() {
data.level = level_idx;
data.vertices.clear();
data.cut_value = f64::INFINITY;
return data;
}
drop(free_list);
// Allocate new
LevelData::new(level_idx, capacity)
}
/// Get pool statistics
pub fn stats(&self) -> PoolStats {
let levels = self.levels.read().unwrap();
let materialized_count = levels.values().filter(|l| l.is_materialized()).count();
PoolStats {
allocations: self.allocations.load(Ordering::Relaxed),
deallocations: self.deallocations.load(Ordering::Relaxed),
pool_size_bytes: self.memory_usage.load(Ordering::Relaxed),
materialized_levels: materialized_count,
evictions: self.evictions.load(Ordering::Relaxed),
peak_memory: self.peak_memory.load(Ordering::Relaxed),
}
}
/// Get current memory usage in bytes
pub fn memory_usage(&self) -> usize {
self.memory_usage.load(Ordering::Relaxed)
}
/// Clear all levels
pub fn clear(&self) {
let mut levels = self.levels.write().unwrap();
levels.clear();
let mut lru = self.lru_order.write().unwrap();
lru.clear();
self.memory_usage.store(0, Ordering::Relaxed);
}
}
impl Default for LevelPool {
fn default() -> Self {
Self::new()
}
}
/// Vertex ID converter for compact representations
pub struct CompactVertexMapper {
/// Original vertex ID to compact ID
to_compact: HashMap<VertexId, u16>,
/// Compact ID to original vertex ID
to_original: Vec<VertexId>,
/// Next compact ID
next_id: u16,
}
impl CompactVertexMapper {
/// Create new mapper
pub fn new() -> Self {
Self {
to_compact: HashMap::new(),
to_original: Vec::new(),
next_id: 0,
}
}
/// Create from vertex list
pub fn from_vertices(vertices: &[VertexId]) -> Self {
let mut mapper = Self::new();
for &v in vertices {
mapper.get_or_insert(v);
}
mapper
}
/// Get compact ID, creating if needed
pub fn get_or_insert(&mut self, original: VertexId) -> u16 {
if let Some(&compact) = self.to_compact.get(&original) {
return compact;
}
let compact = self.next_id;
self.next_id += 1;
self.to_compact.insert(original, compact);
self.to_original.push(original);
compact
}
/// Get compact ID if exists
pub fn get(&self, original: VertexId) -> Option<u16> {
self.to_compact.get(&original).copied()
}
/// Get original vertex ID from compact
pub fn to_original(&self, compact: u16) -> Option<VertexId> {
self.to_original.get(compact as usize).copied()
}
/// Number of mapped vertices
pub fn len(&self) -> usize {
self.to_original.len()
}
/// Check if empty
pub fn is_empty(&self) -> bool {
self.to_original.is_empty()
}
}
impl Default for CompactVertexMapper {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lazy_level_states() {
let level = LazyLevel::Unmaterialized;
assert!(!level.is_materialized());
let data = LevelData::new(0, 100);
let level = LazyLevel::Materialized(data.clone());
assert!(level.is_materialized());
assert!(!level.is_dirty());
let level = LazyLevel::Dirty(data);
assert!(level.is_materialized());
assert!(level.is_dirty());
}
#[test]
fn test_compact_adjacency() {
let edges = vec![(0u16, 1u16, 10u16), (1, 2, 20), (2, 0, 30)];
let adj = CompactAdjacency::from_edges(&edges, 3);
assert_eq!(adj.num_vertices(), 3);
assert_eq!(adj.degree(0), 2);
assert_eq!(adj.degree(1), 2);
assert_eq!(adj.degree(2), 2);
}
#[test]
fn test_level_pool_materialize() {
let pool = LevelPool::new();
let data = LevelData::new(0, 100);
pool.materialize(0, data);
assert!(pool.is_materialized(0));
assert!(!pool.is_materialized(1));
}
#[test]
fn test_level_pool_eviction() {
let pool = LevelPool::with_config(PoolConfig {
max_materialized_levels: 2,
..Default::default()
});
pool.materialize(0, LevelData::new(0, 100));
pool.materialize(1, LevelData::new(1, 100));
assert!(pool.is_materialized(0));
assert!(pool.is_materialized(1));
// This should evict level 0
pool.materialize(2, LevelData::new(2, 100));
assert!(!pool.is_materialized(0));
assert!(pool.is_materialized(1));
assert!(pool.is_materialized(2));
}
#[test]
fn test_level_pool_dirty() {
let pool = LevelPool::new();
let data = LevelData::new(0, 100);
pool.materialize(0, data);
pool.mark_dirty(0);
if let Some(LazyLevel::Dirty(_)) = pool.get_level(0) {
// OK
} else {
panic!("Level should be dirty");
}
pool.mark_clean(0);
if let Some(LazyLevel::Materialized(_)) = pool.get_level(0) {
// OK
} else {
panic!("Level should be clean");
}
}
#[test]
fn test_compact_vertex_mapper() {
let mut mapper = CompactVertexMapper::new();
let c1 = mapper.get_or_insert(100);
let c2 = mapper.get_or_insert(200);
let c3 = mapper.get_or_insert(100); // Should return same as c1
assert_eq!(c1, 0);
assert_eq!(c2, 1);
assert_eq!(c3, 0);
assert_eq!(mapper.to_original(c1), Some(100));
assert_eq!(mapper.to_original(c2), Some(200));
}
#[test]
fn test_pool_stats() {
let pool = LevelPool::new();
let data = LevelData::new(0, 100);
pool.materialize(0, data);
let stats = pool.stats();
assert_eq!(stats.allocations, 1);
assert_eq!(stats.materialized_levels, 1);
}
#[test]
fn test_level_data_memory_size() {
let mut data = LevelData::new(0, 100);
data.vertices = vec![0, 1, 2, 3, 4];
data.update_memory_size();
assert!(data.memory_size() > 0);
}
}

View File

@@ -0,0 +1,566 @@
//! SIMD-Optimized Distance Array Operations
//!
//! Provides vectorized operations for distance arrays:
//! - Parallel min/max finding
//! - Batch distance updates
//! - Vector comparisons
//!
//! Uses WASM SIMD128 when available, falls back to scalar.
use crate::graph::VertexId;
#[cfg(target_arch = "wasm32")]
use core::arch::wasm32::*;
/// Alignment for SIMD operations (64 bytes for AVX-512 compatibility)
pub const SIMD_ALIGNMENT: usize = 64;
/// Number of f64 elements per SIMD operation
pub const SIMD_LANES: usize = 4; // 256-bit = 4 x f64
/// Aligned distance array for SIMD operations
#[repr(C, align(64))]
pub struct DistanceArray {
/// Raw distance values
data: Vec<f64>,
/// Number of vertices
len: usize,
}
impl DistanceArray {
/// Create new distance array initialized to infinity
pub fn new(size: usize) -> Self {
Self {
data: vec![f64::INFINITY; size],
len: size,
}
}
/// Create from slice
pub fn from_slice(slice: &[f64]) -> Self {
Self {
data: slice.to_vec(),
len: slice.len(),
}
}
/// Get distance for vertex
#[inline]
pub fn get(&self, v: VertexId) -> f64 {
self.data.get(v as usize).copied().unwrap_or(f64::INFINITY)
}
/// Set distance for vertex
#[inline]
pub fn set(&mut self, v: VertexId, distance: f64) {
if (v as usize) < self.len {
self.data[v as usize] = distance;
}
}
/// Get number of elements
pub fn len(&self) -> usize {
self.len
}
/// Check if empty
pub fn is_empty(&self) -> bool {
self.len == 0
}
/// Reset all distances to infinity
pub fn reset(&mut self) {
for d in &mut self.data {
*d = f64::INFINITY;
}
}
/// Get raw slice
pub fn as_slice(&self) -> &[f64] {
&self.data
}
/// Get mutable slice
pub fn as_mut_slice(&mut self) -> &mut [f64] {
&mut self.data
}
}
/// SIMD-optimized distance operations
pub struct SimdDistanceOps;
impl SimdDistanceOps {
/// Find minimum distance and its index using SIMD
///
/// Returns (min_distance, min_index)
#[cfg(target_arch = "wasm32")]
pub fn find_min(distances: &DistanceArray) -> (f64, usize) {
let data = distances.as_slice();
if data.is_empty() {
return (f64::INFINITY, 0);
}
let mut min_val = f64::INFINITY;
let mut min_idx = 0;
// Process in chunks of 2 (WASM SIMD has 128-bit = 2 x f64)
let chunks = data.len() / 2;
unsafe {
for i in 0..chunks {
let offset = i * 2;
let v = v128_load(data.as_ptr().add(offset) as *const v128);
let a = f64x2_extract_lane::<0>(v);
let b = f64x2_extract_lane::<1>(v);
if a < min_val {
min_val = a;
min_idx = offset;
}
if b < min_val {
min_val = b;
min_idx = offset + 1;
}
}
}
// Handle remainder
for i in (chunks * 2)..data.len() {
if data[i] < min_val {
min_val = data[i];
min_idx = i;
}
}
(min_val, min_idx)
}
/// Find minimum distance and its index (scalar fallback)
#[cfg(not(target_arch = "wasm32"))]
pub fn find_min(distances: &DistanceArray) -> (f64, usize) {
let data = distances.as_slice();
if data.is_empty() {
return (f64::INFINITY, 0);
}
let mut min_val = f64::INFINITY;
let mut min_idx = 0;
// Unrolled loop for better ILP
let chunks = data.len() / 4;
for i in 0..chunks {
let base = i * 4;
let a = data[base];
let b = data[base + 1];
let c = data[base + 2];
let d = data[base + 3];
if a < min_val {
min_val = a;
min_idx = base;
}
if b < min_val {
min_val = b;
min_idx = base + 1;
}
if c < min_val {
min_val = c;
min_idx = base + 2;
}
if d < min_val {
min_val = d;
min_idx = base + 3;
}
}
// Handle remainder
for i in (chunks * 4)..data.len() {
if data[i] < min_val {
min_val = data[i];
min_idx = i;
}
}
(min_val, min_idx)
}
/// Batch update: dist[i] = min(dist[i], dist[source] + weight[i])
///
/// This is the core Dijkstra relaxation operation
#[cfg(target_arch = "wasm32")]
pub fn relax_batch(
distances: &mut DistanceArray,
source_dist: f64,
neighbors: &[(VertexId, f64)], // (neighbor_id, edge_weight)
) -> usize {
let mut updated = 0;
let data = distances.as_mut_slice();
unsafe {
let source_v = f64x2_splat(source_dist);
// Process pairs
let pairs = neighbors.len() / 2;
for i in 0..pairs {
let idx0 = neighbors[i * 2].0 as usize;
let idx1 = neighbors[i * 2 + 1].0 as usize;
let w0 = neighbors[i * 2].1;
let w1 = neighbors[i * 2 + 1].1;
if idx0 < data.len() && idx1 < data.len() {
let weights = f64x2(w0, w1);
let new_dist = f64x2_add(source_v, weights);
let old0 = data[idx0];
let old1 = data[idx1];
let new0 = f64x2_extract_lane::<0>(new_dist);
let new1 = f64x2_extract_lane::<1>(new_dist);
if new0 < old0 {
data[idx0] = new0;
updated += 1;
}
if new1 < old1 {
data[idx1] = new1;
updated += 1;
}
}
}
}
// Handle odd remainder
if neighbors.len() % 2 == 1 {
let (idx, weight) = neighbors[neighbors.len() - 1];
let idx = idx as usize;
if idx < data.len() {
let new_dist = source_dist + weight;
if new_dist < data[idx] {
data[idx] = new_dist;
updated += 1;
}
}
}
updated
}
/// Batch update (scalar fallback)
#[cfg(not(target_arch = "wasm32"))]
pub fn relax_batch(
distances: &mut DistanceArray,
source_dist: f64,
neighbors: &[(VertexId, f64)],
) -> usize {
let mut updated = 0;
let data = distances.as_mut_slice();
// Process in chunks of 4 for better ILP
let chunks = neighbors.len() / 4;
for i in 0..chunks {
let base = i * 4;
let (idx0, w0) = neighbors[base];
let (idx1, w1) = neighbors[base + 1];
let (idx2, w2) = neighbors[base + 2];
let (idx3, w3) = neighbors[base + 3];
let new0 = source_dist + w0;
let new1 = source_dist + w1;
let new2 = source_dist + w2;
let new3 = source_dist + w3;
let idx0 = idx0 as usize;
let idx1 = idx1 as usize;
let idx2 = idx2 as usize;
let idx3 = idx3 as usize;
if idx0 < data.len() && new0 < data[idx0] {
data[idx0] = new0;
updated += 1;
}
if idx1 < data.len() && new1 < data[idx1] {
data[idx1] = new1;
updated += 1;
}
if idx2 < data.len() && new2 < data[idx2] {
data[idx2] = new2;
updated += 1;
}
if idx3 < data.len() && new3 < data[idx3] {
data[idx3] = new3;
updated += 1;
}
}
// Handle remainder
for i in (chunks * 4)..neighbors.len() {
let (idx, weight) = neighbors[i];
let idx = idx as usize;
if idx < data.len() {
let new_dist = source_dist + weight;
if new_dist < data[idx] {
data[idx] = new_dist;
updated += 1;
}
}
}
updated
}
/// Count vertices with distance less than threshold
#[cfg(target_arch = "wasm32")]
pub fn count_below_threshold(distances: &DistanceArray, threshold: f64) -> usize {
let data = distances.as_slice();
let mut count = 0;
unsafe {
let thresh_v = f64x2_splat(threshold);
let chunks = data.len() / 2;
for i in 0..chunks {
let offset = i * 2;
let v = v128_load(data.as_ptr().add(offset) as *const v128);
let cmp = f64x2_lt(v, thresh_v);
// Extract comparison results
let mask = i8x16_bitmask(cmp);
// Each f64 lane uses 8 bits in bitmask
if mask & 0xFF != 0 {
count += 1;
}
if mask & 0xFF00 != 0 {
count += 1;
}
}
}
// Handle remainder
for i in (data.len() / 2 * 2)..data.len() {
if data[i] < threshold {
count += 1;
}
}
count
}
/// Count vertices with distance less than threshold (scalar fallback)
#[cfg(not(target_arch = "wasm32"))]
pub fn count_below_threshold(distances: &DistanceArray, threshold: f64) -> usize {
distances
.as_slice()
.iter()
.filter(|&&d| d < threshold)
.count()
}
/// Compute sum of distances (for average)
pub fn sum_finite(distances: &DistanceArray) -> (f64, usize) {
let mut sum = 0.0;
let mut count = 0;
for &d in distances.as_slice() {
if d.is_finite() {
sum += d;
count += 1;
}
}
(sum, count)
}
/// Element-wise minimum of two distance arrays
pub fn elementwise_min(a: &DistanceArray, b: &DistanceArray) -> DistanceArray {
let len = a.len().min(b.len());
let mut result = DistanceArray::new(len);
let a_data = a.as_slice();
let b_data = b.as_slice();
let r_data = result.as_mut_slice();
// Unrolled loop
let chunks = len / 4;
for i in 0..chunks {
let base = i * 4;
r_data[base] = a_data[base].min(b_data[base]);
r_data[base + 1] = a_data[base + 1].min(b_data[base + 1]);
r_data[base + 2] = a_data[base + 2].min(b_data[base + 2]);
r_data[base + 3] = a_data[base + 3].min(b_data[base + 3]);
}
for i in (chunks * 4)..len {
r_data[i] = a_data[i].min(b_data[i]);
}
result
}
/// Scale all distances by a factor
pub fn scale(distances: &mut DistanceArray, factor: f64) {
for d in distances.as_mut_slice() {
if d.is_finite() {
*d *= factor;
}
}
}
}
/// Priority queue entry for Dijkstra with SIMD-friendly layout
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct PriorityEntry {
/// Distance (key)
pub distance: f64,
/// Vertex ID
pub vertex: VertexId,
}
impl PriorityEntry {
/// Create a new priority entry with given distance and vertex.
pub fn new(distance: f64, vertex: VertexId) -> Self {
Self { distance, vertex }
}
}
impl PartialEq for PriorityEntry {
fn eq(&self, other: &Self) -> bool {
self.distance == other.distance && self.vertex == other.vertex
}
}
impl Eq for PriorityEntry {}
impl PartialOrd for PriorityEntry {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
// Reverse order for min-heap
other.distance.partial_cmp(&self.distance)
}
}
impl Ord for PriorityEntry {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_distance_array_basic() {
let mut arr = DistanceArray::new(10);
arr.set(0, 1.0);
arr.set(5, 5.0);
assert_eq!(arr.get(0), 1.0);
assert_eq!(arr.get(5), 5.0);
assert_eq!(arr.get(9), f64::INFINITY);
}
#[test]
fn test_find_min() {
let mut arr = DistanceArray::new(100);
arr.set(50, 1.0);
arr.set(25, 0.5);
arr.set(75, 2.0);
let (min_val, min_idx) = SimdDistanceOps::find_min(&arr);
assert_eq!(min_val, 0.5);
assert_eq!(min_idx, 25);
}
#[test]
fn test_find_min_empty() {
let arr = DistanceArray::new(0);
let (min_val, _) = SimdDistanceOps::find_min(&arr);
assert!(min_val.is_infinite());
}
#[test]
fn test_relax_batch() {
let mut arr = DistanceArray::new(10);
arr.set(0, 0.0); // Source
let neighbors = vec![(1, 1.0), (2, 2.0), (3, 3.0), (4, 4.0)];
let updated = SimdDistanceOps::relax_batch(&mut arr, 0.0, &neighbors);
assert_eq!(updated, 4);
assert_eq!(arr.get(1), 1.0);
assert_eq!(arr.get(2), 2.0);
assert_eq!(arr.get(3), 3.0);
assert_eq!(arr.get(4), 4.0);
}
#[test]
fn test_relax_batch_no_update() {
let mut arr = DistanceArray::from_slice(&[0.0, 0.5, 1.0, 1.5, 2.0]);
let neighbors = vec![
(1, 2.0), // New dist = 0 + 2.0 = 2.0 > 0.5
(2, 3.0), // New dist = 0 + 3.0 = 3.0 > 1.0
];
let updated = SimdDistanceOps::relax_batch(&mut arr, 0.0, &neighbors);
assert_eq!(updated, 0); // No updates, existing distances are better
}
#[test]
fn test_count_below_threshold() {
let arr = DistanceArray::from_slice(&[0.0, 0.5, 1.0, 1.5, 2.0, f64::INFINITY]);
assert_eq!(SimdDistanceOps::count_below_threshold(&arr, 1.0), 2);
assert_eq!(SimdDistanceOps::count_below_threshold(&arr, 2.0), 4);
assert_eq!(SimdDistanceOps::count_below_threshold(&arr, 10.0), 5);
}
#[test]
fn test_sum_finite() {
let arr = DistanceArray::from_slice(&[1.0, 2.0, 3.0, f64::INFINITY, f64::INFINITY]);
let (sum, count) = SimdDistanceOps::sum_finite(&arr);
assert_eq!(sum, 6.0);
assert_eq!(count, 3);
}
#[test]
fn test_elementwise_min() {
let a = DistanceArray::from_slice(&[1.0, 5.0, 3.0, 7.0]);
let b = DistanceArray::from_slice(&[2.0, 4.0, 6.0, 1.0]);
let result = SimdDistanceOps::elementwise_min(&a, &b);
assert_eq!(result.as_slice(), &[1.0, 4.0, 3.0, 1.0]);
}
#[test]
fn test_scale() {
let mut arr = DistanceArray::from_slice(&[1.0, 2.0, f64::INFINITY, 4.0]);
SimdDistanceOps::scale(&mut arr, 2.0);
assert_eq!(arr.get(0), 2.0);
assert_eq!(arr.get(1), 4.0);
assert!(arr.get(2).is_infinite());
assert_eq!(arr.get(3), 8.0);
}
#[test]
fn test_priority_entry_ordering() {
let a = PriorityEntry::new(1.0, 1);
let b = PriorityEntry::new(2.0, 2);
// Min-heap ordering: smaller distance is "greater"
assert!(a > b);
}
}

View File

@@ -0,0 +1,609 @@
//! WASM Batch Operations and TypedArray Optimizations
//!
//! Optimizations specific to WebAssembly execution:
//! - Batch FFI calls to minimize overhead
//! - Pre-allocated WASM memory
//! - TypedArray bulk transfers
//! - Memory alignment for SIMD
//!
//! Target: 10x reduction in FFI overhead
use crate::graph::VertexId;
use std::collections::HashMap;
/// Configuration for WASM batch operations
#[derive(Debug, Clone)]
pub struct BatchConfig {
/// Maximum batch size
pub max_batch_size: usize,
/// Pre-allocated buffer size in bytes
pub buffer_size: usize,
/// Alignment for SIMD operations
pub alignment: usize,
/// Enable memory pooling
pub memory_pooling: bool,
}
impl Default for BatchConfig {
fn default() -> Self {
Self {
max_batch_size: 1024,
buffer_size: 64 * 1024, // 64KB
alignment: 64, // AVX-512 alignment
memory_pooling: true,
}
}
}
/// Batch operation types for minimizing FFI calls
#[derive(Debug, Clone)]
pub enum BatchOperation {
/// Insert multiple edges
InsertEdges(Vec<(VertexId, VertexId, f64)>),
/// Delete multiple edges
DeleteEdges(Vec<(VertexId, VertexId)>),
/// Update multiple weights
UpdateWeights(Vec<(VertexId, VertexId, f64)>),
/// Query multiple distances
QueryDistances(Vec<(VertexId, VertexId)>),
/// Compute cuts for multiple partitions
ComputeCuts(Vec<Vec<VertexId>>),
}
/// Result from batch operation
#[derive(Debug, Clone)]
pub struct BatchResult {
/// Operation type
pub operation: String,
/// Number of items processed
pub items_processed: usize,
/// Time taken in microseconds
pub time_us: u64,
/// Results (for queries)
pub results: Vec<f64>,
/// Error message if any
pub error: Option<String>,
}
/// TypedArray transfer for efficient WASM memory access
///
/// Provides aligned memory buffers for bulk data transfer between
/// JavaScript and WASM.
#[repr(C, align(64))]
pub struct TypedArrayTransfer {
/// Float64 buffer for weights/distances
pub f64_buffer: Vec<f64>,
/// Uint64 buffer for vertex IDs
pub u64_buffer: Vec<u64>,
/// Uint32 buffer for indices/counts
pub u32_buffer: Vec<u32>,
/// Byte buffer for raw data
pub byte_buffer: Vec<u8>,
/// Current position in buffers
position: usize,
}
impl TypedArrayTransfer {
/// Create new transfer with default buffer size
pub fn new() -> Self {
Self::with_capacity(1024)
}
/// Create with specific capacity
pub fn with_capacity(capacity: usize) -> Self {
Self {
f64_buffer: Vec::with_capacity(capacity),
u64_buffer: Vec::with_capacity(capacity),
u32_buffer: Vec::with_capacity(capacity * 2),
byte_buffer: Vec::with_capacity(capacity * 8),
position: 0,
}
}
/// Reset buffers for reuse
pub fn reset(&mut self) {
self.f64_buffer.clear();
self.u64_buffer.clear();
self.u32_buffer.clear();
self.byte_buffer.clear();
self.position = 0;
}
/// Add edge to transfer buffer
pub fn add_edge(&mut self, source: VertexId, target: VertexId, weight: f64) {
self.u64_buffer.push(source);
self.u64_buffer.push(target);
self.f64_buffer.push(weight);
}
/// Add vertex to transfer buffer
pub fn add_vertex(&mut self, vertex: VertexId) {
self.u64_buffer.push(vertex);
}
/// Add distance result
pub fn add_distance(&mut self, distance: f64) {
self.f64_buffer.push(distance);
}
/// Get edges from buffer
pub fn get_edges(&self) -> Vec<(VertexId, VertexId, f64)> {
let mut edges = Vec::with_capacity(self.f64_buffer.len());
for (i, &weight) in self.f64_buffer.iter().enumerate() {
let source = self.u64_buffer.get(i * 2).copied().unwrap_or(0);
let target = self.u64_buffer.get(i * 2 + 1).copied().unwrap_or(0);
edges.push((source, target, weight));
}
edges
}
/// Get f64 buffer as raw pointer (for FFI)
pub fn f64_ptr(&self) -> *const f64 {
self.f64_buffer.as_ptr()
}
/// Get u64 buffer as raw pointer (for FFI)
pub fn u64_ptr(&self) -> *const u64 {
self.u64_buffer.as_ptr()
}
/// Get buffer lengths
pub fn len(&self) -> (usize, usize, usize) {
(
self.f64_buffer.len(),
self.u64_buffer.len(),
self.u32_buffer.len(),
)
}
/// Check if empty
pub fn is_empty(&self) -> bool {
self.f64_buffer.is_empty() && self.u64_buffer.is_empty()
}
}
impl Default for TypedArrayTransfer {
fn default() -> Self {
Self::new()
}
}
/// WASM batch operations executor
pub struct WasmBatchOps {
config: BatchConfig,
/// Transfer buffer
transfer: TypedArrayTransfer,
/// Pending operations
pending: Vec<BatchOperation>,
/// Statistics
total_ops: u64,
total_items: u64,
total_time_us: u64,
}
impl WasmBatchOps {
/// Create new batch executor with default config
pub fn new() -> Self {
Self::with_config(BatchConfig::default())
}
/// Create with custom config
pub fn with_config(config: BatchConfig) -> Self {
Self {
transfer: TypedArrayTransfer::with_capacity(config.buffer_size / 8),
config,
pending: Vec::new(),
total_ops: 0,
total_items: 0,
total_time_us: 0,
}
}
/// Queue edge insertions for batch processing
pub fn queue_insert_edges(&mut self, edges: Vec<(VertexId, VertexId, f64)>) {
if edges.len() > self.config.max_batch_size {
// Split into multiple batches
for chunk in edges.chunks(self.config.max_batch_size) {
self.pending
.push(BatchOperation::InsertEdges(chunk.to_vec()));
}
} else {
self.pending.push(BatchOperation::InsertEdges(edges));
}
}
/// Queue edge deletions for batch processing
pub fn queue_delete_edges(&mut self, edges: Vec<(VertexId, VertexId)>) {
if edges.len() > self.config.max_batch_size {
for chunk in edges.chunks(self.config.max_batch_size) {
self.pending
.push(BatchOperation::DeleteEdges(chunk.to_vec()));
}
} else {
self.pending.push(BatchOperation::DeleteEdges(edges));
}
}
/// Queue distance queries for batch processing
pub fn queue_distance_queries(&mut self, pairs: Vec<(VertexId, VertexId)>) {
if pairs.len() > self.config.max_batch_size {
for chunk in pairs.chunks(self.config.max_batch_size) {
self.pending
.push(BatchOperation::QueryDistances(chunk.to_vec()));
}
} else {
self.pending.push(BatchOperation::QueryDistances(pairs));
}
}
/// Execute all pending operations
pub fn execute_batch(&mut self) -> Vec<BatchResult> {
let _start = std::time::Instant::now();
// Drain pending operations to avoid borrow conflict
let pending_ops: Vec<_> = self.pending.drain(..).collect();
let mut results = Vec::with_capacity(pending_ops.len());
for op in pending_ops {
let op_start = std::time::Instant::now();
let result = self.execute_operation(op);
let elapsed = op_start.elapsed().as_micros() as u64;
self.total_ops += 1;
self.total_items += result.items_processed as u64;
self.total_time_us += elapsed;
results.push(result);
}
self.transfer.reset();
results
}
/// Execute a single operation
fn execute_operation(&mut self, op: BatchOperation) -> BatchResult {
match op {
BatchOperation::InsertEdges(edges) => {
let count = edges.len();
// Prepare transfer buffer
self.transfer.reset();
for (u, v, w) in &edges {
self.transfer.add_edge(*u, *v, *w);
}
// In WASM, this would call the native insert function
// For now, we simulate the batch operation
BatchResult {
operation: "InsertEdges".to_string(),
items_processed: count,
time_us: 0,
results: Vec::new(),
error: None,
}
}
BatchOperation::DeleteEdges(edges) => {
let count = edges.len();
self.transfer.reset();
for (u, v) in &edges {
self.transfer.add_vertex(*u);
self.transfer.add_vertex(*v);
}
BatchResult {
operation: "DeleteEdges".to_string(),
items_processed: count,
time_us: 0,
results: Vec::new(),
error: None,
}
}
BatchOperation::UpdateWeights(updates) => {
let count = updates.len();
self.transfer.reset();
for (u, v, w) in &updates {
self.transfer.add_edge(*u, *v, *w);
}
BatchResult {
operation: "UpdateWeights".to_string(),
items_processed: count,
time_us: 0,
results: Vec::new(),
error: None,
}
}
BatchOperation::QueryDistances(pairs) => {
let count = pairs.len();
self.transfer.reset();
for (u, v) in &pairs {
self.transfer.add_vertex(*u);
self.transfer.add_vertex(*v);
}
// Simulate distance results
let results: Vec<f64> = pairs
.iter()
.map(|(u, v)| if u == v { 0.0 } else { 1.0 })
.collect();
BatchResult {
operation: "QueryDistances".to_string(),
items_processed: count,
time_us: 0,
results,
error: None,
}
}
BatchOperation::ComputeCuts(partitions) => {
let count = partitions.len();
BatchResult {
operation: "ComputeCuts".to_string(),
items_processed: count,
time_us: 0,
results: vec![0.0; count],
error: None,
}
}
}
}
/// Get number of pending operations
pub fn pending_count(&self) -> usize {
self.pending.len()
}
/// Get statistics
pub fn stats(&self) -> BatchStats {
BatchStats {
total_operations: self.total_ops,
total_items: self.total_items,
total_time_us: self.total_time_us,
avg_items_per_op: if self.total_ops > 0 {
self.total_items as f64 / self.total_ops as f64
} else {
0.0
},
avg_time_per_item_us: if self.total_items > 0 {
self.total_time_us as f64 / self.total_items as f64
} else {
0.0
},
}
}
/// Clear pending operations
pub fn clear(&mut self) {
self.pending.clear();
self.transfer.reset();
}
}
impl Default for WasmBatchOps {
fn default() -> Self {
Self::new()
}
}
/// Statistics for batch operations
#[derive(Debug, Clone, Default)]
pub struct BatchStats {
/// Total operations executed
pub total_operations: u64,
/// Total items processed
pub total_items: u64,
/// Total time in microseconds
pub total_time_us: u64,
/// Average items per operation
pub avg_items_per_op: f64,
/// Average time per item in microseconds
pub avg_time_per_item_us: f64,
}
/// Pre-allocated WASM memory region
#[repr(C, align(64))]
pub struct WasmMemoryRegion {
/// Raw memory
data: Vec<u8>,
/// Capacity in bytes
capacity: usize,
/// Current offset
offset: usize,
}
impl WasmMemoryRegion {
/// Create new memory region
pub fn new(size: usize) -> Self {
// Round up to alignment
let aligned_size = (size + 63) & !63;
Self {
data: vec![0u8; aligned_size],
capacity: aligned_size,
offset: 0,
}
}
/// Allocate bytes from region, returns the offset
///
/// Returns the starting offset of the allocated region.
/// Use `get_slice` to access the allocated memory safely.
pub fn alloc(&mut self, size: usize, align: usize) -> Option<usize> {
// Align offset
let aligned_offset = (self.offset + align - 1) & !(align - 1);
if aligned_offset + size > self.capacity {
return None;
}
let result = aligned_offset;
self.offset = aligned_offset + size;
Some(result)
}
/// Get a slice at the given offset
pub fn get_slice(&self, offset: usize, len: usize) -> Option<&[u8]> {
if offset + len <= self.capacity {
Some(&self.data[offset..offset + len])
} else {
None
}
}
/// Get a mutable slice at the given offset
pub fn get_slice_mut(&mut self, offset: usize, len: usize) -> Option<&mut [u8]> {
if offset + len <= self.capacity {
Some(&mut self.data[offset..offset + len])
} else {
None
}
}
/// Reset region for reuse
pub fn reset(&mut self) {
self.offset = 0;
// Optional: zero memory
// self.data.fill(0);
}
/// Get remaining capacity
pub fn remaining(&self) -> usize {
self.capacity - self.offset
}
/// Get used bytes
pub fn used(&self) -> usize {
self.offset
}
/// Get raw pointer
pub fn as_ptr(&self) -> *const u8 {
self.data.as_ptr()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_typed_array_transfer() {
let mut transfer = TypedArrayTransfer::new();
transfer.add_edge(1, 2, 1.0);
transfer.add_edge(2, 3, 2.0);
let edges = transfer.get_edges();
assert_eq!(edges.len(), 2);
assert_eq!(edges[0], (1, 2, 1.0));
assert_eq!(edges[1], (2, 3, 2.0));
}
#[test]
fn test_batch_queue() {
let mut batch = WasmBatchOps::new();
let edges = vec![(1, 2, 1.0), (2, 3, 2.0)];
batch.queue_insert_edges(edges);
assert_eq!(batch.pending_count(), 1);
}
#[test]
fn test_batch_execute() {
let mut batch = WasmBatchOps::new();
batch.queue_insert_edges(vec![(1, 2, 1.0)]);
batch.queue_delete_edges(vec![(3, 4)]);
let results = batch.execute_batch();
assert_eq!(results.len(), 2);
assert_eq!(results[0].operation, "InsertEdges");
assert_eq!(results[1].operation, "DeleteEdges");
assert_eq!(batch.pending_count(), 0);
}
#[test]
fn test_batch_splitting() {
let mut batch = WasmBatchOps::with_config(BatchConfig {
max_batch_size: 10,
..Default::default()
});
// Queue 25 edges
let edges: Vec<_> = (0..25).map(|i| (i, i + 1, 1.0)).collect();
batch.queue_insert_edges(edges);
// Should be split into 3 batches
assert_eq!(batch.pending_count(), 3);
}
#[test]
fn test_distance_queries() {
let mut batch = WasmBatchOps::new();
batch.queue_distance_queries(vec![(1, 2), (2, 3), (1, 1)]);
let results = batch.execute_batch();
assert_eq!(results.len(), 1);
assert_eq!(results[0].results.len(), 3);
assert_eq!(results[0].results[2], 0.0); // Same vertex
}
#[test]
fn test_wasm_memory_region() {
let mut region = WasmMemoryRegion::new(1024);
// Allocate 64-byte aligned
let offset1 = region.alloc(100, 64);
assert!(offset1.is_some());
assert_eq!(offset1.unwrap() % 64, 0);
let offset2 = region.alloc(200, 64);
assert!(offset2.is_some());
// Verify we can get slices
let slice1 = region.get_slice(offset1.unwrap(), 100);
assert!(slice1.is_some());
assert!(region.used() > 0);
assert!(region.remaining() < 1024);
region.reset();
assert_eq!(region.used(), 0);
}
#[test]
fn test_batch_stats() {
let mut batch = WasmBatchOps::new();
batch.queue_insert_edges(vec![(1, 2, 1.0), (2, 3, 2.0)]);
let _ = batch.execute_batch();
let stats = batch.stats();
assert_eq!(stats.total_operations, 1);
assert_eq!(stats.total_items, 2);
}
#[test]
fn test_transfer_reset() {
let mut transfer = TypedArrayTransfer::new();
transfer.add_edge(1, 2, 1.0);
assert!(!transfer.is_empty());
transfer.reset();
assert!(transfer.is_empty());
}
}