Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,58 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ruvector_graph::cypher::parser::parse_cypher;
fn bench_simple_match(c: &mut Criterion) {
c.bench_function("parse simple MATCH", |b| {
b.iter(|| parse_cypher(black_box("MATCH (n:Person) RETURN n")))
});
}
fn bench_complex_match(c: &mut Criterion) {
c.bench_function("parse complex MATCH with WHERE", |b| {
b.iter(|| {
parse_cypher(black_box(
"MATCH (a:Person)-[r:KNOWS]->(b:Person) WHERE a.age > 30 AND b.name = 'Alice' RETURN a.name, b.name, r.since ORDER BY r.since DESC LIMIT 10"
))
})
});
}
fn bench_create_query(c: &mut Criterion) {
c.bench_function("parse CREATE query", |b| {
b.iter(|| {
parse_cypher(black_box(
"CREATE (n:Person {name: 'Bob', age: 30, email: 'bob@example.com'})",
))
})
});
}
fn bench_hyperedge_query(c: &mut Criterion) {
c.bench_function("parse hyperedge query", |b| {
b.iter(|| {
parse_cypher(black_box(
"MATCH (person)-[r:TRANSACTION]->(acc1:Account, acc2:Account, merchant:Merchant) WHERE r.amount > 1000 RETURN person, r, acc1, acc2, merchant"
))
})
});
}
fn bench_aggregation_query(c: &mut Criterion) {
c.bench_function("parse aggregation query", |b| {
b.iter(|| {
parse_cypher(black_box(
"MATCH (n:Person) RETURN COUNT(n), AVG(n.age), MAX(n.salary), COLLECT(n.name)",
))
})
});
}
criterion_group!(
benches,
bench_simple_match,
bench_complex_match,
bench_create_query,
bench_hyperedge_query,
bench_aggregation_query
);
criterion_main!(benches);

View File

@@ -0,0 +1,11 @@
// Placeholder benchmark for distributed query
// TODO: Implement comprehensive benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn distributed_query_benchmark(c: &mut Criterion) {
c.bench_function("placeholder", |b| b.iter(|| black_box(42)));
}
criterion_group!(benches, distributed_query_benchmark);
criterion_main!(benches);

View File

@@ -0,0 +1,324 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use ruvector_graph::types::{EdgeId, NodeId, Properties, PropertyValue};
use ruvector_graph::{Edge, GraphDB, Node};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
/// Helper to create test graph
fn create_test_graph() -> GraphDB {
GraphDB::new()
}
/// Benchmark: Single node insertion
fn bench_node_insertion_single(c: &mut Criterion) {
let mut group = c.benchmark_group("node_insertion_single");
for size in [1, 10, 100, 1000].iter() {
group.throughput(Throughput::Elements(*size as u64));
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
b.iter(|| {
let graph = create_test_graph();
for i in 0..size {
let mut props = Properties::new();
props.insert(
"name".to_string(),
PropertyValue::String(format!("node_{}", i)),
);
props.insert("value".to_string(), PropertyValue::Integer(i as i64));
let node_id = NodeId(format!("node_{}", i));
let node = Node::new(node_id, vec!["Person".to_string()], props);
black_box(graph.create_node(node).unwrap());
}
});
});
}
group.finish();
}
/// Benchmark: Batch node insertion
fn bench_node_insertion_batch(c: &mut Criterion) {
let mut group = c.benchmark_group("node_insertion_batch");
for batch_size in [100, 1000, 10000].iter() {
group.throughput(Throughput::Elements(*batch_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
batch_size,
|b, &batch_size| {
b.iter(|| {
let graph = create_test_graph();
for i in 0..batch_size {
let mut props = Properties::new();
props.insert(
"name".to_string(),
PropertyValue::String(format!("node_{}", i)),
);
props.insert("value".to_string(), PropertyValue::Integer(i as i64));
let node_id = NodeId(format!("batch_node_{}", i));
let node = Node::new(node_id, vec!["Person".to_string()], props);
black_box(graph.create_node(node).unwrap());
}
});
},
);
}
group.finish();
}
/// Benchmark: Bulk node insertion (optimized path)
fn bench_node_insertion_bulk(c: &mut Criterion) {
let mut group = c.benchmark_group("node_insertion_bulk");
group.sample_size(10); // Reduce samples for large operations
for bulk_size in [10000, 100000].iter() {
group.throughput(Throughput::Elements(*bulk_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(bulk_size),
bulk_size,
|b, &bulk_size| {
b.iter(|| {
let graph = create_test_graph();
for i in 0..bulk_size {
let mut props = Properties::new();
props.insert("id".to_string(), PropertyValue::Integer(i as i64));
props.insert(
"name".to_string(),
PropertyValue::String(format!("user_{}", i)),
);
let node_id = NodeId(format!("bulk_user_{}", i));
let node = Node::new(node_id, vec!["User".to_string()], props);
black_box(graph.create_node(node).unwrap());
}
});
},
);
}
group.finish();
}
/// Benchmark: Edge creation
fn bench_edge_creation(c: &mut Criterion) {
let mut group = c.benchmark_group("edge_creation");
// Setup: Create nodes once
let graph = Arc::new(create_test_graph());
let mut node_ids = Vec::new();
for i in 0..1000 {
let mut props = Properties::new();
props.insert("id".to_string(), PropertyValue::Integer(i as i64));
let node_id = NodeId(format!("edge_test_node_{}", i));
let node = Node::new(node_id.clone(), vec!["Person".to_string()], props);
graph.create_node(node).unwrap();
node_ids.push(node_id);
}
for num_edges in [100, 1000].iter() {
group.throughput(Throughput::Elements(*num_edges as u64));
group.bench_with_input(
BenchmarkId::from_parameter(num_edges),
num_edges,
|b, &num_edges| {
let graph = graph.clone();
let node_ids = node_ids.clone();
b.iter(|| {
for i in 0..num_edges {
let from = &node_ids[i % node_ids.len()];
let to = &node_ids[(i + 1) % node_ids.len()];
let mut props = Properties::new();
props.insert("weight".to_string(), PropertyValue::Float(i as f64));
let edge_id = EdgeId(format!("edge_{}", i));
let edge = Edge::new(
edge_id,
from.clone(),
to.clone(),
"KNOWS".to_string(),
props,
);
black_box(graph.create_edge(edge).unwrap());
}
});
},
);
}
group.finish();
}
/// Benchmark: Simple node lookup by ID
fn bench_query_node_lookup(c: &mut Criterion) {
let mut group = c.benchmark_group("query_node_lookup");
// Setup: Create 10k nodes (reduced for faster benchmark)
let graph = Arc::new(create_test_graph());
let mut node_ids = Vec::new();
for i in 0..10000 {
let mut props = Properties::new();
props.insert("id".to_string(), PropertyValue::Integer(i as i64));
let node_id = NodeId(format!("lookup_node_{}", i));
let node = Node::new(node_id.clone(), vec!["Person".to_string()], props);
graph.create_node(node).unwrap();
node_ids.push(node_id);
}
group.bench_function("lookup_by_id", |b| {
let graph = graph.clone();
let node_ids = node_ids.clone();
b.iter(|| {
let id = &node_ids[black_box(1234 % node_ids.len())];
black_box(graph.get_node(id).unwrap());
});
});
group.finish();
}
/// Benchmark: Edge lookup
fn bench_query_edge_lookup(c: &mut Criterion) {
let mut group = c.benchmark_group("query_edge_lookup");
// Setup: Create nodes and edges
let graph = Arc::new(create_test_graph());
let mut node_ids = Vec::new();
let mut edge_ids = Vec::new();
// Create 100 nodes
for i in 0..100 {
let mut props = Properties::new();
props.insert("id".to_string(), PropertyValue::Integer(i as i64));
let node_id = NodeId(format!("trav_node_{}", i));
let node = Node::new(node_id.clone(), vec!["Person".to_string()], props);
graph.create_node(node).unwrap();
node_ids.push(node_id);
}
// Create edges (each node has ~5 outgoing edges)
for i in 0..node_ids.len() {
for j in 0..5 {
let to_idx = (i + j + 1) % node_ids.len();
let edge_id = EdgeId(format!("trav_edge_{}_{}", i, j));
let edge = Edge::new(
edge_id.clone(),
node_ids[i].clone(),
node_ids[to_idx].clone(),
"KNOWS".to_string(),
Properties::new(),
);
graph.create_edge(edge).unwrap();
edge_ids.push(edge_id);
}
}
group.bench_function("edge_by_id", |b| {
let graph = graph.clone();
let edge_ids = edge_ids.clone();
b.iter(|| {
let id = &edge_ids[black_box(10 % edge_ids.len())];
black_box(graph.get_edge(id).unwrap());
});
});
group.finish();
}
/// Benchmark: Get nodes by label
fn bench_query_get_by_label(c: &mut Criterion) {
let mut group = c.benchmark_group("query_get_by_label");
let graph = Arc::new(create_test_graph());
// Create diverse nodes with different labels
for i in 0..1000 {
let mut props = Properties::new();
props.insert("id".to_string(), PropertyValue::Integer(i as i64));
let node_id = NodeId(format!("label_node_{}", i));
let label = if i % 3 == 0 {
"Person"
} else if i % 3 == 1 {
"Organization"
} else {
"Location"
};
let node = Node::new(node_id, vec![label.to_string()], props);
graph.create_node(node).unwrap();
}
group.bench_function("get_persons", |b| {
let graph = graph.clone();
b.iter(|| {
let nodes = graph.get_nodes_by_label("Person");
black_box(nodes.len());
});
});
group.finish();
}
/// Benchmark: Memory usage tracking
fn bench_memory_usage(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_usage");
group.sample_size(10);
for num_nodes in [1000, 10000].iter() {
group.throughput(Throughput::Elements(*num_nodes as u64));
group.bench_with_input(
BenchmarkId::from_parameter(num_nodes),
num_nodes,
|b, &num_nodes| {
b.iter_custom(|iters| {
let mut total_duration = Duration::ZERO;
for _ in 0..iters {
let graph = create_test_graph();
let start = std::time::Instant::now();
for i in 0..*num_nodes {
let mut props = Properties::new();
props.insert("id".to_string(), PropertyValue::Integer(i as i64));
props.insert(
"name".to_string(),
PropertyValue::String(format!("node_{}", i)),
);
let node_id = NodeId(format!("mem_node_{}", i));
let node = Node::new(node_id, vec!["TestNode".to_string()], props);
graph.create_node(node).unwrap();
}
total_duration += start.elapsed();
// Force drop to measure cleanup
drop(graph);
}
total_duration
});
},
);
}
group.finish();
}
criterion_group!(
benches,
bench_node_insertion_single,
bench_node_insertion_batch,
bench_node_insertion_bulk,
bench_edge_creation,
bench_query_node_lookup,
bench_query_edge_lookup,
bench_query_get_by_label,
bench_memory_usage
);
criterion_main!(benches);

View File

@@ -0,0 +1,11 @@
// Placeholder benchmark for graph traversal
// TODO: Implement comprehensive benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn graph_traversal_benchmark(c: &mut Criterion) {
c.bench_function("placeholder", |b| b.iter(|| black_box(42)));
}
criterion_group!(benches, graph_traversal_benchmark);
criterion_main!(benches);

View File

@@ -0,0 +1,11 @@
// Placeholder benchmark for hybrid vector graph
// TODO: Implement comprehensive benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn hybrid_vector_graph_benchmark(c: &mut Criterion) {
c.bench_function("placeholder", |b| b.iter(|| black_box(42)));
}
criterion_group!(benches, hybrid_vector_graph_benchmark);
criterion_main!(benches);

View File

@@ -0,0 +1,251 @@
//! Benchmarks for new capabilities
//!
//! Run with: cargo bench --package ruvector-graph --bench new_capabilities_bench
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use ruvector_graph::cypher::parser::parse_cypher;
use ruvector_graph::hybrid::semantic_search::{SemanticSearch, SemanticSearchConfig};
use ruvector_graph::hybrid::vector_index::{EmbeddingConfig, HybridIndex, VectorIndexType};
// ============================================================================
// Parser Benchmarks
// ============================================================================
fn bench_simple_match(c: &mut Criterion) {
let query = "MATCH (n:Person) RETURN n";
c.bench_function("parser/simple_match", |b| {
b.iter(|| parse_cypher(black_box(query)))
});
}
fn bench_relationship_match(c: &mut Criterion) {
let query = "MATCH (a:Person)-[r:KNOWS]->(b:Person) RETURN a, r, b";
c.bench_function("parser/relationship_match", |b| {
b.iter(|| parse_cypher(black_box(query)))
});
}
fn bench_chained_relationship(c: &mut Criterion) {
let mut group = c.benchmark_group("parser/chained_relationships");
// 2-hop chain
let query_2hop = "MATCH (a)-[r]->(b)-[s]->(c) RETURN a, c";
group.bench_function("2_hop", |b| b.iter(|| parse_cypher(black_box(query_2hop))));
// 3-hop chain
let query_3hop = "MATCH (a)-[r]->(b)-[s]->(c)-[t]->(d) RETURN a, d";
group.bench_function("3_hop", |b| b.iter(|| parse_cypher(black_box(query_3hop))));
// 4-hop chain
let query_4hop = "MATCH (a)-[r]->(b)-[s]->(c)-[t]->(d)-[u]->(e) RETURN a, e";
group.bench_function("4_hop", |b| b.iter(|| parse_cypher(black_box(query_4hop))));
group.finish();
}
fn bench_mixed_direction_chain(c: &mut Criterion) {
let query = "MATCH (a:Person)-[r:KNOWS]->(b:Person)<-[s:MANAGES]-(c:Manager) RETURN a, b, c";
c.bench_function("parser/mixed_direction_chain", |b| {
b.iter(|| parse_cypher(black_box(query)))
});
}
fn bench_map_literal(c: &mut Criterion) {
let mut group = c.benchmark_group("parser/map_literal");
// Empty map
let query_empty = "MATCH (n) RETURN {}";
group.bench_function("empty", |b| b.iter(|| parse_cypher(black_box(query_empty))));
// Small map (2 keys)
let query_small = "MATCH (n) RETURN {name: n.name, age: n.age}";
group.bench_function("2_keys", |b| {
b.iter(|| parse_cypher(black_box(query_small)))
});
// Medium map (5 keys)
let query_medium = "MATCH (n) RETURN {a: n.a, b: n.b, c: n.c, d: n.d, e: n.e}";
group.bench_function("5_keys", |b| {
b.iter(|| parse_cypher(black_box(query_medium)))
});
// Large map (10 keys)
let query_large = "MATCH (n) RETURN {a: n.a, b: n.b, c: n.c, d: n.d, e: n.e, f: n.f, g: n.g, h: n.h, i: n.i, j: n.j}";
group.bench_function("10_keys", |b| {
b.iter(|| parse_cypher(black_box(query_large)))
});
group.finish();
}
fn bench_remove_statement(c: &mut Criterion) {
let mut group = c.benchmark_group("parser/remove");
// Remove property
let query_prop = "MATCH (n:Person) REMOVE n.age RETURN n";
group.bench_function("property", |b| {
b.iter(|| parse_cypher(black_box(query_prop)))
});
// Remove single label
let query_label = "MATCH (n:Person:Employee) REMOVE n:Employee RETURN n";
group.bench_function("single_label", |b| {
b.iter(|| parse_cypher(black_box(query_label)))
});
// Remove multiple labels
let query_multi = "MATCH (n:A:B:C:D) REMOVE n:B:C:D RETURN n";
group.bench_function("multi_label", |b| {
b.iter(|| parse_cypher(black_box(query_multi)))
});
group.finish();
}
fn bench_complex_query(c: &mut Criterion) {
let query = r#"
MATCH (p:Person)-[r:WORKS_AT]->(c:Company)<-[h:HEADQUARTERED]-(l:Location)
WHERE p.age > 30 AND c.revenue > 1000000
RETURN {
person: p.name,
company: c.name,
location: l.city
}
ORDER BY p.age DESC
LIMIT 10
"#;
c.bench_function("parser/complex_query", |b| {
b.iter(|| parse_cypher(black_box(query)))
});
}
// ============================================================================
// Semantic Search Benchmarks
// ============================================================================
fn setup_semantic_search(num_vectors: usize, dimensions: usize) -> SemanticSearch {
let config = EmbeddingConfig {
dimensions,
..Default::default()
};
let index = HybridIndex::new(config).unwrap();
index.initialize_index(VectorIndexType::Node).unwrap();
// Add test embeddings
for i in 0..num_vectors {
let mut embedding = vec![0.0f32; dimensions];
// Create varied embeddings
embedding[i % dimensions] = 1.0;
embedding[(i + 1) % dimensions] = 0.5;
index
.add_node_embedding(format!("node_{}", i), embedding)
.unwrap();
}
SemanticSearch::new(index, SemanticSearchConfig::default())
}
fn bench_semantic_search_small(c: &mut Criterion) {
let search = setup_semantic_search(100, 128);
let query: Vec<f32> = (0..128).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
c.bench_function("semantic_search/100_vectors_128d", |b| {
b.iter(|| search.find_similar_nodes(black_box(&query), 10))
});
}
fn bench_semantic_search_medium(c: &mut Criterion) {
let search = setup_semantic_search(1000, 128);
let query: Vec<f32> = (0..128).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
c.bench_function("semantic_search/1000_vectors_128d", |b| {
b.iter(|| search.find_similar_nodes(black_box(&query), 10))
});
}
fn bench_semantic_search_dimensions(c: &mut Criterion) {
let mut group = c.benchmark_group("semantic_search/dimensions");
for dim in [64, 128, 256, 384, 512].iter() {
let search = setup_semantic_search(500, *dim);
let query: Vec<f32> = (0..*dim).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
group.bench_with_input(BenchmarkId::from_parameter(dim), dim, |b, _| {
b.iter(|| search.find_similar_nodes(black_box(&query), 10))
});
}
group.finish();
}
fn bench_semantic_search_top_k(c: &mut Criterion) {
let search = setup_semantic_search(1000, 128);
let query: Vec<f32> = (0..128).map(|i| if i == 0 { 1.0 } else { 0.0 }).collect();
let mut group = c.benchmark_group("semantic_search/top_k");
for k in [1, 5, 10, 25, 50, 100].iter() {
group.bench_with_input(BenchmarkId::from_parameter(k), k, |b, &k| {
b.iter(|| search.find_similar_nodes(black_box(&query), k))
});
}
group.finish();
}
// ============================================================================
// Distance Conversion Benchmark (the fix we made)
// ============================================================================
fn bench_distance_conversion(c: &mut Criterion) {
let distances: Vec<f32> = (0..10000).map(|i| (i as f32) / 10000.0).collect();
c.bench_function("semantic_search/distance_conversion_10k", |b| {
b.iter(|| {
let _: Vec<f32> = distances.iter().map(|d| 1.0 - d).collect();
})
});
}
fn bench_similarity_filtering(c: &mut Criterion) {
let distances: Vec<f32> = (0..10000).map(|i| (i as f32) / 10000.0).collect();
let min_similarity = 0.7f32;
c.bench_function("semantic_search/similarity_filter_10k", |b| {
b.iter(|| {
let _: Vec<f32> = distances
.iter()
.map(|d| 1.0 - d)
.filter(|s| *s >= min_similarity)
.collect();
})
});
}
criterion_group!(
parser_benches,
bench_simple_match,
bench_relationship_match,
bench_chained_relationship,
bench_mixed_direction_chain,
bench_map_literal,
bench_remove_statement,
bench_complex_query,
);
criterion_group!(
semantic_search_benches,
bench_semantic_search_small,
bench_semantic_search_medium,
bench_semantic_search_dimensions,
bench_semantic_search_top_k,
bench_distance_conversion,
bench_similarity_filtering,
);
criterion_main!(parser_benches, semantic_search_benches);

View File

@@ -0,0 +1,11 @@
// Placeholder benchmark for query execution
// TODO: Implement comprehensive benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn query_execution_benchmark(c: &mut Criterion) {
c.bench_function("placeholder", |b| b.iter(|| black_box(42)));
}
criterion_group!(benches, query_execution_benchmark);
criterion_main!(benches);

View File

@@ -0,0 +1,11 @@
// Placeholder benchmark for SIMD operations
// TODO: Implement comprehensive benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn simd_operations_benchmark(c: &mut Criterion) {
c.bench_function("placeholder", |b| b.iter(|| black_box(42)));
}
criterion_group!(benches, simd_operations_benchmark);
criterion_main!(benches);