Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

144
vendor/ruvector/examples/graph/README.md vendored Normal file
View File

@@ -0,0 +1,144 @@
# RuVector Graph Examples
Graph database features including Cypher queries, distributed clustering, and hybrid search.
## Examples
| File | Description |
|------|-------------|
| `basic_graph.rs` | Graph creation and traversal |
| `cypher_queries.rs` | Cypher query language examples |
| `distributed_cluster.rs` | Multi-node graph clustering |
| `hybrid_search.rs` | Combined vector + graph search |
## Quick Start
```bash
cargo run --example basic_graph
cargo run --example cypher_queries
```
## Basic Graph Operations
```rust
use ruvector_graph::{Graph, Node, Edge};
let mut graph = Graph::new();
// Add nodes with embeddings
let n1 = graph.add_node(Node {
id: "user:1".to_string(),
embedding: vec![0.1; 128],
properties: json!({"name": "Alice"}),
});
let n2 = graph.add_node(Node {
id: "user:2".to_string(),
embedding: vec![0.2; 128],
properties: json!({"name": "Bob"}),
});
// Create relationship
graph.add_edge(Edge {
from: n1,
to: n2,
relation: "KNOWS".to_string(),
weight: 0.95,
});
```
## Cypher Queries
```rust
// Find connected nodes
let query = "MATCH (a:User)-[:KNOWS]->(b:User) RETURN b";
let results = graph.cypher(query)?;
// Pattern matching with vector similarity
let query = "
MATCH (u:User)
WHERE vector_similarity(u.embedding, $query_vec) > 0.8
RETURN u
";
let results = graph.cypher_with_params(query, params)?;
```
## Distributed Clustering
```rust
use ruvector_graph::{DistributedGraph, ClusterConfig};
let config = ClusterConfig {
nodes: vec!["node1:9000", "node2:9000"],
replication_factor: 2,
partitioning: Partitioning::Hash,
};
let cluster = DistributedGraph::connect(config)?;
// Data is automatically partitioned
cluster.add_node(node)?;
// Queries are distributed
let results = cluster.query("MATCH (n) RETURN n LIMIT 10")?;
```
## Hybrid Search
Combine vector similarity with graph traversal:
```rust
use ruvector_graph::HybridSearch;
let search = HybridSearch::new(graph, vector_index);
// Step 1: Find similar nodes by embedding
// Step 2: Expand via graph relationships
// Step 3: Re-rank by combined score
let results = search.query(HybridQuery {
embedding: query_vec,
relation_filter: vec!["KNOWS", "WORKS_WITH"],
depth: 2,
top_k: 10,
vector_weight: 0.6,
graph_weight: 0.4,
})?;
```
## Graph Algorithms
```rust
// PageRank
let scores = graph.pagerank(0.85, 100)?;
// Community detection (Louvain)
let communities = graph.detect_communities()?;
// Shortest path
let path = graph.shortest_path(from, to)?;
// Connected components
let components = graph.connected_components()?;
```
## Use Cases
| Use Case | Query Pattern |
|----------|---------------|
| Social Networks | `(user)-[:FOLLOWS]->(user)` |
| Knowledge Graphs | `(entity)-[:RELATED_TO]->(entity)` |
| Recommendations | Vector similarity + collaborative filtering |
| Fraud Detection | Subgraph pattern matching |
| Supply Chain | Path analysis and bottleneck detection |
## Performance
- **Index Types**: B-tree, hash, vector (HNSW)
- **Caching**: LRU cache for hot subgraphs
- **Partitioning**: Hash, range, or custom
- **Replication**: Configurable factor
## Related
- [Graph CLI Usage](../docs/graph-cli-usage.md)
- [Graph WASM Usage](../docs/graph_wasm_usage.html)

View File

@@ -0,0 +1,87 @@
//! Basic Graph Operations Example
//!
//! This example demonstrates fundamental graph database operations:
//! - Creating nodes with labels and properties
//! - Creating relationships between nodes
//! - Querying nodes and relationships
//! - Updating and deleting graph elements
fn main() {
println!("=== RuVector Graph - Basic Operations ===\n");
// TODO: Once the graph API is exposed in ruvector-graph, implement:
println!("1. Creating Graph Database");
// let db = GraphDatabase::open("./data/basic_graph.db")?;
println!("2. Creating Nodes");
// Create person nodes
// let alice = db.create_node()
// .label("Person")
// .property("name", "Alice")
// .property("age", 30)
// .execute()?;
// let bob = db.create_node()
// .label("Person")
// .property("name", "Bob")
// .property("age", 35)
// .execute()?;
println!(" ✓ Created nodes: Alice, Bob");
println!("\n3. Creating Relationships");
// Create friendship relationship
// let friendship = db.create_relationship()
// .from(alice)
// .to(bob)
// .type("FRIENDS_WITH")
// .property("since", 2020)
// .execute()?;
println!(" ✓ Created relationship: Alice -[FRIENDS_WITH]-> Bob");
println!("\n4. Querying Nodes");
// Find all Person nodes
// let people = db.query()
// .match_pattern("(p:Person)")
// .return_("p")
// .execute()?;
// for person in people {
// println!(" Found: {:?}", person);
// }
println!("\n5. Traversing Relationships");
// Find Alice's friends
// let friends = db.query()
// .match_pattern("(a:Person {name: 'Alice'})-[:FRIENDS_WITH]->(friend)")
// .return_("friend")
// .execute()?;
// println!(" Alice's friends: {:?}", friends);
println!("\n6. Updating Properties");
// Update Alice's age
// db.update_node(alice)
// .property("age", 31)
// .execute()?;
println!(" ✓ Updated Alice's age");
println!("\n7. Deleting Elements");
// Delete a relationship
// db.delete_relationship(friendship)?;
println!(" ✓ Deleted friendship relationship");
println!("\n8. Statistics");
// let stats = db.stats()?;
// println!(" Total nodes: {}", stats.node_count);
// println!(" Total relationships: {}", stats.relationship_count);
// println!(" Node labels: {:?}", stats.labels);
// println!(" Relationship types: {:?}", stats.relationship_types);
println!("\n=== Example Complete ===");
println!("\nNote: This is a template. Actual implementation pending graph API exposure.");
}

View File

@@ -0,0 +1,122 @@
//! Cypher Query Examples
//!
//! This example demonstrates Neo4j-compatible Cypher queries:
//! - CREATE: Creating nodes and relationships
//! - MATCH: Pattern matching
//! - WHERE: Filtering results
//! - RETURN: Projecting results
//! - Aggregations and complex queries
fn main() {
println!("=== RuVector Graph - Cypher Queries ===\n");
// TODO: Once the graph API is exposed, implement:
println!("1. Simple CREATE Query");
// let query = "CREATE (n:Person {name: 'Charlie', age: 28}) RETURN n";
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n2. Pattern Matching");
// let query = r#"
// MATCH (p:Person)
// WHERE p.age > 25
// RETURN p.name, p.age
// ORDER BY p.age DESC
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n3. Creating Relationships");
// let query = r#"
// MATCH (a:Person {name: 'Alice'})
// MATCH (b:Person {name: 'Charlie'})
// CREATE (a)-[r:KNOWS {since: 2023}]->(b)
// RETURN r
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n4. Traversal Queries");
// let query = r#"
// MATCH (start:Person {name: 'Alice'})-[:KNOWS*1..3]->(end:Person)
// RETURN end.name, length((start)-[:KNOWS*]->(end)) as distance
// ORDER BY distance
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n5. Aggregation Queries");
// let query = r#"
// MATCH (p:Person)
// RETURN
// count(p) as total_people,
// avg(p.age) as average_age,
// min(p.age) as youngest,
// max(p.age) as oldest
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n6. Shortest Path");
// let query = r#"
// MATCH path = shortestPath(
// (a:Person {name: 'Alice'})-[:KNOWS*]-(b:Person {name: 'Bob'})
// )
// RETURN path, length(path) as distance
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n7. Pattern Comprehension");
// let query = r#"
// MATCH (p:Person)
// RETURN p.name, [(p)-[:KNOWS]->(friend) | friend.name] as friends
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n8. Complex Multi-Pattern Query");
// let query = r#"
// MATCH (p:Person)-[:LIVES_IN]->(city:City)
// MATCH (p)-[:WORKS_AT]->(company:Company)
// WHERE city.name = 'San Francisco' AND company.industry = 'Tech'
// RETURN p.name, company.name, p.salary
// ORDER BY p.salary DESC
// LIMIT 10
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n9. Updating Properties with Cypher");
// let query = r#"
// MATCH (p:Person {name: 'Alice'})
// SET p.age = p.age + 1, p.updated_at = timestamp()
// RETURN p
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n10. Conditional Creation (MERGE)");
// let query = r#"
// MERGE (p:Person {email: 'alice@example.com'})
// ON CREATE SET p.name = 'Alice', p.created_at = timestamp()
// ON MATCH SET p.last_seen = timestamp()
// RETURN p
// "#;
// let result = db.execute_cypher(query)?;
// println!(" Query: {}", query);
// println!(" Result: {:?}", result);
println!("\n=== Example Complete ===");
println!("\nNote: This is a template. Actual implementation pending graph API exposure.");
}

View File

@@ -0,0 +1,155 @@
//! Distributed Graph Cluster Example
//!
//! This example demonstrates setting up a distributed graph database cluster:
//! - Multi-node cluster initialization
//! - Data sharding and partitioning
//! - RAFT consensus for consistency
//! - Replication and failover
//! - Distributed queries
#[cfg(feature = "distributed")]
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("=== RuVector Graph - Distributed Cluster ===\n");
// TODO: Once the distributed graph API is exposed, implement:
println!("1. Initialize Cluster Configuration");
// let config = ClusterConfig::builder()
// .cluster_name("ruvector-cluster")
// .replication_factor(3)
// .sharding_strategy(ShardingStrategy::ConsistentHash)
// .consensus(ConsensusProtocol::Raft)
// .build()?;
println!(" ✓ Cluster configuration created");
println!(" - Replication factor: 3");
println!(" - Sharding: Consistent Hash");
println!(" - Consensus: RAFT");
println!("\n2. Start Cluster Nodes");
// Start 3 nodes in the cluster
// let node1 = GraphNode::new("node1", "127.0.0.1:7001", config.clone())?;
// node1.start()?;
// let node2 = GraphNode::new("node2", "127.0.0.1:7002", config.clone())?;
// node2.join_cluster(&["127.0.0.1:7001"])?;
// let node3 = GraphNode::new("node3", "127.0.0.1:7003", config.clone())?;
// node3.join_cluster(&["127.0.0.1:7001"])?;
println!(" ✓ Started 3 cluster nodes");
println!(" - node1: 127.0.0.1:7001 (leader)");
println!(" - node2: 127.0.0.1:7002 (follower)");
println!(" - node3: 127.0.0.1:7003 (follower)");
println!("\n3. Wait for Cluster Formation");
// Wait for RAFT consensus to be established
// tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// let status = node1.cluster_status()?;
// println!(" Cluster status: {:?}", status);
// assert_eq!(status.healthy_nodes, 3);
println!("\n4. Distributed Data Insertion");
// Connect to cluster (automatically routes to correct shard)
// let client = GraphClient::connect(&["127.0.0.1:7001", "127.0.0.1:7002", "127.0.0.1:7003"])?;
// Insert nodes - will be distributed across shards
// for i in 0..1000 {
// client.create_node()
// .label("Person")
// .property("id", i)
// .property("name", format!("User{}", i))
// .execute_async().await?;
// }
println!(" ✓ Inserted 1000 nodes across cluster");
println!("\n5. Check Data Distribution");
// View how data is distributed across nodes
// let node1_stats = node1.local_stats()?;
// let node2_stats = node2.local_stats()?;
// let node3_stats = node3.local_stats()?;
// println!(" Data distribution:");
// println!(" - node1: {} nodes", node1_stats.node_count);
// println!(" - node2: {} nodes", node2_stats.node_count);
// println!(" - node3: {} nodes", node3_stats.node_count);
println!("\n6. Distributed Query Execution");
// Execute query that spans multiple shards
// let result = client.execute_cypher(r#"
// MATCH (p:Person)
// WHERE p.id >= 100 AND p.id < 200
// RETURN p.name
// ORDER BY p.id
// "#).await?;
// println!(" Query returned {} results", result.len());
println!("\n7. Test Replication");
// Verify data is replicated across nodes
// let node1_data = node1.read_local("Person", "id", 42)?;
// let node2_data = node2.read_replica("Person", "id", 42)?;
// let node3_data = node3.read_replica("Person", "id", 42)?;
// assert_eq!(node1_data, node2_data);
// assert_eq!(node2_data, node3_data);
println!(" ✓ Data successfully replicated");
println!("\n8. Simulate Node Failure");
// Stop one node and verify cluster continues
// node3.stop()?;
// tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
// let status = node1.cluster_status()?;
// println!(" Cluster status after node3 failure:");
// println!(" - Healthy nodes: {}", status.healthy_nodes);
// println!(" - Leader: node1");
// println!(" - Cluster still operational: {}", status.is_healthy());
println!("\n9. Test Failover");
// Queries should still work with 2/3 nodes
// let result = client.execute_cypher(r#"
// MATCH (p:Person)
// RETURN count(p) as total
// "#).await?;
// println!(" Query after failover: {} total nodes", result);
println!("\n10. Node Recovery");
// Restart failed node
// node3.start()?;
// node3.rejoin_cluster(&["127.0.0.1:7001"])?;
// Wait for catch-up
// tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
// let status = node1.cluster_status()?;
// println!(" Cluster fully recovered: {}", status.healthy_nodes == 3);
println!("\n11. Performance Metrics");
// let metrics = client.cluster_metrics()?;
// println!(" Cluster performance:");
// println!(" - Total throughput: {} ops/sec", metrics.total_ops_per_sec);
// println!(" - Average latency: {}ms", metrics.avg_latency_ms);
// println!(" - Cross-shard queries: {}%", metrics.cross_shard_query_pct);
println!("\n12. Cleanup");
// node1.stop()?;
// node2.stop()?;
// node3.stop()?;
println!(" ✓ Cluster shutdown complete");
println!("\n=== Example Complete ===");
Ok(())
}
#[cfg(not(feature = "distributed"))]
fn main() {
println!("=== RuVector Graph - Distributed Cluster ===\n");
println!("This example requires the 'distributed' feature.");
println!("Run with: cargo run --example distributed_cluster --features distributed");
}

View File

@@ -0,0 +1,161 @@
//! Hybrid Vector-Graph Search Example
//!
//! This example demonstrates combining vector similarity search
//! with graph traversal for powerful hybrid queries:
//! - Vector embeddings on graph nodes
//! - Semantic similarity search
//! - Graph-constrained vector search
//! - Ranking by combined scores
fn main() {
println!("=== RuVector Graph - Hybrid Search ===\n");
// TODO: Once the graph and vector APIs are integrated, implement:
println!("1. Initialize Hybrid Database");
// let db = GraphDatabase::open("./data/hybrid_db.db")?;
// let vector_store = db.enable_vector_index("embeddings", 768)?;
println!(" ✓ Created graph database with vector indexing");
println!("\n2. Create Nodes with Vector Embeddings");
// Create documents with text embeddings
// let doc1 = db.create_node()
// .label("Document")
// .property("title", "Introduction to Rust")
// .property("content", "Rust is a systems programming language...")
// .embedding(embedding_model.encode("Introduction to Rust..."))
// .execute()?;
// let doc2 = db.create_node()
// .label("Document")
// .property("title", "Advanced Rust Patterns")
// .property("content", "This article covers advanced patterns...")
// .embedding(embedding_model.encode("Advanced Rust Patterns..."))
// .execute()?;
println!(" ✓ Created documents with vector embeddings");
println!("\n3. Create Relationships");
// Create citation relationships
// db.create_relationship()
// .from(doc2)
// .to(doc1)
// .type("CITES")
// .execute()?;
println!(" ✓ Created citation relationships");
println!("\n4. Semantic Search (Vector Only)");
// let query = "memory safety in programming";
// let query_embedding = embedding_model.encode(query);
// let results = db.vector_search()
// .index("embeddings")
// .query(query_embedding)
// .top_k(5)
// .execute()?;
// println!(" Query: '{}'", query);
// for (doc, score) in results {
// println!(" - {} (score: {:.4})", doc.property("title"), score);
// }
println!("\n5. Graph-Constrained Vector Search");
// Find similar documents that are cited by "Advanced Rust Patterns"
// let results = db.hybrid_query()
// .cypher(r#"
// MATCH (source:Document {title: 'Advanced Rust Patterns'})-[:CITES]->(cited:Document)
// RETURN cited
// "#)
// .vector_similarity(query_embedding, 0.7) // minimum similarity threshold
// .execute()?;
// println!(" Cited documents similar to query:");
// for doc in results {
// println!(" - {}", doc.property("title"));
// }
println!("\n6. Ranked Hybrid Search");
// Combine vector similarity and graph metrics
// let results = db.hybrid_query()
// .vector_search(query_embedding)
// .graph_metric("pagerank")
// .combine_scores(|vector_score, graph_score| {
// 0.7 * vector_score + 0.3 * graph_score
// })
// .top_k(10)
// .execute()?;
// println!(" Top results (vector + graph ranking):");
// for (doc, combined_score, details) in results {
// println!(" - {} (score: {:.4}, vector: {:.4}, graph: {:.4})",
// doc.property("title"),
// combined_score,
// details.vector_score,
// details.graph_score
// );
// }
println!("\n7. Multi-Hop Vector Search");
// Find documents connected through multiple hops, ranked by similarity
// let results = db.hybrid_query()
// .cypher(r#"
// MATCH path = (start:Document {title: 'Introduction to Rust'})
// -[:CITES|:RELATED_TO*1..3]->
// (end:Document)
// RETURN end, length(path) as distance
// "#)
// .vector_similarity(query_embedding, 0.6)
// .penalty_per_hop(0.1) // Reduce score for each hop
// .execute()?;
// println!(" Multi-hop search results:");
// for (doc, score, distance) in results {
// println!(" - {} (score: {:.4}, hops: {})",
// doc.property("title"), score, distance
// );
// }
println!("\n8. Filtered Vector Search with Graph Constraints");
// Complex hybrid query with multiple constraints
// let results = db.hybrid_query()
// .cypher(r#"
// MATCH (doc:Document)-[:AUTHORED_BY]->(author:Person)
// WHERE author.institution = 'MIT'
// AND doc.year >= 2020
// RETURN doc
// "#)
// .vector_search(query_embedding)
// .top_k(5)
// .execute()?;
// println!(" MIT papers since 2020 similar to query:");
// for (doc, score) in results {
// println!(" - {} (score: {:.4})", doc.property("title"), score);
// }
println!("\n9. Performance Comparison");
// Compare pure vector vs hybrid search
// let start = std::time::Instant::now();
// let vector_results = db.vector_search()
// .query(query_embedding)
// .top_k(100)
// .execute()?;
// let vector_time = start.elapsed();
// let start = std::time::Instant::now();
// let hybrid_results = db.hybrid_query()
// .vector_search(query_embedding)
// .graph_filter(/* some constraint */)
// .top_k(100)
// .execute()?;
// let hybrid_time = start.elapsed();
// println!(" Vector-only: {:?}", vector_time);
// println!(" Hybrid: {:?}", hybrid_time);
// println!(" Overhead: {:.2}%", (hybrid_time.as_secs_f64() / vector_time.as_secs_f64() - 1.0) * 100.0);
println!("\n=== Example Complete ===");
println!("\nNote: This is a template. Actual implementation pending graph API exposure.");
}