Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
169
vendor/ruvector/examples/rust/README.md
vendored
Normal file
169
vendor/ruvector/examples/rust/README.md
vendored
Normal file
@@ -0,0 +1,169 @@
|
||||
# RuVector Rust Examples
|
||||
|
||||
Core Rust SDK examples demonstrating RuVector's vector database capabilities.
|
||||
|
||||
## Examples
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `basic_usage.rs` | Getting started with vector DB operations |
|
||||
| `batch_operations.rs` | High-throughput batch ingestion |
|
||||
| `rag_pipeline.rs` | Retrieval-Augmented Generation pipeline |
|
||||
| `advanced_features.rs` | Hypergraphs, neural hashing, topology |
|
||||
| `agenticdb_demo.rs` | AI agent memory with 5 tables |
|
||||
| `gnn_example.rs` | Graph Neural Network layer usage |
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Run basic example
|
||||
cargo run --example basic_usage
|
||||
|
||||
# Run with release optimizations
|
||||
cargo run --release --example advanced_features
|
||||
```
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```rust
|
||||
use ruvector_core::{VectorDB, VectorEntry, DbOptions, Result};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// Create database
|
||||
let mut options = DbOptions::default();
|
||||
options.dimensions = 128;
|
||||
let db = VectorDB::new(options)?;
|
||||
|
||||
// Insert vector
|
||||
let entry = VectorEntry {
|
||||
id: Some("doc_001".to_string()),
|
||||
vector: vec![0.1; 128],
|
||||
metadata: None,
|
||||
};
|
||||
db.insert(entry)?;
|
||||
|
||||
// Search
|
||||
let results = db.search(&vec![0.1; 128], 10)?;
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Features
|
||||
|
||||
### Hypergraph Index
|
||||
Multi-entity relationships with weighted edges.
|
||||
|
||||
```rust
|
||||
use ruvector_core::advanced::*;
|
||||
|
||||
let mut index = HypergraphIndex::new(DistanceMetric::Cosine);
|
||||
index.add_entity(1, vec![0.9, 0.1, 0.0]);
|
||||
index.add_entity(2, vec![0.8, 0.2, 0.0]);
|
||||
|
||||
let edge = Hyperedge::new(
|
||||
vec![1, 2],
|
||||
"Co-cited papers".to_string(),
|
||||
vec![0.7, 0.2, 0.1],
|
||||
0.95,
|
||||
);
|
||||
index.add_hyperedge(edge)?;
|
||||
```
|
||||
|
||||
### Temporal Hypergraph
|
||||
Time-aware relationships for event tracking.
|
||||
|
||||
```rust
|
||||
let mut temporal = TemporalHypergraph::new(DistanceMetric::Cosine);
|
||||
temporal.add_entity_at_time(1, vec![0.5; 3], 1000);
|
||||
temporal.add_entity_at_time(1, vec![0.6; 3], 2000); // Entity evolves
|
||||
```
|
||||
|
||||
### Causal Memory
|
||||
Cause-effect relationship chains.
|
||||
|
||||
```rust
|
||||
let mut causal = CausalMemory::new(DistanceMetric::Cosine);
|
||||
let id1 = causal.add_pattern(vec![0.9, 0.1], "initial event")?;
|
||||
let id2 = causal.add_pattern_with_cause(
|
||||
vec![0.8, 0.2],
|
||||
"consequence",
|
||||
id1, // Caused by id1
|
||||
0.9 // High confidence
|
||||
)?;
|
||||
```
|
||||
|
||||
### Learned Index
|
||||
ML-optimized index structure.
|
||||
|
||||
```rust
|
||||
let mut learned = LearnedIndex::new(DistanceMetric::Cosine);
|
||||
learned.set_model_type(ModelType::LinearRegression);
|
||||
for (i, vec) in vectors.iter().enumerate() {
|
||||
learned.insert(i, vec.clone())?;
|
||||
}
|
||||
learned.train()?; // Train the model
|
||||
```
|
||||
|
||||
### Neural Hash
|
||||
Locality-sensitive hashing.
|
||||
|
||||
```rust
|
||||
let neural_hash = NeuralHash::new(128, 64, 8)?;
|
||||
let hash = neural_hash.hash(&vector)?;
|
||||
let candidates = neural_hash.query_approximate(&query, 10)?;
|
||||
```
|
||||
|
||||
## AgenticDB Tables
|
||||
|
||||
| Table | Purpose |
|
||||
|-------|---------|
|
||||
| `reflexion_episodes` | Self-critique memories |
|
||||
| `skill_library` | Consolidated patterns |
|
||||
| `causal_memory` | Hypergraph relationships |
|
||||
| `learning_sessions` | RL training data |
|
||||
| `vector_db` | Core embeddings |
|
||||
|
||||
```rust
|
||||
use ruvector_core::AgenticDB;
|
||||
|
||||
let db = AgenticDB::new(options)?;
|
||||
|
||||
// Store reflexion episode
|
||||
db.store_episode(
|
||||
"Task description".to_string(),
|
||||
vec!["Action 1".to_string()],
|
||||
vec!["Error observed".to_string()],
|
||||
"What I learned".to_string(),
|
||||
)?;
|
||||
|
||||
// Query similar past experiences
|
||||
let episodes = db.query_similar_episodes(&embedding, 5)?;
|
||||
```
|
||||
|
||||
## GNN Layer
|
||||
|
||||
```rust
|
||||
use ruvector_gnn::RuvectorLayer;
|
||||
|
||||
let gnn = RuvectorLayer::new(128, 256, 4, 0.1);
|
||||
let node = vec![0.5; 128];
|
||||
let neighbors = vec![vec![0.3; 128], vec![0.7; 128]];
|
||||
let weights = vec![0.8, 0.6];
|
||||
|
||||
let updated = gnn.forward(&node, &neighbors, &weights);
|
||||
```
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Batch Operations**: Use `insert_batch` for bulk inserts
|
||||
2. **Dimension**: Match embedding dimensions exactly
|
||||
3. **Index Type**: Choose based on query patterns
|
||||
4. **Distance Metric**: Cosine for normalized, Euclidean for raw
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
ruvector-core = "0.1"
|
||||
ruvector-gnn = "0.1"
|
||||
```
|
||||
303
vendor/ruvector/examples/rust/advanced_features.rs
vendored
Normal file
303
vendor/ruvector/examples/rust/advanced_features.rs
vendored
Normal file
@@ -0,0 +1,303 @@
|
||||
//! Example demonstrating advanced features:
|
||||
//! - Hypergraph structures
|
||||
//! - Learned indexes
|
||||
//! - Neural hashing
|
||||
//! - Topological analysis
|
||||
|
||||
use ruvector_core::advanced::*;
|
||||
use ruvector_core::types::DistanceMetric;
|
||||
|
||||
fn main() {
|
||||
println!("=== Ruvector Advanced Features Demo ===\n");
|
||||
|
||||
demo_hypergraph();
|
||||
demo_temporal_hypergraph();
|
||||
demo_causal_memory();
|
||||
demo_learned_index();
|
||||
demo_neural_hash();
|
||||
demo_topological_analysis();
|
||||
}
|
||||
|
||||
fn demo_hypergraph() {
|
||||
println!("--- Hypergraph for Multi-Entity Relationships ---");
|
||||
|
||||
let mut index = HypergraphIndex::new(DistanceMetric::Cosine);
|
||||
|
||||
// Scenario: Academic paper citation network
|
||||
// Entities: papers (represented by embeddings)
|
||||
println!("Adding papers as entities...");
|
||||
index.add_entity(1, vec![0.9, 0.1, 0.0]); // ML paper
|
||||
index.add_entity(2, vec![0.8, 0.2, 0.0]); // Similar ML paper
|
||||
index.add_entity(3, vec![0.1, 0.9, 0.0]); // NLP paper
|
||||
index.add_entity(4, vec![0.0, 0.8, 0.2]); // Similar NLP paper
|
||||
index.add_entity(5, vec![0.4, 0.4, 0.2]); // Cross-domain paper
|
||||
|
||||
// Hyperedge: Papers 1, 2, 5 co-cited in review
|
||||
let edge1 = Hyperedge::new(
|
||||
vec![1, 2, 5],
|
||||
"Co-cited in ML review paper".to_string(),
|
||||
vec![0.7, 0.2, 0.1],
|
||||
0.95,
|
||||
);
|
||||
index.add_hyperedge(edge1).unwrap();
|
||||
|
||||
// Hyperedge: Papers 3, 4, 5 form research thread
|
||||
let edge2 = Hyperedge::new(
|
||||
vec![3, 4, 5],
|
||||
"NLP research thread".to_string(),
|
||||
vec![0.2, 0.7, 0.1],
|
||||
0.90,
|
||||
);
|
||||
index.add_hyperedge(edge2).unwrap();
|
||||
|
||||
println!("Added 2 hyperedges connecting papers");
|
||||
|
||||
// Search for relationships similar to a query
|
||||
let query = vec![0.6, 0.3, 0.1]; // ML-focused query
|
||||
let results = index.search_hyperedges(&query, 2);
|
||||
|
||||
println!("Searching for relationships similar to ML query:");
|
||||
for (edge_id, distance) in results {
|
||||
if let Some(edge) = index.get_hyperedge(&edge_id) {
|
||||
println!(
|
||||
" - {} (distance: {:.3}, nodes: {:?})",
|
||||
edge.description, distance, edge.nodes
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Find k-hop neighbors
|
||||
let neighbors = index.k_hop_neighbors(1, 2);
|
||||
println!("Papers reachable from paper 1 (2 hops): {:?}", neighbors);
|
||||
|
||||
let stats = index.stats();
|
||||
println!("Stats: {} entities, {} hyperedges, avg degree: {:.2}\n",
|
||||
stats.total_entities, stats.total_hyperedges, stats.avg_entity_degree);
|
||||
}
|
||||
|
||||
fn demo_temporal_hypergraph() {
|
||||
println!("--- Temporal Hypergraph for Time-Series Relationships ---");
|
||||
|
||||
let mut index = HypergraphIndex::new(DistanceMetric::Euclidean);
|
||||
|
||||
// Scenario: User interaction patterns over time
|
||||
println!("Tracking user interactions...");
|
||||
|
||||
index.add_entity(1, vec![1.0, 0.0]); // User A
|
||||
index.add_entity(2, vec![0.0, 1.0]); // User B
|
||||
index.add_entity(3, vec![0.5, 0.5]); // User C
|
||||
|
||||
// Add temporal interactions
|
||||
let edge1 = Hyperedge::new(
|
||||
vec![1, 2],
|
||||
"Users A and B collaborated".to_string(),
|
||||
vec![0.5, 0.5],
|
||||
1.0,
|
||||
);
|
||||
let temporal1 = TemporalHyperedge::new(edge1, TemporalGranularity::Daily);
|
||||
index.add_temporal_hyperedge(temporal1.clone()).unwrap();
|
||||
|
||||
let edge2 = Hyperedge::new(
|
||||
vec![2, 3],
|
||||
"Users B and C interacted".to_string(),
|
||||
vec![0.3, 0.7],
|
||||
0.8,
|
||||
);
|
||||
let temporal2 = TemporalHyperedge::new(edge2, TemporalGranularity::Daily);
|
||||
index.add_temporal_hyperedge(temporal2.clone()).unwrap();
|
||||
|
||||
println!("Added temporal interactions");
|
||||
|
||||
// Query by time bucket
|
||||
let bucket = temporal1.time_bucket();
|
||||
let results = index.query_temporal_range(bucket, bucket + 1);
|
||||
println!("Interactions in time bucket {}: {} found\n", bucket, results.len());
|
||||
}
|
||||
|
||||
fn demo_causal_memory() {
|
||||
println!("--- Causal Hypergraph Memory for Agent Reasoning ---");
|
||||
|
||||
let mut memory = CausalMemory::new(DistanceMetric::Cosine)
|
||||
.with_weights(0.7, 0.2, 0.1); // α=0.7 (similarity), β=0.2 (causal), γ=0.1 (latency)
|
||||
|
||||
// Scenario: Agent learning from experience
|
||||
println!("Building causal memory from agent experiences...");
|
||||
|
||||
// States/actions as embeddings
|
||||
memory.index().add_entity(1, vec![1.0, 0.0, 0.0]); // Action: fetch_data
|
||||
memory.index().add_entity(2, vec![0.0, 1.0, 0.0]); // Effect: success
|
||||
memory.index().add_entity(3, vec![0.0, 0.0, 1.0]); // Context: morning
|
||||
|
||||
// Record successful causal relationship
|
||||
memory.add_causal_edge(
|
||||
1, // cause: fetch_data
|
||||
2, // effect: success
|
||||
vec![3], // context: morning
|
||||
"Fetching data in morning leads to success".to_string(),
|
||||
vec![0.5, 0.4, 0.1],
|
||||
50.0, // 50ms latency
|
||||
).unwrap();
|
||||
|
||||
// Record it again to increase causal strength
|
||||
memory.add_causal_edge(
|
||||
1, 2, vec![3],
|
||||
"Repeated success".to_string(),
|
||||
vec![0.5, 0.4, 0.1],
|
||||
45.0,
|
||||
).unwrap();
|
||||
|
||||
println!("Recorded causal relationships");
|
||||
|
||||
// Query: What actions should agent take in a similar situation?
|
||||
let query = vec![0.6, 0.3, 0.1]; // Similar to morning fetch scenario
|
||||
let results = memory.query_with_utility(&query, 1, 3);
|
||||
|
||||
println!("Querying causal memory for similar situation:");
|
||||
for (edge_id, utility) in results {
|
||||
if let Some(edge) = memory.index().get_hyperedge(&edge_id) {
|
||||
println!(" - {} (utility: {:.3})", edge.description, utility);
|
||||
}
|
||||
}
|
||||
println!("Utility = 0.7*similarity + 0.2*causal_uplift - 0.1*latency\n");
|
||||
}
|
||||
|
||||
fn demo_learned_index() {
|
||||
println!("--- Recursive Model Index (RMI) ---");
|
||||
|
||||
let mut rmi = RecursiveModelIndex::new(2, 4);
|
||||
|
||||
// Generate data: points on a curve
|
||||
println!("Building learned index from 1000 data points...");
|
||||
let data: Vec<(Vec<f32>, u64)> = (0..1000)
|
||||
.map(|i| {
|
||||
let x = (i as f32) / 1000.0;
|
||||
let y = x * x; // Parabola
|
||||
(vec![x, y], i as u64)
|
||||
})
|
||||
.collect();
|
||||
|
||||
rmi.build(data).unwrap();
|
||||
|
||||
// Test predictions
|
||||
println!("Testing predictions:");
|
||||
let test_points = vec![
|
||||
(vec![0.25, 0.0625], "Point on curve"),
|
||||
(vec![0.5, 0.25], "Mid point"),
|
||||
(vec![0.75, 0.5625], "Upper point"),
|
||||
];
|
||||
|
||||
for (point, desc) in test_points {
|
||||
let predicted_pos = rmi.predict(&point).unwrap();
|
||||
let actual_idx = (point[0] * 1000.0) as usize;
|
||||
let error = (predicted_pos as i32 - actual_idx as i32).abs();
|
||||
println!(" {} - Predicted: {}, Actual: {}, Error: {}",
|
||||
desc, predicted_pos, actual_idx, error);
|
||||
}
|
||||
|
||||
let stats = rmi.stats();
|
||||
println!("RMI Stats:");
|
||||
println!(" Total entries: {}", stats.total_entries);
|
||||
println!(" Model size: {} bytes", stats.model_size_bytes);
|
||||
println!(" Average error: {:.2}", stats.avg_error);
|
||||
println!(" Max error: {}\n", stats.max_error);
|
||||
}
|
||||
|
||||
fn demo_neural_hash() {
|
||||
println!("--- Neural Hash Functions for Compression ---");
|
||||
|
||||
// Using LSH for simplicity
|
||||
let lsh = SimpleLSH::new(128, 32);
|
||||
let mut index = HashIndex::new(lsh, 32);
|
||||
|
||||
println!("Creating hash index (128D -> 32 bits)...");
|
||||
|
||||
// Insert random high-dimensional vectors
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
for i in 0..100 {
|
||||
let vec: Vec<f32> = (0..128).map(|_| rng.gen::<f32>()).collect();
|
||||
index.insert(i, vec);
|
||||
}
|
||||
|
||||
println!("Inserted 100 vectors");
|
||||
|
||||
// Search with a query
|
||||
let query: Vec<f32> = (0..128).map(|_| rng.gen::<f32>()).collect();
|
||||
let results = index.search(&query, 5, 8); // Max Hamming distance: 8
|
||||
|
||||
println!("Search results (top 5):");
|
||||
for (id, similarity) in results.iter().take(5) {
|
||||
println!(" Vector {} - Similarity: {:.3}", id, similarity);
|
||||
}
|
||||
|
||||
let stats = index.stats();
|
||||
println!("Hash Index Stats:");
|
||||
println!(" Total vectors: {}", stats.total_vectors);
|
||||
println!(" Buckets: {}", stats.num_buckets);
|
||||
println!(" Avg bucket size: {:.2}", stats.avg_bucket_size);
|
||||
println!(" Compression ratio: {:.1}x\n", stats.compression_ratio);
|
||||
}
|
||||
|
||||
fn demo_topological_analysis() {
|
||||
println!("--- Topological Data Analysis for Embedding Quality ---");
|
||||
|
||||
let analyzer = TopologicalAnalyzer::new(5, 10.0);
|
||||
|
||||
// Create embeddings with known quality issues
|
||||
println!("Analyzing three embedding sets:\n");
|
||||
|
||||
// 1. Good embeddings: well-separated clusters
|
||||
println!("1. Good embeddings (two clusters):");
|
||||
let mut good_embeddings = Vec::new();
|
||||
for i in 0..30 {
|
||||
let angle = (i as f32) * 2.0 * std::f32::consts::PI / 30.0;
|
||||
good_embeddings.push(vec![angle.cos(), angle.sin()]);
|
||||
}
|
||||
for i in 0..30 {
|
||||
let angle = (i as f32) * 2.0 * std::f32::consts::PI / 30.0;
|
||||
good_embeddings.push(vec![5.0 + angle.cos(), 5.0 + angle.sin()]);
|
||||
}
|
||||
|
||||
let quality1 = analyzer.analyze(&good_embeddings).unwrap();
|
||||
print_quality_report(&quality1);
|
||||
|
||||
// 2. Mode collapsed embeddings
|
||||
println!("\n2. Mode collapsed embeddings:");
|
||||
let collapsed: Vec<Vec<f32>> = (0..60)
|
||||
.map(|i| vec![1.0 + (i as f32) * 0.01, 1.0 + (i as f32) * 0.01])
|
||||
.collect();
|
||||
|
||||
let quality2 = analyzer.analyze(&collapsed).unwrap();
|
||||
print_quality_report(&quality2);
|
||||
|
||||
// 3. Degenerate embeddings (stuck in 1D)
|
||||
println!("\n3. Degenerate embeddings (1D manifold in 2D space):");
|
||||
let degenerate: Vec<Vec<f32>> = (0..60)
|
||||
.map(|i| {
|
||||
let x = (i as f32) / 60.0;
|
||||
vec![x, 0.0] // All on x-axis
|
||||
})
|
||||
.collect();
|
||||
|
||||
let quality3 = analyzer.analyze(°enerate).unwrap();
|
||||
print_quality_report(&quality3);
|
||||
}
|
||||
|
||||
fn print_quality_report(quality: &EmbeddingQuality) {
|
||||
println!(" Dimensions: {}", quality.dimensions);
|
||||
println!(" Vectors: {}", quality.num_vectors);
|
||||
println!(" Connected components: {}", quality.connected_components);
|
||||
println!(" Clustering coefficient: {:.3}", quality.clustering_coefficient);
|
||||
println!(" Mode collapse score: {:.3} (0=collapsed, 1=good)", quality.mode_collapse_score);
|
||||
println!(" Degeneracy score: {:.3} (0=full rank, 1=degenerate)", quality.degeneracy_score);
|
||||
println!(" Overall quality: {:.3}", quality.quality_score);
|
||||
println!(" Assessment: {}", quality.assessment());
|
||||
|
||||
if quality.has_mode_collapse() {
|
||||
println!(" ⚠️ WARNING: Mode collapse detected!");
|
||||
}
|
||||
if quality.is_degenerate() {
|
||||
println!(" ⚠️ WARNING: Embeddings are degenerate!");
|
||||
}
|
||||
}
|
||||
319
vendor/ruvector/examples/rust/agenticdb_demo.rs
vendored
Normal file
319
vendor/ruvector/examples/rust/agenticdb_demo.rs
vendored
Normal file
@@ -0,0 +1,319 @@
|
||||
//! AgenticDB API Demonstration
|
||||
//!
|
||||
//! Shows all 5 tables and API features:
|
||||
//! 1. Reflexion Episodes - Self-critique memory
|
||||
//! 2. Skill Library - Consolidated patterns
|
||||
//! 3. Causal Memory - Hypergraph relationships
|
||||
//! 4. Learning Sessions - RL training data
|
||||
//! 5. Vector DB - Core embeddings
|
||||
|
||||
use ruvector_core::{AgenticDB, DbOptions, Result};
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
println!("🚀 AgenticDB API Demonstration\n");
|
||||
|
||||
// Initialize AgenticDB
|
||||
let mut options = DbOptions::default();
|
||||
options.dimensions = 128;
|
||||
options.storage_path = "./demo_agenticdb.db".to_string();
|
||||
|
||||
let db = AgenticDB::new(options)?;
|
||||
|
||||
// ============ 1. Reflexion Memory Demo ============
|
||||
println!("📝 1. REFLEXION MEMORY - Self-Critique Episodes");
|
||||
println!("------------------------------------------------");
|
||||
|
||||
// Store an episode where the agent made a mistake
|
||||
let episode1 = db.store_episode(
|
||||
"Solve a coding problem".to_string(),
|
||||
vec![
|
||||
"Read problem description".to_string(),
|
||||
"Write initial solution".to_string(),
|
||||
"Submit without testing".to_string(),
|
||||
],
|
||||
vec![
|
||||
"Solution failed test cases".to_string(),
|
||||
"Missed edge case with empty input".to_string(),
|
||||
],
|
||||
"I should have tested edge cases before submitting. Always check empty input, null values, and boundary conditions.".to_string(),
|
||||
)?;
|
||||
println!("✅ Stored episode: {}", episode1);
|
||||
|
||||
// Store another episode with improved behavior
|
||||
let episode2 = db.store_episode(
|
||||
"Debug a complex function".to_string(),
|
||||
vec![
|
||||
"Added logging statements".to_string(),
|
||||
"Tested with sample inputs".to_string(),
|
||||
"Fixed the bug".to_string(),
|
||||
],
|
||||
vec![
|
||||
"Found the issue in O(n) time".to_string(),
|
||||
"Tests passed".to_string(),
|
||||
],
|
||||
"Using systematic logging helped identify the issue quickly. This is a good debugging strategy.".to_string(),
|
||||
)?;
|
||||
println!("✅ Stored episode: {}", episode2);
|
||||
|
||||
// Retrieve similar episodes when facing a new coding task
|
||||
let similar_episodes = db.retrieve_similar_episodes("how to approach coding problems", 5)?;
|
||||
println!("\n🔍 Found {} similar episodes for 'coding problems':", similar_episodes.len());
|
||||
for (i, episode) in similar_episodes.iter().enumerate() {
|
||||
println!(" {}. Task: {} | Critique: {}", i + 1, episode.task, episode.critique);
|
||||
}
|
||||
|
||||
// ============ 2. Skill Library Demo ============
|
||||
println!("\n\n🎯 2. SKILL LIBRARY - Reusable Patterns");
|
||||
println!("------------------------------------------------");
|
||||
|
||||
// Create skills for common tasks
|
||||
let mut params1 = HashMap::new();
|
||||
params1.insert("input".to_string(), "string".to_string());
|
||||
params1.insert("output".to_string(), "json".to_string());
|
||||
|
||||
let skill1 = db.create_skill(
|
||||
"JSON Parser".to_string(),
|
||||
"Parse JSON string into structured data".to_string(),
|
||||
params1,
|
||||
vec![
|
||||
"let data = JSON.parse(input);".to_string(),
|
||||
"return data;".to_string(),
|
||||
],
|
||||
)?;
|
||||
println!("✅ Created skill: JSON Parser ({})", skill1);
|
||||
|
||||
let mut params2 = HashMap::new();
|
||||
params2.insert("data".to_string(), "array".to_string());
|
||||
params2.insert("field".to_string(), "string".to_string());
|
||||
|
||||
let skill2 = db.create_skill(
|
||||
"Data Aggregator".to_string(),
|
||||
"Aggregate and summarize array data by field".to_string(),
|
||||
params2,
|
||||
vec![
|
||||
"let groups = data.reduce((acc, item) => {".to_string(),
|
||||
" acc[item[field]] = (acc[item[field]] || 0) + 1;".to_string(),
|
||||
" return acc;".to_string(),
|
||||
"}, {});".to_string(),
|
||||
],
|
||||
)?;
|
||||
println!("✅ Created skill: Data Aggregator ({})", skill2);
|
||||
|
||||
// Search for relevant skills
|
||||
let found_skills = db.search_skills("parse and process json data", 5)?;
|
||||
println!("\n🔍 Found {} skills for 'parse json':", found_skills.len());
|
||||
for skill in found_skills {
|
||||
println!(" - {} ({}) | Success rate: {:.1}%",
|
||||
skill.name, skill.id, skill.success_rate * 100.0);
|
||||
}
|
||||
|
||||
// Auto-consolidate action sequences into skills
|
||||
let action_sequences = vec![
|
||||
vec!["read_file".to_string(), "parse_json".to_string(), "validate_schema".to_string()],
|
||||
vec!["fetch_api".to_string(), "extract_data".to_string(), "cache_result".to_string()],
|
||||
vec!["open_db".to_string(), "query_data".to_string(), "close_db".to_string()],
|
||||
];
|
||||
|
||||
let consolidated_skills = db.auto_consolidate(action_sequences, 3)?;
|
||||
println!("\n✅ Auto-consolidated {} new skills from action sequences", consolidated_skills.len());
|
||||
|
||||
// ============ 3. Causal Memory Demo ============
|
||||
println!("\n\n🧠 3. CAUSAL MEMORY - Hypergraph Relationships");
|
||||
println!("------------------------------------------------");
|
||||
|
||||
// Add causal edges with hypergraph support (multiple causes -> multiple effects)
|
||||
let edge1 = db.add_causal_edge(
|
||||
vec!["high CPU usage".to_string(), "memory leak".to_string()],
|
||||
vec!["system slowdown".to_string(), "application crash".to_string()],
|
||||
0.92,
|
||||
"Server performance issue observed in production".to_string(),
|
||||
)?;
|
||||
println!("✅ Added causal edge: CPU+Memory -> Slowdown+Crash ({})", edge1);
|
||||
|
||||
let edge2 = db.add_causal_edge(
|
||||
vec!["missing index".to_string()],
|
||||
vec!["slow queries".to_string(), "database timeout".to_string()],
|
||||
0.87,
|
||||
"Database performance degradation".to_string(),
|
||||
)?;
|
||||
println!("✅ Added causal edge: No Index -> Slow Queries+Timeout ({})", edge2);
|
||||
|
||||
let edge3 = db.add_causal_edge(
|
||||
vec!["cache invalidation".to_string(), "traffic spike".to_string()],
|
||||
vec!["increased load".to_string(), "response delay".to_string()],
|
||||
0.78,
|
||||
"Cache-related performance issue".to_string(),
|
||||
)?;
|
||||
println!("✅ Added causal edge: Cache+Traffic -> Load+Delay ({})", edge3);
|
||||
|
||||
// Query with utility function: U = α·similarity + β·causal_uplift − γ·latency
|
||||
println!("\n🔍 Querying with utility function (α=0.7, β=0.2, γ=0.1):");
|
||||
let utility_results = db.query_with_utility(
|
||||
"performance problems in production",
|
||||
5,
|
||||
0.7, // alpha: similarity weight
|
||||
0.2, // beta: causal confidence weight
|
||||
0.1, // gamma: latency penalty weight
|
||||
)?;
|
||||
|
||||
for (i, result) in utility_results.iter().enumerate() {
|
||||
println!(" {}. Utility: {:.3} | Similarity: {:.3} | Causal: {:.3} | Latency: {:.3}ms",
|
||||
i + 1,
|
||||
result.utility_score,
|
||||
result.similarity_score,
|
||||
result.causal_uplift,
|
||||
result.latency_penalty * 1000.0,
|
||||
);
|
||||
}
|
||||
|
||||
// ============ 4. Learning Sessions Demo ============
|
||||
println!("\n\n🤖 4. LEARNING SESSIONS - RL Training");
|
||||
println!("------------------------------------------------");
|
||||
|
||||
// Start a Q-Learning session for navigation
|
||||
let session1 = db.start_session(
|
||||
"Q-Learning".to_string(),
|
||||
4, // state_dim: [x, y, goal_x, goal_y]
|
||||
2, // action_dim: [move_x, move_y]
|
||||
)?;
|
||||
println!("✅ Started Q-Learning session: {}", session1);
|
||||
|
||||
// Add training experiences
|
||||
println!("\n📊 Adding training experiences...");
|
||||
for i in 0..10 {
|
||||
let state = vec![i as f32, 0.0, 10.0, 10.0];
|
||||
let action = vec![1.0, 0.0]; // Move right
|
||||
let reward = if i < 5 { 0.5 } else { 1.0 }; // Higher reward as we get closer
|
||||
let next_state = vec![(i + 1) as f32, 0.0, 10.0, 10.0];
|
||||
let done = i == 9;
|
||||
|
||||
db.add_experience(&session1, state, action, reward, next_state, done)?;
|
||||
println!(" ✓ Experience {}: reward={:.1}", i + 1, reward);
|
||||
}
|
||||
|
||||
// Make a prediction with confidence interval
|
||||
let test_state = vec![5.0, 0.0, 10.0, 10.0];
|
||||
let prediction = db.predict_with_confidence(&session1, test_state)?;
|
||||
|
||||
println!("\n🎯 Prediction for state [5.0, 0.0, 10.0, 10.0]:");
|
||||
println!(" Action: {:?}", prediction.action);
|
||||
println!(" Confidence: {:.3} ± [{:.3}, {:.3}]",
|
||||
prediction.mean_confidence,
|
||||
prediction.confidence_lower,
|
||||
prediction.confidence_upper,
|
||||
);
|
||||
|
||||
// Start a DQN session for game playing
|
||||
let session2 = db.start_session(
|
||||
"DQN".to_string(),
|
||||
8, // state_dim: game state
|
||||
4, // action_dim: up, down, left, right
|
||||
)?;
|
||||
println!("\n✅ Started DQN session: {}", session2);
|
||||
|
||||
// ============ 5. Integration Demo ============
|
||||
println!("\n\n🔗 5. INTEGRATION - All Systems Working Together");
|
||||
println!("------------------------------------------------");
|
||||
|
||||
// Scenario: Agent learns from mistakes and builds skills
|
||||
println!("\n📖 Scenario: Agent solving a series of problems");
|
||||
|
||||
// Step 1: Agent fails and reflects
|
||||
let fail_episode = db.store_episode(
|
||||
"Optimize database query".to_string(),
|
||||
vec![
|
||||
"Wrote complex nested query".to_string(),
|
||||
"Ran query on production".to_string(),
|
||||
],
|
||||
vec!["Query timed out after 30 seconds".to_string()],
|
||||
"Should have tested on staging first and checked query plan. Complex nested queries need optimization.".to_string(),
|
||||
)?;
|
||||
println!("❌ Episode: Failed query optimization");
|
||||
|
||||
// Step 2: Agent identifies causal relationship
|
||||
let cause_effect = db.add_causal_edge(
|
||||
vec!["nested subqueries".to_string(), "missing index".to_string()],
|
||||
vec!["slow execution".to_string()],
|
||||
0.95,
|
||||
"Query performance analysis".to_string(),
|
||||
)?;
|
||||
println!("🧠 Learned: Nested queries + No index → Slow execution");
|
||||
|
||||
// Step 3: Agent succeeds and builds skill
|
||||
let success_episode = db.store_episode(
|
||||
"Optimize database query (retry)".to_string(),
|
||||
vec![
|
||||
"Analyzed query plan".to_string(),
|
||||
"Added composite index".to_string(),
|
||||
"Simplified query structure".to_string(),
|
||||
"Tested on staging".to_string(),
|
||||
],
|
||||
vec!["Query completed in 0.2 seconds".to_string()],
|
||||
"Breaking down the problem and using indexes is the key. Always check query plans first.".to_string(),
|
||||
)?;
|
||||
println!("✅ Episode: Successful optimization");
|
||||
|
||||
// Step 4: Agent consolidates into reusable skill
|
||||
let optimization_skill = db.create_skill(
|
||||
"Query Optimizer".to_string(),
|
||||
"Optimize slow database queries using index analysis and query plan review".to_string(),
|
||||
{
|
||||
let mut params = HashMap::new();
|
||||
params.insert("query".to_string(), "string".to_string());
|
||||
params.insert("tables".to_string(), "array".to_string());
|
||||
params
|
||||
},
|
||||
vec![
|
||||
"EXPLAIN ANALYZE query;".to_string(),
|
||||
"Identify missing indexes".to_string(),
|
||||
"CREATE INDEX IF NOT EXISTS...".to_string(),
|
||||
"Simplify nested subqueries".to_string(),
|
||||
"Test on staging".to_string(),
|
||||
],
|
||||
)?;
|
||||
println!("🎯 Created skill: Query Optimizer");
|
||||
|
||||
// Step 5: Agent uses RL to learn optimal strategies
|
||||
let strategy_session = db.start_session(
|
||||
"PPO".to_string(),
|
||||
6, // state: [query_complexity, table_size, index_count, ...]
|
||||
3, // action: [add_index, simplify, cache]
|
||||
)?;
|
||||
println!("🤖 Started RL session for strategy learning");
|
||||
|
||||
// Now when facing similar problems, agent can:
|
||||
println!("\n🎓 Agent capabilities after learning:");
|
||||
|
||||
// 1. Retrieve similar past experiences
|
||||
let relevant_episodes = db.retrieve_similar_episodes("database query performance", 3)?;
|
||||
println!(" ✓ Retrieved {} relevant past experiences", relevant_episodes.len());
|
||||
|
||||
// 2. Find applicable skills
|
||||
let applicable_skills = db.search_skills("optimize database queries", 3)?;
|
||||
println!(" ✓ Found {} applicable skills", applicable_skills.len());
|
||||
|
||||
// 3. Understand causal relationships
|
||||
let causal_knowledge = db.query_with_utility("query performance factors", 3, 0.7, 0.2, 0.1)?;
|
||||
println!(" ✓ Retrieved {} causal relationships", causal_knowledge.len());
|
||||
|
||||
// 4. Make informed decisions using RL
|
||||
let current_state = vec![5.0, 1000.0, 2.0, 0.0, 0.0, 0.0];
|
||||
let recommended_action = db.predict_with_confidence(&strategy_session, current_state)?;
|
||||
println!(" ✓ Predicted optimal action with {:.1}% confidence",
|
||||
recommended_action.mean_confidence * 100.0);
|
||||
|
||||
println!("\n✨ AgenticDB Demo Complete!");
|
||||
println!("\nAll 5 tables working together:");
|
||||
println!(" 1. ✅ Reflexion Episodes - Learning from mistakes");
|
||||
println!(" 2. ✅ Skill Library - Building reusable patterns");
|
||||
println!(" 3. ✅ Causal Memory - Understanding relationships");
|
||||
println!(" 4. ✅ Learning Sessions - Optimizing strategies");
|
||||
println!(" 5. ✅ Vector DB - Fast similarity search");
|
||||
|
||||
println!("\n🚀 Performance: 10-100x faster than original agenticDB");
|
||||
println!("💾 Storage: Efficient HNSW indexing + redb persistence");
|
||||
println!("🎯 Ready for production agentic AI systems!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
73
vendor/ruvector/examples/rust/basic_usage.rs
vendored
Normal file
73
vendor/ruvector/examples/rust/basic_usage.rs
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
//! Basic usage example for Ruvector
|
||||
//!
|
||||
//! Demonstrates:
|
||||
//! - Creating a database
|
||||
//! - Inserting vectors
|
||||
//! - Searching for similar vectors
|
||||
//! - Basic configuration
|
||||
|
||||
use ruvector_core::{VectorDB, VectorEntry, SearchQuery, DbOptions, Result};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
println!("🚀 Ruvector Basic Usage Example\n");
|
||||
|
||||
// 1. Create a database
|
||||
println!("1. Creating database...");
|
||||
let mut options = DbOptions::default();
|
||||
options.dimensions = 128;
|
||||
options.storage_path = "./examples_basic.db".to_string();
|
||||
|
||||
let db = VectorDB::new(options)?;
|
||||
println!(" ✓ Database created with 128 dimensions\n");
|
||||
|
||||
// 2. Insert a single vector
|
||||
println!("2. Inserting single vector...");
|
||||
let entry = VectorEntry {
|
||||
id: Some("doc_001".to_string()),
|
||||
vector: vec![0.1; 128],
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
let id = db.insert(entry)?;
|
||||
println!(" ✓ Inserted vector: {}\n", id);
|
||||
|
||||
// 3. Insert multiple vectors
|
||||
println!("3. Inserting multiple vectors...");
|
||||
let entries: Vec<VectorEntry> = (0..100)
|
||||
.map(|i| VectorEntry {
|
||||
id: Some(format!("doc_{:03}", i + 2)),
|
||||
vector: vec![0.1 + (i as f32) * 0.001; 128],
|
||||
metadata: None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let ids = db.insert_batch(entries)?;
|
||||
println!(" ✓ Inserted {} vectors\n", ids.len());
|
||||
|
||||
// 4. Search for similar vectors
|
||||
println!("4. Searching for similar vectors...");
|
||||
let query = SearchQuery {
|
||||
vector: vec![0.15; 128],
|
||||
k: 5,
|
||||
filter: None,
|
||||
include_vectors: false,
|
||||
};
|
||||
|
||||
let results = db.search(&query)?;
|
||||
println!(" ✓ Found {} results:", results.len());
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
println!(" {}. ID: {}, Distance: {:.6}",
|
||||
i + 1, result.id, result.distance
|
||||
);
|
||||
}
|
||||
println!();
|
||||
|
||||
// 5. Get database stats
|
||||
println!("5. Database statistics:");
|
||||
let total = db.count();
|
||||
println!(" ✓ Total vectors: {}\n", total);
|
||||
|
||||
println!("✅ Example completed successfully!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
78
vendor/ruvector/examples/rust/batch_operations.rs
vendored
Normal file
78
vendor/ruvector/examples/rust/batch_operations.rs
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
//! Batch operations example
|
||||
//!
|
||||
//! Demonstrates efficient batch processing for high throughput
|
||||
|
||||
use ruvector_core::{VectorDB, VectorEntry, SearchQuery, DbOptions, Result};
|
||||
use rand::Rng;
|
||||
use std::time::Instant;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
println!("🚀 Ruvector Batch Operations Example\n");
|
||||
|
||||
// Setup
|
||||
let mut options = DbOptions::default();
|
||||
options.dimensions = 128;
|
||||
options.storage_path = "./examples_batch.db".to_string();
|
||||
|
||||
let db = VectorDB::new(options)?;
|
||||
|
||||
// Generate test data
|
||||
println!("1. Generating 10,000 random vectors...");
|
||||
let mut rng = rand::thread_rng();
|
||||
let entries: Vec<VectorEntry> = (0..10_000)
|
||||
.map(|i| {
|
||||
let vector: Vec<f32> = (0..128)
|
||||
.map(|_| rng.gen::<f32>())
|
||||
.collect();
|
||||
|
||||
VectorEntry {
|
||||
id: Some(format!("vec_{:05}", i)),
|
||||
vector,
|
||||
metadata: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
println!(" ✓ Generated 10,000 vectors\n");
|
||||
|
||||
// Batch insert
|
||||
println!("2. Batch inserting 10,000 vectors...");
|
||||
let start = Instant::now();
|
||||
let ids = db.insert_batch(entries)?;
|
||||
let duration = start.elapsed();
|
||||
|
||||
println!(" ✓ Inserted {} vectors", ids.len());
|
||||
println!(" ✓ Time: {:?}", duration);
|
||||
println!(" ✓ Throughput: {:.0} vectors/sec\n",
|
||||
ids.len() as f64 / duration.as_secs_f64()
|
||||
);
|
||||
|
||||
// Benchmark search
|
||||
println!("3. Benchmarking search operations...");
|
||||
let num_queries = 1000;
|
||||
let query_vector: Vec<f32> = (0..128).map(|_| rng.gen::<f32>()).collect();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..num_queries {
|
||||
let query = SearchQuery {
|
||||
vector: query_vector.clone(),
|
||||
k: 10,
|
||||
filter: None,
|
||||
include_vectors: false,
|
||||
};
|
||||
db.search(&query)?;
|
||||
}
|
||||
let duration = start.elapsed();
|
||||
|
||||
println!(" ✓ Executed {} queries", num_queries);
|
||||
println!(" ✓ Total time: {:?}", duration);
|
||||
println!(" ✓ Average latency: {:.2}ms",
|
||||
duration.as_secs_f64() * 1000.0 / num_queries as f64
|
||||
);
|
||||
println!(" ✓ Throughput: {:.0} queries/sec\n",
|
||||
num_queries as f64 / duration.as_secs_f64()
|
||||
);
|
||||
|
||||
println!("✅ Batch operations completed!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
60
vendor/ruvector/examples/rust/gnn_example.rs
vendored
Normal file
60
vendor/ruvector/examples/rust/gnn_example.rs
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
//! Example demonstrating the Ruvector GNN layer usage
|
||||
|
||||
use ruvector_gnn::{RuvectorLayer, Linear, MultiHeadAttention, GRUCell, LayerNorm};
|
||||
|
||||
fn main() {
|
||||
println!("=== Ruvector GNN Layer Example ===\n");
|
||||
|
||||
// Create a GNN layer
|
||||
// Parameters: input_dim=128, hidden_dim=256, heads=4, dropout=0.1
|
||||
let gnn_layer = RuvectorLayer::new(128, 256, 4, 0.1);
|
||||
|
||||
// Simulate a node embedding (128 dimensions)
|
||||
let node_embedding = vec![0.5; 128];
|
||||
|
||||
// Simulate 3 neighbor embeddings
|
||||
let neighbor_embeddings = vec![
|
||||
vec![0.3; 128],
|
||||
vec![0.7; 128],
|
||||
vec![0.5; 128],
|
||||
];
|
||||
|
||||
// Edge weights (e.g., inverse distances)
|
||||
let edge_weights = vec![0.8, 0.6, 0.4];
|
||||
|
||||
// Forward pass through the GNN layer
|
||||
let updated_embedding = gnn_layer.forward(&node_embedding, &neighbor_embeddings, &edge_weights);
|
||||
|
||||
println!("Input dimension: {}", node_embedding.len());
|
||||
println!("Output dimension: {}", updated_embedding.len());
|
||||
println!("Number of neighbors: {}", neighbor_embeddings.len());
|
||||
println!("\n✓ GNN layer forward pass successful!");
|
||||
|
||||
// Demonstrate individual components
|
||||
println!("\n=== Individual Components ===\n");
|
||||
|
||||
// 1. Linear layer
|
||||
let linear = Linear::new(128, 64);
|
||||
let linear_output = linear.forward(&node_embedding);
|
||||
println!("Linear layer: 128 -> {}", linear_output.len());
|
||||
|
||||
// 2. Layer normalization
|
||||
let layer_norm = LayerNorm::new(128, 1e-5);
|
||||
let normalized = layer_norm.forward(&node_embedding);
|
||||
println!("LayerNorm output dimension: {}", normalized.len());
|
||||
|
||||
// 3. Multi-head attention
|
||||
let attention = MultiHeadAttention::new(128, 4);
|
||||
let keys = neighbor_embeddings.clone();
|
||||
let values = neighbor_embeddings.clone();
|
||||
let attention_output = attention.forward(&node_embedding, &keys, &values);
|
||||
println!("Multi-head attention output: {}", attention_output.len());
|
||||
|
||||
// 4. GRU cell
|
||||
let gru = GRUCell::new(128, 256);
|
||||
let hidden_state = vec![0.0; 256];
|
||||
let new_hidden = gru.forward(&node_embedding, &hidden_state);
|
||||
println!("GRU cell output dimension: {}", new_hidden.len());
|
||||
|
||||
println!("\n✓ All components working correctly!");
|
||||
}
|
||||
147
vendor/ruvector/examples/rust/rag_pipeline.rs
vendored
Normal file
147
vendor/ruvector/examples/rust/rag_pipeline.rs
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
//! RAG (Retrieval Augmented Generation) Pipeline Example
|
||||
//!
|
||||
//! Demonstrates building a complete RAG system with Ruvector.
|
||||
//!
|
||||
//! ⚠️ NOTE: This example uses MOCK embeddings for demonstration.
|
||||
//! In production, replace `mock_embedding()` with a real embedding model:
|
||||
//! - `sentence-transformers` via Python bindings
|
||||
//! - `candle` for native Rust inference
|
||||
//! - ONNX Runtime for cross-platform models
|
||||
//! - OpenAI/Anthropic embedding APIs
|
||||
|
||||
use ruvector_core::{VectorDB, VectorEntry, SearchQuery, DbOptions, Result};
|
||||
use std::collections::HashMap;
|
||||
use serde_json::json;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
println!("📚 RAG Pipeline Example\n");
|
||||
|
||||
// 1. Setup database
|
||||
println!("1. Setting up knowledge base...");
|
||||
let mut options = DbOptions::default();
|
||||
options.dimensions = 384; // sentence-transformers/all-MiniLM-L6-v2
|
||||
options.storage_path = "./rag_knowledge.db".to_string();
|
||||
|
||||
let db = VectorDB::new(options)?;
|
||||
println!(" ✓ Database created\n");
|
||||
|
||||
// 2. Ingest documents
|
||||
println!("2. Ingesting documents into knowledge base...");
|
||||
let documents = vec![
|
||||
(
|
||||
"Rust is a systems programming language that focuses on safety and performance.",
|
||||
mock_embedding(384, 1.0)
|
||||
),
|
||||
(
|
||||
"Vector databases enable semantic search by storing and querying embeddings.",
|
||||
mock_embedding(384, 1.1)
|
||||
),
|
||||
(
|
||||
"HNSW (Hierarchical Navigable Small World) provides efficient approximate nearest neighbor search.",
|
||||
mock_embedding(384, 1.2)
|
||||
),
|
||||
(
|
||||
"RAG combines retrieval systems with language models for better context-aware generation.",
|
||||
mock_embedding(384, 1.3)
|
||||
),
|
||||
(
|
||||
"Embeddings are dense vector representations of text that capture semantic meaning.",
|
||||
mock_embedding(384, 1.4)
|
||||
),
|
||||
];
|
||||
|
||||
let entries: Vec<VectorEntry> = documents.into_iter().enumerate()
|
||||
.map(|(i, (text, embedding))| {
|
||||
let mut metadata = HashMap::new();
|
||||
metadata.insert("text".to_string(), json!(text));
|
||||
metadata.insert("doc_id".to_string(), json!(format!("doc_{}", i)));
|
||||
metadata.insert("timestamp".to_string(), json!(chrono::Utc::now().timestamp()));
|
||||
|
||||
VectorEntry {
|
||||
id: Some(format!("doc_{}", i)),
|
||||
vector: embedding,
|
||||
metadata: Some(metadata),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
db.insert_batch(entries)?;
|
||||
println!(" ✓ Ingested {} documents\n", 5);
|
||||
|
||||
// 3. Retrieval phase
|
||||
println!("3. Retrieval phase (finding relevant context)...");
|
||||
let user_query = "How do vector databases work?";
|
||||
let query_embedding = mock_embedding(384, 1.15); // Mock embedding for query
|
||||
|
||||
let query = SearchQuery {
|
||||
vector: query_embedding,
|
||||
k: 3, // Retrieve top 3 most relevant documents
|
||||
filter: None,
|
||||
include_vectors: false,
|
||||
};
|
||||
|
||||
let results = db.search(&query)?;
|
||||
println!(" ✓ Query: \"{}\"", user_query);
|
||||
println!(" ✓ Retrieved {} relevant documents:\n", results.len());
|
||||
|
||||
let mut context_passages = Vec::new();
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
if let Some(metadata) = &result.metadata {
|
||||
if let Some(text) = metadata.get("text") {
|
||||
let text_str = text.as_str().unwrap();
|
||||
context_passages.push(text_str);
|
||||
println!(" {}. (score: {:.4})", i + 1, result.distance);
|
||||
println!(" {}\n", text_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Generation phase (mock)
|
||||
println!("4. Generation phase (constructing prompt for LLM)...");
|
||||
let prompt = construct_rag_prompt(user_query, &context_passages);
|
||||
println!(" ✓ Prompt constructed:");
|
||||
println!(" {}\n", "─".repeat(60));
|
||||
println!("{}", prompt);
|
||||
println!(" {}\n", "─".repeat(60));
|
||||
|
||||
// 5. (In real application, send prompt to LLM here)
|
||||
println!("5. Next step: Send prompt to LLM for generation");
|
||||
println!(" ✓ In production, you would:");
|
||||
println!(" - Send the constructed prompt to an LLM (GPT, Claude, etc.)");
|
||||
println!(" - Receive context-aware response");
|
||||
println!(" - Return response to user\n");
|
||||
|
||||
println!("✅ RAG pipeline example completed!");
|
||||
println!("\n💡 Key benefits:");
|
||||
println!(" • Semantic search finds relevant context automatically");
|
||||
println!(" • LLM generates responses based on your knowledge base");
|
||||
println!(" • Up-to-date information without retraining models");
|
||||
println!(" • Sub-millisecond retrieval with Ruvector");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// ⚠️ MOCK EMBEDDING - NOT SEMANTIC
|
||||
/// This produces deterministic vectors based on seed value.
|
||||
/// Replace with actual embedding model for real semantic search.
|
||||
fn mock_embedding(dims: usize, seed: f32) -> Vec<f32> {
|
||||
(0..dims)
|
||||
.map(|i| (seed + i as f32 * 0.001).sin())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn construct_rag_prompt(query: &str, context: &[&str]) -> String {
|
||||
let context_text = context.iter()
|
||||
.enumerate()
|
||||
.map(|(i, text)| format!("[{}] {}", i + 1, text))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n");
|
||||
|
||||
format!(
|
||||
"You are a helpful assistant. Answer the user's question based on the provided context.\n\n\
|
||||
Context:\n{}\n\n\
|
||||
User Question: {}\n\n\
|
||||
Answer:",
|
||||
context_text, query
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user