88 lines
3.1 KiB
Rust
88 lines
3.1 KiB
Rust
//! Semantic search example using RuVector integration
|
|
|
|
use anyhow::Result;
|
|
use ruvector_onnx_embeddings::{
|
|
Embedder, RuVectorEmbeddings, IndexConfig, Distance,
|
|
};
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
println!("=== Semantic Search with RuVector ONNX Embeddings ===\n");
|
|
|
|
// Initialize embedder
|
|
let embedder = Embedder::default_model().await?;
|
|
println!("Loaded model with dimension: {}", embedder.dimension());
|
|
|
|
// Create index with custom configuration
|
|
let config = IndexConfig {
|
|
distance: Distance::Cosine,
|
|
max_elements: 100_000,
|
|
ef_search: 100,
|
|
};
|
|
|
|
let index = RuVectorEmbeddings::new("semantic_docs", embedder, config)?;
|
|
|
|
// Sample document corpus
|
|
let documents = vec![
|
|
("doc1", "Rust provides memory safety without garbage collection through its ownership system."),
|
|
("doc2", "Python's simplicity makes it ideal for beginners learning programming."),
|
|
("doc3", "JavaScript dominates web development with frameworks like React and Vue."),
|
|
("doc4", "Machine learning models can be trained using TensorFlow or PyTorch."),
|
|
("doc5", "Docker containers provide consistent deployment environments."),
|
|
("doc6", "Kubernetes orchestrates containerized applications at scale."),
|
|
("doc7", "GraphQL offers a more efficient alternative to REST APIs."),
|
|
("doc8", "PostgreSQL is a powerful open-source relational database."),
|
|
("doc9", "Redis provides in-memory data storage for caching."),
|
|
("doc10", "Elasticsearch enables full-text search across large datasets."),
|
|
];
|
|
|
|
// Index documents with metadata
|
|
println!("Indexing {} documents...", documents.len());
|
|
for (id, content) in &documents {
|
|
let metadata = serde_json::json!({ "doc_id": id });
|
|
index.insert(content, Some(metadata))?;
|
|
}
|
|
|
|
println!("Index contains {} vectors\n", index.len());
|
|
|
|
// Perform semantic searches
|
|
let queries = vec![
|
|
"How can I ensure memory safety in my code?",
|
|
"What's the best language for web applications?",
|
|
"How do I deploy applications in containers?",
|
|
"I need a fast database for caching",
|
|
];
|
|
|
|
for query in queries {
|
|
println!("🔍 Query: \"{}\"\n", query);
|
|
|
|
let results = index.search(query, 3)?;
|
|
|
|
for (rank, result) in results.iter().enumerate() {
|
|
println!(" {}. [Score: {:.4}]", rank + 1, result.score);
|
|
println!(" {}", result.text);
|
|
if let Some(meta) = &result.metadata {
|
|
if let Some(doc_id) = meta.get("doc_id") {
|
|
println!(" ({})", doc_id);
|
|
}
|
|
}
|
|
println!();
|
|
}
|
|
|
|
println!("{}\n", "-".repeat(70));
|
|
}
|
|
|
|
// Find similar documents
|
|
println!("=== Finding Similar Documents ===\n");
|
|
let query_doc = documents[0].1; // Rust document
|
|
println!("Finding documents similar to:\n\"{}\"\n", query_doc);
|
|
|
|
let similar = index.search(query_doc, 4)?;
|
|
for (i, result) in similar.iter().skip(1).enumerate() {
|
|
// Skip first (self)
|
|
println!(" {}. [Score: {:.4}] {}", i + 1, result.score, result.text);
|
|
}
|
|
|
|
Ok(())
|
|
}
|