Files
wifi-densepose/vendor/ruvector/examples/onnx-embeddings/examples/semantic_search.rs

88 lines
3.1 KiB
Rust

//! Semantic search example using RuVector integration
use anyhow::Result;
use ruvector_onnx_embeddings::{
Embedder, RuVectorEmbeddings, IndexConfig, Distance,
};
#[tokio::main]
async fn main() -> Result<()> {
println!("=== Semantic Search with RuVector ONNX Embeddings ===\n");
// Initialize embedder
let embedder = Embedder::default_model().await?;
println!("Loaded model with dimension: {}", embedder.dimension());
// Create index with custom configuration
let config = IndexConfig {
distance: Distance::Cosine,
max_elements: 100_000,
ef_search: 100,
};
let index = RuVectorEmbeddings::new("semantic_docs", embedder, config)?;
// Sample document corpus
let documents = vec![
("doc1", "Rust provides memory safety without garbage collection through its ownership system."),
("doc2", "Python's simplicity makes it ideal for beginners learning programming."),
("doc3", "JavaScript dominates web development with frameworks like React and Vue."),
("doc4", "Machine learning models can be trained using TensorFlow or PyTorch."),
("doc5", "Docker containers provide consistent deployment environments."),
("doc6", "Kubernetes orchestrates containerized applications at scale."),
("doc7", "GraphQL offers a more efficient alternative to REST APIs."),
("doc8", "PostgreSQL is a powerful open-source relational database."),
("doc9", "Redis provides in-memory data storage for caching."),
("doc10", "Elasticsearch enables full-text search across large datasets."),
];
// Index documents with metadata
println!("Indexing {} documents...", documents.len());
for (id, content) in &documents {
let metadata = serde_json::json!({ "doc_id": id });
index.insert(content, Some(metadata))?;
}
println!("Index contains {} vectors\n", index.len());
// Perform semantic searches
let queries = vec![
"How can I ensure memory safety in my code?",
"What's the best language for web applications?",
"How do I deploy applications in containers?",
"I need a fast database for caching",
];
for query in queries {
println!("🔍 Query: \"{}\"\n", query);
let results = index.search(query, 3)?;
for (rank, result) in results.iter().enumerate() {
println!(" {}. [Score: {:.4}]", rank + 1, result.score);
println!(" {}", result.text);
if let Some(meta) = &result.metadata {
if let Some(doc_id) = meta.get("doc_id") {
println!(" ({})", doc_id);
}
}
println!();
}
println!("{}\n", "-".repeat(70));
}
// Find similar documents
println!("=== Finding Similar Documents ===\n");
let query_doc = documents[0].1; // Rust document
println!("Finding documents similar to:\n\"{}\"\n", query_doc);
let similar = index.search(query_doc, 4)?;
for (i, result) in similar.iter().skip(1).enumerate() {
// Skip first (self)
println!(" {}. [Score: {:.4}] {}", i + 1, result.score, result.text);
}
Ok(())
}