Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
32
vendor/ruvector/examples/onnx-embeddings/examples/basic.rs
vendored
Normal file
32
vendor/ruvector/examples/onnx-embeddings/examples/basic.rs
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
//! Basic embedding example demonstrating single text embedding
|
||||
|
||||
use anyhow::Result;
|
||||
use ruvector_onnx_embeddings::{Embedder, EmbedderConfig, PretrainedModel};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// Create embedder with a specific model
|
||||
let config = EmbedderConfig::pretrained(PretrainedModel::AllMiniLmL6V2);
|
||||
let mut embedder = Embedder::new(config).await?;
|
||||
|
||||
// Embed text
|
||||
let text = "Hello, RuVector!";
|
||||
let embedding = embedder.embed_one(text)?;
|
||||
|
||||
println!("Text: {}", text);
|
||||
println!("Embedding dimension: {}", embedding.len());
|
||||
println!("First 10 values: {:?}", &embedding[..10]);
|
||||
|
||||
// Compute similarity
|
||||
let similar_text = "Greetings, RuVector!";
|
||||
let different_text = "The weather is sunny.";
|
||||
|
||||
let sim1 = embedder.similarity(text, similar_text)?;
|
||||
let sim2 = embedder.similarity(text, different_text)?;
|
||||
|
||||
println!("\nSimilarity scores:");
|
||||
println!(" '{}' <-> '{}': {:.4}", text, similar_text, sim1);
|
||||
println!(" '{}' <-> '{}': {:.4}", text, different_text, sim2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
53
vendor/ruvector/examples/onnx-embeddings/examples/batch.rs
vendored
Normal file
53
vendor/ruvector/examples/onnx-embeddings/examples/batch.rs
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
//! Batch embedding example with parallel processing
|
||||
|
||||
use anyhow::Result;
|
||||
use ruvector_onnx_embeddings::{
|
||||
EmbedderBuilder, PretrainedModel, PoolingStrategy,
|
||||
};
|
||||
use std::time::Instant;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// Create embedder with custom settings
|
||||
let mut embedder = EmbedderBuilder::new()
|
||||
.pretrained(PretrainedModel::AllMiniLmL6V2)
|
||||
.pooling(PoolingStrategy::Mean)
|
||||
.normalize(true)
|
||||
.batch_size(32)
|
||||
.max_length(256)
|
||||
.build()
|
||||
.await?;
|
||||
|
||||
// Generate test data
|
||||
let texts: Vec<String> = (0..100)
|
||||
.map(|i| format!("This is test sentence number {} for batch embedding.", i))
|
||||
.collect();
|
||||
|
||||
println!("Embedding {} texts...", texts.len());
|
||||
|
||||
// Sequential embedding
|
||||
let start = Instant::now();
|
||||
let output = embedder.embed(&texts)?;
|
||||
let seq_time = start.elapsed();
|
||||
|
||||
println!("Sequential: {:?} ({:.2} texts/sec)",
|
||||
seq_time,
|
||||
texts.len() as f64 / seq_time.as_secs_f64()
|
||||
);
|
||||
|
||||
// Parallel embedding
|
||||
let start = Instant::now();
|
||||
let output_parallel = embedder.embed_parallel(&texts)?;
|
||||
let par_time = start.elapsed();
|
||||
|
||||
println!("Parallel: {:?} ({:.2} texts/sec)",
|
||||
par_time,
|
||||
texts.len() as f64 / par_time.as_secs_f64()
|
||||
);
|
||||
|
||||
println!("\nSpeedup: {:.2}x", seq_time.as_secs_f64() / par_time.as_secs_f64());
|
||||
println!("Total embeddings: {}", output.len());
|
||||
println!("Dimension: {}", output.dimension);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
87
vendor/ruvector/examples/onnx-embeddings/examples/semantic_search.rs
vendored
Normal file
87
vendor/ruvector/examples/onnx-embeddings/examples/semantic_search.rs
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
//! Semantic search example using RuVector integration
|
||||
|
||||
use anyhow::Result;
|
||||
use ruvector_onnx_embeddings::{
|
||||
Embedder, RuVectorEmbeddings, IndexConfig, Distance,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
println!("=== Semantic Search with RuVector ONNX Embeddings ===\n");
|
||||
|
||||
// Initialize embedder
|
||||
let embedder = Embedder::default_model().await?;
|
||||
println!("Loaded model with dimension: {}", embedder.dimension());
|
||||
|
||||
// Create index with custom configuration
|
||||
let config = IndexConfig {
|
||||
distance: Distance::Cosine,
|
||||
max_elements: 100_000,
|
||||
ef_search: 100,
|
||||
};
|
||||
|
||||
let index = RuVectorEmbeddings::new("semantic_docs", embedder, config)?;
|
||||
|
||||
// Sample document corpus
|
||||
let documents = vec![
|
||||
("doc1", "Rust provides memory safety without garbage collection through its ownership system."),
|
||||
("doc2", "Python's simplicity makes it ideal for beginners learning programming."),
|
||||
("doc3", "JavaScript dominates web development with frameworks like React and Vue."),
|
||||
("doc4", "Machine learning models can be trained using TensorFlow or PyTorch."),
|
||||
("doc5", "Docker containers provide consistent deployment environments."),
|
||||
("doc6", "Kubernetes orchestrates containerized applications at scale."),
|
||||
("doc7", "GraphQL offers a more efficient alternative to REST APIs."),
|
||||
("doc8", "PostgreSQL is a powerful open-source relational database."),
|
||||
("doc9", "Redis provides in-memory data storage for caching."),
|
||||
("doc10", "Elasticsearch enables full-text search across large datasets."),
|
||||
];
|
||||
|
||||
// Index documents with metadata
|
||||
println!("Indexing {} documents...", documents.len());
|
||||
for (id, content) in &documents {
|
||||
let metadata = serde_json::json!({ "doc_id": id });
|
||||
index.insert(content, Some(metadata))?;
|
||||
}
|
||||
|
||||
println!("Index contains {} vectors\n", index.len());
|
||||
|
||||
// Perform semantic searches
|
||||
let queries = vec![
|
||||
"How can I ensure memory safety in my code?",
|
||||
"What's the best language for web applications?",
|
||||
"How do I deploy applications in containers?",
|
||||
"I need a fast database for caching",
|
||||
];
|
||||
|
||||
for query in queries {
|
||||
println!("🔍 Query: \"{}\"\n", query);
|
||||
|
||||
let results = index.search(query, 3)?;
|
||||
|
||||
for (rank, result) in results.iter().enumerate() {
|
||||
println!(" {}. [Score: {:.4}]", rank + 1, result.score);
|
||||
println!(" {}", result.text);
|
||||
if let Some(meta) = &result.metadata {
|
||||
if let Some(doc_id) = meta.get("doc_id") {
|
||||
println!(" ({})", doc_id);
|
||||
}
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
println!("{}\n", "-".repeat(70));
|
||||
}
|
||||
|
||||
// Find similar documents
|
||||
println!("=== Finding Similar Documents ===\n");
|
||||
let query_doc = documents[0].1; // Rust document
|
||||
println!("Finding documents similar to:\n\"{}\"\n", query_doc);
|
||||
|
||||
let similar = index.search(query_doc, 4)?;
|
||||
for (i, result) in similar.iter().skip(1).enumerate() {
|
||||
// Skip first (self)
|
||||
println!(" {}. [Score: {:.4}] {}", i + 1, result.score, result.text);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user