Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
145
vendor/ruvector/crates/ruvector-node/examples/advanced.mjs
vendored
Normal file
145
vendor/ruvector/crates/ruvector-node/examples/advanced.mjs
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Advanced example demonstrating HNSW indexing and batch operations
|
||||
*/
|
||||
|
||||
import { VectorDB } from '../index.js';
|
||||
|
||||
// Generate random vector
|
||||
function randomVector(dim) {
|
||||
return new Float32Array(dim).fill(0).map(() => Math.random());
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Ruvector Advanced Example\n');
|
||||
|
||||
// Create database with HNSW indexing
|
||||
const db = new VectorDB({
|
||||
dimensions: 128,
|
||||
distanceMetric: 'Cosine',
|
||||
storagePath: './advanced-example.db',
|
||||
hnswConfig: {
|
||||
m: 32, // Number of connections per node
|
||||
efConstruction: 200, // Construction quality
|
||||
efSearch: 100, // Search quality
|
||||
maxElements: 100000,
|
||||
},
|
||||
quantization: {
|
||||
type: 'scalar', // 4x compression
|
||||
},
|
||||
});
|
||||
|
||||
console.log('✅ Created database with HNSW indexing');
|
||||
|
||||
// Batch insert
|
||||
console.log('\n📝 Inserting 10,000 vectors in batches...');
|
||||
|
||||
const batchSize = 1000;
|
||||
const totalVectors = 10000;
|
||||
const startTime = Date.now();
|
||||
|
||||
for (let i = 0; i < totalVectors / batchSize; i++) {
|
||||
const batch = Array.from({ length: batchSize }, (_, j) => ({
|
||||
vector: randomVector(128),
|
||||
metadata: {
|
||||
batch: i,
|
||||
index: i * batchSize + j,
|
||||
category: ['A', 'B', 'C'][j % 3],
|
||||
},
|
||||
}));
|
||||
|
||||
await db.insertBatch(batch);
|
||||
|
||||
const progress = ((i + 1) / (totalVectors / batchSize)) * 100;
|
||||
process.stdout.write(`\r Progress: ${progress.toFixed(0)}%`);
|
||||
}
|
||||
|
||||
const insertTime = Date.now() - startTime;
|
||||
console.log(`\n Inserted ${totalVectors} vectors in ${insertTime}ms`);
|
||||
console.log(` Throughput: ${((totalVectors / insertTime) * 1000).toFixed(0)} vectors/sec`);
|
||||
|
||||
// Verify database size
|
||||
const count = await db.len();
|
||||
console.log(`\n📊 Database contains ${count} vectors`);
|
||||
|
||||
// Benchmark search performance
|
||||
console.log('\n🔍 Benchmarking search performance...');
|
||||
|
||||
const numQueries = 100;
|
||||
const searchStart = Date.now();
|
||||
|
||||
for (let i = 0; i < numQueries; i++) {
|
||||
const results = await db.search({
|
||||
vector: randomVector(128),
|
||||
k: 10,
|
||||
});
|
||||
|
||||
if (i === 0) {
|
||||
console.log(`\n First query results:`);
|
||||
results.slice(0, 3).forEach((r, idx) => {
|
||||
console.log(` ${idx + 1}. Score: ${r.score.toFixed(6)}, Category: ${r.metadata?.category}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const searchTime = Date.now() - searchStart;
|
||||
const avgLatency = searchTime / numQueries;
|
||||
const qps = (numQueries / searchTime) * 1000;
|
||||
|
||||
console.log(`\n Completed ${numQueries} queries in ${searchTime}ms`);
|
||||
console.log(` Average latency: ${avgLatency.toFixed(2)}ms`);
|
||||
console.log(` QPS: ${qps.toFixed(0)} queries/sec`);
|
||||
|
||||
// Search with metadata filter
|
||||
console.log('\n🎯 Searching with metadata filter...');
|
||||
|
||||
const filteredResults = await db.search({
|
||||
vector: randomVector(128),
|
||||
k: 20,
|
||||
filter: { category: 'A' },
|
||||
});
|
||||
|
||||
console.log(` Found ${filteredResults.length} results in category 'A'`);
|
||||
filteredResults.slice(0, 3).forEach((r, i) => {
|
||||
console.log(` ${i + 1}. Score: ${r.score.toFixed(6)}, Index: ${r.metadata?.index}`);
|
||||
});
|
||||
|
||||
// Concurrent operations
|
||||
console.log('\n⚡ Testing concurrent operations...');
|
||||
|
||||
const concurrentStart = Date.now();
|
||||
|
||||
const promises = [
|
||||
// Concurrent searches
|
||||
...Array.from({ length: 50 }, () =>
|
||||
db.search({
|
||||
vector: randomVector(128),
|
||||
k: 10,
|
||||
})
|
||||
),
|
||||
// Concurrent inserts
|
||||
...Array.from({ length: 50 }, (_, i) =>
|
||||
db.insert({
|
||||
vector: randomVector(128),
|
||||
metadata: { concurrent: true, index: i },
|
||||
})
|
||||
),
|
||||
];
|
||||
|
||||
await Promise.all(promises);
|
||||
|
||||
const concurrentTime = Date.now() - concurrentStart;
|
||||
console.log(` Completed 100 concurrent operations in ${concurrentTime}ms`);
|
||||
|
||||
// Final stats
|
||||
const finalCount = await db.len();
|
||||
console.log(`\n📊 Final database size: ${finalCount} vectors`);
|
||||
|
||||
console.log('\n✨ Advanced example complete!');
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('Error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
156
vendor/ruvector/crates/ruvector-node/examples/semantic-search.mjs
vendored
Normal file
156
vendor/ruvector/crates/ruvector-node/examples/semantic-search.mjs
vendored
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Semantic search example with text embeddings
|
||||
*
|
||||
* Note: This example assumes you have a way to generate embeddings.
|
||||
* In practice, you would use an embedding model like sentence-transformers
|
||||
* or OpenAI's API to generate actual embeddings.
|
||||
*/
|
||||
|
||||
import { VectorDB } from '../index.js';
|
||||
|
||||
// Mock embedding function (in practice, use a real embedding model)
|
||||
function mockEmbedding(text, dim = 384) {
|
||||
// Simple deterministic "embedding" based on text
|
||||
const hash = text.split('').reduce((acc, char) => {
|
||||
return ((acc << 5) - acc) + char.charCodeAt(0);
|
||||
}, 0);
|
||||
|
||||
const vector = new Float32Array(dim);
|
||||
for (let i = 0; i < dim; i++) {
|
||||
vector[i] = Math.sin(hash * (i + 1) * 0.1);
|
||||
}
|
||||
|
||||
// Normalize
|
||||
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
||||
for (let i = 0; i < dim; i++) {
|
||||
vector[i] /= norm;
|
||||
}
|
||||
|
||||
return vector;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Ruvector Semantic Search Example\n');
|
||||
|
||||
// Sample documents
|
||||
const documents = [
|
||||
{ id: 'doc1', text: 'The cat sat on the mat', category: 'animals' },
|
||||
{ id: 'doc2', text: 'The dog played in the park', category: 'animals' },
|
||||
{ id: 'doc3', text: 'Python is a programming language', category: 'tech' },
|
||||
{ id: 'doc4', text: 'JavaScript is used for web development', category: 'tech' },
|
||||
{ id: 'doc5', text: 'Machine learning models learn from data', category: 'tech' },
|
||||
{ id: 'doc6', text: 'The bird flew over the tree', category: 'animals' },
|
||||
{ id: 'doc7', text: 'Rust is a systems programming language', category: 'tech' },
|
||||
{ id: 'doc8', text: 'The fish swam in the ocean', category: 'animals' },
|
||||
{ id: 'doc9', text: 'Neural networks are inspired by the brain', category: 'tech' },
|
||||
{ id: 'doc10', text: 'The horse galloped across the field', category: 'animals' },
|
||||
];
|
||||
|
||||
// Create database
|
||||
const db = new VectorDB({
|
||||
dimensions: 384,
|
||||
distanceMetric: 'Cosine',
|
||||
storagePath: './semantic-search.db',
|
||||
});
|
||||
|
||||
console.log('✅ Created vector database');
|
||||
|
||||
// Index documents
|
||||
console.log('\n📝 Indexing documents...');
|
||||
|
||||
const entries = documents.map((doc) => ({
|
||||
id: doc.id,
|
||||
vector: mockEmbedding(doc.text),
|
||||
metadata: {
|
||||
text: doc.text,
|
||||
category: doc.category,
|
||||
},
|
||||
}));
|
||||
|
||||
await db.insertBatch(entries);
|
||||
console.log(` Indexed ${documents.length} documents`);
|
||||
|
||||
// Search queries
|
||||
const queries = [
|
||||
'animals in nature',
|
||||
'programming languages',
|
||||
'artificial intelligence',
|
||||
'pets and animals',
|
||||
];
|
||||
|
||||
console.log('\n🔍 Running semantic searches...\n');
|
||||
|
||||
for (const query of queries) {
|
||||
console.log(`Query: "${query}"`);
|
||||
|
||||
const results = await db.search({
|
||||
vector: mockEmbedding(query),
|
||||
k: 3,
|
||||
});
|
||||
|
||||
console.log(' Top results:');
|
||||
results.forEach((result, i) => {
|
||||
console.log(` ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
|
||||
console.log(` Score: ${result.score.toFixed(4)}`);
|
||||
});
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Category-filtered search
|
||||
console.log('🎯 Filtered search (tech category only)...\n');
|
||||
|
||||
const techQuery = 'coding and software';
|
||||
console.log(`Query: "${techQuery}"`);
|
||||
|
||||
const techResults = await db.search({
|
||||
vector: mockEmbedding(techQuery),
|
||||
k: 3,
|
||||
filter: { category: 'tech' },
|
||||
});
|
||||
|
||||
console.log(' Top results:');
|
||||
techResults.forEach((result, i) => {
|
||||
console.log(` ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
|
||||
console.log(` Score: ${result.score.toFixed(4)}`);
|
||||
});
|
||||
|
||||
// Update a document
|
||||
console.log('\n📝 Updating a document...');
|
||||
|
||||
await db.delete('doc3');
|
||||
await db.insert({
|
||||
id: 'doc3',
|
||||
vector: mockEmbedding('Python is great for machine learning and AI'),
|
||||
metadata: {
|
||||
text: 'Python is great for machine learning and AI',
|
||||
category: 'tech',
|
||||
},
|
||||
});
|
||||
|
||||
console.log(' Updated doc3');
|
||||
|
||||
// Search again to see the change
|
||||
const updatedResults = await db.search({
|
||||
vector: mockEmbedding('artificial intelligence'),
|
||||
k: 3,
|
||||
});
|
||||
|
||||
console.log('\n Results after update:');
|
||||
updatedResults.forEach((result, i) => {
|
||||
console.log(` ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
|
||||
console.log(` Score: ${result.score.toFixed(4)}`);
|
||||
});
|
||||
|
||||
console.log('\n✨ Semantic search example complete!');
|
||||
console.log('\n💡 Tip: In production, use real embeddings from models like:');
|
||||
console.log(' - sentence-transformers (e.g., all-MiniLM-L6-v2)');
|
||||
console.log(' - OpenAI embeddings (text-embedding-ada-002)');
|
||||
console.log(' - Cohere embeddings');
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('Error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
85
vendor/ruvector/crates/ruvector-node/examples/simple.mjs
vendored
Normal file
85
vendor/ruvector/crates/ruvector-node/examples/simple.mjs
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Simple example demonstrating basic Ruvector operations
|
||||
*/
|
||||
|
||||
import { VectorDB } from '../index.js';
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Ruvector Simple Example\n');
|
||||
|
||||
// Create a vector database
|
||||
const db = new VectorDB({
|
||||
dimensions: 3,
|
||||
distanceMetric: 'Cosine',
|
||||
storagePath: './simple-example.db',
|
||||
});
|
||||
|
||||
console.log('✅ Created vector database');
|
||||
|
||||
// Insert vectors
|
||||
console.log('\n📝 Inserting vectors...');
|
||||
|
||||
const id1 = await db.insert({
|
||||
id: 'vec1',
|
||||
vector: new Float32Array([1.0, 0.0, 0.0]),
|
||||
metadata: { text: 'First vector' },
|
||||
});
|
||||
|
||||
const id2 = await db.insert({
|
||||
id: 'vec2',
|
||||
vector: new Float32Array([0.0, 1.0, 0.0]),
|
||||
metadata: { text: 'Second vector' },
|
||||
});
|
||||
|
||||
const id3 = await db.insert({
|
||||
id: 'vec3',
|
||||
vector: new Float32Array([0.5, 0.5, 0.0]),
|
||||
metadata: { text: 'Third vector' },
|
||||
});
|
||||
|
||||
console.log(` Inserted: ${id1}, ${id2}, ${id3}`);
|
||||
|
||||
// Get database stats
|
||||
const count = await db.len();
|
||||
console.log(`\n📊 Database contains ${count} vectors`);
|
||||
|
||||
// Search for similar vectors
|
||||
console.log('\n🔍 Searching for similar vectors...');
|
||||
|
||||
const results = await db.search({
|
||||
vector: new Float32Array([1.0, 0.0, 0.0]),
|
||||
k: 3,
|
||||
});
|
||||
|
||||
console.log(` Found ${results.length} results:`);
|
||||
results.forEach((result, i) => {
|
||||
console.log(` ${i + 1}. ID: ${result.id}, Score: ${result.score.toFixed(4)}`);
|
||||
console.log(` Metadata: ${JSON.stringify(result.metadata)}`);
|
||||
});
|
||||
|
||||
// Get a specific vector
|
||||
console.log('\n🎯 Getting vector by ID...');
|
||||
const entry = await db.get('vec2');
|
||||
if (entry) {
|
||||
console.log(` Found: ${entry.id}`);
|
||||
console.log(` Vector: [${Array.from(entry.vector).join(', ')}]`);
|
||||
console.log(` Metadata: ${JSON.stringify(entry.metadata)}`);
|
||||
}
|
||||
|
||||
// Delete a vector
|
||||
console.log('\n🗑️ Deleting vector...');
|
||||
const deleted = await db.delete('vec1');
|
||||
console.log(` Deleted: ${deleted}`);
|
||||
|
||||
const newCount = await db.len();
|
||||
console.log(` Database now contains ${newCount} vectors`);
|
||||
|
||||
console.log('\n✨ Example complete!');
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('Error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user