Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
#!/usr/bin/env node
/**
* Advanced example demonstrating HNSW indexing and batch operations
*/
import { VectorDB } from '../index.js';
// Generate random vector
function randomVector(dim) {
return new Float32Array(dim).fill(0).map(() => Math.random());
}
async function main() {
console.log('🚀 Ruvector Advanced Example\n');
// Create database with HNSW indexing
const db = new VectorDB({
dimensions: 128,
distanceMetric: 'Cosine',
storagePath: './advanced-example.db',
hnswConfig: {
m: 32, // Number of connections per node
efConstruction: 200, // Construction quality
efSearch: 100, // Search quality
maxElements: 100000,
},
quantization: {
type: 'scalar', // 4x compression
},
});
console.log('✅ Created database with HNSW indexing');
// Batch insert
console.log('\n📝 Inserting 10,000 vectors in batches...');
const batchSize = 1000;
const totalVectors = 10000;
const startTime = Date.now();
for (let i = 0; i < totalVectors / batchSize; i++) {
const batch = Array.from({ length: batchSize }, (_, j) => ({
vector: randomVector(128),
metadata: {
batch: i,
index: i * batchSize + j,
category: ['A', 'B', 'C'][j % 3],
},
}));
await db.insertBatch(batch);
const progress = ((i + 1) / (totalVectors / batchSize)) * 100;
process.stdout.write(`\r Progress: ${progress.toFixed(0)}%`);
}
const insertTime = Date.now() - startTime;
console.log(`\n Inserted ${totalVectors} vectors in ${insertTime}ms`);
console.log(` Throughput: ${((totalVectors / insertTime) * 1000).toFixed(0)} vectors/sec`);
// Verify database size
const count = await db.len();
console.log(`\n📊 Database contains ${count} vectors`);
// Benchmark search performance
console.log('\n🔍 Benchmarking search performance...');
const numQueries = 100;
const searchStart = Date.now();
for (let i = 0; i < numQueries; i++) {
const results = await db.search({
vector: randomVector(128),
k: 10,
});
if (i === 0) {
console.log(`\n First query results:`);
results.slice(0, 3).forEach((r, idx) => {
console.log(` ${idx + 1}. Score: ${r.score.toFixed(6)}, Category: ${r.metadata?.category}`);
});
}
}
const searchTime = Date.now() - searchStart;
const avgLatency = searchTime / numQueries;
const qps = (numQueries / searchTime) * 1000;
console.log(`\n Completed ${numQueries} queries in ${searchTime}ms`);
console.log(` Average latency: ${avgLatency.toFixed(2)}ms`);
console.log(` QPS: ${qps.toFixed(0)} queries/sec`);
// Search with metadata filter
console.log('\n🎯 Searching with metadata filter...');
const filteredResults = await db.search({
vector: randomVector(128),
k: 20,
filter: { category: 'A' },
});
console.log(` Found ${filteredResults.length} results in category 'A'`);
filteredResults.slice(0, 3).forEach((r, i) => {
console.log(` ${i + 1}. Score: ${r.score.toFixed(6)}, Index: ${r.metadata?.index}`);
});
// Concurrent operations
console.log('\n⚡ Testing concurrent operations...');
const concurrentStart = Date.now();
const promises = [
// Concurrent searches
...Array.from({ length: 50 }, () =>
db.search({
vector: randomVector(128),
k: 10,
})
),
// Concurrent inserts
...Array.from({ length: 50 }, (_, i) =>
db.insert({
vector: randomVector(128),
metadata: { concurrent: true, index: i },
})
),
];
await Promise.all(promises);
const concurrentTime = Date.now() - concurrentStart;
console.log(` Completed 100 concurrent operations in ${concurrentTime}ms`);
// Final stats
const finalCount = await db.len();
console.log(`\n📊 Final database size: ${finalCount} vectors`);
console.log('\n✨ Advanced example complete!');
}
main().catch((err) => {
console.error('Error:', err);
process.exit(1);
});

View File

@@ -0,0 +1,156 @@
#!/usr/bin/env node
/**
* Semantic search example with text embeddings
*
* Note: This example assumes you have a way to generate embeddings.
* In practice, you would use an embedding model like sentence-transformers
* or OpenAI's API to generate actual embeddings.
*/
import { VectorDB } from '../index.js';
// Mock embedding function (in practice, use a real embedding model)
function mockEmbedding(text, dim = 384) {
// Simple deterministic "embedding" based on text
const hash = text.split('').reduce((acc, char) => {
return ((acc << 5) - acc) + char.charCodeAt(0);
}, 0);
const vector = new Float32Array(dim);
for (let i = 0; i < dim; i++) {
vector[i] = Math.sin(hash * (i + 1) * 0.1);
}
// Normalize
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
for (let i = 0; i < dim; i++) {
vector[i] /= norm;
}
return vector;
}
async function main() {
console.log('🚀 Ruvector Semantic Search Example\n');
// Sample documents
const documents = [
{ id: 'doc1', text: 'The cat sat on the mat', category: 'animals' },
{ id: 'doc2', text: 'The dog played in the park', category: 'animals' },
{ id: 'doc3', text: 'Python is a programming language', category: 'tech' },
{ id: 'doc4', text: 'JavaScript is used for web development', category: 'tech' },
{ id: 'doc5', text: 'Machine learning models learn from data', category: 'tech' },
{ id: 'doc6', text: 'The bird flew over the tree', category: 'animals' },
{ id: 'doc7', text: 'Rust is a systems programming language', category: 'tech' },
{ id: 'doc8', text: 'The fish swam in the ocean', category: 'animals' },
{ id: 'doc9', text: 'Neural networks are inspired by the brain', category: 'tech' },
{ id: 'doc10', text: 'The horse galloped across the field', category: 'animals' },
];
// Create database
const db = new VectorDB({
dimensions: 384,
distanceMetric: 'Cosine',
storagePath: './semantic-search.db',
});
console.log('✅ Created vector database');
// Index documents
console.log('\n📝 Indexing documents...');
const entries = documents.map((doc) => ({
id: doc.id,
vector: mockEmbedding(doc.text),
metadata: {
text: doc.text,
category: doc.category,
},
}));
await db.insertBatch(entries);
console.log(` Indexed ${documents.length} documents`);
// Search queries
const queries = [
'animals in nature',
'programming languages',
'artificial intelligence',
'pets and animals',
];
console.log('\n🔍 Running semantic searches...\n');
for (const query of queries) {
console.log(`Query: "${query}"`);
const results = await db.search({
vector: mockEmbedding(query),
k: 3,
});
console.log(' Top results:');
results.forEach((result, i) => {
console.log(` ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
console.log(` Score: ${result.score.toFixed(4)}`);
});
console.log();
}
// Category-filtered search
console.log('🎯 Filtered search (tech category only)...\n');
const techQuery = 'coding and software';
console.log(`Query: "${techQuery}"`);
const techResults = await db.search({
vector: mockEmbedding(techQuery),
k: 3,
filter: { category: 'tech' },
});
console.log(' Top results:');
techResults.forEach((result, i) => {
console.log(` ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
console.log(` Score: ${result.score.toFixed(4)}`);
});
// Update a document
console.log('\n📝 Updating a document...');
await db.delete('doc3');
await db.insert({
id: 'doc3',
vector: mockEmbedding('Python is great for machine learning and AI'),
metadata: {
text: 'Python is great for machine learning and AI',
category: 'tech',
},
});
console.log(' Updated doc3');
// Search again to see the change
const updatedResults = await db.search({
vector: mockEmbedding('artificial intelligence'),
k: 3,
});
console.log('\n Results after update:');
updatedResults.forEach((result, i) => {
console.log(` ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
console.log(` Score: ${result.score.toFixed(4)}`);
});
console.log('\n✨ Semantic search example complete!');
console.log('\n💡 Tip: In production, use real embeddings from models like:');
console.log(' - sentence-transformers (e.g., all-MiniLM-L6-v2)');
console.log(' - OpenAI embeddings (text-embedding-ada-002)');
console.log(' - Cohere embeddings');
}
main().catch((err) => {
console.error('Error:', err);
process.exit(1);
});

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env node
/**
* Simple example demonstrating basic Ruvector operations
*/
import { VectorDB } from '../index.js';
async function main() {
console.log('🚀 Ruvector Simple Example\n');
// Create a vector database
const db = new VectorDB({
dimensions: 3,
distanceMetric: 'Cosine',
storagePath: './simple-example.db',
});
console.log('✅ Created vector database');
// Insert vectors
console.log('\n📝 Inserting vectors...');
const id1 = await db.insert({
id: 'vec1',
vector: new Float32Array([1.0, 0.0, 0.0]),
metadata: { text: 'First vector' },
});
const id2 = await db.insert({
id: 'vec2',
vector: new Float32Array([0.0, 1.0, 0.0]),
metadata: { text: 'Second vector' },
});
const id3 = await db.insert({
id: 'vec3',
vector: new Float32Array([0.5, 0.5, 0.0]),
metadata: { text: 'Third vector' },
});
console.log(` Inserted: ${id1}, ${id2}, ${id3}`);
// Get database stats
const count = await db.len();
console.log(`\n📊 Database contains ${count} vectors`);
// Search for similar vectors
console.log('\n🔍 Searching for similar vectors...');
const results = await db.search({
vector: new Float32Array([1.0, 0.0, 0.0]),
k: 3,
});
console.log(` Found ${results.length} results:`);
results.forEach((result, i) => {
console.log(` ${i + 1}. ID: ${result.id}, Score: ${result.score.toFixed(4)}`);
console.log(` Metadata: ${JSON.stringify(result.metadata)}`);
});
// Get a specific vector
console.log('\n🎯 Getting vector by ID...');
const entry = await db.get('vec2');
if (entry) {
console.log(` Found: ${entry.id}`);
console.log(` Vector: [${Array.from(entry.vector).join(', ')}]`);
console.log(` Metadata: ${JSON.stringify(entry.metadata)}`);
}
// Delete a vector
console.log('\n🗑 Deleting vector...');
const deleted = await db.delete('vec1');
console.log(` Deleted: ${deleted}`);
const newCount = await db.len();
console.log(` Database now contains ${newCount} vectors`);
console.log('\n✨ Example complete!');
}
main().catch((err) => {
console.error('Error:', err);
process.exit(1);
});