Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-node/examples/advanced.mjs
+++ b/vendor/ruvector/crates/ruvector-node/examples/advanced.mjs
@@ -0,0 +1,145 @@
+#!/usr/bin/env node
+
+/**
+ * Advanced example demonstrating HNSW indexing and batch operations
+ */
+
+import { VectorDB } from '../index.js';
+
+// Generate random vector
+function randomVector(dim) {
+  return new Float32Array(dim).fill(0).map(() => Math.random());
+}
+
+async function main() {
+  console.log('🚀 Ruvector Advanced Example\n');
+
+  // Create database with HNSW indexing
+  const db = new VectorDB({
+    dimensions: 128,
+    distanceMetric: 'Cosine',
+    storagePath: './advanced-example.db',
+    hnswConfig: {
+      m: 32, // Number of connections per node
+      efConstruction: 200, // Construction quality
+      efSearch: 100, // Search quality
+      maxElements: 100000,
+    },
+    quantization: {
+      type: 'scalar', // 4x compression
+    },
+  });
+
+  console.log('✅ Created database with HNSW indexing');
+
+  // Batch insert
+  console.log('\n📝 Inserting 10,000 vectors in batches...');
+
+  const batchSize = 1000;
+  const totalVectors = 10000;
+  const startTime = Date.now();
+
+  for (let i = 0; i < totalVectors / batchSize; i++) {
+    const batch = Array.from({ length: batchSize }, (_, j) => ({
+      vector: randomVector(128),
+      metadata: {
+        batch: i,
+        index: i * batchSize + j,
+        category: ['A', 'B', 'C'][j % 3],
+      },
+    }));
+
+    await db.insertBatch(batch);
+
+    const progress = ((i + 1) / (totalVectors / batchSize)) * 100;
+    process.stdout.write(`\r  Progress: ${progress.toFixed(0)}%`);
+  }
+
+  const insertTime = Date.now() - startTime;
+  console.log(`\n  Inserted ${totalVectors} vectors in ${insertTime}ms`);
+  console.log(`  Throughput: ${((totalVectors / insertTime) * 1000).toFixed(0)} vectors/sec`);
+
+  // Verify database size
+  const count = await db.len();
+  console.log(`\n📊 Database contains ${count} vectors`);
+
+  // Benchmark search performance
+  console.log('\n🔍 Benchmarking search performance...');
+
+  const numQueries = 100;
+  const searchStart = Date.now();
+
+  for (let i = 0; i < numQueries; i++) {
+    const results = await db.search({
+      vector: randomVector(128),
+      k: 10,
+    });
+
+    if (i === 0) {
+      console.log(`\n  First query results:`);
+      results.slice(0, 3).forEach((r, idx) => {
+        console.log(`    ${idx + 1}. Score: ${r.score.toFixed(6)}, Category: ${r.metadata?.category}`);
+      });
+    }
+  }
+
+  const searchTime = Date.now() - searchStart;
+  const avgLatency = searchTime / numQueries;
+  const qps = (numQueries / searchTime) * 1000;
+
+  console.log(`\n  Completed ${numQueries} queries in ${searchTime}ms`);
+  console.log(`  Average latency: ${avgLatency.toFixed(2)}ms`);
+  console.log(`  QPS: ${qps.toFixed(0)} queries/sec`);
+
+  // Search with metadata filter
+  console.log('\n🎯 Searching with metadata filter...');
+
+  const filteredResults = await db.search({
+    vector: randomVector(128),
+    k: 20,
+    filter: { category: 'A' },
+  });
+
+  console.log(`  Found ${filteredResults.length} results in category 'A'`);
+  filteredResults.slice(0, 3).forEach((r, i) => {
+    console.log(`    ${i + 1}. Score: ${r.score.toFixed(6)}, Index: ${r.metadata?.index}`);
+  });
+
+  // Concurrent operations
+  console.log('\n⚡ Testing concurrent operations...');
+
+  const concurrentStart = Date.now();
+
+  const promises = [
+    // Concurrent searches
+    ...Array.from({ length: 50 }, () =>
+      db.search({
+        vector: randomVector(128),
+        k: 10,
+      })
+    ),
+    // Concurrent inserts
+    ...Array.from({ length: 50 }, (_, i) =>
+      db.insert({
+        vector: randomVector(128),
+        metadata: { concurrent: true, index: i },
+      })
+    ),
+  ];
+
+  await Promise.all(promises);
+
+  const concurrentTime = Date.now() - concurrentStart;
+  console.log(`  Completed 100 concurrent operations in ${concurrentTime}ms`);
+
+  // Final stats
+  const finalCount = await db.len();
+  console.log(`\n📊 Final database size: ${finalCount} vectors`);
+
+  console.log('\n✨ Advanced example complete!');
+}
+
+main().catch((err) => {
+  console.error('Error:', err);
+  process.exit(1);
+});
--- a/vendor/ruvector/crates/ruvector-node/examples/semantic-search.mjs
+++ b/vendor/ruvector/crates/ruvector-node/examples/semantic-search.mjs
@@ -0,0 +1,156 @@
+#!/usr/bin/env node
+
+/**
+ * Semantic search example with text embeddings
+ *
+ * Note: This example assumes you have a way to generate embeddings.
+ * In practice, you would use an embedding model like sentence-transformers
+ * or OpenAI's API to generate actual embeddings.
+ */
+
+import { VectorDB } from '../index.js';
+
+// Mock embedding function (in practice, use a real embedding model)
+function mockEmbedding(text, dim = 384) {
+  // Simple deterministic "embedding" based on text
+  const hash = text.split('').reduce((acc, char) => {
+    return ((acc << 5) - acc) + char.charCodeAt(0);
+  }, 0);
+
+  const vector = new Float32Array(dim);
+  for (let i = 0; i < dim; i++) {
+    vector[i] = Math.sin(hash * (i + 1) * 0.1);
+  }
+
+  // Normalize
+  const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
+  for (let i = 0; i < dim; i++) {
+    vector[i] /= norm;
+  }
+
+  return vector;
+}
+
+async function main() {
+  console.log('🚀 Ruvector Semantic Search Example\n');
+
+  // Sample documents
+  const documents = [
+    { id: 'doc1', text: 'The cat sat on the mat', category: 'animals' },
+    { id: 'doc2', text: 'The dog played in the park', category: 'animals' },
+    { id: 'doc3', text: 'Python is a programming language', category: 'tech' },
+    { id: 'doc4', text: 'JavaScript is used for web development', category: 'tech' },
+    { id: 'doc5', text: 'Machine learning models learn from data', category: 'tech' },
+    { id: 'doc6', text: 'The bird flew over the tree', category: 'animals' },
+    { id: 'doc7', text: 'Rust is a systems programming language', category: 'tech' },
+    { id: 'doc8', text: 'The fish swam in the ocean', category: 'animals' },
+    { id: 'doc9', text: 'Neural networks are inspired by the brain', category: 'tech' },
+    { id: 'doc10', text: 'The horse galloped across the field', category: 'animals' },
+  ];
+
+  // Create database
+  const db = new VectorDB({
+    dimensions: 384,
+    distanceMetric: 'Cosine',
+    storagePath: './semantic-search.db',
+  });
+
+  console.log('✅ Created vector database');
+
+  // Index documents
+  console.log('\n📝 Indexing documents...');
+
+  const entries = documents.map((doc) => ({
+    id: doc.id,
+    vector: mockEmbedding(doc.text),
+    metadata: {
+      text: doc.text,
+      category: doc.category,
+    },
+  }));
+
+  await db.insertBatch(entries);
+  console.log(`  Indexed ${documents.length} documents`);
+
+  // Search queries
+  const queries = [
+    'animals in nature',
+    'programming languages',
+    'artificial intelligence',
+    'pets and animals',
+  ];
+
+  console.log('\n🔍 Running semantic searches...\n');
+
+  for (const query of queries) {
+    console.log(`Query: "${query}"`);
+
+    const results = await db.search({
+      vector: mockEmbedding(query),
+      k: 3,
+    });
+
+    console.log('  Top results:');
+    results.forEach((result, i) => {
+      console.log(`    ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
+      console.log(`       Score: ${result.score.toFixed(4)}`);
+    });
+    console.log();
+  }
+
+  // Category-filtered search
+  console.log('🎯 Filtered search (tech category only)...\n');
+
+  const techQuery = 'coding and software';
+  console.log(`Query: "${techQuery}"`);
+
+  const techResults = await db.search({
+    vector: mockEmbedding(techQuery),
+    k: 3,
+    filter: { category: 'tech' },
+  });
+
+  console.log('  Top results:');
+  techResults.forEach((result, i) => {
+    console.log(`    ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
+    console.log(`       Score: ${result.score.toFixed(4)}`);
+  });
+
+  // Update a document
+  console.log('\n📝 Updating a document...');
+
+  await db.delete('doc3');
+  await db.insert({
+    id: 'doc3',
+    vector: mockEmbedding('Python is great for machine learning and AI'),
+    metadata: {
+      text: 'Python is great for machine learning and AI',
+      category: 'tech',
+    },
+  });
+
+  console.log('  Updated doc3');
+
+  // Search again to see the change
+  const updatedResults = await db.search({
+    vector: mockEmbedding('artificial intelligence'),
+    k: 3,
+  });
+
+  console.log('\n  Results after update:');
+  updatedResults.forEach((result, i) => {
+    console.log(`    ${i + 1}. [${result.metadata?.category}] ${result.metadata?.text}`);
+    console.log(`       Score: ${result.score.toFixed(4)}`);
+  });
+
+  console.log('\n✨ Semantic search example complete!');
+  console.log('\n💡 Tip: In production, use real embeddings from models like:');
+  console.log('   - sentence-transformers (e.g., all-MiniLM-L6-v2)');
+  console.log('   - OpenAI embeddings (text-embedding-ada-002)');
+  console.log('   - Cohere embeddings');
+}
+
+main().catch((err) => {
+  console.error('Error:', err);
+  process.exit(1);
+});
--- a/vendor/ruvector/crates/ruvector-node/examples/simple.mjs
+++ b/vendor/ruvector/crates/ruvector-node/examples/simple.mjs
@@ -0,0 +1,85 @@
+#!/usr/bin/env node
+
+/**
+ * Simple example demonstrating basic Ruvector operations
+ */
+
+import { VectorDB } from '../index.js';
+
+async function main() {
+  console.log('🚀 Ruvector Simple Example\n');
+
+  // Create a vector database
+  const db = new VectorDB({
+    dimensions: 3,
+    distanceMetric: 'Cosine',
+    storagePath: './simple-example.db',
+  });
+
+  console.log('✅ Created vector database');
+
+  // Insert vectors
+  console.log('\n📝 Inserting vectors...');
+
+  const id1 = await db.insert({
+    id: 'vec1',
+    vector: new Float32Array([1.0, 0.0, 0.0]),
+    metadata: { text: 'First vector' },
+  });
+
+  const id2 = await db.insert({
+    id: 'vec2',
+    vector: new Float32Array([0.0, 1.0, 0.0]),
+    metadata: { text: 'Second vector' },
+  });
+
+  const id3 = await db.insert({
+    id: 'vec3',
+    vector: new Float32Array([0.5, 0.5, 0.0]),
+    metadata: { text: 'Third vector' },
+  });
+
+  console.log(`  Inserted: ${id1}, ${id2}, ${id3}`);
+
+  // Get database stats
+  const count = await db.len();
+  console.log(`\n📊 Database contains ${count} vectors`);
+
+  // Search for similar vectors
+  console.log('\n🔍 Searching for similar vectors...');
+
+  const results = await db.search({
+    vector: new Float32Array([1.0, 0.0, 0.0]),
+    k: 3,
+  });
+
+  console.log(`  Found ${results.length} results:`);
+  results.forEach((result, i) => {
+    console.log(`    ${i + 1}. ID: ${result.id}, Score: ${result.score.toFixed(4)}`);
+    console.log(`       Metadata: ${JSON.stringify(result.metadata)}`);
+  });
+
+  // Get a specific vector
+  console.log('\n🎯 Getting vector by ID...');
+  const entry = await db.get('vec2');
+  if (entry) {
+    console.log(`  Found: ${entry.id}`);
+    console.log(`  Vector: [${Array.from(entry.vector).join(', ')}]`);
+    console.log(`  Metadata: ${JSON.stringify(entry.metadata)}`);
+  }
+
+  // Delete a vector
+  console.log('\n🗑️  Deleting vector...');
+  const deleted = await db.delete('vec1');
+  console.log(`  Deleted: ${deleted}`);
+
+  const newCount = await db.len();
+  console.log(`  Database now contains ${newCount} vectors`);
+
+  console.log('\n✨ Example complete!');
+}
+
+main().catch((err) => {
+  console.error('Error:', err);
+  process.exit(1);
+});