Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/nodejs/README.md
+++ b/vendor/ruvector/examples/nodejs/README.md
@@ -0,0 +1,210 @@
+# RuVector Node.js Examples
+
+JavaScript/TypeScript examples for integrating RuVector with Node.js applications.
+
+## Examples
+
+| File | Description |
+|------|-------------|
+| `basic_usage.js` | Getting started with the JS SDK |
+| `semantic_search.js` | Semantic search implementation |
+
+## Quick Start
+
+```bash
+npm install ruvector
+node basic_usage.js
+node semantic_search.js
+```
+
+## Basic Usage
+
+```javascript
+const { VectorDB } = require('ruvector');
+
+async function main() {
+    // Initialize database
+    const db = new VectorDB({
+        dimensions: 128,
+        storagePath: './my_vectors.db'
+    });
+    await db.initialize();
+
+    // Insert vectors
+    await db.insert({
+        id: 'doc_001',
+        vector: new Float32Array(128).fill(0.1),
+        metadata: { title: 'Document 1' }
+    });
+
+    // Search
+    const results = await db.search({
+        vector: new Float32Array(128).fill(0.1),
+        topK: 10
+    });
+
+    console.log('Results:', results);
+}
+
+main().catch(console.error);
+```
+
+## Semantic Search
+
+```javascript
+const { VectorDB } = require('ruvector');
+const { encode } = require('your-embedding-model');
+
+async function semanticSearch() {
+    const db = new VectorDB({ dimensions: 384 });
+    await db.initialize();
+
+    // Index documents
+    const documents = [
+        'Machine learning is a subset of AI',
+        'Neural networks power modern AI',
+        'Deep learning uses multiple layers'
+    ];
+
+    for (const doc of documents) {
+        const embedding = await encode(doc);
+        await db.insert({
+            id: doc.slice(0, 20),
+            vector: embedding,
+            metadata: { text: doc }
+        });
+    }
+
+    // Search by meaning
+    const query = 'How does artificial intelligence work?';
+    const queryVec = await encode(query);
+
+    const results = await db.search({
+        vector: queryVec,
+        topK: 5
+    });
+
+    results.forEach(r => {
+        console.log(`${r.score.toFixed(3)}: ${r.metadata.text}`);
+    });
+}
+```
+
+## Batch Operations
+
+```javascript
+// Batch insert for efficiency
+const entries = documents.map((doc, i) => ({
+    id: `doc_${i}`,
+    vector: embeddings[i],
+    metadata: { text: doc }
+}));
+
+await db.insertBatch(entries);
+
+// Batch search
+const queries = ['query1', 'query2', 'query3'];
+const queryVectors = await Promise.all(queries.map(encode));
+
+const batchResults = await db.searchBatch(
+    queryVectors.map(v => ({ vector: v, topK: 5 }))
+);
+```
+
+## Filtering
+
+```javascript
+// Metadata filtering
+const results = await db.search({
+    vector: queryVec,
+    topK: 10,
+    filter: {
+        category: { $eq: 'technology' },
+        date: { $gte: '2024-01-01' }
+    }
+});
+```
+
+## TypeScript
+
+```typescript
+import { VectorDB, VectorEntry, SearchResult } from 'ruvector';
+
+interface DocMetadata {
+    title: string;
+    author: string;
+    date: string;
+}
+
+const db = new VectorDB<DocMetadata>({
+    dimensions: 384
+});
+
+const entry: VectorEntry<DocMetadata> = {
+    id: 'doc_001',
+    vector: new Float32Array(384),
+    metadata: {
+        title: 'TypeScript Guide',
+        author: 'Dev Team',
+        date: '2024-01-01'
+    }
+};
+
+await db.insert(entry);
+```
+
+## Express.js Integration
+
+```javascript
+const express = require('express');
+const { VectorDB } = require('ruvector');
+
+const app = express();
+const db = new VectorDB({ dimensions: 384 });
+
+app.post('/search', express.json(), async (req, res) => {
+    const { query, topK = 10 } = req.body;
+    const queryVec = await encode(query);
+
+    const results = await db.search({
+        vector: queryVec,
+        topK
+    });
+
+    res.json(results);
+});
+
+app.listen(3000);
+```
+
+## Configuration Options
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `dimensions` | number | required | Vector dimensions |
+| `storagePath` | string | `:memory:` | Database file path |
+| `metric` | string | `cosine` | Distance metric |
+| `indexType` | string | `hnsw` | Index algorithm |
+
+## Error Handling
+
+```javascript
+try {
+    await db.insert(entry);
+} catch (error) {
+    if (error.code === 'DIMENSION_MISMATCH') {
+        console.error('Vector dimension mismatch');
+    } else if (error.code === 'DUPLICATE_ID') {
+        console.error('ID already exists');
+    } else {
+        throw error;
+    }
+}
+```
+
+## Performance Tips
+
+1. Use batch operations for bulk inserts
+2. Keep vector dimensions consistent
+3. Use appropriate index for query patterns
+4. Consider in-memory mode for speed
--- a/vendor/ruvector/examples/nodejs/basic_usage.js
+++ b/vendor/ruvector/examples/nodejs/basic_usage.js
@@ -0,0 +1,68 @@
+/**
+ * Basic usage example for Ruvector (Node.js)
+ *
+ * Demonstrates:
+ * - Creating a database
+ * - Inserting vectors
+ * - Searching for similar vectors
+ */
+
+const { VectorDB } = require('ruvector');
+
+async function main() {
+    console.log('🚀 Ruvector Basic Usage Example (Node.js)\n');
+
+    // 1. Create a database
+    console.log('1. Creating database...');
+    const db = new VectorDB({
+        dimensions: 128,
+        storagePath: './examples_basic_node.db',
+        distanceMetric: 'cosine'
+    });
+    console.log('   ✓ Database created with 128 dimensions\n');
+
+    // 2. Insert a single vector
+    console.log('2. Inserting single vector...');
+    const vector = new Float32Array(128).fill(0.1);
+    const id = await db.insert({
+        id: 'doc_001',
+        vector: vector,
+        metadata: { text: 'Example document' }
+    });
+    console.log(`   ✓ Inserted vector: ${id}\n`);
+
+    // 3. Insert multiple vectors
+    console.log('3. Inserting multiple vectors...');
+    const entries = Array.from({ length: 100 }, (_, i) => ({
+        id: `doc_${String(i + 2).padStart(3, '0')}`,
+        vector: new Float32Array(128).fill(0.1 + i * 0.001),
+        metadata: { index: i + 2 }
+    }));
+
+    const ids = await db.insertBatch(entries);
+    console.log(`   ✓ Inserted ${ids.length} vectors\n`);
+
+    // 4. Search for similar vectors
+    console.log('4. Searching for similar vectors...');
+    const queryVector = new Float32Array(128).fill(0.15);
+    const results = await db.search({
+        vector: queryVector,
+        k: 5,
+        includeMetadata: true
+    });
+
+    console.log(`   ✓ Found ${results.length} results:`);
+    results.forEach((result, i) => {
+        console.log(`     ${i + 1}. ID: ${result.id}, Distance: ${result.distance.toFixed(6)}`);
+    });
+    console.log();
+
+    // 5. Get database stats
+    console.log('5. Database statistics:');
+    const total = db.count();
+    console.log(`   ✓ Total vectors: ${total}\n`);
+
+    console.log('✅ Example completed successfully!');
+}
+
+main().catch(console.error);
--- a/vendor/ruvector/examples/nodejs/semantic_search.js
+++ b/vendor/ruvector/examples/nodejs/semantic_search.js
@@ -0,0 +1,150 @@
+/**
+ * Semantic Search Example (Node.js)
+ *
+ * Demonstrates building a semantic search system with Ruvector
+ */
+
+const { VectorDB } = require('ruvector');
+
+// Mock embedding function (in production, use a real embedding model)
+function mockEmbedding(text, dims = 384) {
+    // Simple hash-based mock embedding
+    let hash = 0;
+    for (let i = 0; i < text.length; i++) {
+        hash = ((hash << 5) - hash) + text.charCodeAt(i);
+        hash = hash & hash;
+    }
+
+    const embedding = new Float32Array(dims);
+    for (let i = 0; i < dims; i++) {
+        embedding[i] = Math.sin((hash + i) * 0.01);
+    }
+    return embedding;
+}
+
+async function main() {
+    console.log('🔍 Semantic Search Example\n');
+
+    // 1. Setup database
+    console.log('1. Setting up search index...');
+    const db = new VectorDB({
+        dimensions: 384,
+        storagePath: './semantic_search.db',
+        distanceMetric: 'cosine',
+        hnsw: {
+            m: 32,
+            efConstruction: 200,
+            efSearch: 100
+        }
+    });
+    console.log('   ✓ Database created\n');
+
+    // 2. Index documents
+    console.log('2. Indexing documents...');
+    const documents = [
+        {
+            id: 'doc_001',
+            text: 'The quick brown fox jumps over the lazy dog',
+            category: 'animals'
+        },
+        {
+            id: 'doc_002',
+            text: 'Machine learning is a subset of artificial intelligence',
+            category: 'technology'
+        },
+        {
+            id: 'doc_003',
+            text: 'Python is a popular programming language for data science',
+            category: 'technology'
+        },
+        {
+            id: 'doc_004',
+            text: 'The cat sat on the mat while birds sang outside',
+            category: 'animals'
+        },
+        {
+            id: 'doc_005',
+            text: 'Neural networks are inspired by biological neurons',
+            category: 'technology'
+        },
+        {
+            id: 'doc_006',
+            text: 'Dogs are loyal companions and great pets',
+            category: 'animals'
+        },
+        {
+            id: 'doc_007',
+            text: 'Deep learning requires large amounts of training data',
+            category: 'technology'
+        },
+        {
+            id: 'doc_008',
+            text: 'Birds migrate south during winter months',
+            category: 'animals'
+        }
+    ];
+
+    const entries = documents.map(doc => ({
+        id: doc.id,
+        vector: mockEmbedding(doc.text),
+        metadata: {
+            text: doc.text,
+            category: doc.category
+        }
+    }));
+
+    await db.insertBatch(entries);
+    console.log(`   ✓ Indexed ${documents.length} documents\n`);
+
+    // 3. Perform semantic searches
+    const queries = [
+        'artificial intelligence and neural networks',
+        'pets and domestic animals',
+        'programming and software development'
+    ];
+
+    for (const query of queries) {
+        console.log(`Query: "${query}"`);
+        console.log('─'.repeat(60));
+
+        const queryEmbedding = mockEmbedding(query);
+        const results = await db.search({
+            vector: queryEmbedding,
+            k: 3,
+            includeMetadata: true
+        });
+
+        results.forEach((result, i) => {
+            console.log(`${i + 1}. ${result.metadata.text}`);
+            console.log(`   Category: ${result.metadata.category}, Similarity: ${(1 - result.distance).toFixed(4)}`);
+        });
+        console.log();
+    }
+
+    // 4. Filtered semantic search
+    console.log('Filtered search (category: technology)');
+    console.log('─'.repeat(60));
+
+    const techQuery = mockEmbedding('computers and algorithms');
+    const filteredResults = await db.search({
+        vector: techQuery,
+        k: 3,
+        filter: { category: 'technology' },
+        includeMetadata: true
+    });
+
+    filteredResults.forEach((result, i) => {
+        console.log(`${i + 1}. ${result.metadata.text}`);
+        console.log(`   Similarity: ${(1 - result.distance).toFixed(4)}`);
+    });
+    console.log();
+
+    console.log('✅ Semantic search example completed!');
+    console.log('\n💡 In production:');
+    console.log('   • Use a real embedding model (OpenAI, Sentence Transformers, etc.)');
+    console.log('   • Add more documents to your knowledge base');
+    console.log('   • Implement filters for category, date, author, etc.');
+    console.log('   • Add hybrid search (vector + keyword) for better results');
+}
+
+main().catch(console.error);