Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,210 @@
# RuVector Node.js Examples
JavaScript/TypeScript examples for integrating RuVector with Node.js applications.
## Examples
| File | Description |
|------|-------------|
| `basic_usage.js` | Getting started with the JS SDK |
| `semantic_search.js` | Semantic search implementation |
## Quick Start
```bash
npm install ruvector
node basic_usage.js
node semantic_search.js
```
## Basic Usage
```javascript
const { VectorDB } = require('ruvector');
async function main() {
// Initialize database
const db = new VectorDB({
dimensions: 128,
storagePath: './my_vectors.db'
});
await db.initialize();
// Insert vectors
await db.insert({
id: 'doc_001',
vector: new Float32Array(128).fill(0.1),
metadata: { title: 'Document 1' }
});
// Search
const results = await db.search({
vector: new Float32Array(128).fill(0.1),
topK: 10
});
console.log('Results:', results);
}
main().catch(console.error);
```
## Semantic Search
```javascript
const { VectorDB } = require('ruvector');
const { encode } = require('your-embedding-model');
async function semanticSearch() {
const db = new VectorDB({ dimensions: 384 });
await db.initialize();
// Index documents
const documents = [
'Machine learning is a subset of AI',
'Neural networks power modern AI',
'Deep learning uses multiple layers'
];
for (const doc of documents) {
const embedding = await encode(doc);
await db.insert({
id: doc.slice(0, 20),
vector: embedding,
metadata: { text: doc }
});
}
// Search by meaning
const query = 'How does artificial intelligence work?';
const queryVec = await encode(query);
const results = await db.search({
vector: queryVec,
topK: 5
});
results.forEach(r => {
console.log(`${r.score.toFixed(3)}: ${r.metadata.text}`);
});
}
```
## Batch Operations
```javascript
// Batch insert for efficiency
const entries = documents.map((doc, i) => ({
id: `doc_${i}`,
vector: embeddings[i],
metadata: { text: doc }
}));
await db.insertBatch(entries);
// Batch search
const queries = ['query1', 'query2', 'query3'];
const queryVectors = await Promise.all(queries.map(encode));
const batchResults = await db.searchBatch(
queryVectors.map(v => ({ vector: v, topK: 5 }))
);
```
## Filtering
```javascript
// Metadata filtering
const results = await db.search({
vector: queryVec,
topK: 10,
filter: {
category: { $eq: 'technology' },
date: { $gte: '2024-01-01' }
}
});
```
## TypeScript
```typescript
import { VectorDB, VectorEntry, SearchResult } from 'ruvector';
interface DocMetadata {
title: string;
author: string;
date: string;
}
const db = new VectorDB<DocMetadata>({
dimensions: 384
});
const entry: VectorEntry<DocMetadata> = {
id: 'doc_001',
vector: new Float32Array(384),
metadata: {
title: 'TypeScript Guide',
author: 'Dev Team',
date: '2024-01-01'
}
};
await db.insert(entry);
```
## Express.js Integration
```javascript
const express = require('express');
const { VectorDB } = require('ruvector');
const app = express();
const db = new VectorDB({ dimensions: 384 });
app.post('/search', express.json(), async (req, res) => {
const { query, topK = 10 } = req.body;
const queryVec = await encode(query);
const results = await db.search({
vector: queryVec,
topK
});
res.json(results);
});
app.listen(3000);
```
## Configuration Options
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `dimensions` | number | required | Vector dimensions |
| `storagePath` | string | `:memory:` | Database file path |
| `metric` | string | `cosine` | Distance metric |
| `indexType` | string | `hnsw` | Index algorithm |
## Error Handling
```javascript
try {
await db.insert(entry);
} catch (error) {
if (error.code === 'DIMENSION_MISMATCH') {
console.error('Vector dimension mismatch');
} else if (error.code === 'DUPLICATE_ID') {
console.error('ID already exists');
} else {
throw error;
}
}
```
## Performance Tips
1. Use batch operations for bulk inserts
2. Keep vector dimensions consistent
3. Use appropriate index for query patterns
4. Consider in-memory mode for speed

View File

@@ -0,0 +1,68 @@
/**
* Basic usage example for Ruvector (Node.js)
*
* Demonstrates:
* - Creating a database
* - Inserting vectors
* - Searching for similar vectors
*/
const { VectorDB } = require('ruvector');
async function main() {
console.log('🚀 Ruvector Basic Usage Example (Node.js)\n');
// 1. Create a database
console.log('1. Creating database...');
const db = new VectorDB({
dimensions: 128,
storagePath: './examples_basic_node.db',
distanceMetric: 'cosine'
});
console.log(' ✓ Database created with 128 dimensions\n');
// 2. Insert a single vector
console.log('2. Inserting single vector...');
const vector = new Float32Array(128).fill(0.1);
const id = await db.insert({
id: 'doc_001',
vector: vector,
metadata: { text: 'Example document' }
});
console.log(` ✓ Inserted vector: ${id}\n`);
// 3. Insert multiple vectors
console.log('3. Inserting multiple vectors...');
const entries = Array.from({ length: 100 }, (_, i) => ({
id: `doc_${String(i + 2).padStart(3, '0')}`,
vector: new Float32Array(128).fill(0.1 + i * 0.001),
metadata: { index: i + 2 }
}));
const ids = await db.insertBatch(entries);
console.log(` ✓ Inserted ${ids.length} vectors\n`);
// 4. Search for similar vectors
console.log('4. Searching for similar vectors...');
const queryVector = new Float32Array(128).fill(0.15);
const results = await db.search({
vector: queryVector,
k: 5,
includeMetadata: true
});
console.log(` ✓ Found ${results.length} results:`);
results.forEach((result, i) => {
console.log(` ${i + 1}. ID: ${result.id}, Distance: ${result.distance.toFixed(6)}`);
});
console.log();
// 5. Get database stats
console.log('5. Database statistics:');
const total = db.count();
console.log(` ✓ Total vectors: ${total}\n`);
console.log('✅ Example completed successfully!');
}
main().catch(console.error);

View File

@@ -0,0 +1,150 @@
/**
* Semantic Search Example (Node.js)
*
* Demonstrates building a semantic search system with Ruvector
*/
const { VectorDB } = require('ruvector');
// Mock embedding function (in production, use a real embedding model)
function mockEmbedding(text, dims = 384) {
// Simple hash-based mock embedding
let hash = 0;
for (let i = 0; i < text.length; i++) {
hash = ((hash << 5) - hash) + text.charCodeAt(i);
hash = hash & hash;
}
const embedding = new Float32Array(dims);
for (let i = 0; i < dims; i++) {
embedding[i] = Math.sin((hash + i) * 0.01);
}
return embedding;
}
async function main() {
console.log('🔍 Semantic Search Example\n');
// 1. Setup database
console.log('1. Setting up search index...');
const db = new VectorDB({
dimensions: 384,
storagePath: './semantic_search.db',
distanceMetric: 'cosine',
hnsw: {
m: 32,
efConstruction: 200,
efSearch: 100
}
});
console.log(' ✓ Database created\n');
// 2. Index documents
console.log('2. Indexing documents...');
const documents = [
{
id: 'doc_001',
text: 'The quick brown fox jumps over the lazy dog',
category: 'animals'
},
{
id: 'doc_002',
text: 'Machine learning is a subset of artificial intelligence',
category: 'technology'
},
{
id: 'doc_003',
text: 'Python is a popular programming language for data science',
category: 'technology'
},
{
id: 'doc_004',
text: 'The cat sat on the mat while birds sang outside',
category: 'animals'
},
{
id: 'doc_005',
text: 'Neural networks are inspired by biological neurons',
category: 'technology'
},
{
id: 'doc_006',
text: 'Dogs are loyal companions and great pets',
category: 'animals'
},
{
id: 'doc_007',
text: 'Deep learning requires large amounts of training data',
category: 'technology'
},
{
id: 'doc_008',
text: 'Birds migrate south during winter months',
category: 'animals'
}
];
const entries = documents.map(doc => ({
id: doc.id,
vector: mockEmbedding(doc.text),
metadata: {
text: doc.text,
category: doc.category
}
}));
await db.insertBatch(entries);
console.log(` ✓ Indexed ${documents.length} documents\n`);
// 3. Perform semantic searches
const queries = [
'artificial intelligence and neural networks',
'pets and domestic animals',
'programming and software development'
];
for (const query of queries) {
console.log(`Query: "${query}"`);
console.log('─'.repeat(60));
const queryEmbedding = mockEmbedding(query);
const results = await db.search({
vector: queryEmbedding,
k: 3,
includeMetadata: true
});
results.forEach((result, i) => {
console.log(`${i + 1}. ${result.metadata.text}`);
console.log(` Category: ${result.metadata.category}, Similarity: ${(1 - result.distance).toFixed(4)}`);
});
console.log();
}
// 4. Filtered semantic search
console.log('Filtered search (category: technology)');
console.log('─'.repeat(60));
const techQuery = mockEmbedding('computers and algorithms');
const filteredResults = await db.search({
vector: techQuery,
k: 3,
filter: { category: 'technology' },
includeMetadata: true
});
filteredResults.forEach((result, i) => {
console.log(`${i + 1}. ${result.metadata.text}`);
console.log(` Similarity: ${(1 - result.distance).toFixed(4)}`);
});
console.log();
console.log('✅ Semantic search example completed!');
console.log('\n💡 In production:');
console.log(' • Use a real embedding model (OpenAI, Sentence Transformers, etc.)');
console.log(' • Add more documents to your knowledge base');
console.log(' • Implement filters for category, date, author, etc.');
console.log(' • Add hybrid search (vector + keyword) for better results');
}
main().catch(console.error);