Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/npm/packages/ruvector-extensions/src/embeddings.ts
+++ b/npm/packages/ruvector-extensions/src/embeddings.ts
@@ -0,0 +1,926 @@
+/**
+ * @fileoverview Comprehensive embeddings integration module for ruvector-extensions
+ * Supports multiple providers: OpenAI, Cohere, Anthropic, and local HuggingFace models
+ *
+ * @module embeddings
+ * @author ruv.io Team <info@ruv.io>
+ * @license MIT
+ *
+ * @example
+ * ```typescript
+ * // OpenAI embeddings
+ * const openai = new OpenAIEmbeddings({ apiKey: 'sk-...' });
+ * const embeddings = await openai.embedTexts(['Hello world', 'Test']);
+ *
+ * // Auto-insert into VectorDB
+ * await embedAndInsert(db, openai, [
+ *   { id: '1', text: 'Hello world', metadata: { source: 'test' } }
+ * ]);
+ * ```
+ */
+
+// VectorDB type will be used as any for maximum compatibility
+type VectorDB = any;
+
+// ============================================================================
+// Core Types and Interfaces
+// ============================================================================
+
+/**
+ * Configuration for retry logic
+ */
+export interface RetryConfig {
+  /** Maximum number of retry attempts */
+  maxRetries: number;
+  /** Initial delay in milliseconds before first retry */
+  initialDelay: number;
+  /** Maximum delay in milliseconds between retries */
+  maxDelay: number;
+  /** Multiplier for exponential backoff */
+  backoffMultiplier: number;
+}
+
+/**
+ * Result of an embedding operation
+ */
+export interface EmbeddingResult {
+  /** The generated embedding vector */
+  embedding: number[];
+  /** Index of the text in the original batch */
+  index: number;
+  /** Optional token count used */
+  tokens?: number;
+}
+
+/**
+ * Batch result with embeddings and metadata
+ */
+export interface BatchEmbeddingResult {
+  /** Array of embedding results */
+  embeddings: EmbeddingResult[];
+  /** Total tokens used (if available) */
+  totalTokens?: number;
+  /** Provider-specific metadata */
+  metadata?: Record<string, unknown>;
+}
+
+/**
+ * Error details for failed embedding operations
+ */
+export interface EmbeddingError {
+  /** Error message */
+  message: string;
+  /** Original error object */
+  error: unknown;
+  /** Index of the text that failed (if applicable) */
+  index?: number;
+  /** Whether the error is retryable */
+  retryable: boolean;
+}
+
+/**
+ * Document to embed and insert into VectorDB
+ */
+export interface DocumentToEmbed {
+  /** Unique identifier for the document */
+  id: string;
+  /** Text content to embed */
+  text: string;
+  /** Optional metadata to store with the vector */
+  metadata?: Record<string, unknown>;
+}
+
+// ============================================================================
+// Abstract Base Class
+// ============================================================================
+
+/**
+ * Abstract base class for embedding providers
+ * All embedding providers must extend this class and implement its methods
+ */
+export abstract class EmbeddingProvider {
+  protected retryConfig: RetryConfig;
+
+  /**
+   * Creates a new embedding provider instance
+   * @param retryConfig - Configuration for retry logic
+   */
+  constructor(retryConfig?: Partial<RetryConfig>) {
+    this.retryConfig = {
+      maxRetries: 3,
+      initialDelay: 1000,
+      maxDelay: 10000,
+      backoffMultiplier: 2,
+      ...retryConfig,
+    };
+  }
+
+  /**
+   * Get the maximum batch size supported by this provider
+   */
+  abstract getMaxBatchSize(): number;
+
+  /**
+   * Get the dimension of embeddings produced by this provider
+   */
+  abstract getDimension(): number;
+
+  /**
+   * Embed a single text string
+   * @param text - Text to embed
+   * @returns Promise resolving to the embedding vector
+   */
+  async embedText(text: string): Promise<number[]> {
+    const result = await this.embedTexts([text]);
+    return result.embeddings[0].embedding;
+  }
+
+  /**
+   * Embed multiple texts with automatic batching
+   * @param texts - Array of texts to embed
+   * @returns Promise resolving to batch embedding results
+   */
+  abstract embedTexts(texts: string[]): Promise<BatchEmbeddingResult>;
+
+  /**
+   * Execute a function with retry logic
+   * @param fn - Function to execute
+   * @param context - Context description for error messages
+   * @returns Promise resolving to the function result
+   */
+  protected async withRetry<T>(
+    fn: () => Promise<T>,
+    context: string
+  ): Promise<T> {
+    let lastError: unknown;
+    let delay = this.retryConfig.initialDelay;
+
+    for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) {
+      try {
+        return await fn();
+      } catch (error) {
+        lastError = error;
+
+        // Check if error is retryable
+        if (!this.isRetryableError(error)) {
+          throw this.createEmbeddingError(error, context, false);
+        }
+
+        if (attempt < this.retryConfig.maxRetries) {
+          await this.sleep(delay);
+          delay = Math.min(
+            delay * this.retryConfig.backoffMultiplier,
+            this.retryConfig.maxDelay
+          );
+        }
+      }
+    }
+
+    throw this.createEmbeddingError(
+      lastError,
+      `${context} (after ${this.retryConfig.maxRetries} retries)`,
+      false
+    );
+  }
+
+  /**
+   * Determine if an error is retryable
+   * @param error - Error to check
+   * @returns True if the error should trigger a retry
+   */
+  protected isRetryableError(error: unknown): boolean {
+    if (error instanceof Error) {
+      const message = error.message.toLowerCase();
+      // Rate limits, timeouts, and temporary server errors are retryable
+      return (
+        message.includes('rate limit') ||
+        message.includes('timeout') ||
+        message.includes('503') ||
+        message.includes('429') ||
+        message.includes('connection')
+      );
+    }
+    return false;
+  }
+
+  /**
+   * Create a standardized embedding error
+   * @param error - Original error
+   * @param context - Context description
+   * @param retryable - Whether the error is retryable
+   * @returns Formatted error object
+   */
+  protected createEmbeddingError(
+    error: unknown,
+    context: string,
+    retryable: boolean
+  ): EmbeddingError {
+    const message = error instanceof Error ? error.message : String(error);
+    return {
+      message: `${context}: ${message}`,
+      error,
+      retryable,
+    };
+  }
+
+  /**
+   * Sleep for a specified duration
+   * @param ms - Milliseconds to sleep
+   */
+  protected sleep(ms: number): Promise<void> {
+    return new Promise(resolve => setTimeout(resolve, ms));
+  }
+
+  /**
+   * Split texts into batches based on max batch size
+   * @param texts - Texts to batch
+   * @returns Array of text batches
+   */
+  protected createBatches(texts: string[]): string[][] {
+    const batches: string[][] = [];
+    const batchSize = this.getMaxBatchSize();
+
+    for (let i = 0; i < texts.length; i += batchSize) {
+      batches.push(texts.slice(i, i + batchSize));
+    }
+
+    return batches;
+  }
+}
+
+// ============================================================================
+// OpenAI Embeddings Provider
+// ============================================================================
+
+/**
+ * Configuration for OpenAI embeddings
+ */
+export interface OpenAIEmbeddingsConfig {
+  /** OpenAI API key */
+  apiKey: string;
+  /** Model name (default: 'text-embedding-3-small') */
+  model?: string;
+  /** Embedding dimensions (only for text-embedding-3-* models) */
+  dimensions?: number;
+  /** Organization ID (optional) */
+  organization?: string;
+  /** Custom base URL (optional) */
+  baseURL?: string;
+  /** Retry configuration */
+  retryConfig?: Partial<RetryConfig>;
+}
+
+/**
+ * OpenAI embeddings provider
+ * Supports text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002
+ */
+export class OpenAIEmbeddings extends EmbeddingProvider {
+  private config: {
+    apiKey: string;
+    model: string;
+    organization?: string;
+    baseURL?: string;
+    dimensions?: number;
+  };
+  private openai: any;
+
+  /**
+   * Creates a new OpenAI embeddings provider
+   * @param config - Configuration options
+   * @throws Error if OpenAI SDK is not installed
+   */
+  constructor(config: OpenAIEmbeddingsConfig) {
+    super(config.retryConfig);
+
+    this.config = {
+      apiKey: config.apiKey,
+      model: config.model || 'text-embedding-3-small',
+      organization: config.organization,
+      baseURL: config.baseURL,
+      dimensions: config.dimensions,
+    };
+
+    try {
+      // Dynamic import to support optional peer dependency
+      const OpenAI = require('openai');
+      this.openai = new OpenAI({
+        apiKey: this.config.apiKey,
+        organization: this.config.organization,
+        baseURL: this.config.baseURL,
+      });
+    } catch (error) {
+      throw new Error(
+        'OpenAI SDK not found. Install it with: npm install openai'
+      );
+    }
+  }
+
+  getMaxBatchSize(): number {
+    // OpenAI supports up to 2048 inputs per request
+    return 2048;
+  }
+
+  getDimension(): number {
+    // Return configured dimensions or default based on model
+    if (this.config.dimensions) {
+      return this.config.dimensions;
+    }
+
+    switch (this.config.model) {
+      case 'text-embedding-3-small':
+        return 1536;
+      case 'text-embedding-3-large':
+        return 3072;
+      case 'text-embedding-ada-002':
+        return 1536;
+      default:
+        return 1536;
+    }
+  }
+
+  async embedTexts(texts: string[]): Promise<BatchEmbeddingResult> {
+    if (texts.length === 0) {
+      return { embeddings: [] };
+    }
+
+    const batches = this.createBatches(texts);
+    const allResults: EmbeddingResult[] = [];
+    let totalTokens = 0;
+
+    for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
+      const batch = batches[batchIndex];
+      const baseIndex = batchIndex * this.getMaxBatchSize();
+
+      const response = await this.withRetry(
+        async () => {
+          const params: any = {
+            model: this.config.model,
+            input: batch,
+          };
+
+          if (this.config.dimensions) {
+            params.dimensions = this.config.dimensions;
+          }
+
+          return await this.openai.embeddings.create(params);
+        },
+        `OpenAI embeddings for batch ${batchIndex + 1}/${batches.length}`
+      );
+
+      totalTokens += response.usage?.total_tokens || 0;
+
+      for (const item of response.data) {
+        allResults.push({
+          embedding: item.embedding,
+          index: baseIndex + item.index,
+          tokens: response.usage?.total_tokens,
+        });
+      }
+    }
+
+    return {
+      embeddings: allResults,
+      totalTokens,
+      metadata: {
+        model: this.config.model,
+        provider: 'openai',
+      },
+    };
+  }
+}
+
+// ============================================================================
+// Cohere Embeddings Provider
+// ============================================================================
+
+/**
+ * Configuration for Cohere embeddings
+ */
+export interface CohereEmbeddingsConfig {
+  /** Cohere API key */
+  apiKey: string;
+  /** Model name (default: 'embed-english-v3.0') */
+  model?: string;
+  /** Input type: 'search_document', 'search_query', 'classification', or 'clustering' */
+  inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';
+  /** Truncate input text if it exceeds model limits */
+  truncate?: 'NONE' | 'START' | 'END';
+  /** Retry configuration */
+  retryConfig?: Partial<RetryConfig>;
+}
+
+/**
+ * Cohere embeddings provider
+ * Supports embed-english-v3.0, embed-multilingual-v3.0, and other Cohere models
+ */
+export class CohereEmbeddings extends EmbeddingProvider {
+  private config: {
+    apiKey: string;
+    model: string;
+    inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';
+    truncate?: 'NONE' | 'START' | 'END';
+  };
+  private cohere: any;
+
+  /**
+   * Creates a new Cohere embeddings provider
+   * @param config - Configuration options
+   * @throws Error if Cohere SDK is not installed
+   */
+  constructor(config: CohereEmbeddingsConfig) {
+    super(config.retryConfig);
+
+    this.config = {
+      apiKey: config.apiKey,
+      model: config.model || 'embed-english-v3.0',
+      inputType: config.inputType,
+      truncate: config.truncate,
+    };
+
+    try {
+      // Dynamic import to support optional peer dependency
+      const { CohereClient } = require('cohere-ai');
+      this.cohere = new CohereClient({
+        token: this.config.apiKey,
+      });
+    } catch (error) {
+      throw new Error(
+        'Cohere SDK not found. Install it with: npm install cohere-ai'
+      );
+    }
+  }
+
+  getMaxBatchSize(): number {
+    // Cohere supports up to 96 texts per request
+    return 96;
+  }
+
+  getDimension(): number {
+    // Cohere v3 models produce 1024-dimensional embeddings
+    if (this.config.model.includes('v3')) {
+      return 1024;
+    }
+    // Earlier models use different dimensions
+    return 4096;
+  }
+
+  async embedTexts(texts: string[]): Promise<BatchEmbeddingResult> {
+    if (texts.length === 0) {
+      return { embeddings: [] };
+    }
+
+    const batches = this.createBatches(texts);
+    const allResults: EmbeddingResult[] = [];
+
+    for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
+      const batch = batches[batchIndex];
+      const baseIndex = batchIndex * this.getMaxBatchSize();
+
+      const response = await this.withRetry(
+        async () => {
+          const params: any = {
+            model: this.config.model,
+            texts: batch,
+          };
+
+          if (this.config.inputType) {
+            params.inputType = this.config.inputType;
+          }
+
+          if (this.config.truncate) {
+            params.truncate = this.config.truncate;
+          }
+
+          return await this.cohere.embed(params);
+        },
+        `Cohere embeddings for batch ${batchIndex + 1}/${batches.length}`
+      );
+
+      for (let i = 0; i < response.embeddings.length; i++) {
+        allResults.push({
+          embedding: response.embeddings[i],
+          index: baseIndex + i,
+        });
+      }
+    }
+
+    return {
+      embeddings: allResults,
+      metadata: {
+        model: this.config.model,
+        provider: 'cohere',
+      },
+    };
+  }
+}
+
+// ============================================================================
+// Anthropic Embeddings Provider
+// ============================================================================
+
+/**
+ * Configuration for Anthropic embeddings via Voyage AI
+ */
+export interface AnthropicEmbeddingsConfig {
+  /** Anthropic API key */
+  apiKey: string;
+  /** Model name (default: 'voyage-2') */
+  model?: string;
+  /** Input type for embeddings */
+  inputType?: 'document' | 'query';
+  /** Retry configuration */
+  retryConfig?: Partial<RetryConfig>;
+}
+
+/**
+ * Anthropic embeddings provider using Voyage AI
+ * Anthropic partners with Voyage AI for embeddings
+ */
+export class AnthropicEmbeddings extends EmbeddingProvider {
+  private config: {
+    apiKey: string;
+    model: string;
+    inputType?: 'document' | 'query';
+  };
+  private anthropic: any;
+
+  /**
+   * Creates a new Anthropic embeddings provider
+   * @param config - Configuration options
+   * @throws Error if Anthropic SDK is not installed
+   */
+  constructor(config: AnthropicEmbeddingsConfig) {
+    super(config.retryConfig);
+
+    this.config = {
+      apiKey: config.apiKey,
+      model: config.model || 'voyage-2',
+      inputType: config.inputType,
+    };
+
+    try {
+      const Anthropic = require('@anthropic-ai/sdk');
+      this.anthropic = new Anthropic({
+        apiKey: this.config.apiKey,
+      });
+    } catch (error) {
+      throw new Error(
+        'Anthropic SDK not found. Install it with: npm install @anthropic-ai/sdk'
+      );
+    }
+  }
+
+  getMaxBatchSize(): number {
+    // Process in smaller batches for Voyage API
+    return 128;
+  }
+
+  getDimension(): number {
+    // Voyage-2 produces 1024-dimensional embeddings
+    return 1024;
+  }
+
+  async embedTexts(texts: string[]): Promise<BatchEmbeddingResult> {
+    if (texts.length === 0) {
+      return { embeddings: [] };
+    }
+
+    const batches = this.createBatches(texts);
+    const allResults: EmbeddingResult[] = [];
+
+    for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
+      const batch = batches[batchIndex];
+      const baseIndex = batchIndex * this.getMaxBatchSize();
+
+      // Note: As of early 2025, Anthropic uses Voyage AI for embeddings
+      // This is a placeholder for when official API is available
+      const response = await this.withRetry(
+        async () => {
+          // Use Voyage AI API through Anthropic's recommended integration
+          const httpResponse = await fetch('https://api.voyageai.com/v1/embeddings', {
+            method: 'POST',
+            headers: {
+              'Content-Type': 'application/json',
+              'Authorization': `Bearer ${this.config.apiKey}`,
+            },
+            body: JSON.stringify({
+              input: batch,
+              model: this.config.model,
+              input_type: this.config.inputType || 'document',
+            }),
+          });
+
+          if (!httpResponse.ok) {
+            const error = await httpResponse.text();
+            throw new Error(`Voyage API error: ${error}`);
+          }
+
+          return await httpResponse.json() as { data: Array<{ embedding: number[] }> };
+        },
+        `Anthropic/Voyage embeddings for batch ${batchIndex + 1}/${batches.length}`
+      );
+
+      for (let i = 0; i < response.data.length; i++) {
+        allResults.push({
+          embedding: response.data[i].embedding,
+          index: baseIndex + i,
+        });
+      }
+    }
+
+    return {
+      embeddings: allResults,
+      metadata: {
+        model: this.config.model,
+        provider: 'anthropic-voyage',
+      },
+    };
+  }
+}
+
+// ============================================================================
+// HuggingFace Local Embeddings Provider
+// ============================================================================
+
+/**
+ * Configuration for HuggingFace local embeddings
+ */
+export interface HuggingFaceEmbeddingsConfig {
+  /** Model name or path (default: 'sentence-transformers/all-MiniLM-L6-v2') */
+  model?: string;
+  /** Device to run on: 'cpu' or 'cuda' */
+  device?: 'cpu' | 'cuda';
+  /** Normalize embeddings to unit length */
+  normalize?: boolean;
+  /** Batch size for processing */
+  batchSize?: number;
+  /** Retry configuration */
+  retryConfig?: Partial<RetryConfig>;
+}
+
+/**
+ * HuggingFace local embeddings provider
+ * Runs embedding models locally using transformers.js
+ */
+export class HuggingFaceEmbeddings extends EmbeddingProvider {
+  private config: {
+    model: string;
+    normalize: boolean;
+    batchSize: number;
+  };
+  private pipeline: any;
+  private initialized: boolean = false;
+
+  /**
+   * Creates a new HuggingFace local embeddings provider
+   * @param config - Configuration options
+   */
+  constructor(config: HuggingFaceEmbeddingsConfig = {}) {
+    super(config.retryConfig);
+
+    this.config = {
+      model: config.model || 'Xenova/all-MiniLM-L6-v2',
+      normalize: config.normalize !== false,
+      batchSize: config.batchSize || 32,
+    };
+  }
+
+  getMaxBatchSize(): number {
+    return this.config.batchSize;
+  }
+
+  getDimension(): number {
+    // all-MiniLM-L6-v2 produces 384-dimensional embeddings
+    // This should be determined dynamically based on model
+    return 384;
+  }
+
+  /**
+   * Initialize the embedding pipeline
+   */
+  private async initialize(): Promise<void> {
+    if (this.initialized) return;
+
+    try {
+      // Dynamic import of transformers.js
+      const { pipeline } = await import('@xenova/transformers');
+
+      this.pipeline = await pipeline(
+        'feature-extraction',
+        this.config.model
+      );
+
+      this.initialized = true;
+    } catch (error) {
+      throw new Error(
+        'Transformers.js not found or failed to load. Install it with: npm install @xenova/transformers'
+      );
+    }
+  }
+
+  async embedTexts(texts: string[]): Promise<BatchEmbeddingResult> {
+    if (texts.length === 0) {
+      return { embeddings: [] };
+    }
+
+    await this.initialize();
+
+    const batches = this.createBatches(texts);
+    const allResults: EmbeddingResult[] = [];
+
+    for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
+      const batch = batches[batchIndex];
+      const baseIndex = batchIndex * this.getMaxBatchSize();
+
+      const embeddings = await this.withRetry(
+        async () => {
+          const output = await this.pipeline(batch, {
+            pooling: 'mean',
+            normalize: this.config.normalize,
+          });
+
+          // Convert tensor to array
+          return output.tolist();
+        },
+        `HuggingFace embeddings for batch ${batchIndex + 1}/${batches.length}`
+      );
+
+      for (let i = 0; i < embeddings.length; i++) {
+        allResults.push({
+          embedding: embeddings[i],
+          index: baseIndex + i,
+        });
+      }
+    }
+
+    return {
+      embeddings: allResults,
+      metadata: {
+        model: this.config.model,
+        provider: 'huggingface-local',
+      },
+    };
+  }
+}
+
+// ============================================================================
+// Helper Functions
+// ============================================================================
+
+/**
+ * Embed texts and automatically insert them into a VectorDB
+ *
+ * @param db - VectorDB instance to insert into
+ * @param provider - Embedding provider to use
+ * @param documents - Documents to embed and insert
+ * @param options - Additional options
+ * @returns Promise resolving to array of inserted vector IDs
+ *
+ * @example
+ * ```typescript
+ * const openai = new OpenAIEmbeddings({ apiKey: 'sk-...' });
+ * const db = new VectorDB({ dimension: 1536 });
+ *
+ * const ids = await embedAndInsert(db, openai, [
+ *   { id: '1', text: 'Hello world', metadata: { source: 'test' } },
+ *   { id: '2', text: 'Another document', metadata: { source: 'test' } }
+ * ]);
+ *
+ * console.log('Inserted vector IDs:', ids);
+ * ```
+ */
+export async function embedAndInsert(
+  db: VectorDB,
+  provider: EmbeddingProvider,
+  documents: DocumentToEmbed[],
+  options: {
+    /** Whether to overwrite existing vectors with same ID */
+    overwrite?: boolean;
+    /** Progress callback */
+    onProgress?: (current: number, total: number) => void;
+  } = {}
+): Promise<string[]> {
+  if (documents.length === 0) {
+    return [];
+  }
+
+  // Verify dimension compatibility
+  const dbDimension = (db as any).dimension || db.getDimension?.();
+  const providerDimension = provider.getDimension();
+
+  if (dbDimension && dbDimension !== providerDimension) {
+    throw new Error(
+      `Dimension mismatch: VectorDB expects ${dbDimension} but provider produces ${providerDimension}`
+    );
+  }
+
+  // Extract texts
+  const texts = documents.map(doc => doc.text);
+
+  // Generate embeddings
+  const result = await provider.embedTexts(texts);
+
+  // Insert vectors
+  const insertedIds: string[] = [];
+
+  for (let i = 0; i < documents.length; i++) {
+    const doc = documents[i];
+    const embedding = result.embeddings.find(e => e.index === i);
+
+    if (!embedding) {
+      throw new Error(`Missing embedding for document at index ${i}`);
+    }
+
+    // Insert or update vector
+    if (options.overwrite) {
+      await db.upsert({
+        id: doc.id,
+        values: embedding.embedding,
+        metadata: doc.metadata,
+      });
+    } else {
+      await db.insert({
+        id: doc.id,
+        values: embedding.embedding,
+        metadata: doc.metadata,
+      });
+    }
+
+    insertedIds.push(doc.id);
+
+    // Call progress callback
+    if (options.onProgress) {
+      options.onProgress(i + 1, documents.length);
+    }
+  }
+
+  return insertedIds;
+}
+
+/**
+ * Embed a query and search for similar documents in VectorDB
+ *
+ * @param db - VectorDB instance to search
+ * @param provider - Embedding provider to use
+ * @param query - Query text to search for
+ * @param options - Search options
+ * @returns Promise resolving to search results
+ *
+ * @example
+ * ```typescript
+ * const openai = new OpenAIEmbeddings({ apiKey: 'sk-...' });
+ * const db = new VectorDB({ dimension: 1536 });
+ *
+ * const results = await embedAndSearch(db, openai, 'machine learning', {
+ *   topK: 5,
+ *   threshold: 0.7
+ * });
+ *
+ * console.log('Found documents:', results);
+ * ```
+ */
+export async function embedAndSearch(
+  db: VectorDB,
+  provider: EmbeddingProvider,
+  query: string,
+  options: {
+    /** Number of results to return */
+    topK?: number;
+    /** Minimum similarity threshold (0-1) */
+    threshold?: number;
+    /** Metadata filter */
+    filter?: Record<string, unknown>;
+  } = {}
+): Promise<any[]> {
+  // Generate query embedding
+  const queryEmbedding = await provider.embedText(query);
+
+  // Search VectorDB
+  const results = await db.search({
+    vector: queryEmbedding,
+    topK: options.topK || 10,
+    threshold: options.threshold,
+    filter: options.filter,
+  });
+
+  return results;
+}
+
+// ============================================================================
+// Exports
+// ============================================================================
+
+export default {
+  // Base class
+  EmbeddingProvider,
+
+  // Providers
+  OpenAIEmbeddings,
+  CohereEmbeddings,
+  AnthropicEmbeddings,
+  HuggingFaceEmbeddings,
+
+  // Helper functions
+  embedAndInsert,
+  embedAndSearch,
+};