Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
61
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.d.ts
vendored
Normal file
61
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.d.ts
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* WasmEmbedder - WASM-based Text Embedding
|
||||
*
|
||||
* Provides high-performance text embeddings using RuVector WASM bindings.
|
||||
* Supports batching, caching, and SIMD optimization.
|
||||
*/
|
||||
import type { Embedder } from '../memory/MemoryManager.js';
|
||||
export interface WasmEmbedderConfig {
|
||||
dimensions: number;
|
||||
modelPath?: string;
|
||||
cacheSize?: number;
|
||||
useSIMD?: boolean;
|
||||
batchSize?: number;
|
||||
}
|
||||
export interface EmbeddingCache {
|
||||
get(key: string): Float32Array | undefined;
|
||||
set(key: string, value: Float32Array): void;
|
||||
clear(): void;
|
||||
size(): number;
|
||||
}
|
||||
export declare class WasmEmbedder implements Embedder {
|
||||
private readonly config;
|
||||
private readonly cache;
|
||||
private wasmModule;
|
||||
private initialized;
|
||||
constructor(config: WasmEmbedderConfig);
|
||||
/**
|
||||
* Initialize the WASM module
|
||||
*/
|
||||
initialize(): Promise<void>;
|
||||
/**
|
||||
* Embed a single text string
|
||||
*/
|
||||
embed(text: string): Promise<Float32Array>;
|
||||
/**
|
||||
* Embed multiple texts in batch
|
||||
*/
|
||||
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
||||
/**
|
||||
* Get embedding dimensions
|
||||
*/
|
||||
dimension(): number;
|
||||
/**
|
||||
* Clear the embedding cache
|
||||
*/
|
||||
clearCache(): void;
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getCacheStats(): {
|
||||
size: number;
|
||||
maxSize: number;
|
||||
};
|
||||
private generateEmbedding;
|
||||
private generateEmbeddingBatch;
|
||||
private fallbackEmbed;
|
||||
private hashCode;
|
||||
}
|
||||
export declare function createWasmEmbedder(config: WasmEmbedderConfig): WasmEmbedder;
|
||||
export default WasmEmbedder;
|
||||
//# sourceMappingURL=WasmEmbedder.d.ts.map
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"WasmEmbedder.d.ts","sourceRoot":"","sources":["WasmEmbedder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAO3D,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,YAAY,GAAG,SAAS,CAAC;IAC3C,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,GAAG,IAAI,CAAC;IAC5C,KAAK,IAAI,IAAI,CAAC;IACd,IAAI,IAAI,MAAM,CAAC;CAChB;AAkDD,qBAAa,YAAa,YAAW,QAAQ;IAC3C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,UAAU,CAAiB;IACnC,OAAO,CAAC,WAAW,CAAkB;gBAEzB,MAAM,EAAE,kBAAkB;IAWtC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBjC;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAoBhD;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAoC1D;;OAEG;IACH,SAAS,IAAI,MAAM;IAInB;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,aAAa,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE;YAWpC,iBAAiB;YAoBjB,sBAAsB;IAepC,OAAO,CAAC,aAAa;IAoBrB,OAAO,CAAC,QAAQ;CASjB;AAMD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,kBAAkB,GAAG,YAAY,CAE3E;AAED,eAAe,YAAY,CAAC"}
|
||||
254
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.js
vendored
Normal file
254
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.js
vendored
Normal file
@@ -0,0 +1,254 @@
|
||||
"use strict";
|
||||
/**
|
||||
* WasmEmbedder - WASM-based Text Embedding
|
||||
*
|
||||
* Provides high-performance text embeddings using RuVector WASM bindings.
|
||||
* Supports batching, caching, and SIMD optimization.
|
||||
*/
|
||||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
var desc = Object.getOwnPropertyDescriptor(m, k);
|
||||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
||||
desc = { enumerable: true, get: function() { return m[k]; } };
|
||||
}
|
||||
Object.defineProperty(o, k2, desc);
|
||||
}) : (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
o[k2] = m[k];
|
||||
}));
|
||||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
||||
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
||||
}) : function(o, v) {
|
||||
o["default"] = v;
|
||||
});
|
||||
var __importStar = (this && this.__importStar) || (function () {
|
||||
var ownKeys = function(o) {
|
||||
ownKeys = Object.getOwnPropertyNames || function (o) {
|
||||
var ar = [];
|
||||
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
||||
return ar;
|
||||
};
|
||||
return ownKeys(o);
|
||||
};
|
||||
return function (mod) {
|
||||
if (mod && mod.__esModule) return mod;
|
||||
var result = {};
|
||||
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
||||
__setModuleDefault(result, mod);
|
||||
return result;
|
||||
};
|
||||
})();
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.WasmEmbedder = void 0;
|
||||
exports.createWasmEmbedder = createWasmEmbedder;
|
||||
const errors_js_1 = require("../../core/errors.js");
|
||||
// ============================================================================
|
||||
// Simple LRU Cache Implementation
|
||||
// ============================================================================
|
||||
class LRUCache {
|
||||
constructor(maxSize = 10000) {
|
||||
this.cache = new Map();
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
get(key) {
|
||||
const value = this.cache.get(key);
|
||||
if (value) {
|
||||
// Move to end (most recently used)
|
||||
this.cache.delete(key);
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
set(key, value) {
|
||||
if (this.cache.has(key)) {
|
||||
this.cache.delete(key);
|
||||
}
|
||||
else if (this.cache.size >= this.maxSize) {
|
||||
// Remove oldest entry
|
||||
const firstKey = this.cache.keys().next().value;
|
||||
if (firstKey) {
|
||||
this.cache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
clear() {
|
||||
this.cache.clear();
|
||||
}
|
||||
size() {
|
||||
return this.cache.size;
|
||||
}
|
||||
}
|
||||
// ============================================================================
|
||||
// WasmEmbedder Implementation
|
||||
// ============================================================================
|
||||
class WasmEmbedder {
|
||||
constructor(config) {
|
||||
this.wasmModule = null;
|
||||
this.initialized = false;
|
||||
this.config = {
|
||||
dimensions: config.dimensions,
|
||||
modelPath: config.modelPath,
|
||||
cacheSize: config.cacheSize ?? 10000,
|
||||
useSIMD: config.useSIMD ?? true,
|
||||
batchSize: config.batchSize ?? 32,
|
||||
};
|
||||
this.cache = new LRUCache(this.config.cacheSize);
|
||||
}
|
||||
/**
|
||||
* Initialize the WASM module
|
||||
*/
|
||||
async initialize() {
|
||||
if (this.initialized)
|
||||
return;
|
||||
try {
|
||||
// Try to load @ruvector/ruvllm (WASM module)
|
||||
try {
|
||||
// Dynamic import - may not be available
|
||||
const ruvllm = await Promise.resolve().then(() => __importStar(require('@ruvector/ruvllm')));
|
||||
this.wasmModule = ruvllm;
|
||||
}
|
||||
catch {
|
||||
// Use fallback embedder if no WASM available
|
||||
console.warn('No WASM module available, using fallback embedder');
|
||||
}
|
||||
this.initialized = true;
|
||||
}
|
||||
catch (error) {
|
||||
throw new errors_js_1.WasmError(`Failed to initialize WASM embedder: ${error instanceof Error ? error.message : 'Unknown error'}`, { config: this.config });
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Embed a single text string
|
||||
*/
|
||||
async embed(text) {
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
// Check cache
|
||||
const cached = this.cache.get(text);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
// Generate embedding
|
||||
const embedding = await this.generateEmbedding(text);
|
||||
// Cache result
|
||||
this.cache.set(text, embedding);
|
||||
return embedding;
|
||||
}
|
||||
/**
|
||||
* Embed multiple texts in batch
|
||||
*/
|
||||
async embedBatch(texts) {
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
const results = [];
|
||||
const uncached = [];
|
||||
// Check cache for each text
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
const cached = this.cache.get(texts[i]);
|
||||
if (cached) {
|
||||
results[i] = cached;
|
||||
}
|
||||
else {
|
||||
uncached.push({ index: i, text: texts[i] });
|
||||
}
|
||||
}
|
||||
// Generate embeddings for uncached texts in batches
|
||||
const batchSize = this.config.batchSize;
|
||||
for (let i = 0; i < uncached.length; i += batchSize) {
|
||||
const batch = uncached.slice(i, i + batchSize);
|
||||
const batchTexts = batch.map(item => item.text);
|
||||
const embeddings = await this.generateEmbeddingBatch(batchTexts);
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
const embedding = embeddings[j];
|
||||
results[batch[j].index] = embedding;
|
||||
this.cache.set(batch[j].text, embedding);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
/**
|
||||
* Get embedding dimensions
|
||||
*/
|
||||
dimension() {
|
||||
return this.config.dimensions;
|
||||
}
|
||||
/**
|
||||
* Clear the embedding cache
|
||||
*/
|
||||
clearCache() {
|
||||
this.cache.clear();
|
||||
}
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getCacheStats() {
|
||||
return {
|
||||
size: this.cache.size(),
|
||||
maxSize: this.config.cacheSize,
|
||||
};
|
||||
}
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
async generateEmbedding(text) {
|
||||
if (this.wasmModule) {
|
||||
// Use WASM module if available
|
||||
const module = this.wasmModule;
|
||||
if (module.embed) {
|
||||
return module.embed(text);
|
||||
}
|
||||
if (module.RuvLLM) {
|
||||
return module.RuvLLM.embed(text);
|
||||
}
|
||||
}
|
||||
// Fallback: Generate deterministic pseudo-random embedding
|
||||
return this.fallbackEmbed(text);
|
||||
}
|
||||
async generateEmbeddingBatch(texts) {
|
||||
if (this.wasmModule) {
|
||||
const module = this.wasmModule;
|
||||
if (module.embedBatch) {
|
||||
return module.embedBatch(texts);
|
||||
}
|
||||
}
|
||||
// Fallback: Generate individually
|
||||
return Promise.all(texts.map(text => this.generateEmbedding(text)));
|
||||
}
|
||||
fallbackEmbed(text) {
|
||||
// Generate deterministic embedding based on text hash
|
||||
// This is for testing/development when WASM is not available
|
||||
const embedding = new Float32Array(this.config.dimensions);
|
||||
let hash = this.hashCode(text);
|
||||
for (let i = 0; i < this.config.dimensions; i++) {
|
||||
hash = ((hash * 1103515245) + 12345) & 0x7fffffff;
|
||||
embedding[i] = (hash / 0x7fffffff) * 2 - 1;
|
||||
}
|
||||
// Normalize
|
||||
const norm = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
|
||||
for (let i = 0; i < this.config.dimensions; i++) {
|
||||
embedding[i] /= norm;
|
||||
}
|
||||
return embedding;
|
||||
}
|
||||
hashCode(str) {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash;
|
||||
}
|
||||
return Math.abs(hash);
|
||||
}
|
||||
}
|
||||
exports.WasmEmbedder = WasmEmbedder;
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
function createWasmEmbedder(config) {
|
||||
return new WasmEmbedder(config);
|
||||
}
|
||||
exports.default = WasmEmbedder;
|
||||
//# sourceMappingURL=WasmEmbedder.js.map
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
285
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.ts
vendored
Normal file
285
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/WasmEmbedder.ts
vendored
Normal file
@@ -0,0 +1,285 @@
|
||||
/**
|
||||
* WasmEmbedder - WASM-based Text Embedding
|
||||
*
|
||||
* Provides high-performance text embeddings using RuVector WASM bindings.
|
||||
* Supports batching, caching, and SIMD optimization.
|
||||
*/
|
||||
|
||||
import type { Embedder } from '../memory/MemoryManager.js';
|
||||
import { WasmError } from '../../core/errors.js';
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface WasmEmbedderConfig {
|
||||
dimensions: number;
|
||||
modelPath?: string;
|
||||
cacheSize?: number;
|
||||
useSIMD?: boolean;
|
||||
batchSize?: number;
|
||||
}
|
||||
|
||||
export interface EmbeddingCache {
|
||||
get(key: string): Float32Array | undefined;
|
||||
set(key: string, value: Float32Array): void;
|
||||
clear(): void;
|
||||
size(): number;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Simple LRU Cache Implementation
|
||||
// ============================================================================
|
||||
|
||||
class LRUCache implements EmbeddingCache {
|
||||
private cache: Map<string, Float32Array> = new Map();
|
||||
private readonly maxSize: number;
|
||||
|
||||
constructor(maxSize: number = 10000) {
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
get(key: string): Float32Array | undefined {
|
||||
const value = this.cache.get(key);
|
||||
if (value) {
|
||||
// Move to end (most recently used)
|
||||
this.cache.delete(key);
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
set(key: string, value: Float32Array): void {
|
||||
if (this.cache.has(key)) {
|
||||
this.cache.delete(key);
|
||||
} else if (this.cache.size >= this.maxSize) {
|
||||
// Remove oldest entry
|
||||
const firstKey = this.cache.keys().next().value;
|
||||
if (firstKey) {
|
||||
this.cache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
size(): number {
|
||||
return this.cache.size;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// WasmEmbedder Implementation
|
||||
// ============================================================================
|
||||
|
||||
export class WasmEmbedder implements Embedder {
|
||||
private readonly config: WasmEmbedderConfig;
|
||||
private readonly cache: EmbeddingCache;
|
||||
private wasmModule: unknown = null;
|
||||
private initialized: boolean = false;
|
||||
|
||||
constructor(config: WasmEmbedderConfig) {
|
||||
this.config = {
|
||||
dimensions: config.dimensions,
|
||||
modelPath: config.modelPath,
|
||||
cacheSize: config.cacheSize ?? 10000,
|
||||
useSIMD: config.useSIMD ?? true,
|
||||
batchSize: config.batchSize ?? 32,
|
||||
};
|
||||
this.cache = new LRUCache(this.config.cacheSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the WASM module
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
|
||||
try {
|
||||
// Try to load @ruvector/ruvllm (WASM module)
|
||||
try {
|
||||
// Dynamic import - may not be available
|
||||
const ruvllm = await import('@ruvector/ruvllm');
|
||||
this.wasmModule = ruvllm;
|
||||
} catch {
|
||||
// Use fallback embedder if no WASM available
|
||||
console.warn('No WASM module available, using fallback embedder');
|
||||
}
|
||||
|
||||
this.initialized = true;
|
||||
} catch (error) {
|
||||
throw new WasmError(
|
||||
`Failed to initialize WASM embedder: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
||||
{ config: this.config }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed a single text string
|
||||
*/
|
||||
async embed(text: string): Promise<Float32Array> {
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
// Check cache
|
||||
const cached = this.cache.get(text);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Generate embedding
|
||||
const embedding = await this.generateEmbedding(text);
|
||||
|
||||
// Cache result
|
||||
this.cache.set(text, embedding);
|
||||
|
||||
return embedding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed multiple texts in batch
|
||||
*/
|
||||
async embedBatch(texts: string[]): Promise<Float32Array[]> {
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
const results: Float32Array[] = [];
|
||||
const uncached: { index: number; text: string }[] = [];
|
||||
|
||||
// Check cache for each text
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
const cached = this.cache.get(texts[i]);
|
||||
if (cached) {
|
||||
results[i] = cached;
|
||||
} else {
|
||||
uncached.push({ index: i, text: texts[i] });
|
||||
}
|
||||
}
|
||||
|
||||
// Generate embeddings for uncached texts in batches
|
||||
const batchSize = this.config.batchSize!;
|
||||
for (let i = 0; i < uncached.length; i += batchSize) {
|
||||
const batch = uncached.slice(i, i + batchSize);
|
||||
const batchTexts = batch.map(item => item.text);
|
||||
|
||||
const embeddings = await this.generateEmbeddingBatch(batchTexts);
|
||||
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
const embedding = embeddings[j];
|
||||
results[batch[j].index] = embedding;
|
||||
this.cache.set(batch[j].text, embedding);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding dimensions
|
||||
*/
|
||||
dimension(): number {
|
||||
return this.config.dimensions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the embedding cache
|
||||
*/
|
||||
clearCache(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getCacheStats(): { size: number; maxSize: number } {
|
||||
return {
|
||||
size: this.cache.size(),
|
||||
maxSize: this.config.cacheSize!,
|
||||
};
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
|
||||
private async generateEmbedding(text: string): Promise<Float32Array> {
|
||||
if (this.wasmModule) {
|
||||
// Use WASM module if available
|
||||
const module = this.wasmModule as {
|
||||
embed?: (text: string) => Float32Array;
|
||||
RuvLLM?: { embed: (text: string) => Promise<Float32Array> };
|
||||
};
|
||||
|
||||
if (module.embed) {
|
||||
return module.embed(text);
|
||||
}
|
||||
if (module.RuvLLM) {
|
||||
return module.RuvLLM.embed(text);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Generate deterministic pseudo-random embedding
|
||||
return this.fallbackEmbed(text);
|
||||
}
|
||||
|
||||
private async generateEmbeddingBatch(texts: string[]): Promise<Float32Array[]> {
|
||||
if (this.wasmModule) {
|
||||
const module = this.wasmModule as {
|
||||
embedBatch?: (texts: string[]) => Float32Array[];
|
||||
};
|
||||
|
||||
if (module.embedBatch) {
|
||||
return module.embedBatch(texts);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Generate individually
|
||||
return Promise.all(texts.map(text => this.generateEmbedding(text)));
|
||||
}
|
||||
|
||||
private fallbackEmbed(text: string): Float32Array {
|
||||
// Generate deterministic embedding based on text hash
|
||||
// This is for testing/development when WASM is not available
|
||||
const embedding = new Float32Array(this.config.dimensions);
|
||||
let hash = this.hashCode(text);
|
||||
|
||||
for (let i = 0; i < this.config.dimensions; i++) {
|
||||
hash = ((hash * 1103515245) + 12345) & 0x7fffffff;
|
||||
embedding[i] = (hash / 0x7fffffff) * 2 - 1;
|
||||
}
|
||||
|
||||
// Normalize
|
||||
const norm = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
|
||||
for (let i = 0; i < this.config.dimensions; i++) {
|
||||
embedding[i] /= norm;
|
||||
}
|
||||
|
||||
return embedding;
|
||||
}
|
||||
|
||||
private hashCode(str: string): number {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash;
|
||||
}
|
||||
return Math.abs(hash);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
|
||||
export function createWasmEmbedder(config: WasmEmbedderConfig): WasmEmbedder {
|
||||
return new WasmEmbedder(config);
|
||||
}
|
||||
|
||||
export default WasmEmbedder;
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,YAAY,CAAC;IAC1B,cAAc,EAAE,uBAAuB,CAAC;IACxC,KAAK,EAAE,cAAc,CAAC;CACvB;AAED,MAAM,WAAW,YAAY;IAC3B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAC3C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IACrD,UAAU,IAAI,MAAM,CAAC;IACrB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC/C,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC;IAC5C,SAAS,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI,CAAC;CACxC;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,CAAC;IAC/C,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvE,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB"}
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;GAEG"}
|
||||
40
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/index.ts
vendored
Normal file
40
vendor/ruvector/npm/packages/ruvbot/src/learning/embeddings/index.ts
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Embeddings Module - WASM-accelerated embedding generation
|
||||
*/
|
||||
|
||||
export interface EmbeddingEngine {
|
||||
wasmRuntime: WasmEmbedder;
|
||||
batchProcessor: BatchEmbeddingProcessor;
|
||||
cache: EmbeddingCache;
|
||||
}
|
||||
|
||||
export interface WasmEmbedder {
|
||||
initialize(): Promise<void>;
|
||||
embed(text: string): Promise<Float32Array>;
|
||||
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
||||
dimensions(): number;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface BatchEmbeddingProcessor {
|
||||
queue(text: string): Promise<EmbeddingPromise>;
|
||||
flush(): Promise<Map<string, Float32Array>>;
|
||||
configure(options: BatchOptions): void;
|
||||
}
|
||||
|
||||
export interface EmbeddingPromise {
|
||||
id: string;
|
||||
promise: Promise<Float32Array>;
|
||||
}
|
||||
|
||||
export interface BatchOptions {
|
||||
maxBatchSize: number;
|
||||
maxWaitMs: number;
|
||||
}
|
||||
|
||||
export interface EmbeddingCache {
|
||||
get(key: string): Promise<Float32Array | null>;
|
||||
set(key: string, embedding: Float32Array, ttl?: number): Promise<void>;
|
||||
delete(key: string): Promise<void>;
|
||||
clear(): Promise<void>;
|
||||
}
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,uBAAuB,CAAC;AACtC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,2BAA2B,CAAC"}
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;;;;;;;;;;;;;;AAEH,wDAAsC;AACtC,sDAAoC;AACpC,sDAAoC;AACpC,oDAAkC;AAClC,4DAA0C"}
|
||||
12
vendor/ruvector/npm/packages/ruvbot/src/learning/index.ts
vendored
Normal file
12
vendor/ruvector/npm/packages/ruvbot/src/learning/index.ts
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* Learning Context - Embeddings, Training, Patterns, Search
|
||||
*
|
||||
* Self-optimizing neural patterns and trajectory learning.
|
||||
* Includes hybrid search with BM25 + vector fusion.
|
||||
*/
|
||||
|
||||
export * from './embeddings/index.js';
|
||||
export * from './training/index.js';
|
||||
export * from './patterns/index.js';
|
||||
export * from './search/index.js';
|
||||
export * from './memory/MemoryManager.js';
|
||||
479
vendor/ruvector/npm/packages/ruvbot/src/learning/memory/MemoryManager.ts
vendored
Normal file
479
vendor/ruvector/npm/packages/ruvbot/src/learning/memory/MemoryManager.ts
vendored
Normal file
@@ -0,0 +1,479 @@
|
||||
/**
|
||||
* MemoryManager - HNSW-indexed Vector Memory with Multi-tenancy
|
||||
*
|
||||
* Provides persistent vector memory with:
|
||||
* - HNSW index for fast similarity search (150x-12,500x faster)
|
||||
* - Multi-tenant isolation via PostgreSQL RLS
|
||||
* - Memory types: episodic, semantic, procedural, working
|
||||
*/
|
||||
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Embedder interface for text-to-vector conversion
|
||||
*/
|
||||
export interface Embedder {
|
||||
/** Generate embedding for a single text */
|
||||
embed(text: string): Promise<Float32Array>;
|
||||
/** Generate embeddings for multiple texts in batch */
|
||||
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
||||
/** Get embedding dimension */
|
||||
dimension(): number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector index interface for similarity search
|
||||
*/
|
||||
export interface VectorIndex {
|
||||
/** Add a vector to the index */
|
||||
add(id: string, vector: Float32Array): Promise<void>;
|
||||
/** Remove a vector from the index (async) */
|
||||
remove(id: string): Promise<boolean>;
|
||||
/** Delete a vector from the index (sync) */
|
||||
delete(id: string): boolean;
|
||||
/** Search for similar vectors */
|
||||
search(query: Float32Array, topK: number): Promise<VectorSearchResult[]>;
|
||||
/** Get number of vectors in index */
|
||||
size(): number;
|
||||
/** Clear the index */
|
||||
clear(): void;
|
||||
}
|
||||
|
||||
export interface VectorSearchResult {
|
||||
id: string;
|
||||
score: number;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
export type MemoryType = 'episodic' | 'semantic' | 'procedural' | 'working';
|
||||
|
||||
export interface MemoryEntry {
|
||||
id: string;
|
||||
tenantId: string;
|
||||
sessionId: string | null;
|
||||
type: MemoryType;
|
||||
key: string;
|
||||
value: unknown;
|
||||
embedding: Float32Array | null;
|
||||
metadata: MemoryMetadata;
|
||||
}
|
||||
|
||||
export interface MemoryMetadata {
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
expiresAt: Date | null;
|
||||
accessCount: number;
|
||||
importance: number;
|
||||
tags: string[];
|
||||
}
|
||||
|
||||
export interface MemoryManagerConfig {
|
||||
/** Embedding dimension (default: 384) */
|
||||
dimension: number;
|
||||
/** Maximum entries in index (default: 100000) */
|
||||
maxEntries: number;
|
||||
/** HNSW M parameter (default: 16) */
|
||||
hnswM?: number;
|
||||
/** HNSW ef_construction parameter (default: 200) */
|
||||
hnswEfConstruction?: number;
|
||||
/** Enable persistence (default: false) */
|
||||
persistence?: boolean;
|
||||
/** Database connection string */
|
||||
databaseUrl?: string;
|
||||
}
|
||||
|
||||
export interface MemorySearchOptions {
|
||||
topK?: number;
|
||||
threshold?: number;
|
||||
type?: MemoryType;
|
||||
tags?: string[];
|
||||
sessionId?: string;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Simple In-Memory HNSW Index (Placeholder)
|
||||
// ============================================================================
|
||||
|
||||
class SimpleVectorIndex implements VectorIndex {
|
||||
private vectors: Map<string, Float32Array> = new Map();
|
||||
private readonly dimension: number;
|
||||
|
||||
constructor(dimension: number) {
|
||||
this.dimension = dimension;
|
||||
}
|
||||
|
||||
async add(id: string, vector: Float32Array): Promise<void> {
|
||||
if (vector.length !== this.dimension) {
|
||||
throw new Error(`Dimension mismatch: expected ${this.dimension}, got ${vector.length}`);
|
||||
}
|
||||
this.vectors.set(id, vector);
|
||||
}
|
||||
|
||||
async remove(id: string): Promise<boolean> {
|
||||
return this.vectors.delete(id);
|
||||
}
|
||||
|
||||
delete(id: string): boolean {
|
||||
return this.vectors.delete(id);
|
||||
}
|
||||
|
||||
async search(query: Float32Array, topK: number): Promise<VectorSearchResult[]> {
|
||||
if (query.length !== this.dimension) {
|
||||
throw new Error(`Query dimension mismatch: expected ${this.dimension}, got ${query.length}`);
|
||||
}
|
||||
|
||||
const results: VectorSearchResult[] = [];
|
||||
|
||||
for (const [id, vector] of this.vectors) {
|
||||
const score = this.cosineSimilarity(query, vector);
|
||||
results.push({
|
||||
id,
|
||||
score,
|
||||
distance: 1 - score,
|
||||
});
|
||||
}
|
||||
|
||||
return results
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
size(): number {
|
||||
return this.vectors.size;
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.vectors.clear();
|
||||
}
|
||||
|
||||
private cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
return denominator === 0 ? 0 : dotProduct / denominator;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// MemoryManager Implementation
|
||||
// ============================================================================
|
||||
|
||||
export class MemoryManager {
|
||||
private readonly config: MemoryManagerConfig;
|
||||
private readonly index: VectorIndex;
|
||||
private readonly entries: Map<string, MemoryEntry> = new Map();
|
||||
private readonly tenantIndex: Map<string, Set<string>> = new Map();
|
||||
private readonly sessionIndex: Map<string, Set<string>> = new Map();
|
||||
private embedder: Embedder | null = null;
|
||||
|
||||
constructor(config: Partial<MemoryManagerConfig> = {}) {
|
||||
this.config = {
|
||||
dimension: config.dimension ?? 384,
|
||||
maxEntries: config.maxEntries ?? 100000,
|
||||
hnswM: config.hnswM ?? 16,
|
||||
hnswEfConstruction: config.hnswEfConstruction ?? 200,
|
||||
persistence: config.persistence ?? false,
|
||||
databaseUrl: config.databaseUrl,
|
||||
};
|
||||
|
||||
this.index = new SimpleVectorIndex(this.config.dimension);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the embedder for text-to-vector conversion
|
||||
*/
|
||||
setEmbedder(embedder: Embedder): void {
|
||||
if (embedder.dimension() !== this.config.dimension) {
|
||||
throw new Error(
|
||||
`Embedder dimension (${embedder.dimension()}) does not match ` +
|
||||
`configured dimension (${this.config.dimension})`
|
||||
);
|
||||
}
|
||||
this.embedder = embedder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a memory entry
|
||||
*/
|
||||
async store(
|
||||
tenantId: string,
|
||||
key: string,
|
||||
value: unknown,
|
||||
options: {
|
||||
sessionId?: string;
|
||||
type?: MemoryType;
|
||||
embedding?: Float32Array;
|
||||
text?: string;
|
||||
tags?: string[];
|
||||
expiresAt?: Date;
|
||||
importance?: number;
|
||||
} = {}
|
||||
): Promise<MemoryEntry> {
|
||||
const id = uuidv4();
|
||||
const now = new Date();
|
||||
|
||||
// Generate embedding if text provided and embedder available
|
||||
let embedding = options.embedding ?? null;
|
||||
if (!embedding && options.text && this.embedder) {
|
||||
embedding = await this.embedder.embed(options.text);
|
||||
}
|
||||
|
||||
const entry: MemoryEntry = {
|
||||
id,
|
||||
tenantId,
|
||||
sessionId: options.sessionId ?? null,
|
||||
type: options.type ?? 'semantic',
|
||||
key,
|
||||
value,
|
||||
embedding,
|
||||
metadata: {
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
expiresAt: options.expiresAt ?? null,
|
||||
accessCount: 0,
|
||||
importance: options.importance ?? 0.5,
|
||||
tags: options.tags ?? [],
|
||||
},
|
||||
};
|
||||
|
||||
// Store entry
|
||||
this.entries.set(id, entry);
|
||||
|
||||
// Update indexes
|
||||
this.updateTenantIndex(tenantId, id);
|
||||
if (entry.sessionId) {
|
||||
this.updateSessionIndex(entry.sessionId, id);
|
||||
}
|
||||
|
||||
// Add to vector index if embedding exists
|
||||
if (embedding) {
|
||||
await this.index.add(id, embedding);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a memory entry by ID
|
||||
*/
|
||||
async get(id: string): Promise<MemoryEntry | null> {
|
||||
const entry = this.entries.get(id);
|
||||
if (entry) {
|
||||
entry.metadata.accessCount++;
|
||||
entry.metadata.updatedAt = new Date();
|
||||
}
|
||||
return entry ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a memory entry by key and tenant
|
||||
*/
|
||||
async getByKey(key: string, tenantId: string): Promise<MemoryEntry | null> {
|
||||
const tenantIds = this.tenantIndex.get(tenantId);
|
||||
if (!tenantIds) return null;
|
||||
|
||||
for (const id of tenantIds) {
|
||||
const entry = this.entries.get(id);
|
||||
if (entry && entry.key === key) {
|
||||
entry.metadata.accessCount++;
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for similar memories using vector similarity
|
||||
*/
|
||||
async search(
|
||||
query: string | Float32Array,
|
||||
tenantId: string,
|
||||
options: MemorySearchOptions = {}
|
||||
): Promise<{ entry: MemoryEntry; score: number }[]> {
|
||||
const topK = options.topK ?? 10;
|
||||
const threshold = options.threshold ?? 0;
|
||||
|
||||
// Get query embedding
|
||||
let queryEmbedding: Float32Array;
|
||||
if (typeof query === 'string') {
|
||||
if (!this.embedder) {
|
||||
throw new Error('No embedder configured for text search');
|
||||
}
|
||||
queryEmbedding = await this.embedder.embed(query);
|
||||
} else {
|
||||
queryEmbedding = query;
|
||||
}
|
||||
|
||||
// Search vector index
|
||||
const results = await this.index.search(queryEmbedding, topK * 2);
|
||||
|
||||
// Filter by tenant and other criteria
|
||||
const filtered: { entry: MemoryEntry; score: number }[] = [];
|
||||
|
||||
for (const result of results) {
|
||||
if (result.score < threshold) continue;
|
||||
|
||||
const entry = this.entries.get(result.id);
|
||||
if (!entry || entry.tenantId !== tenantId) continue;
|
||||
|
||||
// Apply additional filters
|
||||
if (options.type && entry.type !== options.type) continue;
|
||||
if (options.sessionId && entry.sessionId !== options.sessionId) continue;
|
||||
if (options.tags?.length) {
|
||||
const hasTag = options.tags.some(tag => entry.metadata.tags.includes(tag));
|
||||
if (!hasTag) continue;
|
||||
}
|
||||
|
||||
filtered.push({ entry, score: result.score });
|
||||
|
||||
if (filtered.length >= topK) break;
|
||||
}
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a memory entry
|
||||
*/
|
||||
async delete(id: string): Promise<boolean> {
|
||||
const entry = this.entries.get(id);
|
||||
if (!entry) return false;
|
||||
|
||||
// Remove from indexes
|
||||
this.tenantIndex.get(entry.tenantId)?.delete(id);
|
||||
if (entry.sessionId) {
|
||||
this.sessionIndex.get(entry.sessionId)?.delete(id);
|
||||
}
|
||||
|
||||
// Remove from vector index
|
||||
if (entry.embedding) {
|
||||
await this.index.remove(id);
|
||||
}
|
||||
|
||||
return this.entries.delete(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* List memories for a tenant
|
||||
*/
|
||||
async listByTenant(tenantId: string, limit: number = 100): Promise<MemoryEntry[]> {
|
||||
const ids = this.tenantIndex.get(tenantId);
|
||||
if (!ids) return [];
|
||||
|
||||
const entries: MemoryEntry[] = [];
|
||||
for (const id of ids) {
|
||||
const entry = this.entries.get(id);
|
||||
if (entry) entries.push(entry);
|
||||
if (entries.length >= limit) break;
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* List memories for a session
|
||||
*/
|
||||
async listBySession(sessionId: string, limit: number = 100): Promise<MemoryEntry[]> {
|
||||
const ids = this.sessionIndex.get(sessionId);
|
||||
if (!ids) return [];
|
||||
|
||||
const entries: MemoryEntry[] = [];
|
||||
for (const id of ids) {
|
||||
const entry = this.entries.get(id);
|
||||
if (entry) entries.push(entry);
|
||||
if (entries.length >= limit) break;
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all memories for a tenant
|
||||
*/
|
||||
async clearTenant(tenantId: string): Promise<number> {
|
||||
const ids = this.tenantIndex.get(tenantId);
|
||||
if (!ids) return 0;
|
||||
|
||||
let count = 0;
|
||||
for (const id of Array.from(ids)) {
|
||||
if (await this.delete(id)) count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire old entries
|
||||
*/
|
||||
async expire(): Promise<number> {
|
||||
const now = new Date();
|
||||
let count = 0;
|
||||
|
||||
for (const [id, entry] of this.entries) {
|
||||
if (entry.metadata.expiresAt && entry.metadata.expiresAt < now) {
|
||||
await this.delete(id);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get memory statistics
|
||||
*/
|
||||
stats(): {
|
||||
totalEntries: number;
|
||||
indexedEntries: number;
|
||||
tenants: number;
|
||||
sessions: number;
|
||||
} {
|
||||
return {
|
||||
totalEntries: this.entries.size,
|
||||
indexedEntries: this.index.size(),
|
||||
tenants: this.tenantIndex.size,
|
||||
sessions: this.sessionIndex.size,
|
||||
};
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
|
||||
private updateTenantIndex(tenantId: string, entryId: string): void {
|
||||
let ids = this.tenantIndex.get(tenantId);
|
||||
if (!ids) {
|
||||
ids = new Set();
|
||||
this.tenantIndex.set(tenantId, ids);
|
||||
}
|
||||
ids.add(entryId);
|
||||
}
|
||||
|
||||
private updateSessionIndex(sessionId: string, entryId: string): void {
|
||||
let ids = this.sessionIndex.get(sessionId);
|
||||
if (!ids) {
|
||||
ids = new Set();
|
||||
this.sessionIndex.set(sessionId, ids);
|
||||
}
|
||||
ids.add(entryId);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
|
||||
export function createMemoryManager(config?: Partial<MemoryManagerConfig>): MemoryManager {
|
||||
return new MemoryManager(config);
|
||||
}
|
||||
|
||||
export default MemoryManager;
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/patterns/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/patterns/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,YAAY;IAC3B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,WAAW,CAAC,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IACzF,UAAU,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnD,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClF,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAC9D;AAED,MAAM,WAAW,cAAc;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,WAAW,CAAC;IACzB,SAAS,EAAE,YAAY,CAAC;IACxB,qBAAqB,EAAE,MAAM,EAAE,CAAC;IAChC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,IAAI,CAAC;IAChB,UAAU,EAAE,IAAI,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,MAAM,WAAW,GACnB,UAAU,GACV,iBAAiB,GACjB,kBAAkB,GAClB,mBAAmB,CAAC;AAExB,MAAM,WAAW,mBAAmB;IAClC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,WAAW,EAAE,CAAC;IAC7B,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,cAAc,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,kBAAkB,CAAC;IACxD,KAAK,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAC/C,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IACnD,OAAO,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;CAChE;AAED,MAAM,WAAW,kBAAkB;IACjC,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,EAAE,EAAE,CAAC;IAChC,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,MAAM,WAAW,WAAW;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,YAAY,CAAC;IACvB,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,QAAQ,EAAE,MAAM,CAAC;CAClB"}
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/patterns/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/patterns/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;GAEG"}
|
||||
76
vendor/ruvector/npm/packages/ruvbot/src/learning/patterns/index.ts
vendored
Normal file
76
vendor/ruvector/npm/packages/ruvbot/src/learning/patterns/index.ts
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Patterns Module - HNSW-indexed pattern matching
|
||||
*/
|
||||
|
||||
export interface PatternIndex {
|
||||
initialize(): Promise<void>;
|
||||
findMatches(query: Float32Array, options?: PatternMatchOptions): Promise<PatternMatch[]>;
|
||||
addPattern(pattern: LearnedPattern): Promise<void>;
|
||||
updateStats(patternId: string, used: boolean, successful: boolean): Promise<void>;
|
||||
deactivate(patternId: string, reason: string): Promise<void>;
|
||||
}
|
||||
|
||||
export interface LearnedPattern {
|
||||
id: string;
|
||||
tenantId: string;
|
||||
workspaceId?: string;
|
||||
patternType: PatternType;
|
||||
embedding: Float32Array;
|
||||
exemplarTrajectoryIds: string[];
|
||||
suggestedResponse?: string;
|
||||
suggestedSkills?: string[];
|
||||
confidence: number;
|
||||
usageCount: number;
|
||||
successCount: number;
|
||||
successRate: number;
|
||||
isActive: boolean;
|
||||
createdAt: Date;
|
||||
lastUsedAt: Date;
|
||||
supersededBy?: string;
|
||||
}
|
||||
|
||||
export type PatternType =
|
||||
| 'response'
|
||||
| 'skill_selection'
|
||||
| 'memory_retrieval'
|
||||
| 'conversation_flow';
|
||||
|
||||
export interface PatternMatchOptions {
|
||||
limit?: number;
|
||||
threshold?: number;
|
||||
patternTypes?: PatternType[];
|
||||
activeOnly?: boolean;
|
||||
}
|
||||
|
||||
export interface PatternMatch {
|
||||
pattern: LearnedPattern;
|
||||
score: number;
|
||||
rawSimilarity: number;
|
||||
}
|
||||
|
||||
export interface PatternOptimizer {
|
||||
analyze(patterns: LearnedPattern[]): OptimizationReport;
|
||||
prune(threshold: number): Promise<PruneResult>;
|
||||
merge(patterns: string[]): Promise<LearnedPattern>;
|
||||
cluster(patterns: LearnedPattern[]): Promise<PatternCluster[]>;
|
||||
}
|
||||
|
||||
export interface OptimizationReport {
|
||||
totalPatterns: number;
|
||||
activePatterns: number;
|
||||
lowConfidenceCount: number;
|
||||
duplicateCandidates: string[][];
|
||||
recommendations: string[];
|
||||
}
|
||||
|
||||
export interface PruneResult {
|
||||
prunedCount: number;
|
||||
prunedPatternIds: string[];
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface PatternCluster {
|
||||
centroid: Float32Array;
|
||||
members: LearnedPattern[];
|
||||
cohesion: number;
|
||||
}
|
||||
88
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.d.ts
vendored
Normal file
88
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.d.ts
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
* BM25Index - Full-Text Search with BM25 Scoring
|
||||
*
|
||||
* Implements the Okapi BM25 ranking algorithm for keyword-based search.
|
||||
* Used in hybrid search to complement vector similarity search.
|
||||
*/
|
||||
export interface BM25Config {
|
||||
k1: number;
|
||||
b: number;
|
||||
}
|
||||
export interface Document {
|
||||
id: string;
|
||||
content: string;
|
||||
tokens?: string[];
|
||||
}
|
||||
export interface BM25Result {
|
||||
id: string;
|
||||
score: number;
|
||||
matchedTerms: string[];
|
||||
}
|
||||
export declare class BM25Index {
|
||||
private readonly k1;
|
||||
private readonly b;
|
||||
private documents;
|
||||
private invertedIndex;
|
||||
private docFrequency;
|
||||
private docLengths;
|
||||
private avgDocLength;
|
||||
private readonly stopwords;
|
||||
constructor(config?: Partial<BM25Config>);
|
||||
/**
|
||||
* Add a document to the index
|
||||
*/
|
||||
add(id: string, content: string): void;
|
||||
/**
|
||||
* Remove a document from the index
|
||||
*/
|
||||
delete(id: string): boolean;
|
||||
/**
|
||||
* Search the index with BM25 scoring
|
||||
*/
|
||||
search(query: string, topK?: number): BM25Result[];
|
||||
/**
|
||||
* Get document by ID
|
||||
*/
|
||||
get(id: string): Document | undefined;
|
||||
/**
|
||||
* Check if document exists
|
||||
*/
|
||||
has(id: string): boolean;
|
||||
/**
|
||||
* Get index size
|
||||
*/
|
||||
size(): number;
|
||||
/**
|
||||
* Clear all documents
|
||||
*/
|
||||
clear(): void;
|
||||
/**
|
||||
* Get index statistics
|
||||
*/
|
||||
getStats(): {
|
||||
documentCount: number;
|
||||
uniqueTerms: number;
|
||||
avgDocLength: number;
|
||||
k1: number;
|
||||
b: number;
|
||||
};
|
||||
/**
|
||||
* Tokenize text into normalized terms
|
||||
*/
|
||||
private tokenize;
|
||||
/**
|
||||
* Simple stemming (basic suffix removal)
|
||||
*/
|
||||
private stem;
|
||||
/**
|
||||
* Count term frequency in tokens
|
||||
*/
|
||||
private termFrequency;
|
||||
/**
|
||||
* Update average document length
|
||||
*/
|
||||
private updateAvgDocLength;
|
||||
}
|
||||
export declare function createBM25Index(config?: Partial<BM25Config>): BM25Index;
|
||||
export default BM25Index;
|
||||
//# sourceMappingURL=BM25Index.d.ts.map
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"BM25Index.d.ts","sourceRoot":"","sources":["BM25Index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,CAAC,EAAE,MAAM,CAAC;CACX;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAMD,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAS;IAC5B,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAS;IAG3B,OAAO,CAAC,SAAS,CAAoC;IAGrD,OAAO,CAAC,aAAa,CAAuC;IAG5D,OAAO,CAAC,YAAY,CAAkC;IAGtD,OAAO,CAAC,UAAU,CAAkC;IAGpD,OAAO,CAAC,YAAY,CAAa;IAGjC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAevB;gBAES,MAAM,GAAE,OAAO,CAAC,UAAU,CAAM;IAK5C;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,IAAI;IAyBtC;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IA6B3B;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAW,GAAG,UAAU,EAAE;IAgDtD;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS;IAIrC;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAIxB;;OAEG;IACH,IAAI,IAAI,MAAM;IAId;;OAEG;IACH,KAAK,IAAI,IAAI;IAQb;;OAEG;IACH,QAAQ,IAAI;QACV,aAAa,EAAE,MAAM,CAAC;QACtB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,EAAE,EAAE,MAAM,CAAC;QACX,CAAC,EAAE,MAAM,CAAC;KACX;IAcD;;OAEG;IACH,OAAO,CAAC,QAAQ;IAWhB;;OAEG;IACH,OAAO,CAAC,IAAI;IAaZ;;OAEG;IACH,OAAO,CAAC,aAAa;IAIrB;;OAEG;IACH,OAAO,CAAC,kBAAkB;CAW3B;AAMD,wBAAgB,eAAe,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,SAAS,CAEvE;AAED,eAAe,SAAS,CAAC"}
|
||||
249
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.js
vendored
Normal file
249
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.js
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
"use strict";
|
||||
/**
|
||||
* BM25Index - Full-Text Search with BM25 Scoring
|
||||
*
|
||||
* Implements the Okapi BM25 ranking algorithm for keyword-based search.
|
||||
* Used in hybrid search to complement vector similarity search.
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.BM25Index = void 0;
|
||||
exports.createBM25Index = createBM25Index;
|
||||
// ============================================================================
|
||||
// BM25Index Implementation
|
||||
// ============================================================================
|
||||
class BM25Index {
|
||||
constructor(config = {}) {
|
||||
// Document storage
|
||||
this.documents = new Map();
|
||||
// Inverted index: term -> Set of document IDs
|
||||
this.invertedIndex = new Map();
|
||||
// Document frequency: term -> number of documents containing term
|
||||
this.docFrequency = new Map();
|
||||
// Document lengths (number of tokens)
|
||||
this.docLengths = new Map();
|
||||
// Average document length
|
||||
this.avgDocLength = 0;
|
||||
// Stopwords to filter
|
||||
this.stopwords = new Set([
|
||||
'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 'was', 'were',
|
||||
'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
|
||||
'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
||||
'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for',
|
||||
'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
|
||||
'before', 'after', 'above', 'below', 'between', 'under', 'again',
|
||||
'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why',
|
||||
'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such',
|
||||
'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too',
|
||||
'very', 's', 't', 'just', 'don', 'now', 'i', 'me', 'my', 'myself',
|
||||
'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself',
|
||||
'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself',
|
||||
'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves',
|
||||
'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those',
|
||||
]);
|
||||
this.k1 = config.k1 ?? 1.2;
|
||||
this.b = config.b ?? 0.75;
|
||||
}
|
||||
/**
|
||||
* Add a document to the index
|
||||
*/
|
||||
add(id, content) {
|
||||
// Tokenize content
|
||||
const tokens = this.tokenize(content);
|
||||
// Store document
|
||||
const doc = { id, content, tokens };
|
||||
this.documents.set(id, doc);
|
||||
this.docLengths.set(id, tokens.length);
|
||||
// Update inverted index
|
||||
const uniqueTerms = new Set(tokens);
|
||||
for (const term of uniqueTerms) {
|
||||
if (!this.invertedIndex.has(term)) {
|
||||
this.invertedIndex.set(term, new Set());
|
||||
}
|
||||
this.invertedIndex.get(term).add(id);
|
||||
// Update document frequency
|
||||
this.docFrequency.set(term, (this.docFrequency.get(term) ?? 0) + 1);
|
||||
}
|
||||
// Update average document length
|
||||
this.updateAvgDocLength();
|
||||
}
|
||||
/**
|
||||
* Remove a document from the index
|
||||
*/
|
||||
delete(id) {
|
||||
const doc = this.documents.get(id);
|
||||
if (!doc)
|
||||
return false;
|
||||
// Remove from inverted index
|
||||
const uniqueTerms = new Set(doc.tokens ?? this.tokenize(doc.content));
|
||||
for (const term of uniqueTerms) {
|
||||
const termDocs = this.invertedIndex.get(term);
|
||||
if (termDocs) {
|
||||
termDocs.delete(id);
|
||||
if (termDocs.size === 0) {
|
||||
this.invertedIndex.delete(term);
|
||||
this.docFrequency.delete(term);
|
||||
}
|
||||
else {
|
||||
this.docFrequency.set(term, (this.docFrequency.get(term) ?? 1) - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove document
|
||||
this.documents.delete(id);
|
||||
this.docLengths.delete(id);
|
||||
// Update average document length
|
||||
this.updateAvgDocLength();
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Search the index with BM25 scoring
|
||||
*/
|
||||
search(query, topK = 10) {
|
||||
const queryTerms = this.tokenize(query);
|
||||
if (queryTerms.length === 0)
|
||||
return [];
|
||||
const scores = new Map();
|
||||
const N = this.documents.size;
|
||||
for (const term of queryTerms) {
|
||||
const docs = this.invertedIndex.get(term);
|
||||
if (!docs)
|
||||
continue;
|
||||
// Document frequency for IDF
|
||||
const df = this.docFrequency.get(term) ?? 0;
|
||||
// IDF with smoothing
|
||||
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
||||
for (const docId of docs) {
|
||||
const docLength = this.docLengths.get(docId) ?? 0;
|
||||
const doc = this.documents.get(docId);
|
||||
if (!doc)
|
||||
continue;
|
||||
// Term frequency in document
|
||||
const tf = this.termFrequency(term, doc.tokens ?? []);
|
||||
// BM25 score for this term
|
||||
const numerator = tf * (this.k1 + 1);
|
||||
const denominator = tf + this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength));
|
||||
const termScore = idf * (numerator / denominator);
|
||||
// Accumulate score
|
||||
if (!scores.has(docId)) {
|
||||
scores.set(docId, { score: 0, matchedTerms: [] });
|
||||
}
|
||||
const existing = scores.get(docId);
|
||||
existing.score += termScore;
|
||||
if (!existing.matchedTerms.includes(term)) {
|
||||
existing.matchedTerms.push(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sort by score and return top K
|
||||
return Array.from(scores.entries())
|
||||
.map(([id, { score, matchedTerms }]) => ({ id, score, matchedTerms }))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, topK);
|
||||
}
|
||||
/**
|
||||
* Get document by ID
|
||||
*/
|
||||
get(id) {
|
||||
return this.documents.get(id);
|
||||
}
|
||||
/**
|
||||
* Check if document exists
|
||||
*/
|
||||
has(id) {
|
||||
return this.documents.has(id);
|
||||
}
|
||||
/**
|
||||
* Get index size
|
||||
*/
|
||||
size() {
|
||||
return this.documents.size;
|
||||
}
|
||||
/**
|
||||
* Clear all documents
|
||||
*/
|
||||
clear() {
|
||||
this.documents.clear();
|
||||
this.invertedIndex.clear();
|
||||
this.docFrequency.clear();
|
||||
this.docLengths.clear();
|
||||
this.avgDocLength = 0;
|
||||
}
|
||||
/**
|
||||
* Get index statistics
|
||||
*/
|
||||
getStats() {
|
||||
return {
|
||||
documentCount: this.documents.size,
|
||||
uniqueTerms: this.invertedIndex.size,
|
||||
avgDocLength: this.avgDocLength,
|
||||
k1: this.k1,
|
||||
b: this.b,
|
||||
};
|
||||
}
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
/**
|
||||
* Tokenize text into normalized terms
|
||||
*/
|
||||
tokenize(text) {
|
||||
return text
|
||||
.toLowerCase()
|
||||
// Split on non-alphanumeric characters
|
||||
.split(/[^a-z0-9]+/)
|
||||
// Filter empty strings and stopwords
|
||||
.filter(token => token.length > 1 && !this.stopwords.has(token))
|
||||
// Stem basic suffixes (simple Porter-like stemming)
|
||||
.map(token => this.stem(token));
|
||||
}
|
||||
/**
|
||||
* Simple stemming (basic suffix removal)
|
||||
*/
|
||||
stem(word) {
|
||||
// Very basic stemming - just remove common suffixes
|
||||
if (word.length > 5) {
|
||||
if (word.endsWith('ing'))
|
||||
return word.slice(0, -3);
|
||||
if (word.endsWith('ed'))
|
||||
return word.slice(0, -2);
|
||||
if (word.endsWith('es'))
|
||||
return word.slice(0, -2);
|
||||
if (word.endsWith('s') && !word.endsWith('ss'))
|
||||
return word.slice(0, -1);
|
||||
if (word.endsWith('ly'))
|
||||
return word.slice(0, -2);
|
||||
if (word.endsWith('tion'))
|
||||
return word.slice(0, -4) + 't';
|
||||
}
|
||||
return word;
|
||||
}
|
||||
/**
|
||||
* Count term frequency in tokens
|
||||
*/
|
||||
termFrequency(term, tokens) {
|
||||
return tokens.filter(t => t === term).length;
|
||||
}
|
||||
/**
|
||||
* Update average document length
|
||||
*/
|
||||
updateAvgDocLength() {
|
||||
if (this.docLengths.size === 0) {
|
||||
this.avgDocLength = 0;
|
||||
return;
|
||||
}
|
||||
let total = 0;
|
||||
for (const length of this.docLengths.values()) {
|
||||
total += length;
|
||||
}
|
||||
this.avgDocLength = total / this.docLengths.size;
|
||||
}
|
||||
}
|
||||
exports.BM25Index = BM25Index;
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
function createBM25Index(config) {
|
||||
return new BM25Index(config);
|
||||
}
|
||||
exports.default = BM25Index;
|
||||
//# sourceMappingURL=BM25Index.js.map
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
302
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.ts
vendored
Normal file
302
vendor/ruvector/npm/packages/ruvbot/src/learning/search/BM25Index.ts
vendored
Normal file
@@ -0,0 +1,302 @@
|
||||
/**
|
||||
* BM25Index - Full-Text Search with BM25 Scoring
|
||||
*
|
||||
* Implements the Okapi BM25 ranking algorithm for keyword-based search.
|
||||
* Used in hybrid search to complement vector similarity search.
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface BM25Config {
|
||||
k1: number; // Term frequency saturation (default: 1.2)
|
||||
b: number; // Document length normalization (default: 0.75)
|
||||
}
|
||||
|
||||
export interface Document {
|
||||
id: string;
|
||||
content: string;
|
||||
tokens?: string[];
|
||||
}
|
||||
|
||||
export interface BM25Result {
|
||||
id: string;
|
||||
score: number;
|
||||
matchedTerms: string[];
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// BM25Index Implementation
|
||||
// ============================================================================
|
||||
|
||||
export class BM25Index {
|
||||
private readonly k1: number;
|
||||
private readonly b: number;
|
||||
|
||||
// Document storage
|
||||
private documents: Map<string, Document> = new Map();
|
||||
|
||||
// Inverted index: term -> Set of document IDs
|
||||
private invertedIndex: Map<string, Set<string>> = new Map();
|
||||
|
||||
// Document frequency: term -> number of documents containing term
|
||||
private docFrequency: Map<string, number> = new Map();
|
||||
|
||||
// Document lengths (number of tokens)
|
||||
private docLengths: Map<string, number> = new Map();
|
||||
|
||||
// Average document length
|
||||
private avgDocLength: number = 0;
|
||||
|
||||
// Stopwords to filter
|
||||
private readonly stopwords = new Set([
|
||||
'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 'was', 'were',
|
||||
'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
|
||||
'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
||||
'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for',
|
||||
'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
|
||||
'before', 'after', 'above', 'below', 'between', 'under', 'again',
|
||||
'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why',
|
||||
'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such',
|
||||
'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too',
|
||||
'very', 's', 't', 'just', 'don', 'now', 'i', 'me', 'my', 'myself',
|
||||
'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself',
|
||||
'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself',
|
||||
'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves',
|
||||
'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those',
|
||||
]);
|
||||
|
||||
constructor(config: Partial<BM25Config> = {}) {
|
||||
this.k1 = config.k1 ?? 1.2;
|
||||
this.b = config.b ?? 0.75;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a document to the index
|
||||
*/
|
||||
add(id: string, content: string): void {
|
||||
// Tokenize content
|
||||
const tokens = this.tokenize(content);
|
||||
|
||||
// Store document
|
||||
const doc: Document = { id, content, tokens };
|
||||
this.documents.set(id, doc);
|
||||
this.docLengths.set(id, tokens.length);
|
||||
|
||||
// Update inverted index
|
||||
const uniqueTerms = new Set(tokens);
|
||||
for (const term of uniqueTerms) {
|
||||
if (!this.invertedIndex.has(term)) {
|
||||
this.invertedIndex.set(term, new Set());
|
||||
}
|
||||
this.invertedIndex.get(term)!.add(id);
|
||||
|
||||
// Update document frequency
|
||||
this.docFrequency.set(term, (this.docFrequency.get(term) ?? 0) + 1);
|
||||
}
|
||||
|
||||
// Update average document length
|
||||
this.updateAvgDocLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a document from the index
|
||||
*/
|
||||
delete(id: string): boolean {
|
||||
const doc = this.documents.get(id);
|
||||
if (!doc) return false;
|
||||
|
||||
// Remove from inverted index
|
||||
const uniqueTerms = new Set(doc.tokens ?? this.tokenize(doc.content));
|
||||
for (const term of uniqueTerms) {
|
||||
const termDocs = this.invertedIndex.get(term);
|
||||
if (termDocs) {
|
||||
termDocs.delete(id);
|
||||
if (termDocs.size === 0) {
|
||||
this.invertedIndex.delete(term);
|
||||
this.docFrequency.delete(term);
|
||||
} else {
|
||||
this.docFrequency.set(term, (this.docFrequency.get(term) ?? 1) - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove document
|
||||
this.documents.delete(id);
|
||||
this.docLengths.delete(id);
|
||||
|
||||
// Update average document length
|
||||
this.updateAvgDocLength();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search the index with BM25 scoring
|
||||
*/
|
||||
search(query: string, topK: number = 10): BM25Result[] {
|
||||
const queryTerms = this.tokenize(query);
|
||||
if (queryTerms.length === 0) return [];
|
||||
|
||||
const scores = new Map<string, { score: number; matchedTerms: string[] }>();
|
||||
const N = this.documents.size;
|
||||
|
||||
for (const term of queryTerms) {
|
||||
const docs = this.invertedIndex.get(term);
|
||||
if (!docs) continue;
|
||||
|
||||
// Document frequency for IDF
|
||||
const df = this.docFrequency.get(term) ?? 0;
|
||||
// IDF with smoothing
|
||||
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
||||
|
||||
for (const docId of docs) {
|
||||
const docLength = this.docLengths.get(docId) ?? 0;
|
||||
const doc = this.documents.get(docId);
|
||||
if (!doc) continue;
|
||||
|
||||
// Term frequency in document
|
||||
const tf = this.termFrequency(term, doc.tokens ?? []);
|
||||
|
||||
// BM25 score for this term
|
||||
const numerator = tf * (this.k1 + 1);
|
||||
const denominator = tf + this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength));
|
||||
const termScore = idf * (numerator / denominator);
|
||||
|
||||
// Accumulate score
|
||||
if (!scores.has(docId)) {
|
||||
scores.set(docId, { score: 0, matchedTerms: [] });
|
||||
}
|
||||
const existing = scores.get(docId)!;
|
||||
existing.score += termScore;
|
||||
if (!existing.matchedTerms.includes(term)) {
|
||||
existing.matchedTerms.push(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score and return top K
|
||||
return Array.from(scores.entries())
|
||||
.map(([id, { score, matchedTerms }]) => ({ id, score, matchedTerms }))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document by ID
|
||||
*/
|
||||
get(id: string): Document | undefined {
|
||||
return this.documents.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if document exists
|
||||
*/
|
||||
has(id: string): boolean {
|
||||
return this.documents.has(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index size
|
||||
*/
|
||||
size(): number {
|
||||
return this.documents.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all documents
|
||||
*/
|
||||
clear(): void {
|
||||
this.documents.clear();
|
||||
this.invertedIndex.clear();
|
||||
this.docFrequency.clear();
|
||||
this.docLengths.clear();
|
||||
this.avgDocLength = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index statistics
|
||||
*/
|
||||
getStats(): {
|
||||
documentCount: number;
|
||||
uniqueTerms: number;
|
||||
avgDocLength: number;
|
||||
k1: number;
|
||||
b: number;
|
||||
} {
|
||||
return {
|
||||
documentCount: this.documents.size,
|
||||
uniqueTerms: this.invertedIndex.size,
|
||||
avgDocLength: this.avgDocLength,
|
||||
k1: this.k1,
|
||||
b: this.b,
|
||||
};
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Tokenize text into normalized terms
|
||||
*/
|
||||
private tokenize(text: string): string[] {
|
||||
return text
|
||||
.toLowerCase()
|
||||
// Split on non-alphanumeric characters
|
||||
.split(/[^a-z0-9]+/)
|
||||
// Filter empty strings and stopwords
|
||||
.filter(token => token.length > 1 && !this.stopwords.has(token))
|
||||
// Stem basic suffixes (simple Porter-like stemming)
|
||||
.map(token => this.stem(token));
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple stemming (basic suffix removal)
|
||||
*/
|
||||
private stem(word: string): string {
|
||||
// Very basic stemming - just remove common suffixes
|
||||
if (word.length > 5) {
|
||||
if (word.endsWith('ing')) return word.slice(0, -3);
|
||||
if (word.endsWith('ed')) return word.slice(0, -2);
|
||||
if (word.endsWith('es')) return word.slice(0, -2);
|
||||
if (word.endsWith('s') && !word.endsWith('ss')) return word.slice(0, -1);
|
||||
if (word.endsWith('ly')) return word.slice(0, -2);
|
||||
if (word.endsWith('tion')) return word.slice(0, -4) + 't';
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count term frequency in tokens
|
||||
*/
|
||||
private termFrequency(term: string, tokens: string[]): number {
|
||||
return tokens.filter(t => t === term).length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update average document length
|
||||
*/
|
||||
private updateAvgDocLength(): void {
|
||||
if (this.docLengths.size === 0) {
|
||||
this.avgDocLength = 0;
|
||||
return;
|
||||
}
|
||||
let total = 0;
|
||||
for (const length of this.docLengths.values()) {
|
||||
total += length;
|
||||
}
|
||||
this.avgDocLength = total / this.docLengths.size;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
|
||||
export function createBM25Index(config?: Partial<BM25Config>): BM25Index {
|
||||
return new BM25Index(config);
|
||||
}
|
||||
|
||||
export default BM25Index;
|
||||
85
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.d.ts
vendored
Normal file
85
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.d.ts
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* HybridSearch - Combined Vector + Keyword Search
|
||||
*
|
||||
* Implements Reciprocal Rank Fusion (RRF) to combine vector similarity
|
||||
* and BM25 keyword search for improved recall and precision.
|
||||
*/
|
||||
import { BM25Index } from './BM25Index.js';
|
||||
import type { Embedder, VectorIndex } from '../memory/MemoryManager.js';
|
||||
export interface HybridSearchConfig {
|
||||
vector: {
|
||||
enabled: boolean;
|
||||
weight: number;
|
||||
};
|
||||
keyword: {
|
||||
enabled: boolean;
|
||||
weight: number;
|
||||
k1?: number;
|
||||
b?: number;
|
||||
};
|
||||
fusion: {
|
||||
method: 'rrf' | 'linear' | 'weighted';
|
||||
k: number;
|
||||
candidateMultiplier: number;
|
||||
};
|
||||
}
|
||||
export interface HybridSearchResult {
|
||||
id: string;
|
||||
vectorScore: number;
|
||||
keywordScore: number;
|
||||
fusedScore: number;
|
||||
matchedTerms?: string[];
|
||||
}
|
||||
export interface HybridSearchOptions {
|
||||
topK?: number;
|
||||
threshold?: number;
|
||||
vectorOnly?: boolean;
|
||||
keywordOnly?: boolean;
|
||||
}
|
||||
export declare const DEFAULT_HYBRID_CONFIG: HybridSearchConfig;
|
||||
export declare class HybridSearch {
|
||||
private readonly config;
|
||||
private vectorIndex;
|
||||
private embedder;
|
||||
private bm25Index;
|
||||
private initialized;
|
||||
constructor(config?: Partial<HybridSearchConfig>);
|
||||
/**
|
||||
* Initialize with vector index and embedder
|
||||
*/
|
||||
initialize(vectorIndex: VectorIndex, embedder: Embedder): void;
|
||||
/**
|
||||
* Check if initialized
|
||||
*/
|
||||
isInitialized(): boolean;
|
||||
/**
|
||||
* Add document to both indices
|
||||
*/
|
||||
add(id: string, content: string, embedding?: Float32Array): Promise<void>;
|
||||
/**
|
||||
* Remove document from both indices
|
||||
*/
|
||||
delete(id: string): boolean;
|
||||
/**
|
||||
* Hybrid search combining vector and keyword
|
||||
*/
|
||||
search(query: string, options?: HybridSearchOptions): Promise<HybridSearchResult[]>;
|
||||
/**
|
||||
* Get statistics
|
||||
*/
|
||||
getStats(): {
|
||||
config: HybridSearchConfig;
|
||||
bm25Stats: ReturnType<BM25Index['getStats']>;
|
||||
vectorIndexSize: number;
|
||||
};
|
||||
/**
|
||||
* Clear both indices
|
||||
*/
|
||||
clear(): void;
|
||||
private vectorSearch;
|
||||
private keywordSearch;
|
||||
private fuseResults;
|
||||
}
|
||||
export declare function createHybridSearch(config?: Partial<HybridSearchConfig>): HybridSearch;
|
||||
export default HybridSearch;
|
||||
//# sourceMappingURL=HybridSearch.d.ts.map
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"HybridSearch.d.ts","sourceRoot":"","sources":["HybridSearch.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAMxE,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE;QACN,OAAO,EAAE,OAAO,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,OAAO,EAAE;QACP,OAAO,EAAE,OAAO,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,CAAC,CAAC,EAAE,MAAM,CAAC;KACZ,CAAC;IACF,MAAM,EAAE;QACN,MAAM,EAAE,KAAK,GAAG,QAAQ,GAAG,UAAU,CAAC;QACtC,CAAC,EAAE,MAAM,CAAC;QACV,mBAAmB,EAAE,MAAM,CAAC;KAC7B,CAAC;CACH;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAMD,eAAO,MAAM,qBAAqB,EAAE,kBAgBnC,CAAC;AAMF,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,WAAW,CAA4B;IAC/C,OAAO,CAAC,QAAQ,CAAyB;IACzC,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,WAAW,CAAkB;gBAEzB,MAAM,GAAE,OAAO,CAAC,kBAAkB,CAAM;IAapD;;OAEG;IACH,UAAU,CAAC,WAAW,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAM9D;;OAEG;IACH,aAAa,IAAI,OAAO;IAIxB;;OAEG;IACG,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAiB/E;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAc3B;;OAEG;IACG,MAAM,CACV,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,mBAAwB,GAChC,OAAO,CAAC,kBAAkB,EAAE,CAAC;IA0ChC;;OAEG;IACH,QAAQ,IAAI;QACV,MAAM,EAAE,kBAAkB,CAAC;QAC3B,SAAS,EAAE,UAAU,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;QAC7C,eAAe,EAAE,MAAM,CAAC;KACzB;IAQD;;OAEG;IACH,KAAK,IAAI,IAAI;YASC,YAAY;YAoBZ,aAAa;IAuB3B,OAAO,CAAC,WAAW;CAwEpB;AAMD,wBAAgB,kBAAkB,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,GAAG,YAAY,CAErF;AAED,eAAe,YAAY,CAAC"}
|
||||
241
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.js
vendored
Normal file
241
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.js
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
"use strict";
|
||||
/**
|
||||
* HybridSearch - Combined Vector + Keyword Search
|
||||
*
|
||||
* Implements Reciprocal Rank Fusion (RRF) to combine vector similarity
|
||||
* and BM25 keyword search for improved recall and precision.
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.HybridSearch = exports.DEFAULT_HYBRID_CONFIG = void 0;
|
||||
exports.createHybridSearch = createHybridSearch;
|
||||
const BM25Index_js_1 = require("./BM25Index.js");
|
||||
// ============================================================================
|
||||
// Default Configuration
|
||||
// ============================================================================
|
||||
exports.DEFAULT_HYBRID_CONFIG = {
|
||||
vector: {
|
||||
enabled: true,
|
||||
weight: 0.7,
|
||||
},
|
||||
keyword: {
|
||||
enabled: true,
|
||||
weight: 0.3,
|
||||
k1: 1.2,
|
||||
b: 0.75,
|
||||
},
|
||||
fusion: {
|
||||
method: 'rrf',
|
||||
k: 60,
|
||||
candidateMultiplier: 3,
|
||||
},
|
||||
};
|
||||
// ============================================================================
|
||||
// HybridSearch Implementation
|
||||
// ============================================================================
|
||||
class HybridSearch {
|
||||
constructor(config = {}) {
|
||||
this.vectorIndex = null;
|
||||
this.embedder = null;
|
||||
this.initialized = false;
|
||||
this.config = {
|
||||
vector: { ...exports.DEFAULT_HYBRID_CONFIG.vector, ...config.vector },
|
||||
keyword: { ...exports.DEFAULT_HYBRID_CONFIG.keyword, ...config.keyword },
|
||||
fusion: { ...exports.DEFAULT_HYBRID_CONFIG.fusion, ...config.fusion },
|
||||
};
|
||||
this.bm25Index = new BM25Index_js_1.BM25Index({
|
||||
k1: this.config.keyword.k1,
|
||||
b: this.config.keyword.b,
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Initialize with vector index and embedder
|
||||
*/
|
||||
initialize(vectorIndex, embedder) {
|
||||
this.vectorIndex = vectorIndex;
|
||||
this.embedder = embedder;
|
||||
this.initialized = true;
|
||||
}
|
||||
/**
|
||||
* Check if initialized
|
||||
*/
|
||||
isInitialized() {
|
||||
return this.initialized;
|
||||
}
|
||||
/**
|
||||
* Add document to both indices
|
||||
*/
|
||||
async add(id, content, embedding) {
|
||||
// Add to BM25 index
|
||||
if (this.config.keyword.enabled) {
|
||||
this.bm25Index.add(id, content);
|
||||
}
|
||||
// Add to vector index
|
||||
if (this.config.vector.enabled && this.vectorIndex) {
|
||||
if (!embedding && this.embedder) {
|
||||
embedding = await this.embedder.embed(content);
|
||||
}
|
||||
if (embedding) {
|
||||
await this.vectorIndex.add(id, embedding);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Remove document from both indices
|
||||
*/
|
||||
delete(id) {
|
||||
let deleted = false;
|
||||
if (this.config.keyword.enabled) {
|
||||
deleted = this.bm25Index.delete(id) || deleted;
|
||||
}
|
||||
if (this.config.vector.enabled && this.vectorIndex) {
|
||||
deleted = this.vectorIndex.delete(id) || deleted;
|
||||
}
|
||||
return deleted;
|
||||
}
|
||||
/**
|
||||
* Hybrid search combining vector and keyword
|
||||
*/
|
||||
async search(query, options = {}) {
|
||||
// Return empty results for empty query
|
||||
if (!query || query.trim().length === 0) {
|
||||
return [];
|
||||
}
|
||||
const { topK = 10, threshold = 0, vectorOnly = false, keywordOnly = false, } = options;
|
||||
const fetchK = topK * this.config.fusion.candidateMultiplier;
|
||||
// Parallel search on both indices
|
||||
const [vectorResults, keywordResults] = await Promise.all([
|
||||
this.vectorSearch(query, fetchK, vectorOnly || !this.config.keyword.enabled),
|
||||
this.keywordSearch(query, fetchK, keywordOnly || !this.config.vector.enabled),
|
||||
]);
|
||||
// If only one mode is enabled/requested, return those results
|
||||
if (vectorOnly || !this.config.keyword.enabled) {
|
||||
return vectorResults
|
||||
.filter(r => r.fusedScore >= threshold)
|
||||
.slice(0, topK);
|
||||
}
|
||||
if (keywordOnly || !this.config.vector.enabled) {
|
||||
return keywordResults
|
||||
.filter(r => r.fusedScore >= threshold)
|
||||
.slice(0, topK);
|
||||
}
|
||||
// Fuse results
|
||||
const fused = this.fuseResults(vectorResults, keywordResults);
|
||||
return fused
|
||||
.filter(r => r.fusedScore >= threshold)
|
||||
.slice(0, topK);
|
||||
}
|
||||
/**
|
||||
* Get statistics
|
||||
*/
|
||||
getStats() {
|
||||
return {
|
||||
config: this.config,
|
||||
bm25Stats: this.bm25Index.getStats(),
|
||||
vectorIndexSize: this.vectorIndex?.size() ?? 0,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Clear both indices
|
||||
*/
|
||||
clear() {
|
||||
this.bm25Index.clear();
|
||||
this.vectorIndex?.clear();
|
||||
}
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
async vectorSearch(query, topK, returnDirectly) {
|
||||
if (!this.config.vector.enabled || !this.vectorIndex || !this.embedder) {
|
||||
return [];
|
||||
}
|
||||
const queryEmbedding = await this.embedder.embed(query);
|
||||
const results = await this.vectorIndex.search(queryEmbedding, topK);
|
||||
return results.map((r) => ({
|
||||
id: r.id,
|
||||
vectorScore: r.score,
|
||||
keywordScore: 0,
|
||||
fusedScore: returnDirectly ? r.score : 0,
|
||||
}));
|
||||
}
|
||||
async keywordSearch(query, topK, returnDirectly) {
|
||||
if (!this.config.keyword.enabled) {
|
||||
return [];
|
||||
}
|
||||
const results = this.bm25Index.search(query, topK);
|
||||
// Normalize BM25 scores to 0-1 range
|
||||
const maxScore = results.length > 0 ? results[0].score : 1;
|
||||
return results.map(r => ({
|
||||
id: r.id,
|
||||
vectorScore: 0,
|
||||
keywordScore: maxScore > 0 ? r.score / maxScore : 0,
|
||||
fusedScore: returnDirectly ? (maxScore > 0 ? r.score / maxScore : 0) : 0,
|
||||
matchedTerms: r.matchedTerms,
|
||||
}));
|
||||
}
|
||||
fuseResults(vectorResults, keywordResults) {
|
||||
const { method, k } = this.config.fusion;
|
||||
const { weight: vectorWeight } = this.config.vector;
|
||||
const { weight: keywordWeight } = this.config.keyword;
|
||||
// Normalize weights
|
||||
const totalWeight = vectorWeight + keywordWeight;
|
||||
const normVectorWeight = vectorWeight / totalWeight;
|
||||
const normKeywordWeight = keywordWeight / totalWeight;
|
||||
// Create maps for quick lookup
|
||||
const vectorMap = new Map(vectorResults.map((r, i) => [r.id, { ...r, rank: i + 1 }]));
|
||||
const keywordMap = new Map(keywordResults.map((r, i) => [r.id, { ...r, rank: i + 1 }]));
|
||||
// Collect all unique IDs
|
||||
const allIds = new Set([
|
||||
...vectorResults.map(r => r.id),
|
||||
...keywordResults.map(r => r.id),
|
||||
]);
|
||||
// Calculate fused scores
|
||||
const fusedResults = [];
|
||||
for (const id of allIds) {
|
||||
const vectorResult = vectorMap.get(id);
|
||||
const keywordResult = keywordMap.get(id);
|
||||
const vectorScore = vectorResult?.vectorScore ?? 0;
|
||||
const keywordScore = keywordResult?.keywordScore ?? 0;
|
||||
let fusedScore;
|
||||
switch (method) {
|
||||
case 'rrf': {
|
||||
// Reciprocal Rank Fusion
|
||||
const vectorRRF = vectorResult ? 1 / (k + vectorResult.rank) : 0;
|
||||
const keywordRRF = keywordResult ? 1 / (k + keywordResult.rank) : 0;
|
||||
fusedScore = normVectorWeight * vectorRRF + normKeywordWeight * keywordRRF;
|
||||
break;
|
||||
}
|
||||
case 'linear': {
|
||||
// Linear combination of scores
|
||||
fusedScore = normVectorWeight * vectorScore + normKeywordWeight * keywordScore;
|
||||
break;
|
||||
}
|
||||
case 'weighted':
|
||||
default: {
|
||||
// Weighted average with presence bonus
|
||||
const presence = (vectorResult ? 1 : 0) + (keywordResult ? 1 : 0);
|
||||
const presenceBonus = presence === 2 ? 0.1 : 0;
|
||||
fusedScore = normVectorWeight * vectorScore + normKeywordWeight * keywordScore + presenceBonus;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fusedResults.push({
|
||||
id,
|
||||
vectorScore,
|
||||
keywordScore,
|
||||
fusedScore,
|
||||
matchedTerms: keywordResult?.matchedTerms,
|
||||
});
|
||||
}
|
||||
// Sort by fused score
|
||||
return fusedResults.sort((a, b) => b.fusedScore - a.fusedScore);
|
||||
}
|
||||
}
|
||||
exports.HybridSearch = HybridSearch;
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
function createHybridSearch(config) {
|
||||
return new HybridSearch(config);
|
||||
}
|
||||
exports.default = HybridSearch;
|
||||
//# sourceMappingURL=HybridSearch.js.map
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
347
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.ts
vendored
Normal file
347
vendor/ruvector/npm/packages/ruvbot/src/learning/search/HybridSearch.ts
vendored
Normal file
@@ -0,0 +1,347 @@
|
||||
/**
|
||||
* HybridSearch - Combined Vector + Keyword Search
|
||||
*
|
||||
* Implements Reciprocal Rank Fusion (RRF) to combine vector similarity
|
||||
* and BM25 keyword search for improved recall and precision.
|
||||
*/
|
||||
|
||||
import { BM25Index } from './BM25Index.js';
|
||||
import type { Embedder, VectorIndex } from '../memory/MemoryManager.js';
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface HybridSearchConfig {
|
||||
vector: {
|
||||
enabled: boolean;
|
||||
weight: number; // 0.0-1.0
|
||||
};
|
||||
keyword: {
|
||||
enabled: boolean;
|
||||
weight: number; // 0.0-1.0
|
||||
k1?: number; // BM25 k1 parameter
|
||||
b?: number; // BM25 b parameter
|
||||
};
|
||||
fusion: {
|
||||
method: 'rrf' | 'linear' | 'weighted';
|
||||
k: number; // RRF constant (default: 60)
|
||||
candidateMultiplier: number; // Fetch more candidates for filtering
|
||||
};
|
||||
}
|
||||
|
||||
export interface HybridSearchResult {
|
||||
id: string;
|
||||
vectorScore: number;
|
||||
keywordScore: number;
|
||||
fusedScore: number;
|
||||
matchedTerms?: string[];
|
||||
}
|
||||
|
||||
export interface HybridSearchOptions {
|
||||
topK?: number;
|
||||
threshold?: number;
|
||||
vectorOnly?: boolean;
|
||||
keywordOnly?: boolean;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Default Configuration
|
||||
// ============================================================================
|
||||
|
||||
export const DEFAULT_HYBRID_CONFIG: HybridSearchConfig = {
|
||||
vector: {
|
||||
enabled: true,
|
||||
weight: 0.7,
|
||||
},
|
||||
keyword: {
|
||||
enabled: true,
|
||||
weight: 0.3,
|
||||
k1: 1.2,
|
||||
b: 0.75,
|
||||
},
|
||||
fusion: {
|
||||
method: 'rrf',
|
||||
k: 60,
|
||||
candidateMultiplier: 3,
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// HybridSearch Implementation
|
||||
// ============================================================================
|
||||
|
||||
export class HybridSearch {
|
||||
private readonly config: HybridSearchConfig;
|
||||
private vectorIndex: VectorIndex | null = null;
|
||||
private embedder: Embedder | null = null;
|
||||
private bm25Index: BM25Index;
|
||||
private initialized: boolean = false;
|
||||
|
||||
constructor(config: Partial<HybridSearchConfig> = {}) {
|
||||
this.config = {
|
||||
vector: { ...DEFAULT_HYBRID_CONFIG.vector, ...config.vector },
|
||||
keyword: { ...DEFAULT_HYBRID_CONFIG.keyword, ...config.keyword },
|
||||
fusion: { ...DEFAULT_HYBRID_CONFIG.fusion, ...config.fusion },
|
||||
};
|
||||
|
||||
this.bm25Index = new BM25Index({
|
||||
k1: this.config.keyword.k1,
|
||||
b: this.config.keyword.b,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize with vector index and embedder
|
||||
*/
|
||||
initialize(vectorIndex: VectorIndex, embedder: Embedder): void {
|
||||
this.vectorIndex = vectorIndex;
|
||||
this.embedder = embedder;
|
||||
this.initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if initialized
|
||||
*/
|
||||
isInitialized(): boolean {
|
||||
return this.initialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add document to both indices
|
||||
*/
|
||||
async add(id: string, content: string, embedding?: Float32Array): Promise<void> {
|
||||
// Add to BM25 index
|
||||
if (this.config.keyword.enabled) {
|
||||
this.bm25Index.add(id, content);
|
||||
}
|
||||
|
||||
// Add to vector index
|
||||
if (this.config.vector.enabled && this.vectorIndex) {
|
||||
if (!embedding && this.embedder) {
|
||||
embedding = await this.embedder.embed(content);
|
||||
}
|
||||
if (embedding) {
|
||||
await this.vectorIndex.add(id, embedding);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove document from both indices
|
||||
*/
|
||||
delete(id: string): boolean {
|
||||
let deleted = false;
|
||||
|
||||
if (this.config.keyword.enabled) {
|
||||
deleted = this.bm25Index.delete(id) || deleted;
|
||||
}
|
||||
|
||||
if (this.config.vector.enabled && this.vectorIndex) {
|
||||
deleted = this.vectorIndex.delete(id) || deleted;
|
||||
}
|
||||
|
||||
return deleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hybrid search combining vector and keyword
|
||||
*/
|
||||
async search(
|
||||
query: string,
|
||||
options: HybridSearchOptions = {}
|
||||
): Promise<HybridSearchResult[]> {
|
||||
// Return empty results for empty query
|
||||
if (!query || query.trim().length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const {
|
||||
topK = 10,
|
||||
threshold = 0,
|
||||
vectorOnly = false,
|
||||
keywordOnly = false,
|
||||
} = options;
|
||||
|
||||
const fetchK = topK * this.config.fusion.candidateMultiplier;
|
||||
|
||||
// Parallel search on both indices
|
||||
const [vectorResults, keywordResults] = await Promise.all([
|
||||
this.vectorSearch(query, fetchK, vectorOnly || !this.config.keyword.enabled),
|
||||
this.keywordSearch(query, fetchK, keywordOnly || !this.config.vector.enabled),
|
||||
]);
|
||||
|
||||
// If only one mode is enabled/requested, return those results
|
||||
if (vectorOnly || !this.config.keyword.enabled) {
|
||||
return vectorResults
|
||||
.filter(r => r.fusedScore >= threshold)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
if (keywordOnly || !this.config.vector.enabled) {
|
||||
return keywordResults
|
||||
.filter(r => r.fusedScore >= threshold)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
// Fuse results
|
||||
const fused = this.fuseResults(vectorResults, keywordResults);
|
||||
|
||||
return fused
|
||||
.filter(r => r.fusedScore >= threshold)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get statistics
|
||||
*/
|
||||
getStats(): {
|
||||
config: HybridSearchConfig;
|
||||
bm25Stats: ReturnType<BM25Index['getStats']>;
|
||||
vectorIndexSize: number;
|
||||
} {
|
||||
return {
|
||||
config: this.config,
|
||||
bm25Stats: this.bm25Index.getStats(),
|
||||
vectorIndexSize: this.vectorIndex?.size() ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear both indices
|
||||
*/
|
||||
clear(): void {
|
||||
this.bm25Index.clear();
|
||||
this.vectorIndex?.clear();
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Private Methods
|
||||
// ==========================================================================
|
||||
|
||||
private async vectorSearch(
|
||||
query: string,
|
||||
topK: number,
|
||||
returnDirectly: boolean
|
||||
): Promise<HybridSearchResult[]> {
|
||||
if (!this.config.vector.enabled || !this.vectorIndex || !this.embedder) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryEmbedding = await this.embedder.embed(query);
|
||||
const results = await this.vectorIndex.search(queryEmbedding, topK);
|
||||
|
||||
return results.map((r: { id: string; score: number }) => ({
|
||||
id: r.id,
|
||||
vectorScore: r.score,
|
||||
keywordScore: 0,
|
||||
fusedScore: returnDirectly ? r.score : 0,
|
||||
}));
|
||||
}
|
||||
|
||||
private async keywordSearch(
|
||||
query: string,
|
||||
topK: number,
|
||||
returnDirectly: boolean
|
||||
): Promise<HybridSearchResult[]> {
|
||||
if (!this.config.keyword.enabled) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const results = this.bm25Index.search(query, topK);
|
||||
|
||||
// Normalize BM25 scores to 0-1 range
|
||||
const maxScore = results.length > 0 ? results[0].score : 1;
|
||||
|
||||
return results.map(r => ({
|
||||
id: r.id,
|
||||
vectorScore: 0,
|
||||
keywordScore: maxScore > 0 ? r.score / maxScore : 0,
|
||||
fusedScore: returnDirectly ? (maxScore > 0 ? r.score / maxScore : 0) : 0,
|
||||
matchedTerms: r.matchedTerms,
|
||||
}));
|
||||
}
|
||||
|
||||
private fuseResults(
|
||||
vectorResults: HybridSearchResult[],
|
||||
keywordResults: HybridSearchResult[]
|
||||
): HybridSearchResult[] {
|
||||
const { method, k } = this.config.fusion;
|
||||
const { weight: vectorWeight } = this.config.vector;
|
||||
const { weight: keywordWeight } = this.config.keyword;
|
||||
|
||||
// Normalize weights
|
||||
const totalWeight = vectorWeight + keywordWeight;
|
||||
const normVectorWeight = vectorWeight / totalWeight;
|
||||
const normKeywordWeight = keywordWeight / totalWeight;
|
||||
|
||||
// Create maps for quick lookup
|
||||
const vectorMap = new Map(vectorResults.map((r, i) => [r.id, { ...r, rank: i + 1 }]));
|
||||
const keywordMap = new Map(keywordResults.map((r, i) => [r.id, { ...r, rank: i + 1 }]));
|
||||
|
||||
// Collect all unique IDs
|
||||
const allIds = new Set([
|
||||
...vectorResults.map(r => r.id),
|
||||
...keywordResults.map(r => r.id),
|
||||
]);
|
||||
|
||||
// Calculate fused scores
|
||||
const fusedResults: HybridSearchResult[] = [];
|
||||
|
||||
for (const id of allIds) {
|
||||
const vectorResult = vectorMap.get(id);
|
||||
const keywordResult = keywordMap.get(id);
|
||||
|
||||
const vectorScore = vectorResult?.vectorScore ?? 0;
|
||||
const keywordScore = keywordResult?.keywordScore ?? 0;
|
||||
|
||||
let fusedScore: number;
|
||||
|
||||
switch (method) {
|
||||
case 'rrf': {
|
||||
// Reciprocal Rank Fusion
|
||||
const vectorRRF = vectorResult ? 1 / (k + vectorResult.rank) : 0;
|
||||
const keywordRRF = keywordResult ? 1 / (k + keywordResult.rank) : 0;
|
||||
fusedScore = normVectorWeight * vectorRRF + normKeywordWeight * keywordRRF;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'linear': {
|
||||
// Linear combination of scores
|
||||
fusedScore = normVectorWeight * vectorScore + normKeywordWeight * keywordScore;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'weighted':
|
||||
default: {
|
||||
// Weighted average with presence bonus
|
||||
const presence = (vectorResult ? 1 : 0) + (keywordResult ? 1 : 0);
|
||||
const presenceBonus = presence === 2 ? 0.1 : 0;
|
||||
fusedScore = normVectorWeight * vectorScore + normKeywordWeight * keywordScore + presenceBonus;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fusedResults.push({
|
||||
id,
|
||||
vectorScore,
|
||||
keywordScore,
|
||||
fusedScore,
|
||||
matchedTerms: keywordResult?.matchedTerms,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by fused score
|
||||
return fusedResults.sort((a, b) => b.fusedScore - a.fusedScore);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Factory Function
|
||||
// ============================================================================
|
||||
|
||||
export function createHybridSearch(config?: Partial<HybridSearchConfig>): HybridSearch {
|
||||
return new HybridSearch(config);
|
||||
}
|
||||
|
||||
export default HybridSearch;
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAC5D,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEvE,OAAO,EACL,YAAY,EACZ,kBAAkB,EAClB,qBAAqB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,YAAY,EACV,kBAAkB,EAClB,kBAAkB,EAClB,mBAAmB,GACpB,MAAM,mBAAmB,CAAC"}
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/search/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAEH,+CAA4D;AAAnD,yGAAA,SAAS,OAAA;AAAE,+GAAA,eAAe,OAAA;AAGnC,qDAI2B;AAHzB,+GAAA,YAAY,OAAA;AACZ,qHAAA,kBAAkB,OAAA;AAClB,wHAAA,qBAAqB,OAAA"}
|
||||
19
vendor/ruvector/npm/packages/ruvbot/src/learning/search/index.ts
vendored
Normal file
19
vendor/ruvector/npm/packages/ruvbot/src/learning/search/index.ts
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* Search module exports
|
||||
*
|
||||
* Provides hybrid search combining vector similarity and BM25 keyword search.
|
||||
*/
|
||||
|
||||
export { BM25Index, createBM25Index } from './BM25Index.js';
|
||||
export type { BM25Config, Document, BM25Result } from './BM25Index.js';
|
||||
|
||||
export {
|
||||
HybridSearch,
|
||||
createHybridSearch,
|
||||
DEFAULT_HYBRID_CONFIG,
|
||||
} from './HybridSearch.js';
|
||||
export type {
|
||||
HybridSearchConfig,
|
||||
HybridSearchResult,
|
||||
HybridSearchOptions,
|
||||
} from './HybridSearch.js';
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/training/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/training/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,mBAAmB,EAAE,mBAAmB,CAAC;IACzC,WAAW,EAAE,WAAW,CAAC;IACzB,eAAe,EAAE,eAAe,CAAC;CAClC;AAED,MAAM,WAAW,mBAAmB;IAClC,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjE,oBAAoB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,cAAc,GAAG,IAAI,CAAC;IACzE,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACnD,KAAK,CAAC,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAC/E;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,YAAY,EAAE,CAAC;IACtB,eAAe,EAAE,cAAc,EAAE,CAAC;IAClC,SAAS,EAAE,IAAI,CAAC;IAChB,OAAO,EAAE,IAAI,CAAC;IACd,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,YAAY,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,OAAO,GAAG,UAAU,GAAG,UAAU,GAAG,SAAS,CAAC;AAE1D,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,YAAY,EAAE,iBAAiB,EAAE,EAAE,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IACnF,KAAK,CAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IACzE,QAAQ,CAAC,OAAO,EAAE,iBAAiB,EAAE,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;CACpE;AAED,MAAM,WAAW,iBAAkB,SAAQ,UAAU;IACnD,OAAO,EAAE,OAAO,CAAC;IACjB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,YAAY,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,CAAC,EAAE,YAAY,CAAC;IAChB,CAAC,EAAE,YAAY,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,aAAa,CAAC,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IACtF,WAAW,CACT,UAAU,EAAE,YAAY,EACxB,UAAU,EAAE,YAAY,EACxB,MAAM,EAAE,YAAY,EACpB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,mBAAmB,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,YAAY,CAAC;IACvB,iBAAiB,EAAE,YAAY,CAAC;CACjC;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,YAAY,CAAC;IACtB,MAAM,EAAE,YAAY,CAAC;IACrB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,gBAAgB,EAAE,MAAM,CAAC;CAC1B"}
|
||||
1
vendor/ruvector/npm/packages/ruvbot/src/learning/training/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvbot/src/learning/training/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;GAEG"}
|
||||
115
vendor/ruvector/npm/packages/ruvbot/src/learning/training/index.ts
vendored
Normal file
115
vendor/ruvector/npm/packages/ruvbot/src/learning/training/index.ts
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Training Module - Trajectory-based learning with LoRA and EWC
|
||||
*/
|
||||
|
||||
export interface TrainingPipeline {
|
||||
trajectoryCollector: TrajectoryCollector;
|
||||
loraTrainer: LoRATrainer;
|
||||
ewcConsolidator: EWCConsolidator;
|
||||
}
|
||||
|
||||
export interface TrajectoryCollector {
|
||||
startSession(sessionId: string): void;
|
||||
recordTurn(sessionId: string, turn: TurnSnapshot): Promise<void>;
|
||||
recordSkillExecution(sessionId: string, execution: SkillExecution): void;
|
||||
endSession(sessionId: string): Promise<Trajectory>;
|
||||
label(trajectoryId: string, verdict: Verdict, reason?: string): Promise<void>;
|
||||
}
|
||||
|
||||
export interface Trajectory {
|
||||
id: string;
|
||||
sessionId: string;
|
||||
tenantId: string;
|
||||
turns: TurnSnapshot[];
|
||||
skillExecutions: SkillExecution[];
|
||||
startTime: Date;
|
||||
endTime: Date;
|
||||
verdict?: Verdict;
|
||||
verdictReason?: string;
|
||||
embeddingId?: string;
|
||||
}
|
||||
|
||||
export interface TurnSnapshot {
|
||||
turnId: string;
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string;
|
||||
contentEmbedding: Float32Array;
|
||||
tokenCount: number;
|
||||
latencyMs: number;
|
||||
timestamp: Date;
|
||||
}
|
||||
|
||||
export interface SkillExecution {
|
||||
skillId: string;
|
||||
params: Record<string, unknown>;
|
||||
success: boolean;
|
||||
latencyMs: number;
|
||||
}
|
||||
|
||||
export type Verdict = 'positive' | 'negative' | 'neutral';
|
||||
|
||||
export interface LoRATrainer {
|
||||
train(trajectories: LabeledTrajectory[], config: LoRAConfig): Promise<LoRAWeights>;
|
||||
merge(baseModel: ModelWeights, lora: LoRAWeights): Promise<ModelWeights>;
|
||||
evaluate(testSet: LabeledTrajectory[]): Promise<EvaluationMetrics>;
|
||||
}
|
||||
|
||||
export interface LabeledTrajectory extends Trajectory {
|
||||
verdict: Verdict;
|
||||
verdictConfidence?: number;
|
||||
}
|
||||
|
||||
export interface LoRAConfig {
|
||||
rank: number;
|
||||
alpha: number;
|
||||
epochs: number;
|
||||
batchSize: number;
|
||||
learningRate: number;
|
||||
}
|
||||
|
||||
export interface LoRAWeights {
|
||||
rank: number;
|
||||
alpha: number;
|
||||
layerAdapters: LayerAdapter[];
|
||||
}
|
||||
|
||||
export interface LayerAdapter {
|
||||
layerIndex: number;
|
||||
A: Float32Array;
|
||||
B: Float32Array;
|
||||
}
|
||||
|
||||
export interface ModelWeights {
|
||||
layers: Float32Array[];
|
||||
hiddenSize: number;
|
||||
parameterCount: number;
|
||||
}
|
||||
|
||||
export interface EvaluationMetrics {
|
||||
accuracy: number;
|
||||
precision: number;
|
||||
recall: number;
|
||||
f1Score: number;
|
||||
}
|
||||
|
||||
export interface EWCConsolidator {
|
||||
computeFisher(model: ModelWeights, trajectories: Trajectory[]): Promise<FisherMatrix>;
|
||||
consolidate(
|
||||
oldWeights: ModelWeights,
|
||||
newWeights: ModelWeights,
|
||||
fisher: FisherMatrix,
|
||||
lambda: number
|
||||
): Promise<ConsolidationResult>;
|
||||
}
|
||||
|
||||
export interface FisherMatrix {
|
||||
diagonal: Float32Array;
|
||||
parameterSnapshot: ModelWeights;
|
||||
}
|
||||
|
||||
export interface ConsolidationResult {
|
||||
weights: ModelWeights;
|
||||
fisher: FisherMatrix;
|
||||
oldKnowledgeRetention: number;
|
||||
newKnowledgeGain: number;
|
||||
}
|
||||
Reference in New Issue
Block a user