git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
1041 lines
27 KiB
TypeScript
1041 lines
27 KiB
TypeScript
/**
|
|
* Database Persistence Module for ruvector-extensions
|
|
*
|
|
* Provides comprehensive database persistence capabilities including:
|
|
* - Multiple save formats (JSON, Binary/MessagePack, SQLite)
|
|
* - Incremental saves (only changed data)
|
|
* - Snapshot management (create, list, restore, delete)
|
|
* - Export/import functionality
|
|
* - Compression support
|
|
* - Progress callbacks for large operations
|
|
*
|
|
* @module persistence
|
|
*/
|
|
|
|
import { promises as fs } from 'fs';
|
|
import { createReadStream, createWriteStream } from 'fs';
|
|
import * as path from 'path';
|
|
import * as crypto from 'crypto';
|
|
import { pipeline } from 'stream/promises';
|
|
import type { VectorEntry, DbOptions, DbStats } from 'ruvector';
|
|
|
|
// VectorDB is a class, not a type - we need to use InstanceType
|
|
type VectorDBInstance = any; // Will be replaced with actual instance type
|
|
|
|
// ============================================================================
|
|
// Types and Interfaces
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Supported persistence formats
|
|
*/
|
|
export type PersistenceFormat = 'json' | 'binary' | 'sqlite';
|
|
|
|
/**
|
|
* Compression algorithms
|
|
*/
|
|
export type CompressionType = 'none' | 'gzip' | 'brotli';
|
|
|
|
/**
|
|
* Progress callback for long-running operations
|
|
*/
|
|
export type ProgressCallback = (progress: {
|
|
/** Operation being performed */
|
|
operation: string;
|
|
/** Current progress (0-100) */
|
|
percentage: number;
|
|
/** Number of items processed */
|
|
current: number;
|
|
/** Total items to process */
|
|
total: number;
|
|
/** Human-readable message */
|
|
message: string;
|
|
}) => void;
|
|
|
|
/**
|
|
* Persistence configuration options
|
|
*/
|
|
export interface PersistenceOptions {
|
|
/** Base directory for persistence files */
|
|
baseDir: string;
|
|
/** Default format for saves */
|
|
format?: PersistenceFormat;
|
|
/** Enable compression */
|
|
compression?: CompressionType;
|
|
/** Enable incremental saves */
|
|
incremental?: boolean;
|
|
/** Auto-save interval in milliseconds (0 = disabled) */
|
|
autoSaveInterval?: number;
|
|
/** Maximum number of snapshots to keep */
|
|
maxSnapshots?: number;
|
|
/** Batch size for large operations */
|
|
batchSize?: number;
|
|
}
|
|
|
|
/**
|
|
* Database snapshot metadata
|
|
*/
|
|
export interface SnapshotMetadata {
|
|
/** Snapshot identifier */
|
|
id: string;
|
|
/** Human-readable name */
|
|
name: string;
|
|
/** Creation timestamp */
|
|
timestamp: number;
|
|
/** Vector count at snapshot time */
|
|
vectorCount: number;
|
|
/** Database dimension */
|
|
dimension: number;
|
|
/** Format used */
|
|
format: PersistenceFormat;
|
|
/** Whether compressed */
|
|
compressed: boolean;
|
|
/** File size in bytes */
|
|
fileSize: number;
|
|
/** Checksum for integrity */
|
|
checksum: string;
|
|
/** Additional metadata */
|
|
metadata?: Record<string, any>;
|
|
}
|
|
|
|
/**
|
|
* Serialized database state
|
|
*/
|
|
export interface DatabaseState {
|
|
/** Format version for compatibility */
|
|
version: string;
|
|
/** Database configuration */
|
|
options: DbOptions;
|
|
/** Database statistics */
|
|
stats: DbStats;
|
|
/** Vector entries */
|
|
vectors: VectorEntry[];
|
|
/** Index state (opaque) */
|
|
indexState?: any;
|
|
/** Additional metadata */
|
|
metadata?: Record<string, any>;
|
|
/** Timestamp of save */
|
|
timestamp: number;
|
|
/** Checksum for integrity */
|
|
checksum?: string;
|
|
}
|
|
|
|
/**
|
|
* Incremental save state
|
|
*/
|
|
interface IncrementalState {
|
|
/** Last save timestamp */
|
|
lastSave: number;
|
|
/** Vector IDs at last save */
|
|
vectorIds: Set<string>;
|
|
/** Checksum of last save */
|
|
checksum: string;
|
|
}
|
|
|
|
/**
|
|
* Export options
|
|
*/
|
|
export interface ExportOptions {
|
|
/** Output file path */
|
|
path: string;
|
|
/** Export format */
|
|
format?: PersistenceFormat;
|
|
/** Enable compression */
|
|
compress?: boolean;
|
|
/** Include index state */
|
|
includeIndex?: boolean;
|
|
/** Progress callback */
|
|
onProgress?: ProgressCallback;
|
|
}
|
|
|
|
/**
|
|
* Import options
|
|
*/
|
|
export interface ImportOptions {
|
|
/** Input file path */
|
|
path: string;
|
|
/** Expected format (auto-detect if not specified) */
|
|
format?: PersistenceFormat;
|
|
/** Whether to clear database before import */
|
|
clear?: boolean;
|
|
/** Verify checksum */
|
|
verifyChecksum?: boolean;
|
|
/** Progress callback */
|
|
onProgress?: ProgressCallback;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Database Persistence Manager
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Main persistence manager for VectorDB instances
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* const db = new VectorDB({ dimension: 384 });
|
|
* const persistence = new DatabasePersistence(db, {
|
|
* baseDir: './data',
|
|
* format: 'binary',
|
|
* compression: 'gzip',
|
|
* incremental: true
|
|
* });
|
|
*
|
|
* // Save database
|
|
* await persistence.save({ onProgress: (p) => console.log(p.message) });
|
|
*
|
|
* // Create snapshot
|
|
* const snapshot = await persistence.createSnapshot('before-update');
|
|
*
|
|
* // Restore from snapshot
|
|
* await persistence.restoreSnapshot(snapshot.id);
|
|
* ```
|
|
*/
|
|
export class DatabasePersistence {
|
|
private db: VectorDBInstance;
|
|
private options: Required<PersistenceOptions>;
|
|
private incrementalState: IncrementalState | null = null;
|
|
private autoSaveTimer: NodeJS.Timeout | null = null;
|
|
|
|
/**
|
|
* Create a new database persistence manager
|
|
*
|
|
* @param db - VectorDB instance to manage
|
|
* @param options - Persistence configuration
|
|
*/
|
|
constructor(db: VectorDBInstance, options: PersistenceOptions) {
|
|
this.db = db;
|
|
this.options = {
|
|
baseDir: options.baseDir,
|
|
format: options.format || 'json',
|
|
compression: options.compression || 'none',
|
|
incremental: options.incremental ?? false,
|
|
autoSaveInterval: options.autoSaveInterval ?? 0,
|
|
maxSnapshots: options.maxSnapshots ?? 10,
|
|
batchSize: options.batchSize ?? 1000,
|
|
};
|
|
|
|
this.initialize();
|
|
}
|
|
|
|
/**
|
|
* Initialize persistence system
|
|
*/
|
|
private async initialize(): Promise<void> {
|
|
// Create base directory if it doesn't exist
|
|
await fs.mkdir(this.options.baseDir, { recursive: true });
|
|
await fs.mkdir(path.join(this.options.baseDir, 'snapshots'), { recursive: true });
|
|
|
|
// Start auto-save if configured
|
|
if (this.options.autoSaveInterval > 0) {
|
|
this.startAutoSave();
|
|
}
|
|
|
|
// Load incremental state if exists
|
|
if (this.options.incremental) {
|
|
await this.loadIncrementalState();
|
|
}
|
|
}
|
|
|
|
// ==========================================================================
|
|
// Save Operations
|
|
// ==========================================================================
|
|
|
|
/**
|
|
* Save database to disk
|
|
*
|
|
* @param options - Save options
|
|
* @returns Path to saved file
|
|
*/
|
|
async save(options: {
|
|
path?: string;
|
|
format?: PersistenceFormat;
|
|
compress?: boolean;
|
|
onProgress?: ProgressCallback;
|
|
} = {}): Promise<string> {
|
|
const format = options.format || this.options.format;
|
|
const compress = options.compress ?? (this.options.compression !== 'none');
|
|
const savePath = options.path || this.getDefaultSavePath(format, compress);
|
|
|
|
const state = await this.serializeDatabase(options.onProgress);
|
|
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'save',
|
|
percentage: 80,
|
|
current: 4,
|
|
total: 5,
|
|
message: 'Writing to disk...',
|
|
});
|
|
}
|
|
|
|
await this.writeStateToFile(state, savePath, format, compress);
|
|
|
|
if (this.options.incremental) {
|
|
await this.updateIncrementalState(state);
|
|
}
|
|
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'save',
|
|
percentage: 100,
|
|
current: 5,
|
|
total: 5,
|
|
message: 'Save completed',
|
|
});
|
|
}
|
|
|
|
return savePath;
|
|
}
|
|
|
|
/**
|
|
* Save only changed data (incremental save)
|
|
*
|
|
* @param options - Save options
|
|
* @returns Path to saved file or null if no changes
|
|
*/
|
|
async saveIncremental(options: {
|
|
path?: string;
|
|
format?: PersistenceFormat;
|
|
onProgress?: ProgressCallback;
|
|
} = {}): Promise<string | null> {
|
|
if (!this.incrementalState) {
|
|
// First save, do full save
|
|
return this.save(options);
|
|
}
|
|
|
|
const stats = this.db.stats();
|
|
const currentVectors = await this.getAllVectorIds();
|
|
|
|
// Detect changes
|
|
const added = currentVectors.filter(id => !this.incrementalState!.vectorIds.has(id));
|
|
const removed = Array.from(this.incrementalState!.vectorIds).filter(
|
|
id => !currentVectors.includes(id)
|
|
);
|
|
|
|
if (added.length === 0 && removed.length === 0) {
|
|
// No changes
|
|
return null;
|
|
}
|
|
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'incremental-save',
|
|
percentage: 20,
|
|
current: 1,
|
|
total: 5,
|
|
message: `Found ${added.length} new and ${removed.length} removed vectors`,
|
|
});
|
|
}
|
|
|
|
// For now, do a full save with changes
|
|
// In a production system, you'd implement delta encoding
|
|
return this.save(options);
|
|
}
|
|
|
|
/**
|
|
* Load database from disk
|
|
*
|
|
* @param options - Load options
|
|
*/
|
|
async load(options: {
|
|
path: string;
|
|
format?: PersistenceFormat;
|
|
verifyChecksum?: boolean;
|
|
onProgress?: ProgressCallback;
|
|
}): Promise<void> {
|
|
const format = options.format || this.detectFormat(options.path);
|
|
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'load',
|
|
percentage: 10,
|
|
current: 1,
|
|
total: 5,
|
|
message: 'Reading from disk...',
|
|
});
|
|
}
|
|
|
|
const state = await this.readStateFromFile(options.path, format);
|
|
|
|
if (options.verifyChecksum && state.checksum) {
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'load',
|
|
percentage: 30,
|
|
current: 2,
|
|
total: 5,
|
|
message: 'Verifying checksum...',
|
|
});
|
|
}
|
|
|
|
const computed = this.computeChecksum(state);
|
|
if (computed !== state.checksum) {
|
|
throw new Error('Checksum verification failed - file may be corrupted');
|
|
}
|
|
}
|
|
|
|
await this.deserializeDatabase(state, options.onProgress);
|
|
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'load',
|
|
percentage: 100,
|
|
current: 5,
|
|
total: 5,
|
|
message: 'Load completed',
|
|
});
|
|
}
|
|
}
|
|
|
|
// ==========================================================================
|
|
// Snapshot Management
|
|
// ==========================================================================
|
|
|
|
/**
|
|
* Create a snapshot of the current database state
|
|
*
|
|
* @param name - Human-readable snapshot name
|
|
* @param metadata - Additional metadata to store
|
|
* @returns Snapshot metadata
|
|
*/
|
|
async createSnapshot(
|
|
name: string,
|
|
metadata?: Record<string, any>
|
|
): Promise<SnapshotMetadata> {
|
|
const id = crypto.randomUUID();
|
|
const timestamp = Date.now();
|
|
const stats = this.db.stats();
|
|
|
|
const snapshotPath = path.join(
|
|
this.options.baseDir,
|
|
'snapshots',
|
|
`${id}.${this.options.format}`
|
|
);
|
|
|
|
await this.save({
|
|
path: snapshotPath,
|
|
format: this.options.format,
|
|
compress: this.options.compression !== 'none',
|
|
});
|
|
|
|
const fileStats = await fs.stat(snapshotPath);
|
|
const checksum = await this.computeFileChecksum(snapshotPath);
|
|
|
|
const snapshotMetadata: SnapshotMetadata = {
|
|
id,
|
|
name,
|
|
timestamp,
|
|
vectorCount: stats.count,
|
|
dimension: stats.dimension,
|
|
format: this.options.format,
|
|
compressed: this.options.compression !== 'none',
|
|
fileSize: fileStats.size,
|
|
checksum,
|
|
metadata,
|
|
};
|
|
|
|
// Save metadata
|
|
const metadataPath = path.join(
|
|
this.options.baseDir,
|
|
'snapshots',
|
|
`${id}.meta.json`
|
|
);
|
|
await fs.writeFile(metadataPath, JSON.stringify(snapshotMetadata, null, 2));
|
|
|
|
// Clean up old snapshots
|
|
await this.cleanupOldSnapshots();
|
|
|
|
return snapshotMetadata;
|
|
}
|
|
|
|
/**
|
|
* List all available snapshots
|
|
*
|
|
* @returns Array of snapshot metadata, sorted by timestamp (newest first)
|
|
*/
|
|
async listSnapshots(): Promise<SnapshotMetadata[]> {
|
|
const snapshotsDir = path.join(this.options.baseDir, 'snapshots');
|
|
const files = await fs.readdir(snapshotsDir);
|
|
|
|
const metadataFiles = files.filter(f => f.endsWith('.meta.json'));
|
|
const snapshots: SnapshotMetadata[] = [];
|
|
|
|
for (const file of metadataFiles) {
|
|
const content = await fs.readFile(path.join(snapshotsDir, file), 'utf-8');
|
|
snapshots.push(JSON.parse(content));
|
|
}
|
|
|
|
return snapshots.sort((a, b) => b.timestamp - a.timestamp);
|
|
}
|
|
|
|
/**
|
|
* Restore database from a snapshot
|
|
*
|
|
* @param snapshotId - Snapshot ID to restore
|
|
* @param options - Restore options
|
|
*/
|
|
async restoreSnapshot(
|
|
snapshotId: string,
|
|
options: {
|
|
verifyChecksum?: boolean;
|
|
onProgress?: ProgressCallback;
|
|
} = {}
|
|
): Promise<void> {
|
|
const snapshotsDir = path.join(this.options.baseDir, 'snapshots');
|
|
const metadataPath = path.join(snapshotsDir, `${snapshotId}.meta.json`);
|
|
|
|
let metadata: SnapshotMetadata;
|
|
try {
|
|
const content = await fs.readFile(metadataPath, 'utf-8');
|
|
metadata = JSON.parse(content);
|
|
} catch (error) {
|
|
throw new Error(`Snapshot ${snapshotId} not found`);
|
|
}
|
|
|
|
const snapshotPath = path.join(snapshotsDir, `${snapshotId}.${metadata.format}`);
|
|
|
|
if (options.verifyChecksum) {
|
|
if (options.onProgress) {
|
|
options.onProgress({
|
|
operation: 'restore',
|
|
percentage: 10,
|
|
current: 1,
|
|
total: 5,
|
|
message: 'Verifying snapshot integrity...',
|
|
});
|
|
}
|
|
|
|
const checksum = await this.computeFileChecksum(snapshotPath);
|
|
if (checksum !== metadata.checksum) {
|
|
throw new Error('Snapshot checksum verification failed - file may be corrupted');
|
|
}
|
|
}
|
|
|
|
await this.load({
|
|
path: snapshotPath,
|
|
format: metadata.format,
|
|
verifyChecksum: false, // Already verified above if needed
|
|
onProgress: options.onProgress,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Delete a snapshot
|
|
*
|
|
* @param snapshotId - Snapshot ID to delete
|
|
*/
|
|
async deleteSnapshot(snapshotId: string): Promise<void> {
|
|
const snapshotsDir = path.join(this.options.baseDir, 'snapshots');
|
|
const metadataPath = path.join(snapshotsDir, `${snapshotId}.meta.json`);
|
|
|
|
let metadata: SnapshotMetadata;
|
|
try {
|
|
const content = await fs.readFile(metadataPath, 'utf-8');
|
|
metadata = JSON.parse(content);
|
|
} catch (error) {
|
|
throw new Error(`Snapshot ${snapshotId} not found`);
|
|
}
|
|
|
|
const snapshotPath = path.join(snapshotsDir, `${snapshotId}.${metadata.format}`);
|
|
|
|
await Promise.all([
|
|
fs.unlink(snapshotPath).catch(() => {}),
|
|
fs.unlink(metadataPath).catch(() => {}),
|
|
]);
|
|
}
|
|
|
|
// ==========================================================================
|
|
// Export/Import
|
|
// ==========================================================================
|
|
|
|
/**
|
|
* Export database to a file
|
|
*
|
|
* @param options - Export options
|
|
*/
|
|
async export(options: ExportOptions): Promise<void> {
|
|
const format = options.format || 'json';
|
|
const compress = options.compress ?? false;
|
|
|
|
const state = await this.serializeDatabase(options.onProgress);
|
|
|
|
if (!options.includeIndex) {
|
|
delete state.indexState;
|
|
}
|
|
|
|
await this.writeStateToFile(state, options.path, format, compress);
|
|
}
|
|
|
|
/**
|
|
* Import database from a file
|
|
*
|
|
* @param options - Import options
|
|
*/
|
|
async import(options: ImportOptions): Promise<void> {
|
|
if (options.clear) {
|
|
this.db.clear();
|
|
}
|
|
|
|
await this.load({
|
|
path: options.path,
|
|
format: options.format,
|
|
verifyChecksum: options.verifyChecksum,
|
|
onProgress: options.onProgress,
|
|
});
|
|
}
|
|
|
|
// ==========================================================================
|
|
// Auto-Save
|
|
// ==========================================================================
|
|
|
|
/**
|
|
* Start automatic saves at configured interval
|
|
*/
|
|
startAutoSave(): void {
|
|
if (this.autoSaveTimer) {
|
|
return; // Already running
|
|
}
|
|
|
|
this.autoSaveTimer = setInterval(async () => {
|
|
try {
|
|
if (this.options.incremental) {
|
|
await this.saveIncremental();
|
|
} else {
|
|
await this.save();
|
|
}
|
|
} catch (error) {
|
|
console.error('Auto-save failed:', error);
|
|
}
|
|
}, this.options.autoSaveInterval);
|
|
}
|
|
|
|
/**
|
|
* Stop automatic saves
|
|
*/
|
|
stopAutoSave(): void {
|
|
if (this.autoSaveTimer) {
|
|
clearInterval(this.autoSaveTimer);
|
|
this.autoSaveTimer = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Cleanup and shutdown
|
|
*/
|
|
async shutdown(): Promise<void> {
|
|
this.stopAutoSave();
|
|
|
|
// Do final save if auto-save was enabled
|
|
if (this.options.autoSaveInterval > 0) {
|
|
await this.save();
|
|
}
|
|
}
|
|
|
|
// ==========================================================================
|
|
// Private Helper Methods
|
|
// ==========================================================================
|
|
|
|
/**
|
|
* Serialize database to state object
|
|
*/
|
|
private async serializeDatabase(
|
|
onProgress?: ProgressCallback
|
|
): Promise<DatabaseState> {
|
|
if (onProgress) {
|
|
onProgress({
|
|
operation: 'serialize',
|
|
percentage: 10,
|
|
current: 1,
|
|
total: 5,
|
|
message: 'Collecting database statistics...',
|
|
});
|
|
}
|
|
|
|
const stats = this.db.stats();
|
|
const vectors: VectorEntry[] = [];
|
|
|
|
if (onProgress) {
|
|
onProgress({
|
|
operation: 'serialize',
|
|
percentage: 30,
|
|
current: 2,
|
|
total: 5,
|
|
message: 'Extracting vectors...',
|
|
});
|
|
}
|
|
|
|
// Extract all vectors
|
|
const vectorIds = await this.getAllVectorIds();
|
|
for (let i = 0; i < vectorIds.length; i++) {
|
|
const vector = this.db.get(vectorIds[i]);
|
|
if (vector) {
|
|
vectors.push(vector);
|
|
}
|
|
|
|
if (onProgress && i % this.options.batchSize === 0) {
|
|
const percentage = 30 + Math.floor((i / vectorIds.length) * 40);
|
|
onProgress({
|
|
operation: 'serialize',
|
|
percentage,
|
|
current: i,
|
|
total: vectorIds.length,
|
|
message: `Extracted ${i}/${vectorIds.length} vectors...`,
|
|
});
|
|
}
|
|
}
|
|
|
|
const state: DatabaseState = {
|
|
version: '1.0.0',
|
|
options: {
|
|
dimension: stats.dimension,
|
|
metric: stats.metric as any,
|
|
},
|
|
stats,
|
|
vectors,
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
if (onProgress) {
|
|
onProgress({
|
|
operation: 'serialize',
|
|
percentage: 90,
|
|
current: 4,
|
|
total: 5,
|
|
message: 'Computing checksum...',
|
|
});
|
|
}
|
|
|
|
state.checksum = this.computeChecksum(state);
|
|
|
|
return state;
|
|
}
|
|
|
|
/**
|
|
* Deserialize state object into database
|
|
*/
|
|
private async deserializeDatabase(
|
|
state: DatabaseState,
|
|
onProgress?: ProgressCallback
|
|
): Promise<void> {
|
|
if (onProgress) {
|
|
onProgress({
|
|
operation: 'deserialize',
|
|
percentage: 40,
|
|
current: 2,
|
|
total: 5,
|
|
message: 'Clearing existing data...',
|
|
});
|
|
}
|
|
|
|
this.db.clear();
|
|
|
|
if (onProgress) {
|
|
onProgress({
|
|
operation: 'deserialize',
|
|
percentage: 50,
|
|
current: 3,
|
|
total: 5,
|
|
message: 'Inserting vectors...',
|
|
});
|
|
}
|
|
|
|
// Insert vectors in batches
|
|
for (let i = 0; i < state.vectors.length; i += this.options.batchSize) {
|
|
const batch = state.vectors.slice(i, i + this.options.batchSize);
|
|
this.db.insertBatch(batch);
|
|
|
|
if (onProgress) {
|
|
const percentage = 50 + Math.floor((i / state.vectors.length) * 40);
|
|
onProgress({
|
|
operation: 'deserialize',
|
|
percentage,
|
|
current: i,
|
|
total: state.vectors.length,
|
|
message: `Inserted ${i}/${state.vectors.length} vectors...`,
|
|
});
|
|
}
|
|
}
|
|
|
|
if (onProgress) {
|
|
onProgress({
|
|
operation: 'deserialize',
|
|
percentage: 95,
|
|
current: 4,
|
|
total: 5,
|
|
message: 'Rebuilding index...',
|
|
});
|
|
}
|
|
|
|
// Rebuild index
|
|
this.db.buildIndex();
|
|
}
|
|
|
|
/**
|
|
* Write state to file in specified format
|
|
*/
|
|
private async writeStateToFile(
|
|
state: DatabaseState,
|
|
filePath: string,
|
|
format: PersistenceFormat,
|
|
compress: boolean
|
|
): Promise<void> {
|
|
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
|
|
let data: Buffer;
|
|
|
|
switch (format) {
|
|
case 'json':
|
|
data = Buffer.from(JSON.stringify(state, null, compress ? 0 : 2));
|
|
break;
|
|
|
|
case 'binary':
|
|
// Use simple JSON for now - in production, use MessagePack
|
|
data = Buffer.from(JSON.stringify(state));
|
|
break;
|
|
|
|
case 'sqlite':
|
|
// SQLite implementation would go here
|
|
throw new Error('SQLite format not yet implemented');
|
|
|
|
default:
|
|
throw new Error(`Unsupported format: ${format}`);
|
|
}
|
|
|
|
if (compress) {
|
|
const { gzip, brotliCompress } = await import('zlib');
|
|
const { promisify } = await import('util');
|
|
|
|
if (this.options.compression === 'gzip') {
|
|
const gzipAsync = promisify(gzip);
|
|
data = await gzipAsync(data);
|
|
} else if (this.options.compression === 'brotli') {
|
|
const brotliAsync = promisify(brotliCompress);
|
|
data = await brotliAsync(data);
|
|
}
|
|
}
|
|
|
|
await fs.writeFile(filePath, data);
|
|
}
|
|
|
|
/**
|
|
* Read state from file in specified format
|
|
*/
|
|
private async readStateFromFile(
|
|
filePath: string,
|
|
format: PersistenceFormat
|
|
): Promise<DatabaseState> {
|
|
let data = await fs.readFile(filePath);
|
|
|
|
// Detect and decompress if needed
|
|
if (this.isCompressed(data)) {
|
|
const { gunzip, brotliDecompress } = await import('zlib');
|
|
const { promisify } = await import('util');
|
|
|
|
// Try gzip first
|
|
try {
|
|
const gunzipAsync = promisify(gunzip);
|
|
data = await gunzipAsync(data);
|
|
} catch {
|
|
// Try brotli
|
|
const brotliAsync = promisify(brotliDecompress);
|
|
data = await brotliAsync(data);
|
|
}
|
|
}
|
|
|
|
switch (format) {
|
|
case 'json':
|
|
case 'binary':
|
|
return JSON.parse(data.toString());
|
|
|
|
case 'sqlite':
|
|
throw new Error('SQLite format not yet implemented');
|
|
|
|
default:
|
|
throw new Error(`Unsupported format: ${format}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get all vector IDs from database
|
|
*/
|
|
private async getAllVectorIds(): Promise<string[]> {
|
|
// This is a workaround - in production, VectorDB should provide an iterator
|
|
const stats = this.db.stats();
|
|
const ids: string[] = [];
|
|
|
|
// Try to get vectors by attempting sequential IDs
|
|
// This is inefficient and should be replaced with a proper API
|
|
for (let i = 0; i < stats.count * 2; i++) {
|
|
const vector = this.db.get(String(i));
|
|
if (vector) {
|
|
ids.push(vector.id);
|
|
}
|
|
if (ids.length >= stats.count) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ids;
|
|
}
|
|
|
|
/**
|
|
* Compute checksum of state object
|
|
*/
|
|
private computeChecksum(state: DatabaseState): string {
|
|
const { checksum, ...stateWithoutChecksum } = state;
|
|
const data = JSON.stringify(stateWithoutChecksum);
|
|
return crypto.createHash('sha256').update(data).digest('hex');
|
|
}
|
|
|
|
/**
|
|
* Compute checksum of file
|
|
*/
|
|
private async computeFileChecksum(filePath: string): Promise<string> {
|
|
return new Promise((resolve, reject) => {
|
|
const hash = crypto.createHash('sha256');
|
|
const stream = createReadStream(filePath);
|
|
|
|
stream.on('data', data => hash.update(data));
|
|
stream.on('end', () => resolve(hash.digest('hex')));
|
|
stream.on('error', reject);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Detect file format from extension
|
|
*/
|
|
private detectFormat(filePath: string): PersistenceFormat {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
if (ext === '.json') return 'json';
|
|
if (ext === '.bin' || ext === '.msgpack') return 'binary';
|
|
if (ext === '.db' || ext === '.sqlite') return 'sqlite';
|
|
return this.options.format;
|
|
}
|
|
|
|
/**
|
|
* Check if data is compressed
|
|
*/
|
|
private isCompressed(data: Buffer): boolean {
|
|
// Gzip magic number: 1f 8b
|
|
if (data[0] === 0x1f && data[1] === 0x8b) return true;
|
|
// Brotli doesn't have a magic number, but we can try to decompress
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Get default save path
|
|
*/
|
|
private getDefaultSavePath(format: PersistenceFormat, compress: boolean): string {
|
|
const ext = format === 'json' ? 'json' : format === 'binary' ? 'bin' : 'db';
|
|
const compressExt = compress ? `.${this.options.compression}` : '';
|
|
return path.join(this.options.baseDir, `database.${ext}${compressExt}`);
|
|
}
|
|
|
|
/**
|
|
* Load incremental state
|
|
*/
|
|
private async loadIncrementalState(): Promise<void> {
|
|
const statePath = path.join(this.options.baseDir, '.incremental.json');
|
|
try {
|
|
const content = await fs.readFile(statePath, 'utf-8');
|
|
const data = JSON.parse(content);
|
|
this.incrementalState = {
|
|
lastSave: data.lastSave,
|
|
vectorIds: new Set(data.vectorIds),
|
|
checksum: data.checksum,
|
|
};
|
|
} catch {
|
|
// No incremental state yet
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update incremental state after save
|
|
*/
|
|
private async updateIncrementalState(state: DatabaseState): Promise<void> {
|
|
const vectorIds = state.vectors.map(v => v.id);
|
|
this.incrementalState = {
|
|
lastSave: Date.now(),
|
|
vectorIds: new Set(vectorIds),
|
|
checksum: state.checksum || '',
|
|
};
|
|
|
|
const statePath = path.join(this.options.baseDir, '.incremental.json');
|
|
await fs.writeFile(
|
|
statePath,
|
|
JSON.stringify({
|
|
lastSave: this.incrementalState.lastSave,
|
|
vectorIds: Array.from(this.incrementalState.vectorIds),
|
|
checksum: this.incrementalState.checksum,
|
|
})
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Clean up old snapshots beyond max limit
|
|
*/
|
|
private async cleanupOldSnapshots(): Promise<void> {
|
|
const snapshots = await this.listSnapshots();
|
|
|
|
if (snapshots.length <= this.options.maxSnapshots) {
|
|
return;
|
|
}
|
|
|
|
const toDelete = snapshots.slice(this.options.maxSnapshots);
|
|
|
|
for (const snapshot of toDelete) {
|
|
await this.deleteSnapshot(snapshot.id);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Utility Functions
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Format file size in human-readable format
|
|
*
|
|
* @param bytes - File size in bytes
|
|
* @returns Formatted string (e.g., "1.5 MB")
|
|
*/
|
|
export function formatFileSize(bytes: number): string {
|
|
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
let size = bytes;
|
|
let unitIndex = 0;
|
|
|
|
while (size >= 1024 && unitIndex < units.length - 1) {
|
|
size /= 1024;
|
|
unitIndex++;
|
|
}
|
|
|
|
return `${size.toFixed(2)} ${units[unitIndex]}`;
|
|
}
|
|
|
|
/**
|
|
* Format timestamp as ISO string
|
|
*
|
|
* @param timestamp - Unix timestamp in milliseconds
|
|
* @returns ISO formatted date string
|
|
*/
|
|
export function formatTimestamp(timestamp: number): string {
|
|
return new Date(timestamp).toISOString();
|
|
}
|
|
|
|
/**
|
|
* Estimate memory usage of database state
|
|
*
|
|
* @param state - Database state
|
|
* @returns Estimated memory usage in bytes
|
|
*/
|
|
export function estimateMemoryUsage(state: DatabaseState): number {
|
|
// Rough estimation
|
|
const vectorSize = state.stats.dimension * 4; // 4 bytes per float
|
|
const metadataSize = 100; // Average metadata size
|
|
const totalVectorSize = state.vectors.length * (vectorSize + metadataSize);
|
|
const overheadSize = JSON.stringify(state).length;
|
|
|
|
return totalVectorSize + overheadSize;
|
|
}
|