Files
wifi-densepose/vendor/ruvector/npm/packages/rudag/src/dag.ts

700 lines
18 KiB
TypeScript

/**
* High-level DAG API with WASM acceleration
* Provides a TypeScript-friendly interface to the WASM DAG implementation
*
* @security All inputs are validated to prevent injection attacks
* @performance Results are cached to minimize WASM calls
*/
import { createStorage, DagStorage, MemoryStorage, StoredDag } from './storage';
// WASM module type definitions
interface WasmDagModule {
WasmDag: {
new(): WasmDagInstance;
from_bytes(data: Uint8Array): WasmDagInstance;
from_json(json: string): WasmDagInstance;
};
}
interface WasmDagInstance {
add_node(op: number, cost: number): number;
add_edge(from: number, to: number): boolean;
node_count(): number;
edge_count(): number;
topo_sort(): Uint32Array;
critical_path(): string | CriticalPath;
attention(mechanism: number): Float32Array;
to_bytes(): Uint8Array;
to_json(): string;
free(): void;
}
/**
* Operator types for DAG nodes
*/
export enum DagOperator {
/** Table scan operation */
SCAN = 0,
/** Filter/WHERE clause */
FILTER = 1,
/** Column projection/SELECT */
PROJECT = 2,
/** Join operation */
JOIN = 3,
/** Aggregation (GROUP BY) */
AGGREGATE = 4,
/** Sort/ORDER BY */
SORT = 5,
/** Limit/TOP N */
LIMIT = 6,
/** Union of results */
UNION = 7,
/** Custom user-defined operator */
CUSTOM = 255,
}
/**
* Attention mechanism types for node scoring
*/
export enum AttentionMechanism {
/** Score by position in topological order */
TOPOLOGICAL = 0,
/** Score by distance from critical path */
CRITICAL_PATH = 1,
/** Equal scores for all nodes */
UNIFORM = 2,
}
/**
* Node representation in the DAG
*/
export interface DagNode {
/** Unique identifier for this node */
id: number;
/** The operator type (e.g., SCAN, FILTER, JOIN) */
operator: DagOperator | number;
/** Execution cost estimate for this node */
cost: number;
/** Optional arbitrary metadata attached to the node */
metadata?: Record<string, unknown>;
}
/**
* Edge representation (directed connection between nodes)
*/
export interface DagEdge {
/** Source node ID */
from: number;
/** Target node ID */
to: number;
}
/**
* Critical path result from DAG analysis
*/
export interface CriticalPath {
/** Node IDs in the critical path */
path: number[];
/** Total cost of the critical path */
cost: number;
}
/**
* DAG configuration options
*/
export interface RuDagOptions {
/** Custom ID for the DAG (auto-generated if not provided) */
id?: string;
/** Human-readable name */
name?: string;
/** Storage backend (IndexedDB/Memory/null for no persistence) */
storage?: DagStorage | MemoryStorage | null;
/** Auto-save changes to storage (default: true) */
autoSave?: boolean;
/** Error handler for background save failures */
onSaveError?: (error: unknown) => void;
}
// WASM module singleton with loading promise for concurrent access
let wasmModule: WasmDagModule | null = null;
let wasmLoadPromise: Promise<WasmDagModule> | null = null;
/**
* Initialize WASM module (singleton pattern with concurrent safety)
* @throws {Error} If WASM module fails to load
*/
async function initWasm(): Promise<WasmDagModule> {
if (wasmModule) return wasmModule;
// Prevent concurrent loading
if (wasmLoadPromise) return wasmLoadPromise;
wasmLoadPromise = (async () => {
try {
// Try browser bundler version first
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const mod = await import('../pkg/ruvector_dag_wasm.js') as any;
if (typeof mod.default === 'function') {
await mod.default();
}
wasmModule = mod as WasmDagModule;
return wasmModule;
} catch {
try {
// Fallback to Node.js version
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const mod = await import('../pkg-node/ruvector_dag_wasm.js') as any;
wasmModule = mod as WasmDagModule;
return wasmModule;
} catch (e) {
wasmLoadPromise = null; // Allow retry on failure
throw new Error(`Failed to load WASM module: ${e}`);
}
}
})();
return wasmLoadPromise;
}
/**
* Type guard for CriticalPath validation
* @security Prevents prototype pollution from untrusted WASM output
*/
function isCriticalPath(obj: unknown): obj is CriticalPath {
if (typeof obj !== 'object' || obj === null) return false;
if (Object.getPrototypeOf(obj) !== Object.prototype && Object.getPrototypeOf(obj) !== null) return false;
const candidate = obj as Record<string, unknown>;
if (!('path' in candidate) || !Array.isArray(candidate.path)) return false;
if (!candidate.path.every((item: unknown) => typeof item === 'number' && Number.isFinite(item))) return false;
if (!('cost' in candidate) || typeof candidate.cost !== 'number') return false;
if (!Number.isFinite(candidate.cost)) return false;
return true;
}
/**
* Validate DAG ID to prevent injection attacks
* @security Prevents path traversal and special character injection
*/
function isValidDagId(id: string): boolean {
if (typeof id !== 'string' || id.length === 0 || id.length > 256) return false;
// Only allow alphanumeric, dash, underscore
return /^[a-zA-Z0-9_-]+$/.test(id);
}
/**
* Sanitize ID or generate a safe one
*/
function sanitizeOrGenerateId(id?: string): string {
if (id && isValidDagId(id)) return id;
// Generate safe ID
const timestamp = Date.now();
const random = Math.random().toString(36).slice(2, 8);
return `dag-${timestamp}-${random}`;
}
/**
* RuDag - High-performance DAG with WASM acceleration and persistence
*
* @example
* ```typescript
* const dag = await new RuDag({ name: 'my-query' }).init();
* const scan = dag.addNode(DagOperator.SCAN, 10.0);
* const filter = dag.addNode(DagOperator.FILTER, 2.0);
* dag.addEdge(scan, filter);
* const { path, cost } = dag.criticalPath();
* ```
*/
export class RuDag {
private wasm: WasmDagInstance | null = null;
private nodes: Map<number, DagNode> = new Map();
private storage: DagStorage | MemoryStorage | null;
private readonly id: string;
private name?: string;
private autoSave: boolean;
private initialized = false;
private onSaveError?: (error: unknown) => void;
// Cache for expensive operations
private _topoCache: number[] | null = null;
private _criticalPathCache: CriticalPath | null = null;
private _dirty = true;
constructor(options: RuDagOptions = {}) {
this.id = sanitizeOrGenerateId(options.id);
this.name = options.name;
this.storage = options.storage === undefined ? createStorage() : options.storage;
this.autoSave = options.autoSave ?? true;
this.onSaveError = options.onSaveError;
}
/**
* Initialize the DAG with WASM module and storage
* @returns This instance for chaining
* @throws {Error} If WASM module fails to load
* @throws {Error} If storage initialization fails
*/
async init(): Promise<this> {
if (this.initialized) return this;
const mod = await initWasm();
try {
this.wasm = new mod.WasmDag();
} catch (error) {
throw new Error(`Failed to create WASM DAG instance: ${error}`);
}
try {
if (this.storage) {
await this.storage.init();
}
} catch (error) {
// Cleanup WASM on storage failure
if (this.wasm) {
this.wasm.free();
this.wasm = null;
}
throw new Error(`Failed to initialize storage: ${error}`);
}
this.initialized = true;
return this;
}
/**
* Ensure DAG is initialized
* @throws {Error} If DAG not initialized
*/
private ensureInit(): WasmDagInstance {
if (!this.wasm) {
throw new Error('DAG not initialized. Call init() first.');
}
return this.wasm;
}
/**
* Handle background save errors
*/
private handleSaveError(error: unknown): void {
if (this.onSaveError) {
this.onSaveError(error);
} else {
console.warn('[RuDag] Background save failed:', error);
}
}
/**
* Invalidate caches (called when DAG structure changes)
*/
private invalidateCache(): void {
this._dirty = true;
this._topoCache = null;
this._criticalPathCache = null;
}
/**
* Add a node to the DAG
* @param operator - The operator type
* @param cost - Execution cost estimate (must be non-negative)
* @param metadata - Optional metadata
* @returns The new node ID
* @throws {Error} If cost is invalid
*/
addNode(operator: DagOperator | number, cost: number, metadata?: Record<string, unknown>): number {
// Input validation
if (!Number.isFinite(cost) || cost < 0) {
throw new Error(`Invalid cost: ${cost}. Must be a non-negative finite number.`);
}
if (!Number.isInteger(operator) || operator < 0 || operator > 255) {
throw new Error(`Invalid operator: ${operator}. Must be an integer 0-255.`);
}
const wasm = this.ensureInit();
const id = wasm.add_node(operator, cost);
this.nodes.set(id, {
id,
operator,
cost,
metadata,
});
this.invalidateCache();
if (this.autoSave) {
this.save().catch((e) => this.handleSaveError(e));
}
return id;
}
/**
* Add an edge between nodes
* @param from - Source node ID
* @param to - Target node ID
* @returns true if edge was added, false if it would create a cycle
* @throws {Error} If node IDs are invalid
*/
addEdge(from: number, to: number): boolean {
// Input validation
if (!Number.isInteger(from) || from < 0) {
throw new Error(`Invalid 'from' node ID: ${from}`);
}
if (!Number.isInteger(to) || to < 0) {
throw new Error(`Invalid 'to' node ID: ${to}`);
}
if (from === to) {
throw new Error('Self-loops are not allowed in a DAG');
}
const wasm = this.ensureInit();
const success = wasm.add_edge(from, to);
if (success) {
this.invalidateCache();
if (this.autoSave) {
this.save().catch((e) => this.handleSaveError(e));
}
}
return success;
}
/**
* Get node count
*/
get nodeCount(): number {
return this.ensureInit().node_count();
}
/**
* Get edge count
*/
get edgeCount(): number {
return this.ensureInit().edge_count();
}
/**
* Get topological sort (cached)
* @returns Array of node IDs in topological order
*/
topoSort(): number[] {
if (!this._dirty && this._topoCache) {
return [...this._topoCache]; // Return copy to prevent mutation
}
const result = this.ensureInit().topo_sort();
this._topoCache = Array.from(result);
return [...this._topoCache];
}
/**
* Find critical path (cached)
* @returns Object with path (node IDs) and total cost
* @throws {Error} If WASM returns invalid data
*/
criticalPath(): CriticalPath {
if (!this._dirty && this._criticalPathCache) {
return { ...this._criticalPathCache, path: [...this._criticalPathCache.path] };
}
const result = this.ensureInit().critical_path();
let parsed: unknown;
if (typeof result === 'string') {
try {
parsed = JSON.parse(result);
} catch (e) {
throw new Error(`Invalid critical path JSON from WASM: ${e}`);
}
} else {
parsed = result;
}
if (!isCriticalPath(parsed)) {
throw new Error('Invalid critical path structure from WASM');
}
this._criticalPathCache = parsed;
this._dirty = false;
return { ...parsed, path: [...parsed.path] };
}
/**
* Compute attention scores for nodes
* @param mechanism - Attention mechanism to use
* @returns Array of scores (one per node)
*/
attention(mechanism: AttentionMechanism = AttentionMechanism.CRITICAL_PATH): number[] {
if (!Number.isInteger(mechanism) || mechanism < 0 || mechanism > 2) {
throw new Error(`Invalid attention mechanism: ${mechanism}`);
}
const result = this.ensureInit().attention(mechanism);
return Array.from(result);
}
/**
* Get node by ID
*/
getNode(id: number): DagNode | undefined {
return this.nodes.get(id);
}
/**
* Get all nodes
*/
getNodes(): DagNode[] {
return Array.from(this.nodes.values());
}
/**
* Serialize to bytes (bincode format)
*/
toBytes(): Uint8Array {
return this.ensureInit().to_bytes();
}
/**
* Serialize to JSON string
*/
toJSON(): string {
return this.ensureInit().to_json();
}
/**
* Save DAG to storage
* @returns StoredDag record or null if no storage configured
*/
async save(): Promise<StoredDag | null> {
if (!this.storage) return null;
const data = this.toBytes();
return this.storage.save(this.id, data, {
name: this.name,
metadata: {
nodeCount: this.nodeCount,
edgeCount: this.edgeCount,
nodes: Object.fromEntries(this.nodes),
},
});
}
/**
* Load DAG from storage by ID
* @param id - DAG ID to load
* @param storage - Storage backend (creates default if not provided)
* @returns Loaded DAG or null if not found
* @throws {Error} If ID contains invalid characters
*/
static async load(id: string, storage?: DagStorage | MemoryStorage): Promise<RuDag | null> {
if (!isValidDagId(id)) {
throw new Error(`Invalid DAG ID: "${id}". Must be alphanumeric with dashes/underscores only.`);
}
const isOwnedStorage = !storage;
const store = storage || createStorage();
try {
await store.init();
const record = await store.get(id);
if (!record) {
if (isOwnedStorage) store.close();
return null;
}
return RuDag.fromBytes(record.data, {
id: record.id,
name: record.name,
storage: store,
});
} catch (error) {
if (isOwnedStorage) store.close();
throw error;
}
}
/**
* Create DAG from bytes
* @param data - Serialized DAG data
* @param options - Configuration options
* @throws {Error} If data is empty or invalid
*/
static async fromBytes(data: Uint8Array, options: RuDagOptions = {}): Promise<RuDag> {
if (!data || data.length === 0) {
throw new Error('Cannot create DAG from empty or null data');
}
const mod = await initWasm();
const dag = new RuDag(options);
try {
dag.wasm = mod.WasmDag.from_bytes(data);
} catch (error) {
throw new Error(`Failed to deserialize DAG from bytes: ${error}`);
}
dag.initialized = true;
if (dag.storage) {
try {
await dag.storage.init();
} catch (error) {
dag.wasm?.free();
dag.wasm = null;
throw new Error(`Failed to initialize storage: ${error}`);
}
}
return dag;
}
/**
* Create DAG from JSON
* @param json - JSON string
* @param options - Configuration options
* @throws {Error} If JSON is empty or invalid
*/
static async fromJSON(json: string, options: RuDagOptions = {}): Promise<RuDag> {
if (!json || json.trim().length === 0) {
throw new Error('Cannot create DAG from empty or null JSON');
}
const mod = await initWasm();
const dag = new RuDag(options);
try {
dag.wasm = mod.WasmDag.from_json(json);
} catch (error) {
throw new Error(`Failed to deserialize DAG from JSON: ${error}`);
}
dag.initialized = true;
if (dag.storage) {
try {
await dag.storage.init();
} catch (error) {
dag.wasm?.free();
dag.wasm = null;
throw new Error(`Failed to initialize storage: ${error}`);
}
}
return dag;
}
/**
* List all stored DAGs
* @param storage - Storage backend (creates default if not provided)
*/
static async listStored(storage?: DagStorage | MemoryStorage): Promise<StoredDag[]> {
const isOwnedStorage = !storage;
const store = storage || createStorage();
try {
await store.init();
const result = await store.list();
if (isOwnedStorage) store.close();
return result;
} catch (error) {
if (isOwnedStorage) store.close();
throw error;
}
}
/**
* Delete a stored DAG
* @param id - DAG ID to delete
* @param storage - Storage backend (creates default if not provided)
* @throws {Error} If ID contains invalid characters
*/
static async deleteStored(id: string, storage?: DagStorage | MemoryStorage): Promise<boolean> {
if (!isValidDagId(id)) {
throw new Error(`Invalid DAG ID: "${id}". Must be alphanumeric with dashes/underscores only.`);
}
const isOwnedStorage = !storage;
const store = storage || createStorage();
try {
await store.init();
const result = await store.delete(id);
if (isOwnedStorage) store.close();
return result;
} catch (error) {
if (isOwnedStorage) store.close();
throw error;
}
}
/**
* Get storage statistics
* @param storage - Storage backend (creates default if not provided)
*/
static async storageStats(storage?: DagStorage | MemoryStorage): Promise<{ count: number; totalSize: number }> {
const isOwnedStorage = !storage;
const store = storage || createStorage();
try {
await store.init();
const result = await store.stats();
if (isOwnedStorage) store.close();
return result;
} catch (error) {
if (isOwnedStorage) store.close();
throw error;
}
}
/**
* Get DAG ID
*/
getId(): string {
return this.id;
}
/**
* Get DAG name
*/
getName(): string | undefined {
return this.name;
}
/**
* Set DAG name
* @param name - New name for the DAG
*/
setName(name: string): void {
this.name = name;
if (this.autoSave) {
this.save().catch((e) => this.handleSaveError(e));
}
}
/**
* Cleanup resources (WASM memory and storage connection)
* Always call this when done with a DAG to prevent memory leaks
*/
dispose(): void {
if (this.wasm) {
this.wasm.free();
this.wasm = null;
}
if (this.storage) {
this.storage.close();
this.storage = null;
}
this.nodes.clear();
this._topoCache = null;
this._criticalPathCache = null;
this.initialized = false;
}
}