/** * @ruvector/edge-net Benchmark Utilities * * Comprehensive benchmarking for model optimization * * @module @ruvector/edge-net/models/benchmark */ import { EventEmitter } from 'events'; import { ModelOptimizer, TARGET_MODELS, QUANTIZATION_CONFIGS } from './model-optimizer.js'; // ============================================ // BENCHMARK CONFIGURATION // ============================================ /** * Benchmark profiles for different scenarios */ export const BENCHMARK_PROFILES = { 'quick': { iterations: 50, warmupIterations: 5, inputSizes: [[1, 128]], quantMethods: ['int8'], }, 'standard': { iterations: 100, warmupIterations: 10, inputSizes: [[1, 128], [1, 512], [4, 256]], quantMethods: ['int8', 'int4', 'fp16'], }, 'comprehensive': { iterations: 500, warmupIterations: 50, inputSizes: [[1, 64], [1, 128], [1, 256], [1, 512], [1, 1024], [4, 256], [8, 128]], quantMethods: ['int8', 'int4', 'fp16', 'int8-fp16-mixed'], }, 'edge-device': { iterations: 100, warmupIterations: 10, inputSizes: [[1, 128], [1, 256]], quantMethods: ['int4'], memoryLimit: 512, // MB }, 'accuracy-focus': { iterations: 200, warmupIterations: 20, inputSizes: [[1, 512]], quantMethods: ['fp16', 'int8'], measureAccuracy: true, }, }; // ============================================ // ACCURACY MEASUREMENT // ============================================ /** * Accuracy metrics for quantized models */ export class AccuracyMeter { constructor() { this.predictions = []; this.groundTruth = []; this.originalOutputs = []; this.quantizedOutputs = []; } /** * Add prediction pair for accuracy measurement */ addPrediction(original, quantized, groundTruth = null) { this.originalOutputs.push(original); this.quantizedOutputs.push(quantized); if (groundTruth !== null) { this.groundTruth.push(groundTruth); } } /** * Compute Mean Squared Error */ computeMSE() { if (this.originalOutputs.length === 0) return 0; let totalMSE = 0; let count = 0; for (let i = 0; i < this.originalOutputs.length; i++) { const orig = this.originalOutputs[i]; const quant = this.quantizedOutputs[i]; let mse = 0; const len = Math.min(orig.length, quant.length); for (let j = 0; j < len; j++) { const diff = orig[j] - quant[j]; mse += diff * diff; } totalMSE += mse / len; count++; } return totalMSE / count; } /** * Compute cosine similarity between original and quantized */ computeCosineSimilarity() { if (this.originalOutputs.length === 0) return 1.0; let totalSim = 0; for (let i = 0; i < this.originalOutputs.length; i++) { const orig = this.originalOutputs[i]; const quant = this.quantizedOutputs[i]; let dot = 0, normA = 0, normB = 0; const len = Math.min(orig.length, quant.length); for (let j = 0; j < len; j++) { dot += orig[j] * quant[j]; normA += orig[j] * orig[j]; normB += quant[j] * quant[j]; } totalSim += dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8); } return totalSim / this.originalOutputs.length; } /** * Compute max absolute error */ computeMaxError() { let maxError = 0; for (let i = 0; i < this.originalOutputs.length; i++) { const orig = this.originalOutputs[i]; const quant = this.quantizedOutputs[i]; const len = Math.min(orig.length, quant.length); for (let j = 0; j < len; j++) { maxError = Math.max(maxError, Math.abs(orig[j] - quant[j])); } } return maxError; } /** * Get comprehensive accuracy metrics */ getMetrics() { const mse = this.computeMSE(); return { mse, rmse: Math.sqrt(mse), cosineSimilarity: this.computeCosineSimilarity(), maxError: this.computeMaxError(), samples: this.originalOutputs.length, accuracyRetained: this.computeCosineSimilarity() * 100, }; } /** * Reset meter */ reset() { this.predictions = []; this.groundTruth = []; this.originalOutputs = []; this.quantizedOutputs = []; } } // ============================================ // LATENCY PROFILER // ============================================ /** * Detailed latency profiling */ export class LatencyProfiler { constructor() { this.measurements = new Map(); } /** * Start timing a section */ start(label) { if (!this.measurements.has(label)) { this.measurements.set(label, { samples: [], running: null, }); } this.measurements.get(label).running = performance.now(); } /** * End timing a section */ end(label) { const entry = this.measurements.get(label); if (entry && entry.running !== null) { const duration = performance.now() - entry.running; entry.samples.push(duration); entry.running = null; return duration; } return 0; } /** * Get statistics for a label */ getStats(label) { const entry = this.measurements.get(label); if (!entry || entry.samples.length === 0) { return null; } const samples = [...entry.samples].sort((a, b) => a - b); const sum = samples.reduce((a, b) => a + b, 0); return { label, count: samples.length, mean: sum / samples.length, median: samples[Math.floor(samples.length / 2)], min: samples[0], max: samples[samples.length - 1], p95: samples[Math.floor(samples.length * 0.95)], p99: samples[Math.floor(samples.length * 0.99)], std: Math.sqrt(samples.reduce((acc, v) => acc + Math.pow(v - sum / samples.length, 2), 0) / samples.length), }; } /** * Get all statistics */ getAllStats() { const stats = {}; for (const label of this.measurements.keys()) { stats[label] = this.getStats(label); } return stats; } /** * Reset profiler */ reset() { this.measurements.clear(); } } // ============================================ // MEMORY PROFILER // ============================================ /** * Memory usage profiler */ export class MemoryProfiler { constructor() { this.snapshots = []; this.peakMemory = 0; } /** * Take memory snapshot */ snapshot(label = 'snapshot') { const memUsage = this.getMemoryUsage(); const snapshot = { label, timestamp: Date.now(), ...memUsage, }; this.snapshots.push(snapshot); this.peakMemory = Math.max(this.peakMemory, memUsage.heapUsed); return snapshot; } /** * Get current memory usage */ getMemoryUsage() { if (typeof process !== 'undefined' && process.memoryUsage) { const usage = process.memoryUsage(); return { heapUsed: usage.heapUsed / (1024 * 1024), heapTotal: usage.heapTotal / (1024 * 1024), external: usage.external / (1024 * 1024), rss: usage.rss / (1024 * 1024), }; } // Browser fallback if (typeof performance !== 'undefined' && performance.memory) { return { heapUsed: performance.memory.usedJSHeapSize / (1024 * 1024), heapTotal: performance.memory.totalJSHeapSize / (1024 * 1024), external: 0, rss: 0, }; } return { heapUsed: 0, heapTotal: 0, external: 0, rss: 0 }; } /** * Get memory delta between two snapshots */ getDelta(startLabel, endLabel) { const start = this.snapshots.find(s => s.label === startLabel); const end = this.snapshots.find(s => s.label === endLabel); if (!start || !end) return null; return { heapDelta: end.heapUsed - start.heapUsed, timeDelta: end.timestamp - start.timestamp, }; } /** * Get profiler summary */ getSummary() { return { snapshots: this.snapshots.length, peakMemoryMB: this.peakMemory, currentMemoryMB: this.getMemoryUsage().heapUsed, history: this.snapshots, }; } /** * Reset profiler */ reset() { this.snapshots = []; this.peakMemory = 0; } } // ============================================ // COMPREHENSIVE BENCHMARK RUNNER // ============================================ /** * ComprehensiveBenchmark - Full benchmark suite for model optimization */ export class ComprehensiveBenchmark extends EventEmitter { constructor(options = {}) { super(); this.optimizer = options.optimizer || new ModelOptimizer(); this.latencyProfiler = new LatencyProfiler(); this.memoryProfiler = new MemoryProfiler(); this.accuracyMeter = new AccuracyMeter(); this.results = []; } /** * Run benchmark suite on a model */ async runSuite(model, profile = 'standard') { const profileConfig = BENCHMARK_PROFILES[profile] || BENCHMARK_PROFILES.standard; const modelConfig = TARGET_MODELS[model]; if (!modelConfig) { throw new Error(`Unknown model: ${model}`); } this.emit('suite:start', { model, profile }); const suiteResults = { model, profile, modelConfig, timestamp: new Date().toISOString(), benchmarks: [], }; // Memory baseline this.memoryProfiler.snapshot('baseline'); // Benchmark each quantization method for (const method of profileConfig.quantMethods) { const methodResult = await this.benchmarkQuantization( model, method, profileConfig ); suiteResults.benchmarks.push(methodResult); } // Memory after benchmarks this.memoryProfiler.snapshot('after-benchmarks'); // Add memory profile suiteResults.memory = this.memoryProfiler.getSummary(); // Add summary suiteResults.summary = this.generateSummary(suiteResults); this.results.push(suiteResults); this.emit('suite:complete', suiteResults); return suiteResults; } /** * Benchmark a specific quantization method */ async benchmarkQuantization(model, method, config) { this.emit('benchmark:start', { model, method }); const quantConfig = QUANTIZATION_CONFIGS[method]; const modelConfig = TARGET_MODELS[model]; // Quantize model this.latencyProfiler.start('quantization'); const quantResult = await this.optimizer.quantize(model, method); this.latencyProfiler.end('quantization'); // Simulate inference benchmarks for each input size const inferenceBenchmarks = []; for (const inputSize of config.inputSizes) { const batchSize = inputSize[0]; const seqLen = inputSize[1]; this.latencyProfiler.start(`inference-${batchSize}x${seqLen}`); // Warmup for (let i = 0; i < config.warmupIterations; i++) { await this.simulateInference(modelConfig, batchSize, seqLen, method); } // Measure const times = []; for (let i = 0; i < config.iterations; i++) { const start = performance.now(); await this.simulateInference(modelConfig, batchSize, seqLen, method); times.push(performance.now() - start); } this.latencyProfiler.end(`inference-${batchSize}x${seqLen}`); times.sort((a, b) => a - b); inferenceBenchmarks.push({ inputSize: `${batchSize}x${seqLen}`, iterations: config.iterations, meanMs: times.reduce((a, b) => a + b) / times.length, medianMs: times[Math.floor(times.length / 2)], p95Ms: times[Math.floor(times.length * 0.95)], minMs: times[0], maxMs: times[times.length - 1], tokensPerSecond: (seqLen * batchSize * 1000) / (times.reduce((a, b) => a + b) / times.length), }); } // Measure accuracy if requested let accuracyMetrics = null; if (config.measureAccuracy) { // Generate test outputs for (let i = 0; i < 100; i++) { const original = new Float32Array(modelConfig.hiddenSize).map(() => Math.random()); const quantized = this.simulateQuantizedOutput(original, method); this.accuracyMeter.addPrediction(Array.from(original), Array.from(quantized)); } accuracyMetrics = this.accuracyMeter.getMetrics(); this.accuracyMeter.reset(); } const result = { method, quantization: quantResult, inference: inferenceBenchmarks, accuracy: accuracyMetrics, latencyProfile: this.latencyProfiler.getAllStats(), compression: { original: modelConfig.originalSize, quantized: modelConfig.originalSize / quantConfig.compression, ratio: quantConfig.compression, }, recommendation: this.getRecommendation(model, method, inferenceBenchmarks), }; this.emit('benchmark:complete', result); return result; } /** * Simulate model inference */ async simulateInference(config, batchSize, seqLen, method) { // Base latency depends on model size and batch const quantConfig = QUANTIZATION_CONFIGS[method]; const baseLatency = (config.originalSize / 100) * (batchSize * seqLen / 512); const speedup = quantConfig?.speedup || 1; const latency = baseLatency / speedup; await new Promise(resolve => setTimeout(resolve, latency)); return new Float32Array(config.hiddenSize).map(() => Math.random()); } /** * Simulate quantized output with added noise */ simulateQuantizedOutput(original, method) { const quantConfig = QUANTIZATION_CONFIGS[method]; const noise = quantConfig?.accuracyLoss || 0.01; return new Float32Array(original.length).map((_, i) => { return original[i] + (Math.random() - 0.5) * 2 * noise; }); } /** * Generate recommendation based on benchmark results */ getRecommendation(model, method, inferenceBenchmarks) { const modelConfig = TARGET_MODELS[model]; const quantConfig = QUANTIZATION_CONFIGS[method]; const avgLatency = inferenceBenchmarks.reduce((a, b) => a + b.meanMs, 0) / inferenceBenchmarks.length; const targetMet = (modelConfig.originalSize / quantConfig.compression) <= modelConfig.targetSize; let score = 0; let reasons = []; // Size target met if (targetMet) { score += 30; reasons.push('Meets size target'); } // Good latency if (avgLatency < 10) { score += 30; reasons.push('Excellent latency (<10ms)'); } else if (avgLatency < 50) { score += 20; reasons.push('Good latency (<50ms)'); } // Low accuracy loss if (quantConfig.accuracyLoss < 0.02) { score += 25; reasons.push('Minimal accuracy loss (<2%)'); } else if (quantConfig.accuracyLoss < 0.05) { score += 15; reasons.push('Acceptable accuracy loss (<5%)'); } // Compression ratio if (quantConfig.compression >= 4) { score += 15; reasons.push('High compression (4x+)'); } return { score, rating: score >= 80 ? 'Excellent' : score >= 60 ? 'Good' : score >= 40 ? 'Acceptable' : 'Poor', reasons, recommended: score >= 60, }; } /** * Generate suite summary */ generateSummary(suiteResults) { const benchmarks = suiteResults.benchmarks; // Find best method let bestMethod = null; let bestScore = 0; for (const b of benchmarks) { if (b.recommendation.score > bestScore) { bestScore = b.recommendation.score; bestMethod = b.method; } } // Calculate averages const avgLatency = benchmarks.reduce((sum, b) => { return sum + b.inference.reduce((s, i) => s + i.meanMs, 0) / b.inference.length; }, 0) / benchmarks.length; return { modelKey: suiteResults.model, modelType: suiteResults.modelConfig.type, originalSizeMB: suiteResults.modelConfig.originalSize, targetSizeMB: suiteResults.modelConfig.targetSize, bestMethod, bestScore, avgLatencyMs: avgLatency, methodsEvaluated: benchmarks.length, recommendation: bestMethod ? `Use ${bestMethod} quantization for optimal edge deployment` : 'No suitable method found', }; } /** * Run benchmarks on all target models */ async runAllModels(profile = 'standard') { const allResults = []; for (const modelKey of Object.keys(TARGET_MODELS)) { try { const result = await this.runSuite(modelKey, profile); allResults.push(result); } catch (error) { allResults.push({ model: modelKey, error: error.message, }); } } return { timestamp: new Date().toISOString(), profile, results: allResults, summary: this.generateOverallSummary(allResults), }; } /** * Generate overall summary for all models */ generateOverallSummary(allResults) { const successful = allResults.filter(r => !r.error); return { totalModels: allResults.length, successfulBenchmarks: successful.length, failedBenchmarks: allResults.length - successful.length, recommendations: successful.map(r => ({ model: r.model, bestMethod: r.summary?.bestMethod, score: r.summary?.bestScore, })), }; } /** * Export results to JSON */ exportResults() { return { exported: new Date().toISOString(), results: this.results, }; } /** * Reset benchmark state */ reset() { this.latencyProfiler.reset(); this.memoryProfiler.reset(); this.accuracyMeter.reset(); this.results = []; } } // ============================================ // EXPORTS // ============================================ // BENCHMARK_PROFILES already exported at declaration (line 19) export default ComprehensiveBenchmark;