689 lines
20 KiB
JavaScript
689 lines
20 KiB
JavaScript
/**
|
|
* @ruvector/edge-net Benchmark Utilities
|
|
*
|
|
* Comprehensive benchmarking for model optimization
|
|
*
|
|
* @module @ruvector/edge-net/models/benchmark
|
|
*/
|
|
|
|
import { EventEmitter } from 'events';
|
|
import { ModelOptimizer, TARGET_MODELS, QUANTIZATION_CONFIGS } from './model-optimizer.js';
|
|
|
|
// ============================================
|
|
// BENCHMARK CONFIGURATION
|
|
// ============================================
|
|
|
|
/**
|
|
* Benchmark profiles for different scenarios
|
|
*/
|
|
export const BENCHMARK_PROFILES = {
|
|
'quick': {
|
|
iterations: 50,
|
|
warmupIterations: 5,
|
|
inputSizes: [[1, 128]],
|
|
quantMethods: ['int8'],
|
|
},
|
|
'standard': {
|
|
iterations: 100,
|
|
warmupIterations: 10,
|
|
inputSizes: [[1, 128], [1, 512], [4, 256]],
|
|
quantMethods: ['int8', 'int4', 'fp16'],
|
|
},
|
|
'comprehensive': {
|
|
iterations: 500,
|
|
warmupIterations: 50,
|
|
inputSizes: [[1, 64], [1, 128], [1, 256], [1, 512], [1, 1024], [4, 256], [8, 128]],
|
|
quantMethods: ['int8', 'int4', 'fp16', 'int8-fp16-mixed'],
|
|
},
|
|
'edge-device': {
|
|
iterations: 100,
|
|
warmupIterations: 10,
|
|
inputSizes: [[1, 128], [1, 256]],
|
|
quantMethods: ['int4'],
|
|
memoryLimit: 512, // MB
|
|
},
|
|
'accuracy-focus': {
|
|
iterations: 200,
|
|
warmupIterations: 20,
|
|
inputSizes: [[1, 512]],
|
|
quantMethods: ['fp16', 'int8'],
|
|
measureAccuracy: true,
|
|
},
|
|
};
|
|
|
|
// ============================================
|
|
// ACCURACY MEASUREMENT
|
|
// ============================================
|
|
|
|
/**
|
|
* Accuracy metrics for quantized models
|
|
*/
|
|
export class AccuracyMeter {
|
|
constructor() {
|
|
this.predictions = [];
|
|
this.groundTruth = [];
|
|
this.originalOutputs = [];
|
|
this.quantizedOutputs = [];
|
|
}
|
|
|
|
/**
|
|
* Add prediction pair for accuracy measurement
|
|
*/
|
|
addPrediction(original, quantized, groundTruth = null) {
|
|
this.originalOutputs.push(original);
|
|
this.quantizedOutputs.push(quantized);
|
|
if (groundTruth !== null) {
|
|
this.groundTruth.push(groundTruth);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Compute Mean Squared Error
|
|
*/
|
|
computeMSE() {
|
|
if (this.originalOutputs.length === 0) return 0;
|
|
|
|
let totalMSE = 0;
|
|
let count = 0;
|
|
|
|
for (let i = 0; i < this.originalOutputs.length; i++) {
|
|
const orig = this.originalOutputs[i];
|
|
const quant = this.quantizedOutputs[i];
|
|
|
|
let mse = 0;
|
|
const len = Math.min(orig.length, quant.length);
|
|
for (let j = 0; j < len; j++) {
|
|
const diff = orig[j] - quant[j];
|
|
mse += diff * diff;
|
|
}
|
|
totalMSE += mse / len;
|
|
count++;
|
|
}
|
|
|
|
return totalMSE / count;
|
|
}
|
|
|
|
/**
|
|
* Compute cosine similarity between original and quantized
|
|
*/
|
|
computeCosineSimilarity() {
|
|
if (this.originalOutputs.length === 0) return 1.0;
|
|
|
|
let totalSim = 0;
|
|
|
|
for (let i = 0; i < this.originalOutputs.length; i++) {
|
|
const orig = this.originalOutputs[i];
|
|
const quant = this.quantizedOutputs[i];
|
|
|
|
let dot = 0, normA = 0, normB = 0;
|
|
const len = Math.min(orig.length, quant.length);
|
|
|
|
for (let j = 0; j < len; j++) {
|
|
dot += orig[j] * quant[j];
|
|
normA += orig[j] * orig[j];
|
|
normB += quant[j] * quant[j];
|
|
}
|
|
|
|
totalSim += dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
|
|
}
|
|
|
|
return totalSim / this.originalOutputs.length;
|
|
}
|
|
|
|
/**
|
|
* Compute max absolute error
|
|
*/
|
|
computeMaxError() {
|
|
let maxError = 0;
|
|
|
|
for (let i = 0; i < this.originalOutputs.length; i++) {
|
|
const orig = this.originalOutputs[i];
|
|
const quant = this.quantizedOutputs[i];
|
|
const len = Math.min(orig.length, quant.length);
|
|
|
|
for (let j = 0; j < len; j++) {
|
|
maxError = Math.max(maxError, Math.abs(orig[j] - quant[j]));
|
|
}
|
|
}
|
|
|
|
return maxError;
|
|
}
|
|
|
|
/**
|
|
* Get comprehensive accuracy metrics
|
|
*/
|
|
getMetrics() {
|
|
const mse = this.computeMSE();
|
|
|
|
return {
|
|
mse,
|
|
rmse: Math.sqrt(mse),
|
|
cosineSimilarity: this.computeCosineSimilarity(),
|
|
maxError: this.computeMaxError(),
|
|
samples: this.originalOutputs.length,
|
|
accuracyRetained: this.computeCosineSimilarity() * 100,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Reset meter
|
|
*/
|
|
reset() {
|
|
this.predictions = [];
|
|
this.groundTruth = [];
|
|
this.originalOutputs = [];
|
|
this.quantizedOutputs = [];
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// LATENCY PROFILER
|
|
// ============================================
|
|
|
|
/**
|
|
* Detailed latency profiling
|
|
*/
|
|
export class LatencyProfiler {
|
|
constructor() {
|
|
this.measurements = new Map();
|
|
}
|
|
|
|
/**
|
|
* Start timing a section
|
|
*/
|
|
start(label) {
|
|
if (!this.measurements.has(label)) {
|
|
this.measurements.set(label, {
|
|
samples: [],
|
|
running: null,
|
|
});
|
|
}
|
|
this.measurements.get(label).running = performance.now();
|
|
}
|
|
|
|
/**
|
|
* End timing a section
|
|
*/
|
|
end(label) {
|
|
const entry = this.measurements.get(label);
|
|
if (entry && entry.running !== null) {
|
|
const duration = performance.now() - entry.running;
|
|
entry.samples.push(duration);
|
|
entry.running = null;
|
|
return duration;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Get statistics for a label
|
|
*/
|
|
getStats(label) {
|
|
const entry = this.measurements.get(label);
|
|
if (!entry || entry.samples.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
const samples = [...entry.samples].sort((a, b) => a - b);
|
|
const sum = samples.reduce((a, b) => a + b, 0);
|
|
|
|
return {
|
|
label,
|
|
count: samples.length,
|
|
mean: sum / samples.length,
|
|
median: samples[Math.floor(samples.length / 2)],
|
|
min: samples[0],
|
|
max: samples[samples.length - 1],
|
|
p95: samples[Math.floor(samples.length * 0.95)],
|
|
p99: samples[Math.floor(samples.length * 0.99)],
|
|
std: Math.sqrt(samples.reduce((acc, v) => acc + Math.pow(v - sum / samples.length, 2), 0) / samples.length),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get all statistics
|
|
*/
|
|
getAllStats() {
|
|
const stats = {};
|
|
for (const label of this.measurements.keys()) {
|
|
stats[label] = this.getStats(label);
|
|
}
|
|
return stats;
|
|
}
|
|
|
|
/**
|
|
* Reset profiler
|
|
*/
|
|
reset() {
|
|
this.measurements.clear();
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// MEMORY PROFILER
|
|
// ============================================
|
|
|
|
/**
|
|
* Memory usage profiler
|
|
*/
|
|
export class MemoryProfiler {
|
|
constructor() {
|
|
this.snapshots = [];
|
|
this.peakMemory = 0;
|
|
}
|
|
|
|
/**
|
|
* Take memory snapshot
|
|
*/
|
|
snapshot(label = 'snapshot') {
|
|
const memUsage = this.getMemoryUsage();
|
|
const snapshot = {
|
|
label,
|
|
timestamp: Date.now(),
|
|
...memUsage,
|
|
};
|
|
|
|
this.snapshots.push(snapshot);
|
|
this.peakMemory = Math.max(this.peakMemory, memUsage.heapUsed);
|
|
|
|
return snapshot;
|
|
}
|
|
|
|
/**
|
|
* Get current memory usage
|
|
*/
|
|
getMemoryUsage() {
|
|
if (typeof process !== 'undefined' && process.memoryUsage) {
|
|
const usage = process.memoryUsage();
|
|
return {
|
|
heapUsed: usage.heapUsed / (1024 * 1024),
|
|
heapTotal: usage.heapTotal / (1024 * 1024),
|
|
external: usage.external / (1024 * 1024),
|
|
rss: usage.rss / (1024 * 1024),
|
|
};
|
|
}
|
|
|
|
// Browser fallback
|
|
if (typeof performance !== 'undefined' && performance.memory) {
|
|
return {
|
|
heapUsed: performance.memory.usedJSHeapSize / (1024 * 1024),
|
|
heapTotal: performance.memory.totalJSHeapSize / (1024 * 1024),
|
|
external: 0,
|
|
rss: 0,
|
|
};
|
|
}
|
|
|
|
return { heapUsed: 0, heapTotal: 0, external: 0, rss: 0 };
|
|
}
|
|
|
|
/**
|
|
* Get memory delta between two snapshots
|
|
*/
|
|
getDelta(startLabel, endLabel) {
|
|
const start = this.snapshots.find(s => s.label === startLabel);
|
|
const end = this.snapshots.find(s => s.label === endLabel);
|
|
|
|
if (!start || !end) return null;
|
|
|
|
return {
|
|
heapDelta: end.heapUsed - start.heapUsed,
|
|
timeDelta: end.timestamp - start.timestamp,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get profiler summary
|
|
*/
|
|
getSummary() {
|
|
return {
|
|
snapshots: this.snapshots.length,
|
|
peakMemoryMB: this.peakMemory,
|
|
currentMemoryMB: this.getMemoryUsage().heapUsed,
|
|
history: this.snapshots,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Reset profiler
|
|
*/
|
|
reset() {
|
|
this.snapshots = [];
|
|
this.peakMemory = 0;
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// COMPREHENSIVE BENCHMARK RUNNER
|
|
// ============================================
|
|
|
|
/**
|
|
* ComprehensiveBenchmark - Full benchmark suite for model optimization
|
|
*/
|
|
export class ComprehensiveBenchmark extends EventEmitter {
|
|
constructor(options = {}) {
|
|
super();
|
|
this.optimizer = options.optimizer || new ModelOptimizer();
|
|
this.latencyProfiler = new LatencyProfiler();
|
|
this.memoryProfiler = new MemoryProfiler();
|
|
this.accuracyMeter = new AccuracyMeter();
|
|
this.results = [];
|
|
}
|
|
|
|
/**
|
|
* Run benchmark suite on a model
|
|
*/
|
|
async runSuite(model, profile = 'standard') {
|
|
const profileConfig = BENCHMARK_PROFILES[profile] || BENCHMARK_PROFILES.standard;
|
|
const modelConfig = TARGET_MODELS[model];
|
|
|
|
if (!modelConfig) {
|
|
throw new Error(`Unknown model: ${model}`);
|
|
}
|
|
|
|
this.emit('suite:start', { model, profile });
|
|
|
|
const suiteResults = {
|
|
model,
|
|
profile,
|
|
modelConfig,
|
|
timestamp: new Date().toISOString(),
|
|
benchmarks: [],
|
|
};
|
|
|
|
// Memory baseline
|
|
this.memoryProfiler.snapshot('baseline');
|
|
|
|
// Benchmark each quantization method
|
|
for (const method of profileConfig.quantMethods) {
|
|
const methodResult = await this.benchmarkQuantization(
|
|
model,
|
|
method,
|
|
profileConfig
|
|
);
|
|
suiteResults.benchmarks.push(methodResult);
|
|
}
|
|
|
|
// Memory after benchmarks
|
|
this.memoryProfiler.snapshot('after-benchmarks');
|
|
|
|
// Add memory profile
|
|
suiteResults.memory = this.memoryProfiler.getSummary();
|
|
|
|
// Add summary
|
|
suiteResults.summary = this.generateSummary(suiteResults);
|
|
|
|
this.results.push(suiteResults);
|
|
this.emit('suite:complete', suiteResults);
|
|
|
|
return suiteResults;
|
|
}
|
|
|
|
/**
|
|
* Benchmark a specific quantization method
|
|
*/
|
|
async benchmarkQuantization(model, method, config) {
|
|
this.emit('benchmark:start', { model, method });
|
|
|
|
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
const modelConfig = TARGET_MODELS[model];
|
|
|
|
// Quantize model
|
|
this.latencyProfiler.start('quantization');
|
|
const quantResult = await this.optimizer.quantize(model, method);
|
|
this.latencyProfiler.end('quantization');
|
|
|
|
// Simulate inference benchmarks for each input size
|
|
const inferenceBenchmarks = [];
|
|
|
|
for (const inputSize of config.inputSizes) {
|
|
const batchSize = inputSize[0];
|
|
const seqLen = inputSize[1];
|
|
|
|
this.latencyProfiler.start(`inference-${batchSize}x${seqLen}`);
|
|
|
|
// Warmup
|
|
for (let i = 0; i < config.warmupIterations; i++) {
|
|
await this.simulateInference(modelConfig, batchSize, seqLen, method);
|
|
}
|
|
|
|
// Measure
|
|
const times = [];
|
|
for (let i = 0; i < config.iterations; i++) {
|
|
const start = performance.now();
|
|
await this.simulateInference(modelConfig, batchSize, seqLen, method);
|
|
times.push(performance.now() - start);
|
|
}
|
|
|
|
this.latencyProfiler.end(`inference-${batchSize}x${seqLen}`);
|
|
|
|
times.sort((a, b) => a - b);
|
|
|
|
inferenceBenchmarks.push({
|
|
inputSize: `${batchSize}x${seqLen}`,
|
|
iterations: config.iterations,
|
|
meanMs: times.reduce((a, b) => a + b) / times.length,
|
|
medianMs: times[Math.floor(times.length / 2)],
|
|
p95Ms: times[Math.floor(times.length * 0.95)],
|
|
minMs: times[0],
|
|
maxMs: times[times.length - 1],
|
|
tokensPerSecond: (seqLen * batchSize * 1000) / (times.reduce((a, b) => a + b) / times.length),
|
|
});
|
|
}
|
|
|
|
// Measure accuracy if requested
|
|
let accuracyMetrics = null;
|
|
if (config.measureAccuracy) {
|
|
// Generate test outputs
|
|
for (let i = 0; i < 100; i++) {
|
|
const original = new Float32Array(modelConfig.hiddenSize).map(() => Math.random());
|
|
const quantized = this.simulateQuantizedOutput(original, method);
|
|
this.accuracyMeter.addPrediction(Array.from(original), Array.from(quantized));
|
|
}
|
|
accuracyMetrics = this.accuracyMeter.getMetrics();
|
|
this.accuracyMeter.reset();
|
|
}
|
|
|
|
const result = {
|
|
method,
|
|
quantization: quantResult,
|
|
inference: inferenceBenchmarks,
|
|
accuracy: accuracyMetrics,
|
|
latencyProfile: this.latencyProfiler.getAllStats(),
|
|
compression: {
|
|
original: modelConfig.originalSize,
|
|
quantized: modelConfig.originalSize / quantConfig.compression,
|
|
ratio: quantConfig.compression,
|
|
},
|
|
recommendation: this.getRecommendation(model, method, inferenceBenchmarks),
|
|
};
|
|
|
|
this.emit('benchmark:complete', result);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Simulate model inference
|
|
*/
|
|
async simulateInference(config, batchSize, seqLen, method) {
|
|
// Base latency depends on model size and batch
|
|
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
const baseLatency = (config.originalSize / 100) * (batchSize * seqLen / 512);
|
|
const speedup = quantConfig?.speedup || 1;
|
|
|
|
const latency = baseLatency / speedup;
|
|
await new Promise(resolve => setTimeout(resolve, latency));
|
|
|
|
return new Float32Array(config.hiddenSize).map(() => Math.random());
|
|
}
|
|
|
|
/**
|
|
* Simulate quantized output with added noise
|
|
*/
|
|
simulateQuantizedOutput(original, method) {
|
|
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
const noise = quantConfig?.accuracyLoss || 0.01;
|
|
|
|
return new Float32Array(original.length).map((_, i) => {
|
|
return original[i] + (Math.random() - 0.5) * 2 * noise;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Generate recommendation based on benchmark results
|
|
*/
|
|
getRecommendation(model, method, inferenceBenchmarks) {
|
|
const modelConfig = TARGET_MODELS[model];
|
|
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
|
|
const avgLatency = inferenceBenchmarks.reduce((a, b) => a + b.meanMs, 0) / inferenceBenchmarks.length;
|
|
const targetMet = (modelConfig.originalSize / quantConfig.compression) <= modelConfig.targetSize;
|
|
|
|
let score = 0;
|
|
let reasons = [];
|
|
|
|
// Size target met
|
|
if (targetMet) {
|
|
score += 30;
|
|
reasons.push('Meets size target');
|
|
}
|
|
|
|
// Good latency
|
|
if (avgLatency < 10) {
|
|
score += 30;
|
|
reasons.push('Excellent latency (<10ms)');
|
|
} else if (avgLatency < 50) {
|
|
score += 20;
|
|
reasons.push('Good latency (<50ms)');
|
|
}
|
|
|
|
// Low accuracy loss
|
|
if (quantConfig.accuracyLoss < 0.02) {
|
|
score += 25;
|
|
reasons.push('Minimal accuracy loss (<2%)');
|
|
} else if (quantConfig.accuracyLoss < 0.05) {
|
|
score += 15;
|
|
reasons.push('Acceptable accuracy loss (<5%)');
|
|
}
|
|
|
|
// Compression ratio
|
|
if (quantConfig.compression >= 4) {
|
|
score += 15;
|
|
reasons.push('High compression (4x+)');
|
|
}
|
|
|
|
return {
|
|
score,
|
|
rating: score >= 80 ? 'Excellent' : score >= 60 ? 'Good' : score >= 40 ? 'Acceptable' : 'Poor',
|
|
reasons,
|
|
recommended: score >= 60,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate suite summary
|
|
*/
|
|
generateSummary(suiteResults) {
|
|
const benchmarks = suiteResults.benchmarks;
|
|
|
|
// Find best method
|
|
let bestMethod = null;
|
|
let bestScore = 0;
|
|
|
|
for (const b of benchmarks) {
|
|
if (b.recommendation.score > bestScore) {
|
|
bestScore = b.recommendation.score;
|
|
bestMethod = b.method;
|
|
}
|
|
}
|
|
|
|
// Calculate averages
|
|
const avgLatency = benchmarks.reduce((sum, b) => {
|
|
return sum + b.inference.reduce((s, i) => s + i.meanMs, 0) / b.inference.length;
|
|
}, 0) / benchmarks.length;
|
|
|
|
return {
|
|
modelKey: suiteResults.model,
|
|
modelType: suiteResults.modelConfig.type,
|
|
originalSizeMB: suiteResults.modelConfig.originalSize,
|
|
targetSizeMB: suiteResults.modelConfig.targetSize,
|
|
bestMethod,
|
|
bestScore,
|
|
avgLatencyMs: avgLatency,
|
|
methodsEvaluated: benchmarks.length,
|
|
recommendation: bestMethod ? `Use ${bestMethod} quantization for optimal edge deployment` : 'No suitable method found',
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Run benchmarks on all target models
|
|
*/
|
|
async runAllModels(profile = 'standard') {
|
|
const allResults = [];
|
|
|
|
for (const modelKey of Object.keys(TARGET_MODELS)) {
|
|
try {
|
|
const result = await this.runSuite(modelKey, profile);
|
|
allResults.push(result);
|
|
} catch (error) {
|
|
allResults.push({
|
|
model: modelKey,
|
|
error: error.message,
|
|
});
|
|
}
|
|
}
|
|
|
|
return {
|
|
timestamp: new Date().toISOString(),
|
|
profile,
|
|
results: allResults,
|
|
summary: this.generateOverallSummary(allResults),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate overall summary for all models
|
|
*/
|
|
generateOverallSummary(allResults) {
|
|
const successful = allResults.filter(r => !r.error);
|
|
|
|
return {
|
|
totalModels: allResults.length,
|
|
successfulBenchmarks: successful.length,
|
|
failedBenchmarks: allResults.length - successful.length,
|
|
recommendations: successful.map(r => ({
|
|
model: r.model,
|
|
bestMethod: r.summary?.bestMethod,
|
|
score: r.summary?.bestScore,
|
|
})),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Export results to JSON
|
|
*/
|
|
exportResults() {
|
|
return {
|
|
exported: new Date().toISOString(),
|
|
results: this.results,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Reset benchmark state
|
|
*/
|
|
reset() {
|
|
this.latencyProfiler.reset();
|
|
this.memoryProfiler.reset();
|
|
this.accuracyMeter.reset();
|
|
this.results = [];
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// EXPORTS
|
|
// ============================================
|
|
|
|
// BENCHMARK_PROFILES already exported at declaration (line 19)
|
|
export default ComprehensiveBenchmark;
|