wifi-densepose/vendor/ruvector/examples/edge-net/pkg/models/benchmark.js

/**
 * @ruvector/edge-net Benchmark Utilities
 *
 * Comprehensive benchmarking for model optimization
 *
 * @module @ruvector/edge-net/models/benchmark
 */

import { EventEmitter } from 'events';
import { ModelOptimizer, TARGET_MODELS, QUANTIZATION_CONFIGS } from './model-optimizer.js';

// ============================================
// BENCHMARK CONFIGURATION
// ============================================

/**
 * Benchmark profiles for different scenarios
 */
export const BENCHMARK_PROFILES = {
    'quick': {
        iterations: 50,
        warmupIterations: 5,
        inputSizes: [[1, 128]],
        quantMethods: ['int8'],
    },
    'standard': {
        iterations: 100,
        warmupIterations: 10,
        inputSizes: [[1, 128], [1, 512], [4, 256]],
        quantMethods: ['int8', 'int4', 'fp16'],
    },
    'comprehensive': {
        iterations: 500,
        warmupIterations: 50,
        inputSizes: [[1, 64], [1, 128], [1, 256], [1, 512], [1, 1024], [4, 256], [8, 128]],
        quantMethods: ['int8', 'int4', 'fp16', 'int8-fp16-mixed'],
    },
    'edge-device': {
        iterations: 100,
        warmupIterations: 10,
        inputSizes: [[1, 128], [1, 256]],
        quantMethods: ['int4'],
        memoryLimit: 512, // MB
    },
    'accuracy-focus': {
        iterations: 200,
        warmupIterations: 20,
        inputSizes: [[1, 512]],
        quantMethods: ['fp16', 'int8'],
        measureAccuracy: true,
    },
};

// ============================================
// ACCURACY MEASUREMENT
// ============================================

/**
 * Accuracy metrics for quantized models
 */
export class AccuracyMeter {
    constructor() {
        this.predictions = [];
        this.groundTruth = [];
        this.originalOutputs = [];
        this.quantizedOutputs = [];
    }

    /**
     * Add prediction pair for accuracy measurement
     */
    addPrediction(original, quantized, groundTruth = null) {
        this.originalOutputs.push(original);
        this.quantizedOutputs.push(quantized);
        if (groundTruth !== null) {
            this.groundTruth.push(groundTruth);
        }
    }

    /**
     * Compute Mean Squared Error
     */
    computeMSE() {
        if (this.originalOutputs.length === 0) return 0;

        let totalMSE = 0;
        let count = 0;

        for (let i = 0; i < this.originalOutputs.length; i++) {
            const orig = this.originalOutputs[i];
            const quant = this.quantizedOutputs[i];

            let mse = 0;
            const len = Math.min(orig.length, quant.length);
            for (let j = 0; j < len; j++) {
                const diff = orig[j] - quant[j];
                mse += diff * diff;
            }
            totalMSE += mse / len;
            count++;
        }

        return totalMSE / count;
    }

    /**
     * Compute cosine similarity between original and quantized
     */
    computeCosineSimilarity() {
        if (this.originalOutputs.length === 0) return 1.0;

        let totalSim = 0;

        for (let i = 0; i < this.originalOutputs.length; i++) {
            const orig = this.originalOutputs[i];
            const quant = this.quantizedOutputs[i];

            let dot = 0, normA = 0, normB = 0;
            const len = Math.min(orig.length, quant.length);

            for (let j = 0; j < len; j++) {
                dot += orig[j] * quant[j];
                normA += orig[j] * orig[j];
                normB += quant[j] * quant[j];
            }

            totalSim += dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
        }

        return totalSim / this.originalOutputs.length;
    }

    /**
     * Compute max absolute error
     */
    computeMaxError() {
        let maxError = 0;

        for (let i = 0; i < this.originalOutputs.length; i++) {
            const orig = this.originalOutputs[i];
            const quant = this.quantizedOutputs[i];
            const len = Math.min(orig.length, quant.length);

            for (let j = 0; j < len; j++) {
                maxError = Math.max(maxError, Math.abs(orig[j] - quant[j]));
            }
        }

        return maxError;
    }

    /**
     * Get comprehensive accuracy metrics
     */
    getMetrics() {
        const mse = this.computeMSE();

        return {
            mse,
            rmse: Math.sqrt(mse),
            cosineSimilarity: this.computeCosineSimilarity(),
            maxError: this.computeMaxError(),
            samples: this.originalOutputs.length,
            accuracyRetained: this.computeCosineSimilarity() * 100,
        };
    }

    /**
     * Reset meter
     */
    reset() {
        this.predictions = [];
        this.groundTruth = [];
        this.originalOutputs = [];
        this.quantizedOutputs = [];
    }
}

// ============================================
// LATENCY PROFILER
// ============================================

/**
 * Detailed latency profiling
 */
export class LatencyProfiler {
    constructor() {
        this.measurements = new Map();
    }

    /**
     * Start timing a section
     */
    start(label) {
        if (!this.measurements.has(label)) {
            this.measurements.set(label, {
                samples: [],
                running: null,
            });
        }
        this.measurements.get(label).running = performance.now();
    }

    /**
     * End timing a section
     */
    end(label) {
        const entry = this.measurements.get(label);
        if (entry && entry.running !== null) {
            const duration = performance.now() - entry.running;
            entry.samples.push(duration);
            entry.running = null;
            return duration;
        }
        return 0;
    }

    /**
     * Get statistics for a label
     */
    getStats(label) {
        const entry = this.measurements.get(label);
        if (!entry || entry.samples.length === 0) {
            return null;
        }

        const samples = [...entry.samples].sort((a, b) => a - b);
        const sum = samples.reduce((a, b) => a + b, 0);

        return {
            label,
            count: samples.length,
            mean: sum / samples.length,
            median: samples[Math.floor(samples.length / 2)],
            min: samples[0],
            max: samples[samples.length - 1],
            p95: samples[Math.floor(samples.length * 0.95)],
            p99: samples[Math.floor(samples.length * 0.99)],
            std: Math.sqrt(samples.reduce((acc, v) => acc + Math.pow(v - sum / samples.length, 2), 0) / samples.length),
        };
    }

    /**
     * Get all statistics
     */
    getAllStats() {
        const stats = {};
        for (const label of this.measurements.keys()) {
            stats[label] = this.getStats(label);
        }
        return stats;
    }

    /**
     * Reset profiler
     */
    reset() {
        this.measurements.clear();
    }
}

// ============================================
// MEMORY PROFILER
// ============================================

/**
 * Memory usage profiler
 */
export class MemoryProfiler {
    constructor() {
        this.snapshots = [];
        this.peakMemory = 0;
    }

    /**
     * Take memory snapshot
     */
    snapshot(label = 'snapshot') {
        const memUsage = this.getMemoryUsage();
        const snapshot = {
            label,
            timestamp: Date.now(),
            ...memUsage,
        };

        this.snapshots.push(snapshot);
        this.peakMemory = Math.max(this.peakMemory, memUsage.heapUsed);

        return snapshot;
    }

    /**
     * Get current memory usage
     */
    getMemoryUsage() {
        if (typeof process !== 'undefined' && process.memoryUsage) {
            const usage = process.memoryUsage();
            return {
                heapUsed: usage.heapUsed / (1024 * 1024),
                heapTotal: usage.heapTotal / (1024 * 1024),
                external: usage.external / (1024 * 1024),
                rss: usage.rss / (1024 * 1024),
            };
        }

        // Browser fallback
        if (typeof performance !== 'undefined' && performance.memory) {
            return {
                heapUsed: performance.memory.usedJSHeapSize / (1024 * 1024),
                heapTotal: performance.memory.totalJSHeapSize / (1024 * 1024),
                external: 0,
                rss: 0,
            };
        }

        return { heapUsed: 0, heapTotal: 0, external: 0, rss: 0 };
    }

    /**
     * Get memory delta between two snapshots
     */
    getDelta(startLabel, endLabel) {
        const start = this.snapshots.find(s => s.label === startLabel);
        const end = this.snapshots.find(s => s.label === endLabel);

        if (!start || !end) return null;

        return {
            heapDelta: end.heapUsed - start.heapUsed,
            timeDelta: end.timestamp - start.timestamp,
        };
    }

    /**
     * Get profiler summary
     */
    getSummary() {
        return {
            snapshots: this.snapshots.length,
            peakMemoryMB: this.peakMemory,
            currentMemoryMB: this.getMemoryUsage().heapUsed,
            history: this.snapshots,
        };
    }

    /**
     * Reset profiler
     */
    reset() {
        this.snapshots = [];
        this.peakMemory = 0;
    }
}

// ============================================
// COMPREHENSIVE BENCHMARK RUNNER
// ============================================

/**
 * ComprehensiveBenchmark - Full benchmark suite for model optimization
 */
export class ComprehensiveBenchmark extends EventEmitter {
    constructor(options = {}) {
        super();
        this.optimizer = options.optimizer || new ModelOptimizer();
        this.latencyProfiler = new LatencyProfiler();
        this.memoryProfiler = new MemoryProfiler();
        this.accuracyMeter = new AccuracyMeter();
        this.results = [];
    }

    /**
     * Run benchmark suite on a model
     */
    async runSuite(model, profile = 'standard') {
        const profileConfig = BENCHMARK_PROFILES[profile] || BENCHMARK_PROFILES.standard;
        const modelConfig = TARGET_MODELS[model];

        if (!modelConfig) {
            throw new Error(`Unknown model: ${model}`);
        }

        this.emit('suite:start', { model, profile });

        const suiteResults = {
            model,
            profile,
            modelConfig,
            timestamp: new Date().toISOString(),
            benchmarks: [],
        };

        // Memory baseline
        this.memoryProfiler.snapshot('baseline');

        // Benchmark each quantization method
        for (const method of profileConfig.quantMethods) {
            const methodResult = await this.benchmarkQuantization(
                model,
                method,
                profileConfig
            );
            suiteResults.benchmarks.push(methodResult);
        }

        // Memory after benchmarks
        this.memoryProfiler.snapshot('after-benchmarks');

        // Add memory profile
        suiteResults.memory = this.memoryProfiler.getSummary();

        // Add summary
        suiteResults.summary = this.generateSummary(suiteResults);

        this.results.push(suiteResults);
        this.emit('suite:complete', suiteResults);

        return suiteResults;
    }

    /**
     * Benchmark a specific quantization method
     */
    async benchmarkQuantization(model, method, config) {
        this.emit('benchmark:start', { model, method });

        const quantConfig = QUANTIZATION_CONFIGS[method];
        const modelConfig = TARGET_MODELS[model];

        // Quantize model
        this.latencyProfiler.start('quantization');
        const quantResult = await this.optimizer.quantize(model, method);
        this.latencyProfiler.end('quantization');

        // Simulate inference benchmarks for each input size
        const inferenceBenchmarks = [];

        for (const inputSize of config.inputSizes) {
            const batchSize = inputSize[0];
            const seqLen = inputSize[1];

            this.latencyProfiler.start(`inference-${batchSize}x${seqLen}`);

            // Warmup
            for (let i = 0; i < config.warmupIterations; i++) {
                await this.simulateInference(modelConfig, batchSize, seqLen, method);
            }

            // Measure
            const times = [];
            for (let i = 0; i < config.iterations; i++) {
                const start = performance.now();
                await this.simulateInference(modelConfig, batchSize, seqLen, method);
                times.push(performance.now() - start);
            }

            this.latencyProfiler.end(`inference-${batchSize}x${seqLen}`);

            times.sort((a, b) => a - b);

            inferenceBenchmarks.push({
                inputSize: `${batchSize}x${seqLen}`,
                iterations: config.iterations,
                meanMs: times.reduce((a, b) => a + b) / times.length,
                medianMs: times[Math.floor(times.length / 2)],
                p95Ms: times[Math.floor(times.length * 0.95)],
                minMs: times[0],
                maxMs: times[times.length - 1],
                tokensPerSecond: (seqLen * batchSize * 1000) / (times.reduce((a, b) => a + b) / times.length),
            });
        }

        // Measure accuracy if requested
        let accuracyMetrics = null;
        if (config.measureAccuracy) {
            // Generate test outputs
            for (let i = 0; i < 100; i++) {
                const original = new Float32Array(modelConfig.hiddenSize).map(() => Math.random());
                const quantized = this.simulateQuantizedOutput(original, method);
                this.accuracyMeter.addPrediction(Array.from(original), Array.from(quantized));
            }
            accuracyMetrics = this.accuracyMeter.getMetrics();
            this.accuracyMeter.reset();
        }

        const result = {
            method,
            quantization: quantResult,
            inference: inferenceBenchmarks,
            accuracy: accuracyMetrics,
            latencyProfile: this.latencyProfiler.getAllStats(),
            compression: {
                original: modelConfig.originalSize,
                quantized: modelConfig.originalSize / quantConfig.compression,
                ratio: quantConfig.compression,
            },
            recommendation: this.getRecommendation(model, method, inferenceBenchmarks),
        };

        this.emit('benchmark:complete', result);

        return result;
    }

    /**
     * Simulate model inference
     */
    async simulateInference(config, batchSize, seqLen, method) {
        // Base latency depends on model size and batch
        const quantConfig = QUANTIZATION_CONFIGS[method];
        const baseLatency = (config.originalSize / 100) * (batchSize * seqLen / 512);
        const speedup = quantConfig?.speedup || 1;

        const latency = baseLatency / speedup;
        await new Promise(resolve => setTimeout(resolve, latency));

        return new Float32Array(config.hiddenSize).map(() => Math.random());
    }

    /**
     * Simulate quantized output with added noise
     */
    simulateQuantizedOutput(original, method) {
        const quantConfig = QUANTIZATION_CONFIGS[method];
        const noise = quantConfig?.accuracyLoss || 0.01;

        return new Float32Array(original.length).map((_, i) => {
            return original[i] + (Math.random() - 0.5) * 2 * noise;
        });
    }

    /**
     * Generate recommendation based on benchmark results
     */
    getRecommendation(model, method, inferenceBenchmarks) {
        const modelConfig = TARGET_MODELS[model];
        const quantConfig = QUANTIZATION_CONFIGS[method];

        const avgLatency = inferenceBenchmarks.reduce((a, b) => a + b.meanMs, 0) / inferenceBenchmarks.length;
        const targetMet = (modelConfig.originalSize / quantConfig.compression) <= modelConfig.targetSize;

        let score = 0;
        let reasons = [];

        // Size target met
        if (targetMet) {
            score += 30;
            reasons.push('Meets size target');
        }

        // Good latency
        if (avgLatency < 10) {
            score += 30;
            reasons.push('Excellent latency (<10ms)');
        } else if (avgLatency < 50) {
            score += 20;
            reasons.push('Good latency (<50ms)');
        }

        // Low accuracy loss
        if (quantConfig.accuracyLoss < 0.02) {
            score += 25;
            reasons.push('Minimal accuracy loss (<2%)');
        } else if (quantConfig.accuracyLoss < 0.05) {
            score += 15;
            reasons.push('Acceptable accuracy loss (<5%)');
        }

        // Compression ratio
        if (quantConfig.compression >= 4) {
            score += 15;
            reasons.push('High compression (4x+)');
        }

        return {
            score,
            rating: score >= 80 ? 'Excellent' : score >= 60 ? 'Good' : score >= 40 ? 'Acceptable' : 'Poor',
            reasons,
            recommended: score >= 60,
        };
    }

    /**
     * Generate suite summary
     */
    generateSummary(suiteResults) {
        const benchmarks = suiteResults.benchmarks;

        // Find best method
        let bestMethod = null;
        let bestScore = 0;

        for (const b of benchmarks) {
            if (b.recommendation.score > bestScore) {
                bestScore = b.recommendation.score;
                bestMethod = b.method;
            }
        }

        // Calculate averages
        const avgLatency = benchmarks.reduce((sum, b) => {
            return sum + b.inference.reduce((s, i) => s + i.meanMs, 0) / b.inference.length;
        }, 0) / benchmarks.length;

        return {
            modelKey: suiteResults.model,
            modelType: suiteResults.modelConfig.type,
            originalSizeMB: suiteResults.modelConfig.originalSize,
            targetSizeMB: suiteResults.modelConfig.targetSize,
            bestMethod,
            bestScore,
            avgLatencyMs: avgLatency,
            methodsEvaluated: benchmarks.length,
            recommendation: bestMethod ? `Use ${bestMethod} quantization for optimal edge deployment` : 'No suitable method found',
        };
    }

    /**
     * Run benchmarks on all target models
     */
    async runAllModels(profile = 'standard') {
        const allResults = [];

        for (const modelKey of Object.keys(TARGET_MODELS)) {
            try {
                const result = await this.runSuite(modelKey, profile);
                allResults.push(result);
            } catch (error) {
                allResults.push({
                    model: modelKey,
                    error: error.message,
                });
            }
        }

        return {
            timestamp: new Date().toISOString(),
            profile,
            results: allResults,
            summary: this.generateOverallSummary(allResults),
        };
    }

    /**
     * Generate overall summary for all models
     */
    generateOverallSummary(allResults) {
        const successful = allResults.filter(r => !r.error);

        return {
            totalModels: allResults.length,
            successfulBenchmarks: successful.length,
            failedBenchmarks: allResults.length - successful.length,
            recommendations: successful.map(r => ({
                model: r.model,
                bestMethod: r.summary?.bestMethod,
                score: r.summary?.bestScore,
            })),
        };
    }

    /**
     * Export results to JSON
     */
    exportResults() {
        return {
            exported: new Date().toISOString(),
            results: this.results,
        };
    }

    /**
     * Reset benchmark state
     */
    reset() {
        this.latencyProfiler.reset();
        this.memoryProfiler.reset();
        this.accuracyMeter.reset();
        this.results = [];
    }
}

// ============================================
// EXPORTS
// ============================================

// BENCHMARK_PROFILES already exported at declaration (line 19)
export default ComprehensiveBenchmark;