"use strict"; /** * Example: Running DSPy Benchmarks * * This script demonstrates how to use the benchmark suite * for comparing multiple models across various metrics. */ Object.defineProperty(exports, "__esModule", { value: true }); const dspy_benchmarks_js_1 = require("./dspy-benchmarks.js"); async function runFullBenchmarkSuite() { console.log('šŸŽÆ Running Full DSPy Benchmark Suite\n'); const suite = new dspy_benchmarks_js_1.BenchmarkSuite('./training/results/benchmarks'); // Option 1: Add common models suite.addCommonModels(); // Option 2: Add custom models // const customModel: ModelConfig = { // name: 'Custom Model', // provider: 'openrouter', // model: 'custom-model', // costPer1kTokens: 0.002, // maxTokens: 8192, // }; // suite.addModel(customModel); // Run comprehensive comparison const comparison = await suite.runModelComparison(1000); // Run additional analyses await suite.runScalabilityTest(); await suite.runCostAnalysis(); await suite.runQualityConvergence(10); await suite.runDiversityAnalysis(5000); // Generate reports await suite.generateJSONReport(comparison); await suite.generateMarkdownReport(comparison); console.log('\nāœ… All benchmarks completed!'); console.log('\nšŸ“Š Key Findings:'); console.log(` Overall Winner: ${comparison.winner.overall}`); console.log(` Best Quality: ${comparison.winner.quality}`); console.log(` Best Performance: ${comparison.winner.performance}`); console.log(` Most Cost-Effective: ${comparison.winner.cost}`); console.log(` Pareto Frontier: ${comparison.paretoFrontier.join(', ')}`); console.log('\nšŸ’” Recommendations by Use Case:'); for (const [useCase, model] of Object.entries(comparison.recommendations)) { console.log(` ${useCase}: ${model}`); } } async function runQuickComparison() { console.log('⚔ Running Quick Model Comparison\n'); const suite = new dspy_benchmarks_js_1.BenchmarkSuite(); // Add just a few models for quick testing suite.addModel({ name: 'GPT-4', provider: 'openai', model: 'gpt-4', costPer1kTokens: 0.03, maxTokens: 8192, }); suite.addModel({ name: 'Claude 3.5 Sonnet', provider: 'anthropic', model: 'claude-3.5-sonnet', costPer1kTokens: 0.015, maxTokens: 200000, }); suite.addModel({ name: 'Gemini Pro', provider: 'gemini', model: 'gemini-pro', costPer1kTokens: 0.0005, maxTokens: 32768, }); // Run comparison with smaller sample size const comparison = await suite.runModelComparison(500); // Generate reports await suite.generateJSONReport(comparison); await suite.generateMarkdownReport(comparison); console.log('\nāœ… Quick comparison completed!'); } async function runScalabilityOnly() { console.log('šŸ“ˆ Running Scalability Test Only\n'); const suite = new dspy_benchmarks_js_1.BenchmarkSuite(); suite.addCommonModels(); const results = await suite.runScalabilityTest(); console.log('\nšŸ“Š Scalability Summary:'); for (const result of results) { console.log(`\n${result.modelName}:`); console.log(` Scaling Efficiency: ${result.scalingEfficiency.toFixed(2)}x`); console.log(` Best Throughput: ${Math.max(...result.throughputs).toFixed(0)} samples/s`); console.log(` Cost at 100K: $${result.costs[result.costs.length - 1].toFixed(4)}`); } } async function runCostOptimization() { console.log('šŸ’° Running Cost Optimization Analysis\n'); const suite = new dspy_benchmarks_js_1.BenchmarkSuite(); suite.addCommonModels(); await suite.runModelComparison(1000); await suite.runCostAnalysis(); console.log('\nāœ… Cost analysis completed!'); } // Main execution async function main() { const mode = process.argv[2] || 'full'; switch (mode) { case 'full': await runFullBenchmarkSuite(); break; case 'quick': await runQuickComparison(); break; case 'scalability': await runScalabilityOnly(); break; case 'cost': await runCostOptimization(); break; default: console.log('Usage: node run-benchmarks.js [full|quick|scalability|cost]'); console.log('\nModes:'); console.log(' full - Run complete benchmark suite (default)'); console.log(' quick - Quick comparison with 3 models'); console.log(' scalability - Scalability test only'); console.log(' cost - Cost optimization analysis only'); process.exit(1); } } main().catch(console.error); //# sourceMappingURL=run-benchmarks.js.map