Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
179
vendor/ruvector/npm/packages/agentic-synth/training/dspy-multi-model-benchmark.d.ts
vendored
Normal file
179
vendor/ruvector/npm/packages/agentic-synth/training/dspy-multi-model-benchmark.d.ts
vendored
Normal file
@@ -0,0 +1,179 @@
|
||||
/**
|
||||
* DSPy.ts Multi-Model Benchmarking System v1.0.0
|
||||
*
|
||||
* Comprehensive benchmarking suite comparing multiple models across:
|
||||
* - Quality metrics (f1Score, exactMatch, bleuScore, rougeScore)
|
||||
* - Optimization strategies (BootstrapFewShot, MIPROv2)
|
||||
* - Cost-effectiveness analysis
|
||||
* - Performance characteristics
|
||||
*
|
||||
* Real-world implementation using actual dspy.ts v2.1.1 features:
|
||||
* - ChainOfThought for reasoning
|
||||
* - ReAct for iterative improvement
|
||||
* - MultiChainComparison for ensemble decisions
|
||||
* - BootstrapFewShot & MIPROv2 optimizers
|
||||
*
|
||||
* @requires dspy.ts@2.1.1
|
||||
* @requires Environment: OPENAI_API_KEY, ANTHROPIC_API_KEY
|
||||
*/
|
||||
declare const ChainOfThought: any;
|
||||
interface ModelConfig {
|
||||
name: string;
|
||||
provider: 'openai' | 'anthropic' | 'openrouter';
|
||||
modelId: string;
|
||||
apiKey: string;
|
||||
costPer1kTokens: {
|
||||
input: number;
|
||||
output: number;
|
||||
};
|
||||
maxTokens: number;
|
||||
}
|
||||
interface BenchmarkMetrics {
|
||||
quality: {
|
||||
f1: number;
|
||||
exactMatch: number;
|
||||
bleu: number;
|
||||
rouge: number;
|
||||
overall: number;
|
||||
};
|
||||
performance: {
|
||||
avgLatency: number;
|
||||
p50: number;
|
||||
p95: number;
|
||||
p99: number;
|
||||
throughput: number;
|
||||
successRate: number;
|
||||
};
|
||||
cost: {
|
||||
totalCost: number;
|
||||
costPerSample: number;
|
||||
costPerQualityPoint: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
};
|
||||
optimization: {
|
||||
baselineQuality: number;
|
||||
bootstrapQuality: number;
|
||||
miproQuality: number;
|
||||
bootstrapImprovement: number;
|
||||
miproImprovement: number;
|
||||
};
|
||||
}
|
||||
interface BenchmarkResult {
|
||||
modelName: string;
|
||||
timestamp: string;
|
||||
metrics: BenchmarkMetrics;
|
||||
optimizationHistory: {
|
||||
method: 'baseline' | 'bootstrap' | 'mipro';
|
||||
round: number;
|
||||
quality: number;
|
||||
duration: number;
|
||||
}[];
|
||||
sampleSize: number;
|
||||
duration: number;
|
||||
}
|
||||
interface ComparisonReport {
|
||||
summary: {
|
||||
winner: {
|
||||
quality: string;
|
||||
performance: string;
|
||||
cost: string;
|
||||
optimization: string;
|
||||
overall: string;
|
||||
};
|
||||
modelsCompared: number;
|
||||
totalSamples: number;
|
||||
totalDuration: number;
|
||||
};
|
||||
results: BenchmarkResult[];
|
||||
rankings: {
|
||||
quality: {
|
||||
model: string;
|
||||
score: number;
|
||||
}[];
|
||||
performance: {
|
||||
model: string;
|
||||
score: number;
|
||||
}[];
|
||||
cost: {
|
||||
model: string;
|
||||
score: number;
|
||||
}[];
|
||||
optimization: {
|
||||
model: string;
|
||||
score: number;
|
||||
}[];
|
||||
};
|
||||
recommendations: {
|
||||
production: string;
|
||||
research: string;
|
||||
costOptimized: string;
|
||||
balanced: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Synthetic Data Generator using Chain of Thought
|
||||
*/
|
||||
declare class SyntheticDataModule extends ChainOfThought {
|
||||
constructor();
|
||||
}
|
||||
export declare class DSPyMultiModelBenchmark {
|
||||
private models;
|
||||
private results;
|
||||
private outputDir;
|
||||
constructor(outputDir?: string);
|
||||
/**
|
||||
* Register a model for benchmarking
|
||||
*/
|
||||
addModel(config: ModelConfig): void;
|
||||
/**
|
||||
* Run comprehensive comparison across all models
|
||||
*/
|
||||
runComparison(sampleSize?: number): Promise<ComparisonReport>;
|
||||
/**
|
||||
* Benchmark a single model
|
||||
*/
|
||||
private benchmarkModel;
|
||||
/**
|
||||
* Optimize with BootstrapFewShot
|
||||
*/
|
||||
optimizeWithBootstrap(module: SyntheticDataModule, schema: any, sampleSize: number): Promise<SyntheticDataModule>;
|
||||
/**
|
||||
* Optimize with MIPROv2
|
||||
*/
|
||||
optimizeWithMIPRO(module: SyntheticDataModule, schema: any, sampleSize: number): Promise<SyntheticDataModule>;
|
||||
/**
|
||||
* Evaluate module quality
|
||||
*/
|
||||
private evaluateModule;
|
||||
/**
|
||||
* Measure performance metrics
|
||||
*/
|
||||
private measurePerformance;
|
||||
/**
|
||||
* Generate training dataset
|
||||
*/
|
||||
private generateTrainingSet;
|
||||
/**
|
||||
* Generate sample synthetic data
|
||||
*/
|
||||
private generateSampleData;
|
||||
/**
|
||||
* Calculate quality score for synthetic data
|
||||
*/
|
||||
private calculateQualityScore;
|
||||
/**
|
||||
* Calculate percentile
|
||||
*/
|
||||
private percentile;
|
||||
/**
|
||||
* Generate comparison report
|
||||
*/
|
||||
private generateComparisonReport;
|
||||
/**
|
||||
* Generate and save markdown report
|
||||
*/
|
||||
generateReport(comparison: ComparisonReport): Promise<string>;
|
||||
}
|
||||
export { ModelConfig, BenchmarkResult, ComparisonReport, BenchmarkMetrics };
|
||||
//# sourceMappingURL=dspy-multi-model-benchmark.d.ts.map
|
||||
Reference in New Issue
Block a user