Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
227
vendor/ruvector/npm/packages/agentic-synth/training/dspy-benchmarks.d.ts
vendored
Normal file
227
vendor/ruvector/npm/packages/agentic-synth/training/dspy-benchmarks.d.ts
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
/**
|
||||
* DSPy Benchmark Comparison Framework
|
||||
*
|
||||
* Comprehensive benchmarking suite for comparing multiple models across
|
||||
* quality, performance, cost, learning, and diversity metrics.
|
||||
*
|
||||
* Features:
|
||||
* - Multi-model comparison with statistical significance
|
||||
* - Scalability testing (100 to 100K samples)
|
||||
* - Cost-effectiveness analysis
|
||||
* - Quality convergence tracking
|
||||
* - Diversity analysis
|
||||
* - Pareto frontier optimization
|
||||
* - Use case recommendations
|
||||
*/
|
||||
interface ModelConfig {
|
||||
name: string;
|
||||
provider: 'openrouter' | 'gemini' | 'anthropic' | 'openai';
|
||||
model: string;
|
||||
costPer1kTokens: number;
|
||||
maxTokens: number;
|
||||
apiKey?: string;
|
||||
}
|
||||
interface QualityMetrics {
|
||||
accuracy: number;
|
||||
coherence: number;
|
||||
validity: number;
|
||||
consistency: number;
|
||||
completeness: number;
|
||||
overall: number;
|
||||
}
|
||||
interface PerformanceMetrics {
|
||||
latencyP50: number;
|
||||
latencyP95: number;
|
||||
latencyP99: number;
|
||||
avgLatency: number;
|
||||
minLatency: number;
|
||||
maxLatency: number;
|
||||
throughput: number;
|
||||
successRate: number;
|
||||
}
|
||||
interface CostMetrics {
|
||||
totalCost: number;
|
||||
costPerSample: number;
|
||||
costPerQualityPoint: number;
|
||||
tokensUsed: number;
|
||||
efficiency: number;
|
||||
}
|
||||
interface LearningMetrics {
|
||||
improvementRate: number;
|
||||
convergenceSpeed: number;
|
||||
learningCurve: number[];
|
||||
plateauGeneration: number;
|
||||
finalQuality: number;
|
||||
}
|
||||
interface DiversityMetrics {
|
||||
uniqueValues: number;
|
||||
patternVariety: number;
|
||||
distributionEntropy: number;
|
||||
coverageScore: number;
|
||||
noveltyRate: number;
|
||||
}
|
||||
interface BenchmarkResult {
|
||||
modelName: string;
|
||||
sampleSize: number;
|
||||
quality: QualityMetrics;
|
||||
performance: PerformanceMetrics;
|
||||
cost: CostMetrics;
|
||||
learning: LearningMetrics;
|
||||
diversity: DiversityMetrics;
|
||||
timestamp: string;
|
||||
duration: number;
|
||||
}
|
||||
interface ComparisonResult {
|
||||
models: string[];
|
||||
winner: {
|
||||
overall: string;
|
||||
quality: string;
|
||||
performance: string;
|
||||
cost: string;
|
||||
learning: string;
|
||||
diversity: string;
|
||||
};
|
||||
statisticalSignificance: {
|
||||
[key: string]: number;
|
||||
};
|
||||
paretoFrontier: string[];
|
||||
recommendations: {
|
||||
[useCase: string]: string;
|
||||
};
|
||||
}
|
||||
interface ScalabilityResult {
|
||||
modelName: string;
|
||||
sampleSizes: number[];
|
||||
latencies: number[];
|
||||
throughputs: number[];
|
||||
costs: number[];
|
||||
qualities: number[];
|
||||
scalingEfficiency: number;
|
||||
}
|
||||
declare class StatisticalAnalyzer {
|
||||
/**
|
||||
* Calculate mean of array
|
||||
*/
|
||||
static mean(values: number[]): number;
|
||||
/**
|
||||
* Calculate standard deviation
|
||||
*/
|
||||
static stdDev(values: number[]): number;
|
||||
/**
|
||||
* Calculate percentile
|
||||
*/
|
||||
static percentile(values: number[], p: number): number;
|
||||
/**
|
||||
* Perform t-test to determine statistical significance
|
||||
* Returns p-value
|
||||
*/
|
||||
static tTest(sample1: number[], sample2: number[]): number;
|
||||
/**
|
||||
* Simplified t-distribution CDF approximation
|
||||
*/
|
||||
private static tDistribution;
|
||||
/**
|
||||
* Calculate Shannon entropy for diversity measurement
|
||||
*/
|
||||
static entropy(values: any[]): number;
|
||||
}
|
||||
export declare class BenchmarkSuite {
|
||||
private models;
|
||||
private outputDir;
|
||||
private results;
|
||||
constructor(outputDir?: string);
|
||||
/**
|
||||
* Add a model configuration to the benchmark suite
|
||||
*/
|
||||
addModel(config: ModelConfig): void;
|
||||
/**
|
||||
* Add multiple common models for quick testing
|
||||
*/
|
||||
addCommonModels(): void;
|
||||
/**
|
||||
* Run comprehensive comparison across all models
|
||||
*/
|
||||
runModelComparison(sampleSize?: number): Promise<ComparisonResult>;
|
||||
/**
|
||||
* Test scalability from 100 to 100K samples
|
||||
*/
|
||||
runScalabilityTest(): Promise<ScalabilityResult[]>;
|
||||
/**
|
||||
* Analyze cost-effectiveness across models
|
||||
*/
|
||||
runCostAnalysis(): Promise<void>;
|
||||
/**
|
||||
* Measure quality convergence and learning rates
|
||||
*/
|
||||
runQualityConvergence(generations?: number): Promise<void>;
|
||||
/**
|
||||
* Analyze data diversity and variety
|
||||
*/
|
||||
runDiversityAnalysis(sampleSize?: number): Promise<void>;
|
||||
/**
|
||||
* Benchmark a single model
|
||||
*/
|
||||
private benchmarkModel;
|
||||
/**
|
||||
* Calculate quality metrics
|
||||
*/
|
||||
private calculateQualityMetrics;
|
||||
/**
|
||||
* Calculate performance metrics
|
||||
*/
|
||||
private calculatePerformanceMetrics;
|
||||
/**
|
||||
* Calculate cost metrics
|
||||
*/
|
||||
private calculateCostMetrics;
|
||||
/**
|
||||
* Calculate learning metrics
|
||||
*/
|
||||
private calculateLearningMetrics;
|
||||
/**
|
||||
* Calculate diversity metrics
|
||||
*/
|
||||
private calculateDiversityMetrics;
|
||||
/**
|
||||
* Compare results and generate comparison report
|
||||
*/
|
||||
private compareResults;
|
||||
/**
|
||||
* Calculate Pareto frontier for quality vs cost trade-off
|
||||
*/
|
||||
private calculateParetoFrontier;
|
||||
/**
|
||||
* Find generation where quality plateaus
|
||||
*/
|
||||
private findPlateauGeneration;
|
||||
/**
|
||||
* Generate comprehensive JSON report
|
||||
*/
|
||||
generateJSONReport(comparison: ComparisonResult): Promise<void>;
|
||||
/**
|
||||
* Generate comprehensive Markdown report
|
||||
*/
|
||||
generateMarkdownReport(comparison: ComparisonResult): Promise<void>;
|
||||
/**
|
||||
* Build markdown report content
|
||||
*/
|
||||
private buildMarkdownReport;
|
||||
/**
|
||||
* Generate summary statistics
|
||||
*/
|
||||
private generateSummary;
|
||||
/**
|
||||
* Generate conclusion for report
|
||||
*/
|
||||
private generateConclusion;
|
||||
/**
|
||||
* Save scalability results
|
||||
*/
|
||||
private saveScalabilityResults;
|
||||
/**
|
||||
* Save convergence data
|
||||
*/
|
||||
private saveConvergenceData;
|
||||
}
|
||||
export { ModelConfig, BenchmarkResult, ComparisonResult, ScalabilityResult, StatisticalAnalyzer };
|
||||
//# sourceMappingURL=dspy-benchmarks.d.ts.map
|
||||
Reference in New Issue
Block a user