Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/npm/packages/agentic-synth/training/dspy-multi-model-benchmark.d.ts
+++ b/vendor/ruvector/npm/packages/agentic-synth/training/dspy-multi-model-benchmark.d.ts
@@ -0,0 +1,179 @@
+/**
+ * DSPy.ts Multi-Model Benchmarking System v1.0.0
+ *
+ * Comprehensive benchmarking suite comparing multiple models across:
+ * - Quality metrics (f1Score, exactMatch, bleuScore, rougeScore)
+ * - Optimization strategies (BootstrapFewShot, MIPROv2)
+ * - Cost-effectiveness analysis
+ * - Performance characteristics
+ *
+ * Real-world implementation using actual dspy.ts v2.1.1 features:
+ * - ChainOfThought for reasoning
+ * - ReAct for iterative improvement
+ * - MultiChainComparison for ensemble decisions
+ * - BootstrapFewShot & MIPROv2 optimizers
+ *
+ * @requires dspy.ts@2.1.1
+ * @requires Environment: OPENAI_API_KEY, ANTHROPIC_API_KEY
+ */
+declare const ChainOfThought: any;
+interface ModelConfig {
+    name: string;
+    provider: 'openai' | 'anthropic' | 'openrouter';
+    modelId: string;
+    apiKey: string;
+    costPer1kTokens: {
+        input: number;
+        output: number;
+    };
+    maxTokens: number;
+}
+interface BenchmarkMetrics {
+    quality: {
+        f1: number;
+        exactMatch: number;
+        bleu: number;
+        rouge: number;
+        overall: number;
+    };
+    performance: {
+        avgLatency: number;
+        p50: number;
+        p95: number;
+        p99: number;
+        throughput: number;
+        successRate: number;
+    };
+    cost: {
+        totalCost: number;
+        costPerSample: number;
+        costPerQualityPoint: number;
+        inputTokens: number;
+        outputTokens: number;
+    };
+    optimization: {
+        baselineQuality: number;
+        bootstrapQuality: number;
+        miproQuality: number;
+        bootstrapImprovement: number;
+        miproImprovement: number;
+    };
+}
+interface BenchmarkResult {
+    modelName: string;
+    timestamp: string;
+    metrics: BenchmarkMetrics;
+    optimizationHistory: {
+        method: 'baseline' | 'bootstrap' | 'mipro';
+        round: number;
+        quality: number;
+        duration: number;
+    }[];
+    sampleSize: number;
+    duration: number;
+}
+interface ComparisonReport {
+    summary: {
+        winner: {
+            quality: string;
+            performance: string;
+            cost: string;
+            optimization: string;
+            overall: string;
+        };
+        modelsCompared: number;
+        totalSamples: number;
+        totalDuration: number;
+    };
+    results: BenchmarkResult[];
+    rankings: {
+        quality: {
+            model: string;
+            score: number;
+        }[];
+        performance: {
+            model: string;
+            score: number;
+        }[];
+        cost: {
+            model: string;
+            score: number;
+        }[];
+        optimization: {
+            model: string;
+            score: number;
+        }[];
+    };
+    recommendations: {
+        production: string;
+        research: string;
+        costOptimized: string;
+        balanced: string;
+    };
+}
+/**
+ * Synthetic Data Generator using Chain of Thought
+ */
+declare class SyntheticDataModule extends ChainOfThought {
+    constructor();
+}
+export declare class DSPyMultiModelBenchmark {
+    private models;
+    private results;
+    private outputDir;
+    constructor(outputDir?: string);
+    /**
+     * Register a model for benchmarking
+     */
+    addModel(config: ModelConfig): void;
+    /**
+     * Run comprehensive comparison across all models
+     */
+    runComparison(sampleSize?: number): Promise<ComparisonReport>;
+    /**
+     * Benchmark a single model
+     */
+    private benchmarkModel;
+    /**
+     * Optimize with BootstrapFewShot
+     */
+    optimizeWithBootstrap(module: SyntheticDataModule, schema: any, sampleSize: number): Promise<SyntheticDataModule>;
+    /**
+     * Optimize with MIPROv2
+     */
+    optimizeWithMIPRO(module: SyntheticDataModule, schema: any, sampleSize: number): Promise<SyntheticDataModule>;
+    /**
+     * Evaluate module quality
+     */
+    private evaluateModule;
+    /**
+     * Measure performance metrics
+     */
+    private measurePerformance;
+    /**
+     * Generate training dataset
+     */
+    private generateTrainingSet;
+    /**
+     * Generate sample synthetic data
+     */
+    private generateSampleData;
+    /**
+     * Calculate quality score for synthetic data
+     */
+    private calculateQualityScore;
+    /**
+     * Calculate percentile
+     */
+    private percentile;
+    /**
+     * Generate comparison report
+     */
+    private generateComparisonReport;
+    /**
+     * Generate and save markdown report
+     */
+    generateReport(comparison: ComparisonReport): Promise<string>;
+}
+export { ModelConfig, BenchmarkResult, ComparisonReport, BenchmarkMetrics };
+//# sourceMappingURL=dspy-multi-model-benchmark.d.ts.map