Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* INTERMEDIATE TUTORIAL: Multi-Model Comparison
|
||||
*
|
||||
* Compare multiple AI models (Gemini, Claude, GPT-4) to find the best
|
||||
* performer for your specific task. Includes benchmarking, cost tracking,
|
||||
* and performance metrics.
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Running parallel model comparisons
|
||||
* - Benchmarking quality and speed
|
||||
* - Tracking costs per model
|
||||
* - Selecting the best model for production
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Set API keys: GEMINI_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY
|
||||
* - npm install dspy.ts @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/intermediate/multi-model-comparison.ts
|
||||
*/
|
||||
import { Prediction } from 'dspy.ts';
|
||||
interface ModelConfig {
|
||||
name: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
apiKey: string;
|
||||
costPer1kTokens: number;
|
||||
capabilities: string[];
|
||||
}
|
||||
declare const models: ModelConfig[];
|
||||
interface BenchmarkResult {
|
||||
modelName: string;
|
||||
qualityScore: number;
|
||||
avgResponseTime: number;
|
||||
estimatedCost: number;
|
||||
successRate: number;
|
||||
outputs: Prediction[];
|
||||
errors: string[];
|
||||
}
|
||||
declare function benchmarkModel(config: ModelConfig): Promise<BenchmarkResult>;
|
||||
declare function runComparison(): Promise<BenchmarkResult[]>;
|
||||
export { runComparison, benchmarkModel, models };
|
||||
//# sourceMappingURL=multi-model-comparison.d.ts.map
|
||||
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"multi-model-comparison.d.ts","sourceRoot":"","sources":["multi-model-comparison.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAsB,UAAU,EAAE,MAAM,SAAS,CAAC;AAIzD,UAAU,WAAW;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAGD,QAAA,MAAM,MAAM,EAAE,WAAW,EAyBxB,CAAC;AAGF,UAAU,eAAe;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAwFD,iBAAe,cAAc,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,eAAe,CAAC,CA0E3E;AAGD,iBAAe,aAAa,+BA4F3B;AAUD,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,EAAE,CAAC"}
|
||||
274
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/multi-model-comparison.js
vendored
Normal file
274
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/multi-model-comparison.js
vendored
Normal file
@@ -0,0 +1,274 @@
|
||||
"use strict";
|
||||
/**
|
||||
* INTERMEDIATE TUTORIAL: Multi-Model Comparison
|
||||
*
|
||||
* Compare multiple AI models (Gemini, Claude, GPT-4) to find the best
|
||||
* performer for your specific task. Includes benchmarking, cost tracking,
|
||||
* and performance metrics.
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Running parallel model comparisons
|
||||
* - Benchmarking quality and speed
|
||||
* - Tracking costs per model
|
||||
* - Selecting the best model for production
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Set API keys: GEMINI_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY
|
||||
* - npm install dspy.ts @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/intermediate/multi-model-comparison.ts
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.models = void 0;
|
||||
exports.runComparison = runComparison;
|
||||
exports.benchmarkModel = benchmarkModel;
|
||||
const dspy_ts_1 = require("dspy.ts");
|
||||
// Available models to compare
|
||||
const models = [
|
||||
{
|
||||
name: 'Gemini Flash',
|
||||
provider: 'google-genai',
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: process.env.GEMINI_API_KEY || '',
|
||||
costPer1kTokens: 0.001, // Very cheap
|
||||
capabilities: ['fast', 'cost-effective', 'reasoning']
|
||||
},
|
||||
{
|
||||
name: 'Claude Sonnet 4',
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
apiKey: process.env.ANTHROPIC_API_KEY || '',
|
||||
costPer1kTokens: 0.003, // Medium cost
|
||||
capabilities: ['high-quality', 'reasoning', 'code']
|
||||
},
|
||||
{
|
||||
name: 'GPT-4 Turbo',
|
||||
provider: 'openai',
|
||||
model: 'gpt-4-turbo-preview',
|
||||
apiKey: process.env.OPENAI_API_KEY || '',
|
||||
costPer1kTokens: 0.01, // More expensive
|
||||
capabilities: ['versatile', 'high-quality', 'creative']
|
||||
}
|
||||
];
|
||||
exports.models = models;
|
||||
// Test cases for comparison
|
||||
const testCases = [
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Wireless Noise-Cancelling Headphones',
|
||||
category: 'Electronics',
|
||||
price: 299
|
||||
},
|
||||
expectedFeatures: ['noise cancellation', 'wireless', 'battery life']
|
||||
},
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Organic Herbal Tea Collection',
|
||||
category: 'Beverages',
|
||||
price: 24
|
||||
},
|
||||
expectedFeatures: ['organic', 'herbal', 'health benefits']
|
||||
},
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Professional Camera Tripod',
|
||||
category: 'Photography',
|
||||
price: 149
|
||||
},
|
||||
expectedFeatures: ['stability', 'adjustable', 'professional']
|
||||
},
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Smart Fitness Tracker',
|
||||
category: 'Wearables',
|
||||
price: 79
|
||||
},
|
||||
expectedFeatures: ['fitness tracking', 'smart features', 'health monitoring']
|
||||
}
|
||||
];
|
||||
// Quality evaluation function
|
||||
function evaluateQuality(prediction, testCase) {
|
||||
let score = 0;
|
||||
const weights = {
|
||||
hasDescription: 0.3,
|
||||
descriptionLength: 0.2,
|
||||
hasFeatures: 0.2,
|
||||
featureCount: 0.15,
|
||||
relevance: 0.15
|
||||
};
|
||||
// Check if description exists and is well-formed
|
||||
if (prediction.description && typeof prediction.description === 'string') {
|
||||
score += weights.hasDescription;
|
||||
// Optimal length is 80-200 characters
|
||||
const length = prediction.description.length;
|
||||
if (length >= 80 && length <= 200) {
|
||||
score += weights.descriptionLength;
|
||||
}
|
||||
else if (length >= 50 && length <= 250) {
|
||||
score += weights.descriptionLength * 0.5;
|
||||
}
|
||||
}
|
||||
// Check features
|
||||
if (prediction.key_features && Array.isArray(prediction.key_features)) {
|
||||
score += weights.hasFeatures;
|
||||
// More features is better (up to 5)
|
||||
const featureCount = Math.min(prediction.key_features.length, 5);
|
||||
score += weights.featureCount * (featureCount / 5);
|
||||
}
|
||||
// Check relevance to expected features
|
||||
if (prediction.description) {
|
||||
const descLower = prediction.description.toLowerCase();
|
||||
const relevantFeatures = testCase.expectedFeatures.filter(feature => descLower.includes(feature.toLowerCase()));
|
||||
score += weights.relevance * (relevantFeatures.length / testCase.expectedFeatures.length);
|
||||
}
|
||||
return score;
|
||||
}
|
||||
// Run benchmark for a single model
|
||||
async function benchmarkModel(config) {
|
||||
console.log(`\n🔄 Testing ${config.name}...`);
|
||||
const result = {
|
||||
modelName: config.name,
|
||||
qualityScore: 0,
|
||||
avgResponseTime: 0,
|
||||
estimatedCost: 0,
|
||||
successRate: 0,
|
||||
outputs: [],
|
||||
errors: []
|
||||
};
|
||||
if (!config.apiKey) {
|
||||
console.log(` ⚠️ API key not found, skipping...`);
|
||||
result.errors.push('API key not configured');
|
||||
return result;
|
||||
}
|
||||
const lm = new dspy_ts_1.LM({
|
||||
provider: config.provider,
|
||||
model: config.model,
|
||||
apiKey: config.apiKey,
|
||||
temperature: 0.7
|
||||
});
|
||||
const signature = {
|
||||
input: 'product_name: string, category: string, price: number',
|
||||
output: 'description: string, key_features: string[]'
|
||||
};
|
||||
const generator = new dspy_ts_1.ChainOfThought(signature, { lm });
|
||||
const times = [];
|
||||
let totalScore = 0;
|
||||
let successCount = 0;
|
||||
// Run all test cases
|
||||
for (let i = 0; i < testCases.length; i++) {
|
||||
const testCase = testCases[i];
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const prediction = await generator.forward(testCase.input);
|
||||
const duration = Date.now() - startTime;
|
||||
times.push(duration);
|
||||
result.outputs.push(prediction);
|
||||
const score = evaluateQuality(prediction, testCase);
|
||||
totalScore += score;
|
||||
successCount++;
|
||||
console.log(` ✓ Test ${i + 1}/${testCases.length} - Score: ${(score * 100).toFixed(0)}% - ${duration}ms`);
|
||||
}
|
||||
catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
||||
result.errors.push(`Test ${i + 1}: ${errorMsg}`);
|
||||
console.log(` ✗ Test ${i + 1}/${testCases.length} - Failed: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
// Calculate metrics
|
||||
result.avgResponseTime = times.length > 0
|
||||
? times.reduce((a, b) => a + b, 0) / times.length
|
||||
: 0;
|
||||
result.qualityScore = successCount > 0 ? totalScore / testCases.length : 0;
|
||||
result.successRate = successCount / testCases.length;
|
||||
// Estimate cost (rough approximation based on avg tokens)
|
||||
const avgTokens = 500; // Rough estimate
|
||||
result.estimatedCost = (avgTokens / 1000) * config.costPer1kTokens * testCases.length;
|
||||
return result;
|
||||
}
|
||||
// Main comparison function
|
||||
async function runComparison() {
|
||||
console.log('🏆 Multi-Model Comparison Benchmark\n');
|
||||
console.log('='.repeat(70));
|
||||
console.log('\nComparing models:');
|
||||
models.forEach((m, i) => {
|
||||
console.log(`${i + 1}. ${m.name} - $${m.costPer1kTokens}/1K tokens`);
|
||||
console.log(` Capabilities: ${m.capabilities.join(', ')}`);
|
||||
});
|
||||
console.log(`\nRunning ${testCases.length} test cases per model...\n`);
|
||||
console.log('='.repeat(70));
|
||||
// Run all benchmarks in parallel
|
||||
const results = await Promise.all(models.map(config => benchmarkModel(config)));
|
||||
// Display results
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('\n📊 BENCHMARK RESULTS\n');
|
||||
// Sort by quality score
|
||||
const sortedResults = [...results].sort((a, b) => b.qualityScore - a.qualityScore);
|
||||
console.log('┌─────────────────────┬──────────┬──────────┬──────────┬──────────┐');
|
||||
console.log('│ Model │ Quality │ Speed │ Cost │ Success │');
|
||||
console.log('├─────────────────────┼──────────┼──────────┼──────────┼──────────┤');
|
||||
sortedResults.forEach((result, index) => {
|
||||
const quality = `${(result.qualityScore * 100).toFixed(1)}%`;
|
||||
const speed = `${result.avgResponseTime.toFixed(0)}ms`;
|
||||
const cost = `$${result.estimatedCost.toFixed(4)}`;
|
||||
const success = `${(result.successRate * 100).toFixed(0)}%`;
|
||||
const modelName = result.modelName.padEnd(19);
|
||||
const qualityPad = quality.padStart(8);
|
||||
const speedPad = speed.padStart(8);
|
||||
const costPad = cost.padStart(8);
|
||||
const successPad = success.padStart(8);
|
||||
const medal = index === 0 ? '🥇' : index === 1 ? '🥈' : index === 2 ? '🥉' : ' ';
|
||||
console.log(`│ ${medal} ${modelName}│${qualityPad}│${speedPad}│${costPad}│${successPad}│`);
|
||||
});
|
||||
console.log('└─────────────────────┴──────────┴──────────┴──────────┴──────────┘\n');
|
||||
// Winner analysis
|
||||
const winner = sortedResults[0];
|
||||
console.log('🎯 WINNER: ' + winner.modelName);
|
||||
console.log(` Quality Score: ${(winner.qualityScore * 100).toFixed(1)}%`);
|
||||
console.log(` Avg Response: ${winner.avgResponseTime.toFixed(0)}ms`);
|
||||
console.log(` Total Cost: $${winner.estimatedCost.toFixed(4)}`);
|
||||
console.log(` Success Rate: ${(winner.successRate * 100).toFixed(0)}%\n`);
|
||||
// Recommendations
|
||||
console.log('💡 RECOMMENDATIONS:\n');
|
||||
const fastest = [...results].sort((a, b) => a.avgResponseTime - b.avgResponseTime)[0];
|
||||
const cheapest = [...results].sort((a, b) => a.estimatedCost - b.estimatedCost)[0];
|
||||
const mostReliable = [...results].sort((a, b) => b.successRate - a.successRate)[0];
|
||||
console.log(`⚡ Fastest: ${fastest.modelName} (${fastest.avgResponseTime.toFixed(0)}ms avg)`);
|
||||
console.log(`💰 Cheapest: ${cheapest.modelName} ($${cheapest.estimatedCost.toFixed(4)} total)`);
|
||||
console.log(`🎯 Most Reliable: ${mostReliable.modelName} (${(mostReliable.successRate * 100).toFixed(0)}% success)\n`);
|
||||
console.log('Use case suggestions:');
|
||||
console.log(' • High-volume/cost-sensitive → ' + cheapest.modelName);
|
||||
console.log(' • Latency-critical/real-time → ' + fastest.modelName);
|
||||
console.log(' • Quality-critical/production → ' + winner.modelName + '\n');
|
||||
// Error report
|
||||
const errorsExist = results.some(r => r.errors.length > 0);
|
||||
if (errorsExist) {
|
||||
console.log('⚠️ ERRORS:\n');
|
||||
results.forEach(result => {
|
||||
if (result.errors.length > 0) {
|
||||
console.log(`${result.modelName}:`);
|
||||
result.errors.forEach(err => console.log(` • ${err}`));
|
||||
console.log('');
|
||||
}
|
||||
});
|
||||
}
|
||||
console.log('='.repeat(70));
|
||||
console.log('\n✅ Benchmark complete!\n');
|
||||
console.log('Next steps:');
|
||||
console.log(' 1. Configure your production app with the winning model');
|
||||
console.log(' 2. Set up fallback chains for reliability');
|
||||
console.log(' 3. Monitor performance in production');
|
||||
console.log(' 4. Re-run benchmarks periodically as models improve\n');
|
||||
return results;
|
||||
}
|
||||
// Run the comparison
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
runComparison().catch(error => {
|
||||
console.error('❌ Benchmark failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
//# sourceMappingURL=multi-model-comparison.js.map
|
||||
File diff suppressed because one or more lines are too long
338
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/multi-model-comparison.ts
vendored
Normal file
338
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/multi-model-comparison.ts
vendored
Normal file
@@ -0,0 +1,338 @@
|
||||
/**
|
||||
* INTERMEDIATE TUTORIAL: Multi-Model Comparison
|
||||
*
|
||||
* Compare multiple AI models (Gemini, Claude, GPT-4) to find the best
|
||||
* performer for your specific task. Includes benchmarking, cost tracking,
|
||||
* and performance metrics.
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Running parallel model comparisons
|
||||
* - Benchmarking quality and speed
|
||||
* - Tracking costs per model
|
||||
* - Selecting the best model for production
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Set API keys: GEMINI_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY
|
||||
* - npm install dspy.ts @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/intermediate/multi-model-comparison.ts
|
||||
*/
|
||||
|
||||
import { LM, ChainOfThought, Prediction } from 'dspy.ts';
|
||||
import { AgenticSynth } from '@ruvector/agentic-synth';
|
||||
|
||||
// Model configuration with pricing
|
||||
interface ModelConfig {
|
||||
name: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
apiKey: string;
|
||||
costPer1kTokens: number; // Approximate pricing
|
||||
capabilities: string[];
|
||||
}
|
||||
|
||||
// Available models to compare
|
||||
const models: ModelConfig[] = [
|
||||
{
|
||||
name: 'Gemini Flash',
|
||||
provider: 'google-genai',
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: process.env.GEMINI_API_KEY || '',
|
||||
costPer1kTokens: 0.001, // Very cheap
|
||||
capabilities: ['fast', 'cost-effective', 'reasoning']
|
||||
},
|
||||
{
|
||||
name: 'Claude Sonnet 4',
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
apiKey: process.env.ANTHROPIC_API_KEY || '',
|
||||
costPer1kTokens: 0.003, // Medium cost
|
||||
capabilities: ['high-quality', 'reasoning', 'code']
|
||||
},
|
||||
{
|
||||
name: 'GPT-4 Turbo',
|
||||
provider: 'openai',
|
||||
model: 'gpt-4-turbo-preview',
|
||||
apiKey: process.env.OPENAI_API_KEY || '',
|
||||
costPer1kTokens: 0.01, // More expensive
|
||||
capabilities: ['versatile', 'high-quality', 'creative']
|
||||
}
|
||||
];
|
||||
|
||||
// Benchmark results interface
|
||||
interface BenchmarkResult {
|
||||
modelName: string;
|
||||
qualityScore: number;
|
||||
avgResponseTime: number;
|
||||
estimatedCost: number;
|
||||
successRate: number;
|
||||
outputs: Prediction[];
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
// Test cases for comparison
|
||||
const testCases = [
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Wireless Noise-Cancelling Headphones',
|
||||
category: 'Electronics',
|
||||
price: 299
|
||||
},
|
||||
expectedFeatures: ['noise cancellation', 'wireless', 'battery life']
|
||||
},
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Organic Herbal Tea Collection',
|
||||
category: 'Beverages',
|
||||
price: 24
|
||||
},
|
||||
expectedFeatures: ['organic', 'herbal', 'health benefits']
|
||||
},
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Professional Camera Tripod',
|
||||
category: 'Photography',
|
||||
price: 149
|
||||
},
|
||||
expectedFeatures: ['stability', 'adjustable', 'professional']
|
||||
},
|
||||
{
|
||||
task: 'product_description',
|
||||
input: {
|
||||
product_name: 'Smart Fitness Tracker',
|
||||
category: 'Wearables',
|
||||
price: 79
|
||||
},
|
||||
expectedFeatures: ['fitness tracking', 'smart features', 'health monitoring']
|
||||
}
|
||||
];
|
||||
|
||||
// Quality evaluation function
|
||||
function evaluateQuality(prediction: Prediction, testCase: typeof testCases[0]): number {
|
||||
let score = 0;
|
||||
const weights = {
|
||||
hasDescription: 0.3,
|
||||
descriptionLength: 0.2,
|
||||
hasFeatures: 0.2,
|
||||
featureCount: 0.15,
|
||||
relevance: 0.15
|
||||
};
|
||||
|
||||
// Check if description exists and is well-formed
|
||||
if (prediction.description && typeof prediction.description === 'string') {
|
||||
score += weights.hasDescription;
|
||||
|
||||
// Optimal length is 80-200 characters
|
||||
const length = prediction.description.length;
|
||||
if (length >= 80 && length <= 200) {
|
||||
score += weights.descriptionLength;
|
||||
} else if (length >= 50 && length <= 250) {
|
||||
score += weights.descriptionLength * 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
// Check features
|
||||
if (prediction.key_features && Array.isArray(prediction.key_features)) {
|
||||
score += weights.hasFeatures;
|
||||
|
||||
// More features is better (up to 5)
|
||||
const featureCount = Math.min(prediction.key_features.length, 5);
|
||||
score += weights.featureCount * (featureCount / 5);
|
||||
}
|
||||
|
||||
// Check relevance to expected features
|
||||
if (prediction.description) {
|
||||
const descLower = prediction.description.toLowerCase();
|
||||
const relevantFeatures = testCase.expectedFeatures.filter(feature =>
|
||||
descLower.includes(feature.toLowerCase())
|
||||
);
|
||||
score += weights.relevance * (relevantFeatures.length / testCase.expectedFeatures.length);
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
// Run benchmark for a single model
|
||||
async function benchmarkModel(config: ModelConfig): Promise<BenchmarkResult> {
|
||||
console.log(`\n🔄 Testing ${config.name}...`);
|
||||
|
||||
const result: BenchmarkResult = {
|
||||
modelName: config.name,
|
||||
qualityScore: 0,
|
||||
avgResponseTime: 0,
|
||||
estimatedCost: 0,
|
||||
successRate: 0,
|
||||
outputs: [],
|
||||
errors: []
|
||||
};
|
||||
|
||||
if (!config.apiKey) {
|
||||
console.log(` ⚠️ API key not found, skipping...`);
|
||||
result.errors.push('API key not configured');
|
||||
return result;
|
||||
}
|
||||
|
||||
const lm = new LM({
|
||||
provider: config.provider as any,
|
||||
model: config.model,
|
||||
apiKey: config.apiKey,
|
||||
temperature: 0.7
|
||||
});
|
||||
|
||||
const signature = {
|
||||
input: 'product_name: string, category: string, price: number',
|
||||
output: 'description: string, key_features: string[]'
|
||||
};
|
||||
|
||||
const generator = new ChainOfThought(signature, { lm });
|
||||
|
||||
const times: number[] = [];
|
||||
let totalScore = 0;
|
||||
let successCount = 0;
|
||||
|
||||
// Run all test cases
|
||||
for (let i = 0; i < testCases.length; i++) {
|
||||
const testCase = testCases[i];
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const prediction = await generator.forward(testCase.input);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
times.push(duration);
|
||||
result.outputs.push(prediction);
|
||||
|
||||
const score = evaluateQuality(prediction, testCase);
|
||||
totalScore += score;
|
||||
successCount++;
|
||||
|
||||
console.log(` ✓ Test ${i + 1}/${testCases.length} - Score: ${(score * 100).toFixed(0)}% - ${duration}ms`);
|
||||
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
||||
result.errors.push(`Test ${i + 1}: ${errorMsg}`);
|
||||
console.log(` ✗ Test ${i + 1}/${testCases.length} - Failed: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate metrics
|
||||
result.avgResponseTime = times.length > 0
|
||||
? times.reduce((a, b) => a + b, 0) / times.length
|
||||
: 0;
|
||||
result.qualityScore = successCount > 0 ? totalScore / testCases.length : 0;
|
||||
result.successRate = successCount / testCases.length;
|
||||
|
||||
// Estimate cost (rough approximation based on avg tokens)
|
||||
const avgTokens = 500; // Rough estimate
|
||||
result.estimatedCost = (avgTokens / 1000) * config.costPer1kTokens * testCases.length;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Main comparison function
|
||||
async function runComparison() {
|
||||
console.log('🏆 Multi-Model Comparison Benchmark\n');
|
||||
console.log('=' .repeat(70));
|
||||
console.log('\nComparing models:');
|
||||
models.forEach((m, i) => {
|
||||
console.log(`${i + 1}. ${m.name} - $${m.costPer1kTokens}/1K tokens`);
|
||||
console.log(` Capabilities: ${m.capabilities.join(', ')}`);
|
||||
});
|
||||
console.log(`\nRunning ${testCases.length} test cases per model...\n`);
|
||||
console.log('=' .repeat(70));
|
||||
|
||||
// Run all benchmarks in parallel
|
||||
const results = await Promise.all(
|
||||
models.map(config => benchmarkModel(config))
|
||||
);
|
||||
|
||||
// Display results
|
||||
console.log('\n' + '=' .repeat(70));
|
||||
console.log('\n📊 BENCHMARK RESULTS\n');
|
||||
|
||||
// Sort by quality score
|
||||
const sortedResults = [...results].sort((a, b) => b.qualityScore - a.qualityScore);
|
||||
|
||||
console.log('┌─────────────────────┬──────────┬──────────┬──────────┬──────────┐');
|
||||
console.log('│ Model │ Quality │ Speed │ Cost │ Success │');
|
||||
console.log('├─────────────────────┼──────────┼──────────┼──────────┼──────────┤');
|
||||
|
||||
sortedResults.forEach((result, index) => {
|
||||
const quality = `${(result.qualityScore * 100).toFixed(1)}%`;
|
||||
const speed = `${result.avgResponseTime.toFixed(0)}ms`;
|
||||
const cost = `$${result.estimatedCost.toFixed(4)}`;
|
||||
const success = `${(result.successRate * 100).toFixed(0)}%`;
|
||||
|
||||
const modelName = result.modelName.padEnd(19);
|
||||
const qualityPad = quality.padStart(8);
|
||||
const speedPad = speed.padStart(8);
|
||||
const costPad = cost.padStart(8);
|
||||
const successPad = success.padStart(8);
|
||||
|
||||
const medal = index === 0 ? '🥇' : index === 1 ? '🥈' : index === 2 ? '🥉' : ' ';
|
||||
|
||||
console.log(`│ ${medal} ${modelName}│${qualityPad}│${speedPad}│${costPad}│${successPad}│`);
|
||||
});
|
||||
|
||||
console.log('└─────────────────────┴──────────┴──────────┴──────────┴──────────┘\n');
|
||||
|
||||
// Winner analysis
|
||||
const winner = sortedResults[0];
|
||||
console.log('🎯 WINNER: ' + winner.modelName);
|
||||
console.log(` Quality Score: ${(winner.qualityScore * 100).toFixed(1)}%`);
|
||||
console.log(` Avg Response: ${winner.avgResponseTime.toFixed(0)}ms`);
|
||||
console.log(` Total Cost: $${winner.estimatedCost.toFixed(4)}`);
|
||||
console.log(` Success Rate: ${(winner.successRate * 100).toFixed(0)}%\n`);
|
||||
|
||||
// Recommendations
|
||||
console.log('💡 RECOMMENDATIONS:\n');
|
||||
|
||||
const fastest = [...results].sort((a, b) => a.avgResponseTime - b.avgResponseTime)[0];
|
||||
const cheapest = [...results].sort((a, b) => a.estimatedCost - b.estimatedCost)[0];
|
||||
const mostReliable = [...results].sort((a, b) => b.successRate - a.successRate)[0];
|
||||
|
||||
console.log(`⚡ Fastest: ${fastest.modelName} (${fastest.avgResponseTime.toFixed(0)}ms avg)`);
|
||||
console.log(`💰 Cheapest: ${cheapest.modelName} ($${cheapest.estimatedCost.toFixed(4)} total)`);
|
||||
console.log(`🎯 Most Reliable: ${mostReliable.modelName} (${(mostReliable.successRate * 100).toFixed(0)}% success)\n`);
|
||||
|
||||
console.log('Use case suggestions:');
|
||||
console.log(' • High-volume/cost-sensitive → ' + cheapest.modelName);
|
||||
console.log(' • Latency-critical/real-time → ' + fastest.modelName);
|
||||
console.log(' • Quality-critical/production → ' + winner.modelName + '\n');
|
||||
|
||||
// Error report
|
||||
const errorsExist = results.some(r => r.errors.length > 0);
|
||||
if (errorsExist) {
|
||||
console.log('⚠️ ERRORS:\n');
|
||||
results.forEach(result => {
|
||||
if (result.errors.length > 0) {
|
||||
console.log(`${result.modelName}:`);
|
||||
result.errors.forEach(err => console.log(` • ${err}`));
|
||||
console.log('');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
console.log('=' .repeat(70));
|
||||
console.log('\n✅ Benchmark complete!\n');
|
||||
console.log('Next steps:');
|
||||
console.log(' 1. Configure your production app with the winning model');
|
||||
console.log(' 2. Set up fallback chains for reliability');
|
||||
console.log(' 3. Monitor performance in production');
|
||||
console.log(' 4. Re-run benchmarks periodically as models improve\n');
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Run the comparison
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
runComparison().catch(error => {
|
||||
console.error('❌ Benchmark failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
export { runComparison, benchmarkModel, models };
|
||||
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* INTERMEDIATE TUTORIAL: Self-Learning System
|
||||
*
|
||||
* Build an adaptive AI system that improves its output quality over time
|
||||
* through feedback loops and pattern recognition. This demonstrates how
|
||||
* to create systems that learn from their mistakes and successes.
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Building feedback loops
|
||||
* - Tracking quality improvements
|
||||
* - Adaptive prompt engineering
|
||||
* - Learning from examples
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Set GEMINI_API_KEY environment variable
|
||||
* - npm install dspy.ts @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/intermediate/self-learning-system.ts
|
||||
*/
|
||||
import { Prediction } from 'dspy.ts';
|
||||
interface LearningConfig {
|
||||
targetQualityThreshold: number;
|
||||
maxIterations: number;
|
||||
improvementRate: number;
|
||||
minImprovement: number;
|
||||
}
|
||||
interface Feedback {
|
||||
quality: number;
|
||||
strengths: string[];
|
||||
weaknesses: string[];
|
||||
suggestions: string[];
|
||||
}
|
||||
interface LearningEntry {
|
||||
iteration: number;
|
||||
quality: number;
|
||||
output: Prediction;
|
||||
feedback: Feedback;
|
||||
promptModifications: string[];
|
||||
timestamp: Date;
|
||||
}
|
||||
declare class SelfLearningGenerator {
|
||||
private lm;
|
||||
private history;
|
||||
private config;
|
||||
private basePrompt;
|
||||
private currentPromptAdditions;
|
||||
constructor(config?: Partial<LearningConfig>);
|
||||
private evaluateOutput;
|
||||
private adaptPrompt;
|
||||
private generate;
|
||||
learn(input: any, criteria?: any): Promise<void>;
|
||||
private displaySummary;
|
||||
getLearnedImprovements(): string[];
|
||||
getHistory(): LearningEntry[];
|
||||
}
|
||||
export { SelfLearningGenerator, LearningConfig, LearningEntry };
|
||||
//# sourceMappingURL=self-learning-system.d.ts.map
|
||||
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"self-learning-system.d.ts","sourceRoot":"","sources":["self-learning-system.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAsB,UAAU,EAAE,MAAM,SAAS,CAAC;AAGzD,UAAU,cAAc;IACtB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAGD,UAAU,QAAQ;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB;AAGD,UAAU,aAAa;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,EAAE,QAAQ,CAAC;IACnB,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,EAAE,IAAI,CAAC;CACjB;AAGD,cAAM,qBAAqB;IACzB,OAAO,CAAC,EAAE,CAAK;IACf,OAAO,CAAC,OAAO,CAAuB;IACtC,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,sBAAsB,CAAgB;gBAElC,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM;IAmBhD,OAAO,CAAC,cAAc;IA6EtB,OAAO,CAAC,WAAW;YAuBL,QAAQ;IAiBhB,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,QAAQ,GAAE,GAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IA6F1D,OAAO,CAAC,cAAc;IA2CtB,sBAAsB,IAAI,MAAM,EAAE;IAKlC,UAAU,IAAI,aAAa,EAAE;CAG9B;AAiCD,OAAO,EAAE,qBAAqB,EAAE,cAAc,EAAE,aAAa,EAAE,CAAC"}
|
||||
300
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/self-learning-system.js
vendored
Normal file
300
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/self-learning-system.js
vendored
Normal file
@@ -0,0 +1,300 @@
|
||||
"use strict";
|
||||
/**
|
||||
* INTERMEDIATE TUTORIAL: Self-Learning System
|
||||
*
|
||||
* Build an adaptive AI system that improves its output quality over time
|
||||
* through feedback loops and pattern recognition. This demonstrates how
|
||||
* to create systems that learn from their mistakes and successes.
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Building feedback loops
|
||||
* - Tracking quality improvements
|
||||
* - Adaptive prompt engineering
|
||||
* - Learning from examples
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Set GEMINI_API_KEY environment variable
|
||||
* - npm install dspy.ts @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/intermediate/self-learning-system.ts
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.SelfLearningGenerator = void 0;
|
||||
const dspy_ts_1 = require("dspy.ts");
|
||||
// Self-learning generator class
|
||||
class SelfLearningGenerator {
|
||||
constructor(config = {}) {
|
||||
this.history = [];
|
||||
this.currentPromptAdditions = [];
|
||||
this.config = {
|
||||
targetQualityThreshold: config.targetQualityThreshold || 0.9,
|
||||
maxIterations: config.maxIterations || 10,
|
||||
improvementRate: config.improvementRate || 0.15,
|
||||
minImprovement: config.minImprovement || 0.02
|
||||
};
|
||||
this.lm = new dspy_ts_1.LM({
|
||||
provider: 'google-genai',
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: process.env.GEMINI_API_KEY || '',
|
||||
temperature: 0.8 // Higher temperature for creativity during learning
|
||||
});
|
||||
this.basePrompt = '';
|
||||
}
|
||||
// Evaluate the quality of generated output
|
||||
evaluateOutput(prediction, criteria) {
|
||||
let quality = 0;
|
||||
const strengths = [];
|
||||
const weaknesses = [];
|
||||
const suggestions = [];
|
||||
// Check description quality
|
||||
if (prediction.description) {
|
||||
const desc = prediction.description;
|
||||
const length = desc.length;
|
||||
if (length >= 100 && length <= 200) {
|
||||
quality += 0.3;
|
||||
strengths.push('Description length is optimal');
|
||||
}
|
||||
else if (length < 50) {
|
||||
weaknesses.push('Description too short');
|
||||
suggestions.push('Expand description with more details');
|
||||
}
|
||||
else if (length > 250) {
|
||||
weaknesses.push('Description too verbose');
|
||||
suggestions.push('Make description more concise');
|
||||
}
|
||||
else {
|
||||
quality += 0.15;
|
||||
}
|
||||
// Check for emotional/engaging language
|
||||
const emotionalWords = ['amazing', 'powerful', 'innovative', 'premium', 'exceptional'];
|
||||
const hasEmotionalLanguage = emotionalWords.some(word => desc.toLowerCase().includes(word));
|
||||
if (hasEmotionalLanguage) {
|
||||
quality += 0.2;
|
||||
strengths.push('Uses engaging language');
|
||||
}
|
||||
else {
|
||||
weaknesses.push('Could be more engaging');
|
||||
suggestions.push('Add more descriptive and emotional words');
|
||||
}
|
||||
}
|
||||
else {
|
||||
weaknesses.push('Missing description');
|
||||
suggestions.push('Generate a complete description');
|
||||
}
|
||||
// Check features
|
||||
if (prediction.key_features && Array.isArray(prediction.key_features)) {
|
||||
const features = prediction.key_features;
|
||||
if (features.length >= 4 && features.length <= 6) {
|
||||
quality += 0.3;
|
||||
strengths.push('Optimal number of features');
|
||||
}
|
||||
else if (features.length < 3) {
|
||||
weaknesses.push('Too few features');
|
||||
suggestions.push('Include at least 4 key features');
|
||||
}
|
||||
else {
|
||||
quality += 0.15;
|
||||
}
|
||||
// Check feature quality (should be concise)
|
||||
const wellFormedFeatures = features.filter(f => f.length >= 10 && f.length <= 50);
|
||||
if (wellFormedFeatures.length === features.length) {
|
||||
quality += 0.2;
|
||||
strengths.push('All features are well-formed');
|
||||
}
|
||||
else {
|
||||
weaknesses.push('Some features need better formatting');
|
||||
suggestions.push('Keep features concise (10-50 chars)');
|
||||
}
|
||||
}
|
||||
else {
|
||||
weaknesses.push('Missing features');
|
||||
suggestions.push('Generate key features list');
|
||||
}
|
||||
return { quality, strengths, weaknesses, suggestions };
|
||||
}
|
||||
// Adapt prompt based on feedback
|
||||
adaptPrompt(feedback) {
|
||||
const modifications = [];
|
||||
// Add specific instructions based on weaknesses
|
||||
feedback.suggestions.forEach(suggestion => {
|
||||
if (suggestion.includes('short')) {
|
||||
modifications.push('Write detailed descriptions (100-200 characters)');
|
||||
}
|
||||
else if (suggestion.includes('verbose')) {
|
||||
modifications.push('Keep descriptions concise and focused');
|
||||
}
|
||||
else if (suggestion.includes('engaging')) {
|
||||
modifications.push('Use descriptive, engaging language');
|
||||
}
|
||||
else if (suggestion.includes('features')) {
|
||||
modifications.push('Include 4-6 specific, measurable key features');
|
||||
}
|
||||
else if (suggestion.includes('concise')) {
|
||||
modifications.push('Format features as short, punchy statements');
|
||||
}
|
||||
});
|
||||
// Remove duplicates
|
||||
return [...new Set(modifications)];
|
||||
}
|
||||
// Generate with current prompt
|
||||
async generate(input) {
|
||||
// Build enhanced signature with learned improvements
|
||||
const enhancedInstructions = this.currentPromptAdditions.length > 0
|
||||
? '\n\nImportant guidelines:\n' + this.currentPromptAdditions.map((s, i) => `${i + 1}. ${s}`).join('\n')
|
||||
: '';
|
||||
const signature = {
|
||||
input: 'product_name: string, category: string, price: number',
|
||||
output: 'description: string, key_features: string[]',
|
||||
description: 'Generate compelling product descriptions' + enhancedInstructions
|
||||
};
|
||||
const generator = new dspy_ts_1.ChainOfThought(signature, { lm: this.lm });
|
||||
return await generator.forward(input);
|
||||
}
|
||||
// Main learning loop
|
||||
async learn(input, criteria = {}) {
|
||||
console.log('🧠 Starting Self-Learning Session\n');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`\nTarget Quality: ${(this.config.targetQualityThreshold * 100).toFixed(0)}%`);
|
||||
console.log(`Max Iterations: ${this.config.maxIterations}`);
|
||||
console.log(`Input: ${JSON.stringify(input, null, 2)}\n`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
let iteration = 0;
|
||||
let previousQuality = 0;
|
||||
while (iteration < this.config.maxIterations) {
|
||||
iteration++;
|
||||
console.log(`\n📊 Iteration ${iteration}/${this.config.maxIterations}`);
|
||||
console.log('─'.repeat(70));
|
||||
// Generate output
|
||||
const startTime = Date.now();
|
||||
const output = await this.generate(input);
|
||||
const duration = Date.now() - startTime;
|
||||
// Evaluate
|
||||
const feedback = this.evaluateOutput(output, criteria);
|
||||
// Store in history
|
||||
this.history.push({
|
||||
iteration,
|
||||
quality: feedback.quality,
|
||||
output,
|
||||
feedback,
|
||||
promptModifications: [...this.currentPromptAdditions],
|
||||
timestamp: new Date()
|
||||
});
|
||||
// Display results
|
||||
console.log(`\n⏱️ Generation time: ${duration}ms`);
|
||||
console.log(`\n📝 Output:`);
|
||||
console.log(` Description: ${output.description || 'N/A'}`);
|
||||
if (output.key_features) {
|
||||
console.log(` Features:`);
|
||||
output.key_features.forEach((f) => console.log(` • ${f}`));
|
||||
}
|
||||
console.log(`\n📈 Quality: ${(feedback.quality * 100).toFixed(1)}%`);
|
||||
if (feedback.strengths.length > 0) {
|
||||
console.log(`\n✅ Strengths:`);
|
||||
feedback.strengths.forEach(s => console.log(` • ${s}`));
|
||||
}
|
||||
if (feedback.weaknesses.length > 0) {
|
||||
console.log(`\n⚠️ Weaknesses:`);
|
||||
feedback.weaknesses.forEach(w => console.log(` • ${w}`));
|
||||
}
|
||||
// Check if target reached
|
||||
if (feedback.quality >= this.config.targetQualityThreshold) {
|
||||
console.log(`\n🎯 Target quality reached!`);
|
||||
break;
|
||||
}
|
||||
// Check for improvement
|
||||
const improvement = feedback.quality - previousQuality;
|
||||
if (iteration > 1 && improvement < this.config.minImprovement) {
|
||||
console.log(`\n⚠️ Improvement too small (${(improvement * 100).toFixed(1)}%), stopping...`);
|
||||
break;
|
||||
}
|
||||
// Adapt for next iteration
|
||||
const modifications = this.adaptPrompt(feedback);
|
||||
if (modifications.length > 0) {
|
||||
console.log(`\n🔧 Adapting strategy:`);
|
||||
modifications.forEach(m => console.log(` • ${m}`));
|
||||
// Add new modifications
|
||||
modifications.forEach(m => {
|
||||
if (!this.currentPromptAdditions.includes(m)) {
|
||||
this.currentPromptAdditions.push(m);
|
||||
}
|
||||
});
|
||||
}
|
||||
previousQuality = feedback.quality;
|
||||
// Brief pause between iterations
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
// Final summary
|
||||
this.displaySummary();
|
||||
}
|
||||
// Display learning summary
|
||||
displaySummary() {
|
||||
console.log('\n\n' + '='.repeat(70));
|
||||
console.log('\n🎓 LEARNING SUMMARY\n');
|
||||
if (this.history.length === 0) {
|
||||
console.log('No learning history available.\n');
|
||||
return;
|
||||
}
|
||||
const firstQuality = this.history[0].quality;
|
||||
const lastQuality = this.history[this.history.length - 1].quality;
|
||||
const improvement = lastQuality - firstQuality;
|
||||
const improvementPercent = (improvement / firstQuality) * 100;
|
||||
console.log(`Total Iterations: ${this.history.length}`);
|
||||
console.log(`Starting Quality: ${(firstQuality * 100).toFixed(1)}%`);
|
||||
console.log(`Final Quality: ${(lastQuality * 100).toFixed(1)}%`);
|
||||
console.log(`Improvement: ${improvement >= 0 ? '+' : ''}${(improvement * 100).toFixed(1)}% (${improvementPercent >= 0 ? '+' : ''}${improvementPercent.toFixed(1)}%)`);
|
||||
console.log(`\n📊 Quality Progression:`);
|
||||
this.history.forEach(entry => {
|
||||
const bar = '█'.repeat(Math.floor(entry.quality * 50));
|
||||
const percent = (entry.quality * 100).toFixed(1);
|
||||
console.log(` Iteration ${entry.iteration}: ${bar} ${percent}%`);
|
||||
});
|
||||
console.log(`\n🔧 Learned Improvements (${this.currentPromptAdditions.length}):`);
|
||||
this.currentPromptAdditions.forEach((mod, i) => {
|
||||
console.log(` ${i + 1}. ${mod}`);
|
||||
});
|
||||
console.log('\n💡 Key Insights:');
|
||||
if (improvement > 0) {
|
||||
console.log(` ✓ System successfully learned and improved`);
|
||||
console.log(` ✓ Quality increased by ${(improvement * 100).toFixed(1)}%`);
|
||||
}
|
||||
console.log(` ✓ Discovered ${this.currentPromptAdditions.length} optimization strategies`);
|
||||
console.log(` ✓ These improvements can be applied to future generations\n`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
}
|
||||
// Get the learned prompt modifications
|
||||
getLearnedImprovements() {
|
||||
return [...this.currentPromptAdditions];
|
||||
}
|
||||
// Get learning history
|
||||
getHistory() {
|
||||
return [...this.history];
|
||||
}
|
||||
}
|
||||
exports.SelfLearningGenerator = SelfLearningGenerator;
|
||||
// Main execution
|
||||
async function runSelfLearning() {
|
||||
const generator = new SelfLearningGenerator({
|
||||
targetQualityThreshold: 0.85,
|
||||
maxIterations: 8,
|
||||
improvementRate: 0.15,
|
||||
minImprovement: 0.03
|
||||
});
|
||||
const testProduct = {
|
||||
product_name: 'Professional DSLR Camera',
|
||||
category: 'Photography',
|
||||
price: 1299
|
||||
};
|
||||
await generator.learn(testProduct);
|
||||
// Save learned improvements
|
||||
const improvements = generator.getLearnedImprovements();
|
||||
console.log('📝 Learned improvements can be reused:\n');
|
||||
console.log(JSON.stringify(improvements, null, 2) + '\n');
|
||||
}
|
||||
// Run the example
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
runSelfLearning().catch(error => {
|
||||
console.error('❌ Learning failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
//# sourceMappingURL=self-learning-system.js.map
|
||||
File diff suppressed because one or more lines are too long
370
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/self-learning-system.ts
vendored
Normal file
370
vendor/ruvector/npm/packages/agentic-synth-examples/examples/intermediate/self-learning-system.ts
vendored
Normal file
@@ -0,0 +1,370 @@
|
||||
/**
|
||||
* INTERMEDIATE TUTORIAL: Self-Learning System
|
||||
*
|
||||
* Build an adaptive AI system that improves its output quality over time
|
||||
* through feedback loops and pattern recognition. This demonstrates how
|
||||
* to create systems that learn from their mistakes and successes.
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Building feedback loops
|
||||
* - Tracking quality improvements
|
||||
* - Adaptive prompt engineering
|
||||
* - Learning from examples
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Set GEMINI_API_KEY environment variable
|
||||
* - npm install dspy.ts @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/intermediate/self-learning-system.ts
|
||||
*/
|
||||
|
||||
import { LM, ChainOfThought, Prediction } from 'dspy.ts';
|
||||
|
||||
// Learning session configuration
|
||||
interface LearningConfig {
|
||||
targetQualityThreshold: number; // Stop when this quality is reached
|
||||
maxIterations: number; // Maximum learning iterations
|
||||
improvementRate: number; // How aggressively to adjust (0.1 = 10% per iteration)
|
||||
minImprovement: number; // Minimum improvement to continue
|
||||
}
|
||||
|
||||
// Feedback from each iteration
|
||||
interface Feedback {
|
||||
quality: number;
|
||||
strengths: string[];
|
||||
weaknesses: string[];
|
||||
suggestions: string[];
|
||||
}
|
||||
|
||||
// Learning history entry
|
||||
interface LearningEntry {
|
||||
iteration: number;
|
||||
quality: number;
|
||||
output: Prediction;
|
||||
feedback: Feedback;
|
||||
promptModifications: string[];
|
||||
timestamp: Date;
|
||||
}
|
||||
|
||||
// Self-learning generator class
|
||||
class SelfLearningGenerator {
|
||||
private lm: LM;
|
||||
private history: LearningEntry[] = [];
|
||||
private config: LearningConfig;
|
||||
private basePrompt: string;
|
||||
private currentPromptAdditions: string[] = [];
|
||||
|
||||
constructor(config: Partial<LearningConfig> = {}) {
|
||||
this.config = {
|
||||
targetQualityThreshold: config.targetQualityThreshold || 0.9,
|
||||
maxIterations: config.maxIterations || 10,
|
||||
improvementRate: config.improvementRate || 0.15,
|
||||
minImprovement: config.minImprovement || 0.02
|
||||
};
|
||||
|
||||
this.lm = new LM({
|
||||
provider: 'google-genai',
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: process.env.GEMINI_API_KEY || '',
|
||||
temperature: 0.8 // Higher temperature for creativity during learning
|
||||
});
|
||||
|
||||
this.basePrompt = '';
|
||||
}
|
||||
|
||||
// Evaluate the quality of generated output
|
||||
private evaluateOutput(prediction: Prediction, criteria: any): Feedback {
|
||||
let quality = 0;
|
||||
const strengths: string[] = [];
|
||||
const weaknesses: string[] = [];
|
||||
const suggestions: string[] = [];
|
||||
|
||||
// Check description quality
|
||||
if (prediction.description) {
|
||||
const desc = prediction.description;
|
||||
const length = desc.length;
|
||||
|
||||
if (length >= 100 && length <= 200) {
|
||||
quality += 0.3;
|
||||
strengths.push('Description length is optimal');
|
||||
} else if (length < 50) {
|
||||
weaknesses.push('Description too short');
|
||||
suggestions.push('Expand description with more details');
|
||||
} else if (length > 250) {
|
||||
weaknesses.push('Description too verbose');
|
||||
suggestions.push('Make description more concise');
|
||||
} else {
|
||||
quality += 0.15;
|
||||
}
|
||||
|
||||
// Check for emotional/engaging language
|
||||
const emotionalWords = ['amazing', 'powerful', 'innovative', 'premium', 'exceptional'];
|
||||
const hasEmotionalLanguage = emotionalWords.some(word =>
|
||||
desc.toLowerCase().includes(word)
|
||||
);
|
||||
|
||||
if (hasEmotionalLanguage) {
|
||||
quality += 0.2;
|
||||
strengths.push('Uses engaging language');
|
||||
} else {
|
||||
weaknesses.push('Could be more engaging');
|
||||
suggestions.push('Add more descriptive and emotional words');
|
||||
}
|
||||
} else {
|
||||
weaknesses.push('Missing description');
|
||||
suggestions.push('Generate a complete description');
|
||||
}
|
||||
|
||||
// Check features
|
||||
if (prediction.key_features && Array.isArray(prediction.key_features)) {
|
||||
const features = prediction.key_features;
|
||||
|
||||
if (features.length >= 4 && features.length <= 6) {
|
||||
quality += 0.3;
|
||||
strengths.push('Optimal number of features');
|
||||
} else if (features.length < 3) {
|
||||
weaknesses.push('Too few features');
|
||||
suggestions.push('Include at least 4 key features');
|
||||
} else {
|
||||
quality += 0.15;
|
||||
}
|
||||
|
||||
// Check feature quality (should be concise)
|
||||
const wellFormedFeatures = features.filter(f =>
|
||||
f.length >= 10 && f.length <= 50
|
||||
);
|
||||
|
||||
if (wellFormedFeatures.length === features.length) {
|
||||
quality += 0.2;
|
||||
strengths.push('All features are well-formed');
|
||||
} else {
|
||||
weaknesses.push('Some features need better formatting');
|
||||
suggestions.push('Keep features concise (10-50 chars)');
|
||||
}
|
||||
} else {
|
||||
weaknesses.push('Missing features');
|
||||
suggestions.push('Generate key features list');
|
||||
}
|
||||
|
||||
return { quality, strengths, weaknesses, suggestions };
|
||||
}
|
||||
|
||||
// Adapt prompt based on feedback
|
||||
private adaptPrompt(feedback: Feedback): string[] {
|
||||
const modifications: string[] = [];
|
||||
|
||||
// Add specific instructions based on weaknesses
|
||||
feedback.suggestions.forEach(suggestion => {
|
||||
if (suggestion.includes('short')) {
|
||||
modifications.push('Write detailed descriptions (100-200 characters)');
|
||||
} else if (suggestion.includes('verbose')) {
|
||||
modifications.push('Keep descriptions concise and focused');
|
||||
} else if (suggestion.includes('engaging')) {
|
||||
modifications.push('Use descriptive, engaging language');
|
||||
} else if (suggestion.includes('features')) {
|
||||
modifications.push('Include 4-6 specific, measurable key features');
|
||||
} else if (suggestion.includes('concise')) {
|
||||
modifications.push('Format features as short, punchy statements');
|
||||
}
|
||||
});
|
||||
|
||||
// Remove duplicates
|
||||
return [...new Set(modifications)];
|
||||
}
|
||||
|
||||
// Generate with current prompt
|
||||
private async generate(input: any): Promise<Prediction> {
|
||||
// Build enhanced signature with learned improvements
|
||||
const enhancedInstructions = this.currentPromptAdditions.length > 0
|
||||
? '\n\nImportant guidelines:\n' + this.currentPromptAdditions.map((s, i) => `${i + 1}. ${s}`).join('\n')
|
||||
: '';
|
||||
|
||||
const signature = {
|
||||
input: 'product_name: string, category: string, price: number',
|
||||
output: 'description: string, key_features: string[]',
|
||||
description: 'Generate compelling product descriptions' + enhancedInstructions
|
||||
};
|
||||
|
||||
const generator = new ChainOfThought(signature, { lm: this.lm });
|
||||
return await generator.forward(input);
|
||||
}
|
||||
|
||||
// Main learning loop
|
||||
async learn(input: any, criteria: any = {}): Promise<void> {
|
||||
console.log('🧠 Starting Self-Learning Session\n');
|
||||
console.log('=' .repeat(70));
|
||||
console.log(`\nTarget Quality: ${(this.config.targetQualityThreshold * 100).toFixed(0)}%`);
|
||||
console.log(`Max Iterations: ${this.config.maxIterations}`);
|
||||
console.log(`Input: ${JSON.stringify(input, null, 2)}\n`);
|
||||
console.log('=' .repeat(70) + '\n');
|
||||
|
||||
let iteration = 0;
|
||||
let previousQuality = 0;
|
||||
|
||||
while (iteration < this.config.maxIterations) {
|
||||
iteration++;
|
||||
console.log(`\n📊 Iteration ${iteration}/${this.config.maxIterations}`);
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
// Generate output
|
||||
const startTime = Date.now();
|
||||
const output = await this.generate(input);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// Evaluate
|
||||
const feedback = this.evaluateOutput(output, criteria);
|
||||
|
||||
// Store in history
|
||||
this.history.push({
|
||||
iteration,
|
||||
quality: feedback.quality,
|
||||
output,
|
||||
feedback,
|
||||
promptModifications: [...this.currentPromptAdditions],
|
||||
timestamp: new Date()
|
||||
});
|
||||
|
||||
// Display results
|
||||
console.log(`\n⏱️ Generation time: ${duration}ms`);
|
||||
console.log(`\n📝 Output:`);
|
||||
console.log(` Description: ${output.description || 'N/A'}`);
|
||||
if (output.key_features) {
|
||||
console.log(` Features:`);
|
||||
output.key_features.forEach((f: string) => console.log(` • ${f}`));
|
||||
}
|
||||
|
||||
console.log(`\n📈 Quality: ${(feedback.quality * 100).toFixed(1)}%`);
|
||||
|
||||
if (feedback.strengths.length > 0) {
|
||||
console.log(`\n✅ Strengths:`);
|
||||
feedback.strengths.forEach(s => console.log(` • ${s}`));
|
||||
}
|
||||
|
||||
if (feedback.weaknesses.length > 0) {
|
||||
console.log(`\n⚠️ Weaknesses:`);
|
||||
feedback.weaknesses.forEach(w => console.log(` • ${w}`));
|
||||
}
|
||||
|
||||
// Check if target reached
|
||||
if (feedback.quality >= this.config.targetQualityThreshold) {
|
||||
console.log(`\n🎯 Target quality reached!`);
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for improvement
|
||||
const improvement = feedback.quality - previousQuality;
|
||||
if (iteration > 1 && improvement < this.config.minImprovement) {
|
||||
console.log(`\n⚠️ Improvement too small (${(improvement * 100).toFixed(1)}%), stopping...`);
|
||||
break;
|
||||
}
|
||||
|
||||
// Adapt for next iteration
|
||||
const modifications = this.adaptPrompt(feedback);
|
||||
if (modifications.length > 0) {
|
||||
console.log(`\n🔧 Adapting strategy:`);
|
||||
modifications.forEach(m => console.log(` • ${m}`));
|
||||
|
||||
// Add new modifications
|
||||
modifications.forEach(m => {
|
||||
if (!this.currentPromptAdditions.includes(m)) {
|
||||
this.currentPromptAdditions.push(m);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
previousQuality = feedback.quality;
|
||||
|
||||
// Brief pause between iterations
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
// Final summary
|
||||
this.displaySummary();
|
||||
}
|
||||
|
||||
// Display learning summary
|
||||
private displaySummary(): void {
|
||||
console.log('\n\n' + '=' .repeat(70));
|
||||
console.log('\n🎓 LEARNING SUMMARY\n');
|
||||
|
||||
if (this.history.length === 0) {
|
||||
console.log('No learning history available.\n');
|
||||
return;
|
||||
}
|
||||
|
||||
const firstQuality = this.history[0].quality;
|
||||
const lastQuality = this.history[this.history.length - 1].quality;
|
||||
const improvement = lastQuality - firstQuality;
|
||||
const improvementPercent = (improvement / firstQuality) * 100;
|
||||
|
||||
console.log(`Total Iterations: ${this.history.length}`);
|
||||
console.log(`Starting Quality: ${(firstQuality * 100).toFixed(1)}%`);
|
||||
console.log(`Final Quality: ${(lastQuality * 100).toFixed(1)}%`);
|
||||
console.log(`Improvement: ${improvement >= 0 ? '+' : ''}${(improvement * 100).toFixed(1)}% (${improvementPercent >= 0 ? '+' : ''}${improvementPercent.toFixed(1)}%)`);
|
||||
|
||||
console.log(`\n📊 Quality Progression:`);
|
||||
this.history.forEach(entry => {
|
||||
const bar = '█'.repeat(Math.floor(entry.quality * 50));
|
||||
const percent = (entry.quality * 100).toFixed(1);
|
||||
console.log(` Iteration ${entry.iteration}: ${bar} ${percent}%`);
|
||||
});
|
||||
|
||||
console.log(`\n🔧 Learned Improvements (${this.currentPromptAdditions.length}):`);
|
||||
this.currentPromptAdditions.forEach((mod, i) => {
|
||||
console.log(` ${i + 1}. ${mod}`);
|
||||
});
|
||||
|
||||
console.log('\n💡 Key Insights:');
|
||||
if (improvement > 0) {
|
||||
console.log(` ✓ System successfully learned and improved`);
|
||||
console.log(` ✓ Quality increased by ${(improvement * 100).toFixed(1)}%`);
|
||||
}
|
||||
console.log(` ✓ Discovered ${this.currentPromptAdditions.length} optimization strategies`);
|
||||
console.log(` ✓ These improvements can be applied to future generations\n`);
|
||||
|
||||
console.log('=' .repeat(70) + '\n');
|
||||
}
|
||||
|
||||
// Get the learned prompt modifications
|
||||
getLearnedImprovements(): string[] {
|
||||
return [...this.currentPromptAdditions];
|
||||
}
|
||||
|
||||
// Get learning history
|
||||
getHistory(): LearningEntry[] {
|
||||
return [...this.history];
|
||||
}
|
||||
}
|
||||
|
||||
// Main execution
|
||||
async function runSelfLearning() {
|
||||
const generator = new SelfLearningGenerator({
|
||||
targetQualityThreshold: 0.85,
|
||||
maxIterations: 8,
|
||||
improvementRate: 0.15,
|
||||
minImprovement: 0.03
|
||||
});
|
||||
|
||||
const testProduct = {
|
||||
product_name: 'Professional DSLR Camera',
|
||||
category: 'Photography',
|
||||
price: 1299
|
||||
};
|
||||
|
||||
await generator.learn(testProduct);
|
||||
|
||||
// Save learned improvements
|
||||
const improvements = generator.getLearnedImprovements();
|
||||
console.log('📝 Learned improvements can be reused:\n');
|
||||
console.log(JSON.stringify(improvements, null, 2) + '\n');
|
||||
}
|
||||
|
||||
// Run the example
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
runSelfLearning().catch(error => {
|
||||
console.error('❌ Learning failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
export { SelfLearningGenerator, LearningConfig, LearningEntry };
|
||||
Reference in New Issue
Block a user