"use strict"; /** * Self-Improving Data Generation with Feedback Loops * * This example demonstrates: * - Quality scoring and regeneration * - A/B testing data for model improvement * - Pattern learning from production data * - Adaptive schema evolution */ Object.defineProperty(exports, "__esModule", { value: true }); exports.qualityScoringLoop = qualityScoringLoop; exports.abTestingData = abTestingData; exports.patternLearningLoop = patternLearningLoop; exports.adaptiveSchemaEvolution = adaptiveSchemaEvolution; exports.activeLearningData = activeLearningData; exports.continuousEvaluationData = continuousEvaluationData; exports.runAllFeedbackLoopExamples = runAllFeedbackLoopExamples; const index_js_1 = require("../../src/index.js"); // ============================================================================ // Example 1: Quality Scoring and Regeneration // ============================================================================ /** * Generate data with quality scores and regenerate low-quality samples */ async function qualityScoringLoop() { console.log('\n⭐ Example 1: Quality Scoring and Regeneration\n'); const synth = (0, index_js_1.createSynth)({ provider: 'gemini', apiKey: process.env.GEMINI_API_KEY || 'demo-key', cacheStrategy: 'memory', }); // Initial generation with quality metrics const initialData = await synth.generateStructured({ count: 100, schema: { id: 'UUID', content: 'product description (2-3 sentences)', category: 'electronics | clothing | home | sports', price: 'number (10-1000)', // Quality metrics (would be computed by quality model) quality_score: 'number (0-1, overall quality)', metrics: { coherence: 'number (0-1)', relevance: 'number (0-1)', completeness: 'number (0-1)', grammar: 'number (0-1)', }, // Metadata generation_attempt: 'number (1)', timestamp: 'ISO timestamp', }, constraints: [ 'quality_score should be average of metrics', '20% of samples should have quality_score < 0.7 (for regeneration demo)', 'grammar score should be high (0.8-1.0)', ], }); console.log('Initial Generation:'); console.log(`- Total samples: ${initialData.data.length}`); console.log(`- Average quality: ${calculateAverage(initialData.data, 'quality_score')}`); // Identify low-quality samples const lowQuality = initialData.data.filter((d) => d.quality_score < 0.7); console.log(`- Low quality samples: ${lowQuality.length}`); if (lowQuality.length > 0) { // Regenerate low-quality samples with feedback const regenerated = await synth.generateStructured({ count: lowQuality.length, schema: { id: 'UUID', content: 'product description (2-3 sentences, improve coherence and completeness)', category: 'electronics | clothing | home | sports', price: 'number (10-1000)', // Quality metrics quality_score: 'number (0.7-1.0, improved quality)', metrics: { coherence: 'number (0.7-1.0)', relevance: 'number (0.7-1.0)', completeness: 'number (0.7-1.0)', grammar: 'number (0.9-1.0)', }, // Track regeneration generation_attempt: 'number (2)', previous_issues: ['array of issues that were fixed'], timestamp: 'ISO timestamp', }, constraints: [ 'All samples should have quality_score >= 0.7', 'Focus on improving coherence and completeness', 'Maintain high grammar scores', ], }); console.log('\nRegenerated Samples:'); console.log(`- Count: ${regenerated.data.length}`); console.log(`- Average quality: ${calculateAverage(regenerated.data, 'quality_score')}`); console.log(`- Quality improvement: ${calculateAverage(regenerated.data, 'quality_score') - calculateAverage(lowQuality, 'quality_score')}`); } console.log('\nāœ… Quality scoring loop complete'); } // ============================================================================ // Example 2: A/B Testing Data for Model Improvement // ============================================================================ /** * Generate A/B test data to improve model performance */ async function abTestingData() { console.log('\nšŸ”¬ Example 2: A/B Testing Data Generation\n'); const synth = (0, index_js_1.createSynth)({ provider: 'gemini', apiKey: process.env.GEMINI_API_KEY || 'demo-key', }); // Generate A/B test scenarios const abTests = await synth.generateStructured({ count: 200, schema: { test_id: 'UUID', variant: 'A | B', // Input features user_profile: { age: 'number (18-80)', location: 'US state', interests: ['array of 2-5 interests'], past_purchases: 'number (0-100)', }, // Model predictions model_variant: 'baseline_model | improved_model (based on variant)', prediction: 'number (0-1, predicted conversion probability)', confidence: 'number (0-1)', // Actual outcome actual_conversion: 'boolean', conversion_value: 'number (0-500) if converted', // Performance metrics prediction_error: 'number (absolute error)', calibration_error: 'number', // Metadata timestamp: 'ISO timestamp', feature_version: 'v1.0 | v1.1', }, constraints: [ 'Variant A should use baseline_model', 'Variant B should use improved_model', 'Variant B should have higher accuracy (lower prediction_error)', 'Variant B should have better calibration', 'Distribution of user_profile should be similar across variants', 'prediction should correlate with actual_conversion', ], }); // Analyze A/B test results const variantA = abTests.data.filter((d) => d.variant === 'A'); const variantB = abTests.data.filter((d) => d.variant === 'B'); console.log('A/B Test Results:'); console.log(`\nVariant A (Baseline):`); console.log(` - Samples: ${variantA.length}`); console.log(` - Avg prediction error: ${calculateAverage(variantA, 'prediction_error').toFixed(4)}`); console.log(` - Conversion rate: ${calculateConversionRate(variantA)}%`); console.log(`\nVariant B (Improved):`); console.log(` - Samples: ${variantB.length}`); console.log(` - Avg prediction error: ${calculateAverage(variantB, 'prediction_error').toFixed(4)}`); console.log(` - Conversion rate: ${calculateConversionRate(variantB)}%`); const improvement = (((calculateAverage(variantA, 'prediction_error') - calculateAverage(variantB, 'prediction_error')) / calculateAverage(variantA, 'prediction_error')) * 100); console.log(`\nImprovement: ${improvement.toFixed(2)}% reduction in error`); console.log('āœ… A/B testing data generated'); return abTests; } // ============================================================================ // Example 3: Pattern Learning from Production Data // ============================================================================ /** * Learn patterns from production data and generate similar synthetic data */ async function patternLearningLoop() { console.log('\n🧠 Example 3: Pattern Learning from Production\n'); const synth = (0, index_js_1.createSynth)({ provider: 'gemini', apiKey: process.env.GEMINI_API_KEY || 'demo-key', }); // Simulate production data patterns const productionPatterns = { common_sequences: [ ['login', 'browse', 'add_to_cart', 'checkout'], ['login', 'browse', 'search', 'view_product'], ['browse', 'search', 'add_to_cart', 'abandon'], ], time_distributions: { peak_hours: [9, 12, 18, 20], avg_session_duration: 420, // seconds bounce_rate: 0.35, }, user_segments: { frequent_buyers: 0.15, casual_browsers: 0.50, one_time_visitors: 0.35, }, }; // Generate synthetic data matching learned patterns const syntheticData = await synth.generateStructured({ count: 500, schema: { session_id: 'UUID', user_segment: 'frequent_buyer | casual_browser | one_time_visitor', // Event sequence following learned patterns events: [ { event_type: 'login | browse | search | add_to_cart | checkout | abandon | view_product', timestamp: 'ISO timestamp', duration: 'number (5-300, seconds)', }, ], // Session metrics total_duration: 'number (60-900, seconds, should match avg from patterns)', hour_of_day: 'number (0-23, biased toward peak hours)', bounced: 'boolean (35% true)', converted: 'boolean', // Pattern conformance matches_common_sequence: 'boolean (80% should be true)', pattern_id: 'number (0-2) if matches sequence', timestamp: 'ISO timestamp', }, constraints: [ 'User segment distribution should match: 15% frequent_buyer, 50% casual_browser, 35% one_time_visitor', 'Hour of day should be biased toward 9, 12, 18, 20', 'Event sequences should follow common patterns 80% of time', 'total_duration should be around 420 seconds on average', 'bounce_rate should be approximately 35%', 'frequent_buyers should have higher conversion rate', ], }); console.log('Pattern-Learned Synthetic Data:'); console.log(`- Total sessions: ${syntheticData.data.length}`); console.log(`- User segment distribution:`, getUserSegmentDist(syntheticData.data)); console.log(`- Avg session duration: ${calculateAverage(syntheticData.data, 'total_duration').toFixed(0)}s`); console.log(`- Bounce rate: ${calculateBounceRate(syntheticData.data)}%`); console.log(`- Pattern conformance: ${calculatePatternConformance(syntheticData.data)}%`); console.log('\nāœ… Pattern learning complete'); return syntheticData; } // ============================================================================ // Example 4: Adaptive Schema Evolution // ============================================================================ /** * Evolve data schema based on feedback and changing requirements */ async function adaptiveSchemaEvolution() { console.log('\nšŸ”„ Example 4: Adaptive Schema Evolution\n'); const synth = (0, index_js_1.createSynth)({ provider: 'gemini', apiKey: process.env.GEMINI_API_KEY || 'demo-key', }); // Version 1: Initial schema console.log('Schema V1 (Initial):'); const v1Data = await synth.generateStructured({ count: 50, schema: { id: 'UUID', name: 'full name', email: 'valid email', age: 'number (18-80)', schema_version: 'string (v1.0)', }, }); console.log(` - Generated ${v1Data.data.length} records`); console.log(` - Fields: id, name, email, age`); // Simulate feedback: need more demographic info console.log('\nFeedback: Need location and preferences'); // Version 2: Add fields based on feedback console.log('\nSchema V2 (Enhanced):'); const v2Data = await synth.generateStructured({ count: 50, schema: { id: 'UUID', name: 'full name', email: 'valid email', age: 'number (18-80)', // New fields location: { city: 'city name', state: 'US state', country: 'country name', }, preferences: ['array of 3-5 preference categories'], schema_version: 'string (v2.0)', }, }); console.log(` - Generated ${v2Data.data.length} records`); console.log(` - Fields: id, name, email, age, location, preferences`); // Simulate more feedback: need behavioral data console.log('\nFeedback: Need behavioral and engagement metrics'); // Version 3: Add behavioral tracking console.log('\nSchema V3 (Full Featured):'); const v3Data = await synth.generateStructured({ count: 50, schema: { id: 'UUID', name: 'full name', email: 'valid email', age: 'number (18-80)', location: { city: 'city name', state: 'US state', country: 'country name', }, preferences: ['array of 3-5 preference categories'], // New behavioral fields behavioral_metrics: { total_sessions: 'number (0-500)', avg_session_duration: 'number (60-3600, seconds)', last_active: 'ISO timestamp (within last 30 days)', engagement_score: 'number (0-100)', ltv: 'number (0-10000, lifetime value)', }, // New segmentation user_segment: 'high_value | medium_value | low_value | churned', predicted_churn: 'boolean', churn_risk_score: 'number (0-1)', schema_version: 'string (v3.0)', }, constraints: [ 'engagement_score should correlate with total_sessions', 'ltv should be higher for high_value segment', 'churned users should have old last_active dates', 'churn_risk_score should be high for predicted_churn=true', ], }); console.log(` - Generated ${v3Data.data.length} records`); console.log(` - Fields: All previous + behavioral_metrics, user_segment, churn predictions`); // Show schema evolution console.log('\nSchema Evolution Summary:'); console.log(' V1 → V2: Added location and preferences'); console.log(' V2 → V3: Added behavioral metrics and churn prediction'); console.log(' Field count: 5 → 7 → 12'); console.log('\nāœ… Adaptive schema evolution complete'); return { v1: v1Data, v2: v2Data, v3: v3Data }; } // ============================================================================ // Example 5: Active Learning Data Generation // ============================================================================ /** * Generate data for active learning - focus on uncertain/informative samples */ async function activeLearningData() { console.log('\nšŸŽÆ Example 5: Active Learning Data\n'); const synth = (0, index_js_1.createSynth)({ provider: 'gemini', apiKey: process.env.GEMINI_API_KEY || 'demo-key', }); // Generate samples with uncertainty scores const activeLearningData = await synth.generateStructured({ count: 300, schema: { sample_id: 'UUID', // Features features: { feature_1: 'number (0-100)', feature_2: 'number (0-100)', feature_3: 'number (0-100)', feature_4: 'number (0-100)', }, // Model predictions predicted_class: 'number (0-4)', prediction_confidence: 'number (0-1)', uncertainty_score: 'number (0-1, inverse of confidence)', // Active learning strategy query_strategy: 'uncertainty_sampling | query_by_committee | expected_model_change', should_label: 'boolean (true if high uncertainty)', // If labeled true_label: 'number (0-4) or null', was_useful: 'boolean or null (if labeled)', // Metadata iteration: 'number (1-10, active learning iteration)', timestamp: 'ISO timestamp', }, constraints: [ 'uncertainty_score should equal 1 - prediction_confidence', 'should_label should be true for samples with uncertainty > 0.6', '30% of samples should have high uncertainty (>0.6)', 'true_label should be provided if should_label is true', 'was_useful should correlate with uncertainty_score', ], }); const highUncertainty = activeLearningData.data.filter((d) => d.uncertainty_score > 0.6); const shouldLabel = activeLearningData.data.filter((d) => d.should_label); console.log('Active Learning Data:'); console.log(`- Total samples: ${activeLearningData.data.length}`); console.log(`- High uncertainty samples: ${highUncertainty.length}`); console.log(`- Samples to label: ${shouldLabel.length}`); console.log(`- Avg uncertainty: ${calculateAverage(activeLearningData.data, 'uncertainty_score').toFixed(3)}`); console.log(`- Strategy distribution:`, getStrategyDistribution(activeLearningData.data)); console.log('\nāœ… Active learning data generated'); return activeLearningData; } // ============================================================================ // Example 6: Continuous Model Evaluation Data // ============================================================================ /** * Generate evaluation data for continuous model monitoring */ async function continuousEvaluationData() { console.log('\nšŸ“Š Example 6: Continuous Evaluation Data\n'); const synth = (0, index_js_1.createSynth)({ provider: 'gemini', apiKey: process.env.GEMINI_API_KEY || 'demo-key', }); // Generate time-series evaluation data const evaluationData = await synth.generateTimeSeries({ count: 168, // One week, hourly interval: '1h', schema: { timestamp: 'ISO timestamp', hour: 'number (0-23)', // Model performance metrics accuracy: 'number (0.7-0.95)', precision: 'number (0.7-0.95)', recall: 'number (0.7-0.95)', f1_score: 'number (0.7-0.95)', // Data distribution metrics prediction_distribution: { class_0: 'number (0-1, proportion)', class_1: 'number (0-1, proportion)', }, confidence_distribution: { high: 'number (0-1, >0.8)', medium: 'number (0-1, 0.5-0.8)', low: 'number (0-1, <0.5)', }, // Drift detection feature_drift_score: 'number (0-1)', prediction_drift_score: 'number (0-1)', alert_triggered: 'boolean (true if drift > 0.3)', // System metrics inference_latency_ms: 'number (10-100)', throughput_qps: 'number (100-1000)', error_rate: 'number (0-0.05)', }, trend: 'stable', seasonality: true, constraints: [ 'Performance should degrade slightly during peak hours (9-17)', 'alert_triggered should be true when drift scores > 0.3', 'Drift should gradually increase over time (concept drift)', 'Latency should be higher during peak traffic', ], }); const alerts = evaluationData.data.filter((d) => d.alert_triggered); console.log('Continuous Evaluation Data:'); console.log(`- Time points: ${evaluationData.data.length}`); console.log(`- Average accuracy: ${calculateAverage(evaluationData.data, 'accuracy').toFixed(3)}`); console.log(`- Average drift score: ${calculateAverage(evaluationData.data, 'feature_drift_score').toFixed(3)}`); console.log(`- Drift alerts: ${alerts.length}`); console.log(`- Average latency: ${calculateAverage(evaluationData.data, 'inference_latency_ms').toFixed(1)}ms`); console.log('\nāœ… Continuous evaluation data generated'); return evaluationData; } // ============================================================================ // Utility Functions // ============================================================================ function calculateAverage(data, field) { const values = data.map((d) => d[field]).filter((v) => typeof v === 'number'); if (values.length === 0) return 0; return values.reduce((a, b) => a + b, 0) / values.length; } function calculateConversionRate(data) { const converted = data.filter((d) => d.actual_conversion).length; return (converted / data.length) * 100; } function calculateBounceRate(data) { const bounced = data.filter((d) => d.bounced).length; return (bounced / data.length) * 100; } function calculatePatternConformance(data) { const matching = data.filter((d) => d.matches_common_sequence).length; return (matching / data.length) * 100; } function getUserSegmentDist(data) { const dist = {}; data.forEach((d) => { dist[d.user_segment] = (dist[d.user_segment] || 0) + 1; }); return dist; } function getStrategyDistribution(data) { const dist = {}; data.forEach((d) => { dist[d.query_strategy] = (dist[d.query_strategy] || 0) + 1; }); return dist; } // ============================================================================ // Run All Examples // ============================================================================ async function runAllFeedbackLoopExamples() { console.log('šŸ”„ Self-Improving Feedback Loop Examples\n'); console.log('='.repeat(60)); try { await qualityScoringLoop(); console.log('='.repeat(60)); await abTestingData(); console.log('='.repeat(60)); await patternLearningLoop(); console.log('='.repeat(60)); await adaptiveSchemaEvolution(); console.log('='.repeat(60)); await activeLearningData(); console.log('='.repeat(60)); await continuousEvaluationData(); console.log('='.repeat(60)); console.log('\nāœ… All feedback loop examples completed!\n'); } catch (error) { console.error('āŒ Error:', error.message); } } // Run if executed directly if (import.meta.url === `file://${process.argv[1]}`) { runAllFeedbackLoopExamples().catch(console.error); } //# sourceMappingURL=feedback-loop.js.map