Files
wifi-densepose/npm/packages/agentic-synth/examples/self-learning/feedback-loop.js
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

521 lines
22 KiB
JavaScript

"use strict";
/**
* Self-Improving Data Generation with Feedback Loops
*
* This example demonstrates:
* - Quality scoring and regeneration
* - A/B testing data for model improvement
* - Pattern learning from production data
* - Adaptive schema evolution
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.qualityScoringLoop = qualityScoringLoop;
exports.abTestingData = abTestingData;
exports.patternLearningLoop = patternLearningLoop;
exports.adaptiveSchemaEvolution = adaptiveSchemaEvolution;
exports.activeLearningData = activeLearningData;
exports.continuousEvaluationData = continuousEvaluationData;
exports.runAllFeedbackLoopExamples = runAllFeedbackLoopExamples;
const index_js_1 = require("../../src/index.js");
// ============================================================================
// Example 1: Quality Scoring and Regeneration
// ============================================================================
/**
* Generate data with quality scores and regenerate low-quality samples
*/
async function qualityScoringLoop() {
console.log('\n⭐ Example 1: Quality Scoring and Regeneration\n');
const synth = (0, index_js_1.createSynth)({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
cacheStrategy: 'memory',
});
// Initial generation with quality metrics
const initialData = await synth.generateStructured({
count: 100,
schema: {
id: 'UUID',
content: 'product description (2-3 sentences)',
category: 'electronics | clothing | home | sports',
price: 'number (10-1000)',
// Quality metrics (would be computed by quality model)
quality_score: 'number (0-1, overall quality)',
metrics: {
coherence: 'number (0-1)',
relevance: 'number (0-1)',
completeness: 'number (0-1)',
grammar: 'number (0-1)',
},
// Metadata
generation_attempt: 'number (1)',
timestamp: 'ISO timestamp',
},
constraints: [
'quality_score should be average of metrics',
'20% of samples should have quality_score < 0.7 (for regeneration demo)',
'grammar score should be high (0.8-1.0)',
],
});
console.log('Initial Generation:');
console.log(`- Total samples: ${initialData.data.length}`);
console.log(`- Average quality: ${calculateAverage(initialData.data, 'quality_score')}`);
// Identify low-quality samples
const lowQuality = initialData.data.filter((d) => d.quality_score < 0.7);
console.log(`- Low quality samples: ${lowQuality.length}`);
if (lowQuality.length > 0) {
// Regenerate low-quality samples with feedback
const regenerated = await synth.generateStructured({
count: lowQuality.length,
schema: {
id: 'UUID',
content: 'product description (2-3 sentences, improve coherence and completeness)',
category: 'electronics | clothing | home | sports',
price: 'number (10-1000)',
// Quality metrics
quality_score: 'number (0.7-1.0, improved quality)',
metrics: {
coherence: 'number (0.7-1.0)',
relevance: 'number (0.7-1.0)',
completeness: 'number (0.7-1.0)',
grammar: 'number (0.9-1.0)',
},
// Track regeneration
generation_attempt: 'number (2)',
previous_issues: ['array of issues that were fixed'],
timestamp: 'ISO timestamp',
},
constraints: [
'All samples should have quality_score >= 0.7',
'Focus on improving coherence and completeness',
'Maintain high grammar scores',
],
});
console.log('\nRegenerated Samples:');
console.log(`- Count: ${regenerated.data.length}`);
console.log(`- Average quality: ${calculateAverage(regenerated.data, 'quality_score')}`);
console.log(`- Quality improvement: ${calculateAverage(regenerated.data, 'quality_score') -
calculateAverage(lowQuality, 'quality_score')}`);
}
console.log('\n✅ Quality scoring loop complete');
}
// ============================================================================
// Example 2: A/B Testing Data for Model Improvement
// ============================================================================
/**
* Generate A/B test data to improve model performance
*/
async function abTestingData() {
console.log('\n🔬 Example 2: A/B Testing Data Generation\n');
const synth = (0, index_js_1.createSynth)({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate A/B test scenarios
const abTests = await synth.generateStructured({
count: 200,
schema: {
test_id: 'UUID',
variant: 'A | B',
// Input features
user_profile: {
age: 'number (18-80)',
location: 'US state',
interests: ['array of 2-5 interests'],
past_purchases: 'number (0-100)',
},
// Model predictions
model_variant: 'baseline_model | improved_model (based on variant)',
prediction: 'number (0-1, predicted conversion probability)',
confidence: 'number (0-1)',
// Actual outcome
actual_conversion: 'boolean',
conversion_value: 'number (0-500) if converted',
// Performance metrics
prediction_error: 'number (absolute error)',
calibration_error: 'number',
// Metadata
timestamp: 'ISO timestamp',
feature_version: 'v1.0 | v1.1',
},
constraints: [
'Variant A should use baseline_model',
'Variant B should use improved_model',
'Variant B should have higher accuracy (lower prediction_error)',
'Variant B should have better calibration',
'Distribution of user_profile should be similar across variants',
'prediction should correlate with actual_conversion',
],
});
// Analyze A/B test results
const variantA = abTests.data.filter((d) => d.variant === 'A');
const variantB = abTests.data.filter((d) => d.variant === 'B');
console.log('A/B Test Results:');
console.log(`\nVariant A (Baseline):`);
console.log(` - Samples: ${variantA.length}`);
console.log(` - Avg prediction error: ${calculateAverage(variantA, 'prediction_error').toFixed(4)}`);
console.log(` - Conversion rate: ${calculateConversionRate(variantA)}%`);
console.log(`\nVariant B (Improved):`);
console.log(` - Samples: ${variantB.length}`);
console.log(` - Avg prediction error: ${calculateAverage(variantB, 'prediction_error').toFixed(4)}`);
console.log(` - Conversion rate: ${calculateConversionRate(variantB)}%`);
const improvement = (((calculateAverage(variantA, 'prediction_error') -
calculateAverage(variantB, 'prediction_error')) /
calculateAverage(variantA, 'prediction_error')) *
100);
console.log(`\nImprovement: ${improvement.toFixed(2)}% reduction in error`);
console.log('✅ A/B testing data generated');
return abTests;
}
// ============================================================================
// Example 3: Pattern Learning from Production Data
// ============================================================================
/**
* Learn patterns from production data and generate similar synthetic data
*/
async function patternLearningLoop() {
console.log('\n🧠 Example 3: Pattern Learning from Production\n');
const synth = (0, index_js_1.createSynth)({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Simulate production data patterns
const productionPatterns = {
common_sequences: [
['login', 'browse', 'add_to_cart', 'checkout'],
['login', 'browse', 'search', 'view_product'],
['browse', 'search', 'add_to_cart', 'abandon'],
],
time_distributions: {
peak_hours: [9, 12, 18, 20],
avg_session_duration: 420, // seconds
bounce_rate: 0.35,
},
user_segments: {
frequent_buyers: 0.15,
casual_browsers: 0.50,
one_time_visitors: 0.35,
},
};
// Generate synthetic data matching learned patterns
const syntheticData = await synth.generateStructured({
count: 500,
schema: {
session_id: 'UUID',
user_segment: 'frequent_buyer | casual_browser | one_time_visitor',
// Event sequence following learned patterns
events: [
{
event_type: 'login | browse | search | add_to_cart | checkout | abandon | view_product',
timestamp: 'ISO timestamp',
duration: 'number (5-300, seconds)',
},
],
// Session metrics
total_duration: 'number (60-900, seconds, should match avg from patterns)',
hour_of_day: 'number (0-23, biased toward peak hours)',
bounced: 'boolean (35% true)',
converted: 'boolean',
// Pattern conformance
matches_common_sequence: 'boolean (80% should be true)',
pattern_id: 'number (0-2) if matches sequence',
timestamp: 'ISO timestamp',
},
constraints: [
'User segment distribution should match: 15% frequent_buyer, 50% casual_browser, 35% one_time_visitor',
'Hour of day should be biased toward 9, 12, 18, 20',
'Event sequences should follow common patterns 80% of time',
'total_duration should be around 420 seconds on average',
'bounce_rate should be approximately 35%',
'frequent_buyers should have higher conversion rate',
],
});
console.log('Pattern-Learned Synthetic Data:');
console.log(`- Total sessions: ${syntheticData.data.length}`);
console.log(`- User segment distribution:`, getUserSegmentDist(syntheticData.data));
console.log(`- Avg session duration: ${calculateAverage(syntheticData.data, 'total_duration').toFixed(0)}s`);
console.log(`- Bounce rate: ${calculateBounceRate(syntheticData.data)}%`);
console.log(`- Pattern conformance: ${calculatePatternConformance(syntheticData.data)}%`);
console.log('\n✅ Pattern learning complete');
return syntheticData;
}
// ============================================================================
// Example 4: Adaptive Schema Evolution
// ============================================================================
/**
* Evolve data schema based on feedback and changing requirements
*/
async function adaptiveSchemaEvolution() {
console.log('\n🔄 Example 4: Adaptive Schema Evolution\n');
const synth = (0, index_js_1.createSynth)({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Version 1: Initial schema
console.log('Schema V1 (Initial):');
const v1Data = await synth.generateStructured({
count: 50,
schema: {
id: 'UUID',
name: 'full name',
email: 'valid email',
age: 'number (18-80)',
schema_version: 'string (v1.0)',
},
});
console.log(` - Generated ${v1Data.data.length} records`);
console.log(` - Fields: id, name, email, age`);
// Simulate feedback: need more demographic info
console.log('\nFeedback: Need location and preferences');
// Version 2: Add fields based on feedback
console.log('\nSchema V2 (Enhanced):');
const v2Data = await synth.generateStructured({
count: 50,
schema: {
id: 'UUID',
name: 'full name',
email: 'valid email',
age: 'number (18-80)',
// New fields
location: {
city: 'city name',
state: 'US state',
country: 'country name',
},
preferences: ['array of 3-5 preference categories'],
schema_version: 'string (v2.0)',
},
});
console.log(` - Generated ${v2Data.data.length} records`);
console.log(` - Fields: id, name, email, age, location, preferences`);
// Simulate more feedback: need behavioral data
console.log('\nFeedback: Need behavioral and engagement metrics');
// Version 3: Add behavioral tracking
console.log('\nSchema V3 (Full Featured):');
const v3Data = await synth.generateStructured({
count: 50,
schema: {
id: 'UUID',
name: 'full name',
email: 'valid email',
age: 'number (18-80)',
location: {
city: 'city name',
state: 'US state',
country: 'country name',
},
preferences: ['array of 3-5 preference categories'],
// New behavioral fields
behavioral_metrics: {
total_sessions: 'number (0-500)',
avg_session_duration: 'number (60-3600, seconds)',
last_active: 'ISO timestamp (within last 30 days)',
engagement_score: 'number (0-100)',
ltv: 'number (0-10000, lifetime value)',
},
// New segmentation
user_segment: 'high_value | medium_value | low_value | churned',
predicted_churn: 'boolean',
churn_risk_score: 'number (0-1)',
schema_version: 'string (v3.0)',
},
constraints: [
'engagement_score should correlate with total_sessions',
'ltv should be higher for high_value segment',
'churned users should have old last_active dates',
'churn_risk_score should be high for predicted_churn=true',
],
});
console.log(` - Generated ${v3Data.data.length} records`);
console.log(` - Fields: All previous + behavioral_metrics, user_segment, churn predictions`);
// Show schema evolution
console.log('\nSchema Evolution Summary:');
console.log(' V1 → V2: Added location and preferences');
console.log(' V2 → V3: Added behavioral metrics and churn prediction');
console.log(' Field count: 5 → 7 → 12');
console.log('\n✅ Adaptive schema evolution complete');
return { v1: v1Data, v2: v2Data, v3: v3Data };
}
// ============================================================================
// Example 5: Active Learning Data Generation
// ============================================================================
/**
* Generate data for active learning - focus on uncertain/informative samples
*/
async function activeLearningData() {
console.log('\n🎯 Example 5: Active Learning Data\n');
const synth = (0, index_js_1.createSynth)({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate samples with uncertainty scores
const activeLearningData = await synth.generateStructured({
count: 300,
schema: {
sample_id: 'UUID',
// Features
features: {
feature_1: 'number (0-100)',
feature_2: 'number (0-100)',
feature_3: 'number (0-100)',
feature_4: 'number (0-100)',
},
// Model predictions
predicted_class: 'number (0-4)',
prediction_confidence: 'number (0-1)',
uncertainty_score: 'number (0-1, inverse of confidence)',
// Active learning strategy
query_strategy: 'uncertainty_sampling | query_by_committee | expected_model_change',
should_label: 'boolean (true if high uncertainty)',
// If labeled
true_label: 'number (0-4) or null',
was_useful: 'boolean or null (if labeled)',
// Metadata
iteration: 'number (1-10, active learning iteration)',
timestamp: 'ISO timestamp',
},
constraints: [
'uncertainty_score should equal 1 - prediction_confidence',
'should_label should be true for samples with uncertainty > 0.6',
'30% of samples should have high uncertainty (>0.6)',
'true_label should be provided if should_label is true',
'was_useful should correlate with uncertainty_score',
],
});
const highUncertainty = activeLearningData.data.filter((d) => d.uncertainty_score > 0.6);
const shouldLabel = activeLearningData.data.filter((d) => d.should_label);
console.log('Active Learning Data:');
console.log(`- Total samples: ${activeLearningData.data.length}`);
console.log(`- High uncertainty samples: ${highUncertainty.length}`);
console.log(`- Samples to label: ${shouldLabel.length}`);
console.log(`- Avg uncertainty: ${calculateAverage(activeLearningData.data, 'uncertainty_score').toFixed(3)}`);
console.log(`- Strategy distribution:`, getStrategyDistribution(activeLearningData.data));
console.log('\n✅ Active learning data generated');
return activeLearningData;
}
// ============================================================================
// Example 6: Continuous Model Evaluation Data
// ============================================================================
/**
* Generate evaluation data for continuous model monitoring
*/
async function continuousEvaluationData() {
console.log('\n📊 Example 6: Continuous Evaluation Data\n');
const synth = (0, index_js_1.createSynth)({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate time-series evaluation data
const evaluationData = await synth.generateTimeSeries({
count: 168, // One week, hourly
interval: '1h',
schema: {
timestamp: 'ISO timestamp',
hour: 'number (0-23)',
// Model performance metrics
accuracy: 'number (0.7-0.95)',
precision: 'number (0.7-0.95)',
recall: 'number (0.7-0.95)',
f1_score: 'number (0.7-0.95)',
// Data distribution metrics
prediction_distribution: {
class_0: 'number (0-1, proportion)',
class_1: 'number (0-1, proportion)',
},
confidence_distribution: {
high: 'number (0-1, >0.8)',
medium: 'number (0-1, 0.5-0.8)',
low: 'number (0-1, <0.5)',
},
// Drift detection
feature_drift_score: 'number (0-1)',
prediction_drift_score: 'number (0-1)',
alert_triggered: 'boolean (true if drift > 0.3)',
// System metrics
inference_latency_ms: 'number (10-100)',
throughput_qps: 'number (100-1000)',
error_rate: 'number (0-0.05)',
},
trend: 'stable',
seasonality: true,
constraints: [
'Performance should degrade slightly during peak hours (9-17)',
'alert_triggered should be true when drift scores > 0.3',
'Drift should gradually increase over time (concept drift)',
'Latency should be higher during peak traffic',
],
});
const alerts = evaluationData.data.filter((d) => d.alert_triggered);
console.log('Continuous Evaluation Data:');
console.log(`- Time points: ${evaluationData.data.length}`);
console.log(`- Average accuracy: ${calculateAverage(evaluationData.data, 'accuracy').toFixed(3)}`);
console.log(`- Average drift score: ${calculateAverage(evaluationData.data, 'feature_drift_score').toFixed(3)}`);
console.log(`- Drift alerts: ${alerts.length}`);
console.log(`- Average latency: ${calculateAverage(evaluationData.data, 'inference_latency_ms').toFixed(1)}ms`);
console.log('\n✅ Continuous evaluation data generated');
return evaluationData;
}
// ============================================================================
// Utility Functions
// ============================================================================
function calculateAverage(data, field) {
const values = data.map((d) => d[field]).filter((v) => typeof v === 'number');
if (values.length === 0)
return 0;
return values.reduce((a, b) => a + b, 0) / values.length;
}
function calculateConversionRate(data) {
const converted = data.filter((d) => d.actual_conversion).length;
return (converted / data.length) * 100;
}
function calculateBounceRate(data) {
const bounced = data.filter((d) => d.bounced).length;
return (bounced / data.length) * 100;
}
function calculatePatternConformance(data) {
const matching = data.filter((d) => d.matches_common_sequence).length;
return (matching / data.length) * 100;
}
function getUserSegmentDist(data) {
const dist = {};
data.forEach((d) => {
dist[d.user_segment] = (dist[d.user_segment] || 0) + 1;
});
return dist;
}
function getStrategyDistribution(data) {
const dist = {};
data.forEach((d) => {
dist[d.query_strategy] = (dist[d.query_strategy] || 0) + 1;
});
return dist;
}
// ============================================================================
// Run All Examples
// ============================================================================
async function runAllFeedbackLoopExamples() {
console.log('🔄 Self-Improving Feedback Loop Examples\n');
console.log('='.repeat(60));
try {
await qualityScoringLoop();
console.log('='.repeat(60));
await abTestingData();
console.log('='.repeat(60));
await patternLearningLoop();
console.log('='.repeat(60));
await adaptiveSchemaEvolution();
console.log('='.repeat(60));
await activeLearningData();
console.log('='.repeat(60));
await continuousEvaluationData();
console.log('='.repeat(60));
console.log('\n✅ All feedback loop examples completed!\n');
}
catch (error) {
console.error('❌ Error:', error.message);
}
}
// Run if executed directly
if (import.meta.url === `file://${process.argv[1]}`) {
runAllFeedbackLoopExamples().catch(console.error);
}
//# sourceMappingURL=feedback-loop.js.map