Files
wifi-densepose/npm/packages/agentic-synth-examples/examples/advanced/production-pipeline.ts
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

445 lines
14 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* ADVANCED TUTORIAL: Production Pipeline
*
* Build a complete production-ready data generation pipeline with:
* - Error handling and retry logic
* - Monitoring and metrics
* - Rate limiting and cost controls
* - Batch processing and caching
* - Quality validation
*
* What you'll learn:
* - Production-grade error handling
* - Performance monitoring
* - Cost optimization
* - Scalability patterns
* - Deployment best practices
*
* Prerequisites:
* - Complete previous tutorials
* - Set GEMINI_API_KEY environment variable
* - npm install @ruvector/agentic-synth
*
* Run: npx tsx examples/advanced/production-pipeline.ts
*/
import { AgenticSynth, GenerationResult } from '@ruvector/agentic-synth';
import { writeFileSync, existsSync, mkdirSync } from 'fs';
import { join } from 'path';
// Pipeline configuration
interface PipelineConfig {
maxRetries: number;
retryDelay: number;
batchSize: number;
maxConcurrency: number;
qualityThreshold: number;
costBudget: number;
rateLimitPerMinute: number;
enableCaching: boolean;
outputDirectory: string;
}
// Metrics tracking
interface PipelineMetrics {
totalRequests: number;
successfulRequests: number;
failedRequests: number;
totalDuration: number;
totalCost: number;
averageQuality: number;
cacheHits: number;
retries: number;
errors: Array<{ timestamp: Date; error: string; context: any }>;
}
// Quality validator
interface QualityValidator {
validate(data: any): { valid: boolean; score: number; issues: string[] };
}
// Production-grade pipeline
class ProductionPipeline {
private config: PipelineConfig;
private synth: AgenticSynth;
private metrics: PipelineMetrics;
private requestsThisMinute: number = 0;
private minuteStartTime: number = Date.now();
constructor(config: Partial<PipelineConfig> = {}) {
this.config = {
maxRetries: config.maxRetries || 3,
retryDelay: config.retryDelay || 1000,
batchSize: config.batchSize || 10,
maxConcurrency: config.maxConcurrency || 3,
qualityThreshold: config.qualityThreshold || 0.7,
costBudget: config.costBudget || 10.0,
rateLimitPerMinute: config.rateLimitPerMinute || 60,
enableCaching: config.enableCaching !== false,
outputDirectory: config.outputDirectory || './output'
};
this.synth = new AgenticSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY,
model: 'gemini-2.0-flash-exp',
cacheStrategy: this.config.enableCaching ? 'memory' : 'none',
cacheTTL: 3600,
maxRetries: this.config.maxRetries,
timeout: 30000
});
this.metrics = {
totalRequests: 0,
successfulRequests: 0,
failedRequests: 0,
totalDuration: 0,
totalCost: 0,
averageQuality: 0,
cacheHits: 0,
retries: 0,
errors: []
};
// Ensure output directory exists
if (!existsSync(this.config.outputDirectory)) {
mkdirSync(this.config.outputDirectory, { recursive: true });
}
}
// Rate limiting check
private async checkRateLimit(): Promise<void> {
const now = Date.now();
const elapsedMinutes = (now - this.minuteStartTime) / 60000;
if (elapsedMinutes >= 1) {
// Reset counter for new minute
this.requestsThisMinute = 0;
this.minuteStartTime = now;
}
if (this.requestsThisMinute >= this.config.rateLimitPerMinute) {
const waitTime = 60000 - (now - this.minuteStartTime);
console.log(`⏳ Rate limit reached, waiting ${Math.ceil(waitTime / 1000)}s...`);
await new Promise(resolve => setTimeout(resolve, waitTime));
this.requestsThisMinute = 0;
this.minuteStartTime = Date.now();
}
}
// Cost check
private checkCostBudget(): void {
if (this.metrics.totalCost >= this.config.costBudget) {
throw new Error(`Cost budget exceeded: $${this.metrics.totalCost.toFixed(4)} >= $${this.config.costBudget}`);
}
}
// Generate with retry logic
private async generateWithRetry(
options: any,
attempt: number = 1
): Promise<GenerationResult> {
try {
await this.checkRateLimit();
this.checkCostBudget();
this.requestsThisMinute++;
this.metrics.totalRequests++;
const startTime = Date.now();
const result = await this.synth.generateStructured(options);
const duration = Date.now() - startTime;
this.metrics.totalDuration += duration;
this.metrics.successfulRequests++;
if (result.metadata.cached) {
this.metrics.cacheHits++;
}
// Estimate cost (rough approximation)
const estimatedCost = result.metadata.cached ? 0 : 0.0001;
this.metrics.totalCost += estimatedCost;
return result;
} catch (error) {
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
if (attempt < this.config.maxRetries) {
this.metrics.retries++;
console.log(`⚠️ Attempt ${attempt} failed, retrying... (${errorMsg})`);
await new Promise(resolve =>
setTimeout(resolve, this.config.retryDelay * attempt)
);
return this.generateWithRetry(options, attempt + 1);
} else {
this.metrics.failedRequests++;
this.metrics.errors.push({
timestamp: new Date(),
error: errorMsg,
context: options
});
throw error;
}
}
}
// Process a single batch
private async processBatch(
requests: any[],
validator?: QualityValidator
): Promise<GenerationResult[]> {
const results: GenerationResult[] = [];
// Process with concurrency control
for (let i = 0; i < requests.length; i += this.config.maxConcurrency) {
const batch = requests.slice(i, i + this.config.maxConcurrency);
const batchResults = await Promise.allSettled(
batch.map(req => this.generateWithRetry(req))
);
batchResults.forEach((result, idx) => {
if (result.status === 'fulfilled') {
const genResult = result.value;
// Validate quality if validator provided
if (validator) {
const validation = validator.validate(genResult.data);
if (validation.valid) {
results.push(genResult);
} else {
console.log(`⚠️ Quality validation failed (score: ${validation.score.toFixed(2)})`);
console.log(` Issues: ${validation.issues.join(', ')}`);
}
} else {
results.push(genResult);
}
} else {
console.error(`❌ Batch item ${i + idx} failed:`, result.reason);
}
});
}
return results;
}
// Main pipeline execution
async run(
requests: any[],
validator?: QualityValidator
): Promise<GenerationResult[]> {
console.log('🏭 Starting Production Pipeline\n');
console.log('=' .repeat(70));
console.log(`\nConfiguration:`);
console.log(` Total Requests: ${requests.length}`);
console.log(` Batch Size: ${this.config.batchSize}`);
console.log(` Max Concurrency: ${this.config.maxConcurrency}`);
console.log(` Max Retries: ${this.config.maxRetries}`);
console.log(` Cost Budget: $${this.config.costBudget}`);
console.log(` Rate Limit: ${this.config.rateLimitPerMinute}/min`);
console.log(` Caching: ${this.config.enableCaching ? 'Enabled' : 'Disabled'}`);
console.log(` Output: ${this.config.outputDirectory}`);
console.log('\n' + '=' .repeat(70) + '\n');
const startTime = Date.now();
const allResults: GenerationResult[] = [];
// Split into batches
const batches = [];
for (let i = 0; i < requests.length; i += this.config.batchSize) {
batches.push(requests.slice(i, i + this.config.batchSize));
}
console.log(`📦 Processing ${batches.length} batches...\n`);
// Process each batch
for (let i = 0; i < batches.length; i++) {
console.log(`\nBatch ${i + 1}/${batches.length} (${batches[i].length} items)`);
console.log('─'.repeat(70));
try {
const batchResults = await this.processBatch(batches[i], validator);
allResults.push(...batchResults);
console.log(`✓ Batch complete: ${batchResults.length}/${batches[i].length} successful`);
console.log(` Cost so far: $${this.metrics.totalCost.toFixed(4)}`);
console.log(` Cache hits: ${this.metrics.cacheHits}`);
} catch (error) {
console.error(`✗ Batch failed:`, error instanceof Error ? error.message : 'Unknown error');
if (error instanceof Error && error.message.includes('budget')) {
console.log('\n⚠ Cost budget exceeded, stopping pipeline...');
break;
}
}
}
const totalTime = Date.now() - startTime;
// Save results
await this.saveResults(allResults);
// Display metrics
this.displayMetrics(totalTime);
return allResults;
}
// Save results to disk
private async saveResults(results: GenerationResult[]): Promise<void> {
try {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `generation-${timestamp}.json`;
const filepath = join(this.config.outputDirectory, filename);
const output = {
timestamp: new Date(),
results: results.map(r => r.data),
metadata: {
count: results.length,
metrics: this.metrics
}
};
writeFileSync(filepath, JSON.stringify(output, null, 2));
console.log(`\n💾 Results saved to: ${filepath}`);
// Save metrics separately
const metricsFile = join(this.config.outputDirectory, `metrics-${timestamp}.json`);
writeFileSync(metricsFile, JSON.stringify(this.metrics, null, 2));
console.log(`📊 Metrics saved to: ${metricsFile}`);
} catch (error) {
console.error('⚠️ Failed to save results:', error instanceof Error ? error.message : 'Unknown error');
}
}
// Display comprehensive metrics
private displayMetrics(totalTime: number): void {
console.log('\n\n' + '=' .repeat(70));
console.log('\n📊 PIPELINE METRICS\n');
const successRate = (this.metrics.successfulRequests / this.metrics.totalRequests) * 100;
const avgDuration = this.metrics.totalDuration / this.metrics.successfulRequests;
const cacheHitRate = (this.metrics.cacheHits / this.metrics.totalRequests) * 100;
console.log('Performance:');
console.log(` Total Time: ${(totalTime / 1000).toFixed(2)}s`);
console.log(` Avg Request Time: ${avgDuration.toFixed(0)}ms`);
console.log(` Throughput: ${(this.metrics.successfulRequests / (totalTime / 1000)).toFixed(2)} req/s`);
console.log('\nReliability:');
console.log(` Total Requests: ${this.metrics.totalRequests}`);
console.log(` Successful: ${this.metrics.successfulRequests} (${successRate.toFixed(1)}%)`);
console.log(` Failed: ${this.metrics.failedRequests}`);
console.log(` Retries: ${this.metrics.retries}`);
console.log('\nCost & Efficiency:');
console.log(` Total Cost: $${this.metrics.totalCost.toFixed(4)}`);
console.log(` Avg Cost/Request: $${(this.metrics.totalCost / this.metrics.totalRequests).toFixed(6)}`);
console.log(` Cache Hit Rate: ${cacheHitRate.toFixed(1)}%`);
console.log(` Cost Savings from Cache: $${(this.metrics.cacheHits * 0.0001).toFixed(4)}`);
if (this.metrics.errors.length > 0) {
console.log(`\n⚠ Errors (${this.metrics.errors.length}):`);
this.metrics.errors.slice(0, 5).forEach((err, i) => {
console.log(` ${i + 1}. ${err.error}`);
});
if (this.metrics.errors.length > 5) {
console.log(` ... and ${this.metrics.errors.length - 5} more`);
}
}
console.log('\n' + '=' .repeat(70) + '\n');
}
// Get metrics
getMetrics(): PipelineMetrics {
return { ...this.metrics };
}
}
// Example quality validator
class ProductQualityValidator implements QualityValidator {
validate(data: any[]): { valid: boolean; score: number; issues: string[] } {
const issues: string[] = [];
let score = 1.0;
if (!Array.isArray(data) || data.length === 0) {
return { valid: false, score: 0, issues: ['No data generated'] };
}
data.forEach((item, idx) => {
if (!item.description || item.description.length < 50) {
issues.push(`Item ${idx}: Description too short`);
score -= 0.1;
}
if (!item.key_features || !Array.isArray(item.key_features) || item.key_features.length < 3) {
issues.push(`Item ${idx}: Insufficient features`);
score -= 0.1;
}
});
score = Math.max(0, score);
const valid = score >= 0.7;
return { valid, score, issues };
}
}
// Main execution
async function runProductionPipeline() {
const pipeline = new ProductionPipeline({
maxRetries: 3,
retryDelay: 2000,
batchSize: 5,
maxConcurrency: 2,
qualityThreshold: 0.7,
costBudget: 1.0,
rateLimitPerMinute: 30,
enableCaching: true,
outputDirectory: join(process.cwd(), 'examples', 'output', 'production')
});
const validator = new ProductQualityValidator();
// Generate product data for e-commerce catalog
const requests = [
{
count: 2,
schema: {
id: { type: 'string', required: true },
name: { type: 'string', required: true },
description: { type: 'string', required: true },
key_features: { type: 'array', items: { type: 'string' }, required: true },
price: { type: 'number', required: true, minimum: 10, maximum: 1000 },
category: { type: 'string', enum: ['Electronics', 'Clothing', 'Home', 'Sports'] }
}
}
];
// Duplicate requests to test batching
const allRequests = Array(5).fill(null).map(() => requests[0]);
const results = await pipeline.run(allRequests, validator);
console.log(`\n✅ Pipeline complete! Generated ${results.length} batches of products.\n`);
}
// Run the example
if (import.meta.url === `file://${process.argv[1]}`) {
runProductionPipeline().catch(error => {
console.error('❌ Pipeline failed:', error);
process.exit(1);
});
}
export { ProductionPipeline, ProductQualityValidator, PipelineConfig, PipelineMetrics };