Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
@@ -0,0 +1,444 @@
|
||||
/**
|
||||
* ADVANCED TUTORIAL: Production Pipeline
|
||||
*
|
||||
* Build a complete production-ready data generation pipeline with:
|
||||
* - Error handling and retry logic
|
||||
* - Monitoring and metrics
|
||||
* - Rate limiting and cost controls
|
||||
* - Batch processing and caching
|
||||
* - Quality validation
|
||||
*
|
||||
* What you'll learn:
|
||||
* - Production-grade error handling
|
||||
* - Performance monitoring
|
||||
* - Cost optimization
|
||||
* - Scalability patterns
|
||||
* - Deployment best practices
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Complete previous tutorials
|
||||
* - Set GEMINI_API_KEY environment variable
|
||||
* - npm install @ruvector/agentic-synth
|
||||
*
|
||||
* Run: npx tsx examples/advanced/production-pipeline.ts
|
||||
*/
|
||||
|
||||
import { AgenticSynth, GenerationResult } from '@ruvector/agentic-synth';
|
||||
import { writeFileSync, existsSync, mkdirSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
// Pipeline configuration
|
||||
interface PipelineConfig {
|
||||
maxRetries: number;
|
||||
retryDelay: number;
|
||||
batchSize: number;
|
||||
maxConcurrency: number;
|
||||
qualityThreshold: number;
|
||||
costBudget: number;
|
||||
rateLimitPerMinute: number;
|
||||
enableCaching: boolean;
|
||||
outputDirectory: string;
|
||||
}
|
||||
|
||||
// Metrics tracking
|
||||
interface PipelineMetrics {
|
||||
totalRequests: number;
|
||||
successfulRequests: number;
|
||||
failedRequests: number;
|
||||
totalDuration: number;
|
||||
totalCost: number;
|
||||
averageQuality: number;
|
||||
cacheHits: number;
|
||||
retries: number;
|
||||
errors: Array<{ timestamp: Date; error: string; context: any }>;
|
||||
}
|
||||
|
||||
// Quality validator
|
||||
interface QualityValidator {
|
||||
validate(data: any): { valid: boolean; score: number; issues: string[] };
|
||||
}
|
||||
|
||||
// Production-grade pipeline
|
||||
class ProductionPipeline {
|
||||
private config: PipelineConfig;
|
||||
private synth: AgenticSynth;
|
||||
private metrics: PipelineMetrics;
|
||||
private requestsThisMinute: number = 0;
|
||||
private minuteStartTime: number = Date.now();
|
||||
|
||||
constructor(config: Partial<PipelineConfig> = {}) {
|
||||
this.config = {
|
||||
maxRetries: config.maxRetries || 3,
|
||||
retryDelay: config.retryDelay || 1000,
|
||||
batchSize: config.batchSize || 10,
|
||||
maxConcurrency: config.maxConcurrency || 3,
|
||||
qualityThreshold: config.qualityThreshold || 0.7,
|
||||
costBudget: config.costBudget || 10.0,
|
||||
rateLimitPerMinute: config.rateLimitPerMinute || 60,
|
||||
enableCaching: config.enableCaching !== false,
|
||||
outputDirectory: config.outputDirectory || './output'
|
||||
};
|
||||
|
||||
this.synth = new AgenticSynth({
|
||||
provider: 'gemini',
|
||||
apiKey: process.env.GEMINI_API_KEY,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
cacheStrategy: this.config.enableCaching ? 'memory' : 'none',
|
||||
cacheTTL: 3600,
|
||||
maxRetries: this.config.maxRetries,
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
this.metrics = {
|
||||
totalRequests: 0,
|
||||
successfulRequests: 0,
|
||||
failedRequests: 0,
|
||||
totalDuration: 0,
|
||||
totalCost: 0,
|
||||
averageQuality: 0,
|
||||
cacheHits: 0,
|
||||
retries: 0,
|
||||
errors: []
|
||||
};
|
||||
|
||||
// Ensure output directory exists
|
||||
if (!existsSync(this.config.outputDirectory)) {
|
||||
mkdirSync(this.config.outputDirectory, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting check
|
||||
private async checkRateLimit(): Promise<void> {
|
||||
const now = Date.now();
|
||||
const elapsedMinutes = (now - this.minuteStartTime) / 60000;
|
||||
|
||||
if (elapsedMinutes >= 1) {
|
||||
// Reset counter for new minute
|
||||
this.requestsThisMinute = 0;
|
||||
this.minuteStartTime = now;
|
||||
}
|
||||
|
||||
if (this.requestsThisMinute >= this.config.rateLimitPerMinute) {
|
||||
const waitTime = 60000 - (now - this.minuteStartTime);
|
||||
console.log(`⏳ Rate limit reached, waiting ${Math.ceil(waitTime / 1000)}s...`);
|
||||
await new Promise(resolve => setTimeout(resolve, waitTime));
|
||||
this.requestsThisMinute = 0;
|
||||
this.minuteStartTime = Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
// Cost check
|
||||
private checkCostBudget(): void {
|
||||
if (this.metrics.totalCost >= this.config.costBudget) {
|
||||
throw new Error(`Cost budget exceeded: $${this.metrics.totalCost.toFixed(4)} >= $${this.config.costBudget}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate with retry logic
|
||||
private async generateWithRetry(
|
||||
options: any,
|
||||
attempt: number = 1
|
||||
): Promise<GenerationResult> {
|
||||
try {
|
||||
await this.checkRateLimit();
|
||||
this.checkCostBudget();
|
||||
|
||||
this.requestsThisMinute++;
|
||||
this.metrics.totalRequests++;
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await this.synth.generateStructured(options);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
this.metrics.totalDuration += duration;
|
||||
this.metrics.successfulRequests++;
|
||||
|
||||
if (result.metadata.cached) {
|
||||
this.metrics.cacheHits++;
|
||||
}
|
||||
|
||||
// Estimate cost (rough approximation)
|
||||
const estimatedCost = result.metadata.cached ? 0 : 0.0001;
|
||||
this.metrics.totalCost += estimatedCost;
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
if (attempt < this.config.maxRetries) {
|
||||
this.metrics.retries++;
|
||||
console.log(`⚠️ Attempt ${attempt} failed, retrying... (${errorMsg})`);
|
||||
|
||||
await new Promise(resolve =>
|
||||
setTimeout(resolve, this.config.retryDelay * attempt)
|
||||
);
|
||||
|
||||
return this.generateWithRetry(options, attempt + 1);
|
||||
} else {
|
||||
this.metrics.failedRequests++;
|
||||
this.metrics.errors.push({
|
||||
timestamp: new Date(),
|
||||
error: errorMsg,
|
||||
context: options
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process a single batch
|
||||
private async processBatch(
|
||||
requests: any[],
|
||||
validator?: QualityValidator
|
||||
): Promise<GenerationResult[]> {
|
||||
const results: GenerationResult[] = [];
|
||||
|
||||
// Process with concurrency control
|
||||
for (let i = 0; i < requests.length; i += this.config.maxConcurrency) {
|
||||
const batch = requests.slice(i, i + this.config.maxConcurrency);
|
||||
|
||||
const batchResults = await Promise.allSettled(
|
||||
batch.map(req => this.generateWithRetry(req))
|
||||
);
|
||||
|
||||
batchResults.forEach((result, idx) => {
|
||||
if (result.status === 'fulfilled') {
|
||||
const genResult = result.value;
|
||||
|
||||
// Validate quality if validator provided
|
||||
if (validator) {
|
||||
const validation = validator.validate(genResult.data);
|
||||
|
||||
if (validation.valid) {
|
||||
results.push(genResult);
|
||||
} else {
|
||||
console.log(`⚠️ Quality validation failed (score: ${validation.score.toFixed(2)})`);
|
||||
console.log(` Issues: ${validation.issues.join(', ')}`);
|
||||
}
|
||||
} else {
|
||||
results.push(genResult);
|
||||
}
|
||||
} else {
|
||||
console.error(`❌ Batch item ${i + idx} failed:`, result.reason);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Main pipeline execution
|
||||
async run(
|
||||
requests: any[],
|
||||
validator?: QualityValidator
|
||||
): Promise<GenerationResult[]> {
|
||||
console.log('🏭 Starting Production Pipeline\n');
|
||||
console.log('=' .repeat(70));
|
||||
console.log(`\nConfiguration:`);
|
||||
console.log(` Total Requests: ${requests.length}`);
|
||||
console.log(` Batch Size: ${this.config.batchSize}`);
|
||||
console.log(` Max Concurrency: ${this.config.maxConcurrency}`);
|
||||
console.log(` Max Retries: ${this.config.maxRetries}`);
|
||||
console.log(` Cost Budget: $${this.config.costBudget}`);
|
||||
console.log(` Rate Limit: ${this.config.rateLimitPerMinute}/min`);
|
||||
console.log(` Caching: ${this.config.enableCaching ? 'Enabled' : 'Disabled'}`);
|
||||
console.log(` Output: ${this.config.outputDirectory}`);
|
||||
console.log('\n' + '=' .repeat(70) + '\n');
|
||||
|
||||
const startTime = Date.now();
|
||||
const allResults: GenerationResult[] = [];
|
||||
|
||||
// Split into batches
|
||||
const batches = [];
|
||||
for (let i = 0; i < requests.length; i += this.config.batchSize) {
|
||||
batches.push(requests.slice(i, i + this.config.batchSize));
|
||||
}
|
||||
|
||||
console.log(`📦 Processing ${batches.length} batches...\n`);
|
||||
|
||||
// Process each batch
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
console.log(`\nBatch ${i + 1}/${batches.length} (${batches[i].length} items)`);
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
try {
|
||||
const batchResults = await this.processBatch(batches[i], validator);
|
||||
allResults.push(...batchResults);
|
||||
|
||||
console.log(`✓ Batch complete: ${batchResults.length}/${batches[i].length} successful`);
|
||||
console.log(` Cost so far: $${this.metrics.totalCost.toFixed(4)}`);
|
||||
console.log(` Cache hits: ${this.metrics.cacheHits}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`✗ Batch failed:`, error instanceof Error ? error.message : 'Unknown error');
|
||||
|
||||
if (error instanceof Error && error.message.includes('budget')) {
|
||||
console.log('\n⚠️ Cost budget exceeded, stopping pipeline...');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalTime = Date.now() - startTime;
|
||||
|
||||
// Save results
|
||||
await this.saveResults(allResults);
|
||||
|
||||
// Display metrics
|
||||
this.displayMetrics(totalTime);
|
||||
|
||||
return allResults;
|
||||
}
|
||||
|
||||
// Save results to disk
|
||||
private async saveResults(results: GenerationResult[]): Promise<void> {
|
||||
try {
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const filename = `generation-${timestamp}.json`;
|
||||
const filepath = join(this.config.outputDirectory, filename);
|
||||
|
||||
const output = {
|
||||
timestamp: new Date(),
|
||||
results: results.map(r => r.data),
|
||||
metadata: {
|
||||
count: results.length,
|
||||
metrics: this.metrics
|
||||
}
|
||||
};
|
||||
|
||||
writeFileSync(filepath, JSON.stringify(output, null, 2));
|
||||
console.log(`\n💾 Results saved to: ${filepath}`);
|
||||
|
||||
// Save metrics separately
|
||||
const metricsFile = join(this.config.outputDirectory, `metrics-${timestamp}.json`);
|
||||
writeFileSync(metricsFile, JSON.stringify(this.metrics, null, 2));
|
||||
console.log(`📊 Metrics saved to: ${metricsFile}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('⚠️ Failed to save results:', error instanceof Error ? error.message : 'Unknown error');
|
||||
}
|
||||
}
|
||||
|
||||
// Display comprehensive metrics
|
||||
private displayMetrics(totalTime: number): void {
|
||||
console.log('\n\n' + '=' .repeat(70));
|
||||
console.log('\n📊 PIPELINE METRICS\n');
|
||||
|
||||
const successRate = (this.metrics.successfulRequests / this.metrics.totalRequests) * 100;
|
||||
const avgDuration = this.metrics.totalDuration / this.metrics.successfulRequests;
|
||||
const cacheHitRate = (this.metrics.cacheHits / this.metrics.totalRequests) * 100;
|
||||
|
||||
console.log('Performance:');
|
||||
console.log(` Total Time: ${(totalTime / 1000).toFixed(2)}s`);
|
||||
console.log(` Avg Request Time: ${avgDuration.toFixed(0)}ms`);
|
||||
console.log(` Throughput: ${(this.metrics.successfulRequests / (totalTime / 1000)).toFixed(2)} req/s`);
|
||||
|
||||
console.log('\nReliability:');
|
||||
console.log(` Total Requests: ${this.metrics.totalRequests}`);
|
||||
console.log(` Successful: ${this.metrics.successfulRequests} (${successRate.toFixed(1)}%)`);
|
||||
console.log(` Failed: ${this.metrics.failedRequests}`);
|
||||
console.log(` Retries: ${this.metrics.retries}`);
|
||||
|
||||
console.log('\nCost & Efficiency:');
|
||||
console.log(` Total Cost: $${this.metrics.totalCost.toFixed(4)}`);
|
||||
console.log(` Avg Cost/Request: $${(this.metrics.totalCost / this.metrics.totalRequests).toFixed(6)}`);
|
||||
console.log(` Cache Hit Rate: ${cacheHitRate.toFixed(1)}%`);
|
||||
console.log(` Cost Savings from Cache: $${(this.metrics.cacheHits * 0.0001).toFixed(4)}`);
|
||||
|
||||
if (this.metrics.errors.length > 0) {
|
||||
console.log(`\n⚠️ Errors (${this.metrics.errors.length}):`);
|
||||
this.metrics.errors.slice(0, 5).forEach((err, i) => {
|
||||
console.log(` ${i + 1}. ${err.error}`);
|
||||
});
|
||||
if (this.metrics.errors.length > 5) {
|
||||
console.log(` ... and ${this.metrics.errors.length - 5} more`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '=' .repeat(70) + '\n');
|
||||
}
|
||||
|
||||
// Get metrics
|
||||
getMetrics(): PipelineMetrics {
|
||||
return { ...this.metrics };
|
||||
}
|
||||
}
|
||||
|
||||
// Example quality validator
|
||||
class ProductQualityValidator implements QualityValidator {
|
||||
validate(data: any[]): { valid: boolean; score: number; issues: string[] } {
|
||||
const issues: string[] = [];
|
||||
let score = 1.0;
|
||||
|
||||
if (!Array.isArray(data) || data.length === 0) {
|
||||
return { valid: false, score: 0, issues: ['No data generated'] };
|
||||
}
|
||||
|
||||
data.forEach((item, idx) => {
|
||||
if (!item.description || item.description.length < 50) {
|
||||
issues.push(`Item ${idx}: Description too short`);
|
||||
score -= 0.1;
|
||||
}
|
||||
|
||||
if (!item.key_features || !Array.isArray(item.key_features) || item.key_features.length < 3) {
|
||||
issues.push(`Item ${idx}: Insufficient features`);
|
||||
score -= 0.1;
|
||||
}
|
||||
});
|
||||
|
||||
score = Math.max(0, score);
|
||||
const valid = score >= 0.7;
|
||||
|
||||
return { valid, score, issues };
|
||||
}
|
||||
}
|
||||
|
||||
// Main execution
|
||||
async function runProductionPipeline() {
|
||||
const pipeline = new ProductionPipeline({
|
||||
maxRetries: 3,
|
||||
retryDelay: 2000,
|
||||
batchSize: 5,
|
||||
maxConcurrency: 2,
|
||||
qualityThreshold: 0.7,
|
||||
costBudget: 1.0,
|
||||
rateLimitPerMinute: 30,
|
||||
enableCaching: true,
|
||||
outputDirectory: join(process.cwd(), 'examples', 'output', 'production')
|
||||
});
|
||||
|
||||
const validator = new ProductQualityValidator();
|
||||
|
||||
// Generate product data for e-commerce catalog
|
||||
const requests = [
|
||||
{
|
||||
count: 2,
|
||||
schema: {
|
||||
id: { type: 'string', required: true },
|
||||
name: { type: 'string', required: true },
|
||||
description: { type: 'string', required: true },
|
||||
key_features: { type: 'array', items: { type: 'string' }, required: true },
|
||||
price: { type: 'number', required: true, minimum: 10, maximum: 1000 },
|
||||
category: { type: 'string', enum: ['Electronics', 'Clothing', 'Home', 'Sports'] }
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
// Duplicate requests to test batching
|
||||
const allRequests = Array(5).fill(null).map(() => requests[0]);
|
||||
|
||||
const results = await pipeline.run(allRequests, validator);
|
||||
|
||||
console.log(`\n✅ Pipeline complete! Generated ${results.length} batches of products.\n`);
|
||||
}
|
||||
|
||||
// Run the example
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
runProductionPipeline().catch(error => {
|
||||
console.error('❌ Pipeline failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
export { ProductionPipeline, ProductQualityValidator, PipelineConfig, PipelineMetrics };
|
||||
Reference in New Issue
Block a user