Files
wifi-densepose/npm/packages/agentic-synth/examples/swarms/distributed-processing.ts
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

701 lines
24 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Distributed Processing Examples
*
* Demonstrates distributed computation patterns including map-reduce,
* worker pools, message queues, event-driven architectures, and
* saga pattern transactions for multi-agent systems.
*
* Integrates with:
* - claude-flow: Distributed workflow orchestration
* - Apache Kafka: Event streaming
* - RabbitMQ: Message queuing
* - Redis: Distributed caching
*/
import { AgenticSynth, createSynth } from '../../dist/index.js';
import type { GenerationResult } from '../../src/types.js';
// ============================================================================
// Example 1: Map-Reduce Job Data
// ============================================================================
/**
* Generate map-reduce job execution data for distributed processing
*/
export async function mapReduceJobData() {
console.log('\n🗺 Example 1: Map-Reduce Job Execution\n');
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
cacheStrategy: 'memory',
});
// Generate map-reduce jobs
const jobs = await synth.generateStructured({
count: 20,
schema: {
job_id: 'UUID',
job_name: 'descriptive job name (e.g., "Word Count Analysis")',
input_size_mb: 'number (100-10000)',
map_phase: {
mapper_count: 'number (10-100)',
tasks: [
{
task_id: 'UUID',
mapper_id: 'mapper-{1-100}',
input_split: 'split-{id}',
input_size_mb: 'number (proportional to job input)',
output_records: 'number (1000-100000)',
execution_time_ms: 'number (1000-30000)',
status: 'completed | failed | running',
},
],
start_time: 'ISO timestamp',
end_time: 'ISO timestamp',
duration_ms: 'number (sum of task times)',
},
shuffle_phase: {
data_transferred_mb: 'number (input_size_mb * 0.8-1.2)',
partitions: 'number (10-50)',
transfer_time_ms: 'number (5000-60000)',
},
reduce_phase: {
reducer_count: 'number (5-30)',
tasks: [
{
task_id: 'UUID',
reducer_id: 'reducer-{1-30}',
partition_id: 'number',
input_records: 'number (10000-500000)',
output_records: 'number (100-10000)',
execution_time_ms: 'number (2000-40000)',
status: 'completed | failed | running',
},
],
start_time: 'ISO timestamp',
end_time: 'ISO timestamp',
duration_ms: 'number',
},
overall_status: 'completed | failed | running',
total_duration_ms: 'number',
efficiency_score: 'number (0.5-1.0)',
},
constraints: [
'Map tasks should run in parallel',
'Reduce phase starts after map phase completes',
'95% of tasks should be completed',
'Efficiency should correlate with parallelism',
],
});
// Analyze map-reduce performance
const completedJobs = jobs.data.filter((j: any) => j.overall_status === 'completed');
const avgEfficiency = completedJobs.reduce((sum: number, j: any) => sum + j.efficiency_score, 0) / completedJobs.length;
console.log('Map-Reduce Analysis:');
console.log(`- Total jobs: ${jobs.data.length}`);
console.log(`- Completed: ${completedJobs.length}`);
console.log(`- Average efficiency: ${(avgEfficiency * 100).toFixed(1)}%`);
console.log(`- Total data processed: ${jobs.data.reduce((sum: number, j: any) => sum + j.input_size_mb, 0).toFixed(0)} MB`);
// Calculate parallelism metrics
const avgMappers = jobs.data.reduce((sum: number, j: any) => sum + j.map_phase.mapper_count, 0) / jobs.data.length;
const avgReducers = jobs.data.reduce((sum: number, j: any) => sum + j.reduce_phase.reducer_count, 0) / jobs.data.length;
console.log(`\nParallelism Metrics:`);
console.log(`- Average mappers: ${avgMappers.toFixed(1)}`);
console.log(`- Average reducers: ${avgReducers.toFixed(1)}`);
// Integration with claude-flow
console.log('\nClaude-Flow Integration:');
console.log('npx claude-flow@alpha hooks pre-task --description "Map-Reduce job execution"');
console.log('// Store results in coordination memory for analysis');
return jobs;
}
// ============================================================================
// Example 2: Worker Pool Simulation
// ============================================================================
/**
* Generate worker pool execution data
*/
export async function workerPoolSimulation() {
console.log('\n👷 Example 2: Worker Pool Simulation\n');
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate worker pool state over time
const poolStates = await synth.generateTimeSeries({
count: 100,
interval: '1m',
metrics: [
'active_workers',
'idle_workers',
'queue_size',
'tasks_per_minute',
'avg_task_duration_ms',
'worker_utilization',
],
trend: 'mixed',
seasonality: true,
});
// Generate individual worker performance
const workerMetrics = await synth.generateStructured({
count: 200,
schema: {
timestamp: 'ISO timestamp',
worker_id: 'worker-{1-20}',
state: 'idle | busy | initializing | terminating',
current_task_id: 'UUID or null',
tasks_completed: 'number (0-1000)',
tasks_failed: 'number (0-50)',
avg_execution_time_ms: 'number (100-5000)',
cpu_usage: 'number (0-100)',
memory_mb: 'number (100-2000)',
uptime_seconds: 'number (0-86400)',
last_heartbeat: 'ISO timestamp',
},
constraints: [
'Busy workers should have current_task_id',
'Idle workers should have null current_task_id',
'High task count should correlate with low avg execution time',
],
});
// Generate task execution history
const taskExecutions = await synth.generateEvents({
count: 500,
eventTypes: ['task_queued', 'task_assigned', 'task_started', 'task_completed', 'task_failed'],
schema: {
event_id: 'UUID',
task_id: 'UUID',
event_type: 'one of eventTypes',
worker_id: 'worker-{1-20} or null',
queue_wait_time_ms: 'number (0-10000)',
execution_time_ms: 'number (100-5000)',
priority: 'number (1-10)',
timestamp: 'ISO timestamp',
},
distribution: 'poisson',
});
console.log('Worker Pool Analysis:');
console.log(`- Time series points: ${poolStates.data.length}`);
console.log(`- Worker metrics: ${workerMetrics.data.length}`);
console.log(`- Task executions: ${taskExecutions.data.length}`);
// Calculate pool efficiency
const avgUtilization = poolStates.data.reduce(
(sum: number, p: any) => sum + p.worker_utilization,
0
) / poolStates.data.length;
console.log(`\nPool Efficiency:`);
console.log(`- Average utilization: ${avgUtilization.toFixed(1)}%`);
console.log(`- Active workers: ${workerMetrics.data.filter((w: any) => w.state === 'busy').length}`);
console.log(`- Idle workers: ${workerMetrics.data.filter((w: any) => w.state === 'idle').length}`);
// Integration patterns
console.log('\nIntegration Pattern:');
console.log('// Bull Queue (Redis-based)');
console.log('const queue = new Queue("tasks", { redis: redisConfig });');
console.log('// Process with worker pool');
console.log('queue.process(concurrency, async (job) => { /* ... */ });');
return { poolStates, workerMetrics, taskExecutions };
}
// ============================================================================
// Example 3: Message Queue Scenarios
// ============================================================================
/**
* Generate message queue data (RabbitMQ, SQS, etc.)
*/
export async function messageQueueScenarios() {
console.log('\n📬 Example 3: Message Queue Scenarios\n');
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate queue metrics
const queueMetrics = await synth.generateTimeSeries({
count: 150,
interval: '30s',
metrics: [
'messages_published',
'messages_consumed',
'queue_depth',
'consumer_count',
'publish_rate',
'consume_rate',
'avg_latency_ms',
],
trend: 'up',
seasonality: false,
});
// Generate message data
const messages = await synth.generateStructured({
count: 1000,
schema: {
message_id: 'UUID',
queue_name: 'tasks | events | notifications | dead_letter',
priority: 'number (0-9)',
payload_size_bytes: 'number (100-10000)',
content_type: 'application/json | text/plain | application/octet-stream',
routing_key: 'routing key pattern',
headers: {
correlation_id: 'UUID',
reply_to: 'queue name or null',
timestamp: 'ISO timestamp',
retry_count: 'number (0-5)',
},
published_at: 'ISO timestamp',
consumed_at: 'ISO timestamp or null',
acknowledged_at: 'ISO timestamp or null',
status: 'pending | consumed | acknowledged | dead_letter | expired',
time_in_queue_ms: 'number (0-60000)',
consumer_id: 'consumer-{1-15} or null',
},
constraints: [
'Consumed messages should have consumed_at timestamp',
'Acknowledged messages should have acknowledged_at after consumed_at',
'5% of messages should be in dead_letter queue',
'Higher priority messages should have lower time_in_queue_ms',
],
});
// Generate consumer performance
const consumers = await synth.generateStructured({
count: 15,
schema: {
consumer_id: 'consumer-{1-15}',
queue_subscriptions: ['array of 1-3 queue names'],
messages_processed: 'number (0-200)',
processing_rate_per_second: 'number (1-50)',
error_count: 'number (0-20)',
avg_processing_time_ms: 'number (100-2000)',
prefetch_count: 'number (1-100)',
status: 'active | idle | error | disconnected',
last_activity: 'ISO timestamp',
},
});
console.log('Message Queue Analysis:');
console.log(`- Total messages: ${messages.data.length}`);
console.log(`- Pending: ${messages.data.filter((m: any) => m.status === 'pending').length}`);
console.log(`- Consumed: ${messages.data.filter((m: any) => m.status === 'consumed').length}`);
console.log(`- Dead letter: ${messages.data.filter((m: any) => m.status === 'dead_letter').length}`);
// Calculate throughput
const totalProcessed = consumers.data.reduce(
(sum: number, c: any) => sum + c.messages_processed,
0
);
const avgLatency = messages.data
.filter((m: any) => m.time_in_queue_ms)
.reduce((sum: number, m: any) => sum + m.time_in_queue_ms, 0) / messages.data.length;
console.log(`\nThroughput Metrics:`);
console.log(`- Total processed: ${totalProcessed}`);
console.log(`- Active consumers: ${consumers.data.filter((c: any) => c.status === 'active').length}`);
console.log(`- Average latency: ${avgLatency.toFixed(0)}ms`);
// RabbitMQ integration example
console.log('\nRabbitMQ Integration:');
console.log('// Connect to RabbitMQ');
console.log('const channel = await connection.createChannel();');
console.log('await channel.assertQueue("tasks", { durable: true });');
console.log('// Publish with agentic-synth data');
return { queueMetrics, messages, consumers };
}
// ============================================================================
// Example 4: Event-Driven Architecture
// ============================================================================
/**
* Generate event-driven architecture data (Kafka, EventBridge)
*/
export async function eventDrivenArchitecture() {
console.log('\n⚡ Example 4: Event-Driven Architecture\n');
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate event streams
const events = await synth.generateEvents({
count: 2000,
eventTypes: [
'user.created',
'user.updated',
'order.placed',
'order.confirmed',
'order.shipped',
'payment.processed',
'inventory.updated',
'notification.sent',
],
schema: {
event_id: 'UUID',
event_type: 'one of eventTypes',
event_version: 'v1 | v2',
aggregate_id: 'UUID',
aggregate_type: 'user | order | payment | inventory',
payload: 'JSON object with event data',
metadata: {
causation_id: 'UUID (triggering event)',
correlation_id: 'UUID (workflow id)',
timestamp: 'ISO timestamp',
source_service: 'service name',
user_id: 'UUID or null',
},
partition_key: 'aggregate_id',
sequence_number: 'number',
published_at: 'ISO timestamp',
},
distribution: 'poisson',
timeRange: {
start: new Date(Date.now() - 3600000),
end: new Date(),
},
});
// Generate event handlers (subscribers)
const handlers = await synth.generateStructured({
count: 30,
schema: {
handler_id: 'UUID',
handler_name: 'descriptive name',
subscribed_events: ['array of 1-5 event types'],
service_name: 'service name',
processing_mode: 'sync | async | batch',
events_processed: 'number (0-500)',
events_failed: 'number (0-50)',
avg_processing_time_ms: 'number (50-2000)',
retry_policy: {
max_retries: 'number (3-10)',
backoff_strategy: 'exponential | linear | constant',
dead_letter_queue: 'boolean',
},
status: 'active | degraded | failing | disabled',
},
});
// Generate event projections (read models)
const projections = await synth.generateStructured({
count: 100,
schema: {
projection_id: 'UUID',
projection_type: 'user_profile | order_summary | inventory_view',
aggregate_id: 'UUID',
version: 'number (1-100)',
data: 'JSON object representing current state',
last_event_id: 'UUID (from events)',
last_updated: 'ISO timestamp',
consistency_lag_ms: 'number (0-5000)',
},
});
console.log('Event-Driven Architecture Analysis:');
console.log(`- Total events: ${events.data.length}`);
console.log(`- Event handlers: ${handlers.data.length}`);
console.log(`- Projections: ${projections.data.length}`);
// Event type distribution
const eventTypes = new Map<string, number>();
events.data.forEach((e: any) => {
eventTypes.set(e.event_type, (eventTypes.get(e.event_type) || 0) + 1);
});
console.log('\nEvent Distribution:');
eventTypes.forEach((count, type) => {
console.log(`- ${type}: ${count}`);
});
// Calculate average consistency lag
const avgLag = projections.data.reduce(
(sum: number, p: any) => sum + p.consistency_lag_ms,
0
) / projections.data.length;
console.log(`\nConsistency Metrics:`);
console.log(`- Average lag: ${avgLag.toFixed(0)}ms`);
console.log(`- Active handlers: ${handlers.data.filter((h: any) => h.status === 'active').length}`);
// Kafka integration
console.log('\nKafka Integration:');
console.log('// Produce events');
console.log('await producer.send({ topic: "events", messages: [...] });');
console.log('// Consume with handler coordination');
console.log('await consumer.run({ eachMessage: async ({ message }) => { /* ... */ } });');
return { events, handlers, projections };
}
// ============================================================================
// Example 5: Saga Pattern Transactions
// ============================================================================
/**
* Generate saga pattern distributed transaction data
*/
export async function sagaPatternTransactions() {
console.log('\n🔄 Example 5: Saga Pattern Transactions\n');
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate saga executions
const sagas = await synth.generateStructured({
count: 100,
schema: {
saga_id: 'UUID',
saga_type: 'order_fulfillment | payment_processing | user_registration | booking_reservation',
orchestration_type: 'orchestration | choreography',
initiator_service: 'service name',
steps: [
{
step_id: 'UUID',
step_name: 'descriptive step name',
service: 'service name',
operation: 'operation name',
compensation_operation: 'compensation operation name',
status: 'pending | executing | completed | failed | compensating | compensated',
started_at: 'ISO timestamp or null',
completed_at: 'ISO timestamp or null',
duration_ms: 'number (100-5000)',
retry_count: 'number (0-3)',
},
],
overall_status: 'in_progress | completed | failed | compensated',
started_at: 'ISO timestamp',
completed_at: 'ISO timestamp or null',
total_duration_ms: 'number',
compensation_triggered: 'boolean',
compensation_reason: 'error description or null',
},
constraints: [
'Sagas should have 3-8 steps',
'Failed sagas should have compensation_triggered = true',
'Compensated steps should execute in reverse order',
'80% of sagas should complete successfully',
],
});
// Generate saga events
const sagaEvents = await synth.generateEvents({
count: 500,
eventTypes: [
'saga_started',
'step_started',
'step_completed',
'step_failed',
'compensation_started',
'compensation_completed',
'saga_completed',
'saga_failed',
],
schema: {
event_id: 'UUID',
saga_id: 'UUID (from sagas)',
step_id: 'UUID or null',
event_type: 'one of eventTypes',
service: 'service name',
payload: 'JSON object',
timestamp: 'ISO timestamp',
},
});
// Analyze saga patterns
const successfulSagas = sagas.data.filter((s: any) => s.overall_status === 'completed');
const failedSagas = sagas.data.filter((s: any) => s.overall_status === 'failed');
const compensatedSagas = sagas.data.filter((s: any) => s.overall_status === 'compensated');
console.log('Saga Pattern Analysis:');
console.log(`- Total sagas: ${sagas.data.length}`);
console.log(`- Successful: ${successfulSagas.length} (${((successfulSagas.length / sagas.data.length) * 100).toFixed(1)}%)`);
console.log(`- Failed: ${failedSagas.length}`);
console.log(`- Compensated: ${compensatedSagas.length}`);
// Calculate average steps and duration
const avgSteps = sagas.data.reduce((sum: number, s: any) => sum + s.steps.length, 0) / sagas.data.length;
const avgDuration = sagas.data
.filter((s: any) => s.completed_at)
.reduce((sum: number, s: any) => sum + s.total_duration_ms, 0) / successfulSagas.length;
console.log(`\nTransaction Metrics:`);
console.log(`- Average steps per saga: ${avgSteps.toFixed(1)}`);
console.log(`- Average duration: ${avgDuration.toFixed(0)}ms`);
console.log(`- Compensation rate: ${((compensatedSagas.length / sagas.data.length) * 100).toFixed(1)}%`);
// Orchestration vs Choreography
const orchestrated = sagas.data.filter((s: any) => s.orchestration_type === 'orchestration').length;
const choreographed = sagas.data.filter((s: any) => s.orchestration_type === 'choreography').length;
console.log(`\nOrchestration Style:`);
console.log(`- Orchestration: ${orchestrated}`);
console.log(`- Choreography: ${choreographed}`);
// Integration with coordination frameworks
console.log('\nIntegration Pattern:');
console.log('// MassTransit Saga State Machine');
console.log('class OrderSaga : MassTransitStateMachine<OrderState> { /* ... */ }');
console.log('// Or custom orchestrator with agentic-synth test data');
return { sagas, sagaEvents };
}
// ============================================================================
// Example 6: Stream Processing Pipeline
// ============================================================================
/**
* Generate stream processing pipeline data (Kafka Streams, Flink)
*/
export async function streamProcessingPipeline() {
console.log('\n🌊 Example 6: Stream Processing Pipeline\n');
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY || 'demo-key',
});
// Generate pipeline topology
const pipeline = await synth.generateStructured({
count: 1,
schema: {
pipeline_id: 'UUID',
pipeline_name: 'descriptive name',
stages: [
{
stage_id: 'UUID',
stage_type: 'source | transform | aggregate | filter | sink',
stage_name: 'stage name',
parallelism: 'number (1-20)',
input_topics: ['array of topic names'],
output_topics: ['array of topic names'],
transformation: 'description of transformation',
windowing: {
type: 'tumbling | sliding | session | global',
size_ms: 'number (1000-300000)',
slide_ms: 'number or null',
},
state_store: 'store name or null',
},
],
},
});
// Generate processing metrics
const metrics = await synth.generateTimeSeries({
count: 200,
interval: '30s',
metrics: [
'records_in',
'records_out',
'records_filtered',
'processing_latency_ms',
'watermark_lag_ms',
'cpu_usage',
'memory_mb',
],
trend: 'mixed',
});
// Generate windowed aggregations
const aggregations = await synth.generateStructured({
count: 150,
schema: {
window_id: 'UUID',
stage_id: 'UUID (from pipeline)',
window_start: 'ISO timestamp',
window_end: 'ISO timestamp',
record_count: 'number (100-10000)',
aggregate_values: {
sum: 'number',
avg: 'number',
min: 'number',
max: 'number',
count: 'number',
},
emitted_at: 'ISO timestamp',
late_arrivals: 'number (0-100)',
},
});
console.log('Stream Processing Analysis:');
console.log(`- Pipeline stages: ${pipeline.data[0].stages.length}`);
console.log(`- Metric points: ${metrics.data.length}`);
console.log(`- Window aggregations: ${aggregations.data.length}`);
// Calculate throughput
const avgRecordsIn = metrics.data.reduce((sum: number, m: any) => sum + m.records_in, 0) / metrics.data.length;
const avgRecordsOut = metrics.data.reduce((sum: number, m: any) => sum + m.records_out, 0) / metrics.data.length;
console.log(`\nThroughput:`);
console.log(`- Input: ${avgRecordsIn.toFixed(0)} records/interval`);
console.log(`- Output: ${avgRecordsOut.toFixed(0)} records/interval`);
console.log(`- Filter rate: ${(((avgRecordsIn - avgRecordsOut) / avgRecordsIn) * 100).toFixed(1)}%`);
console.log('\nApache Flink Integration:');
console.log('// Define stream processing job');
console.log('env.addSource(kafkaSource).keyBy(...).window(...).aggregate(...).addSink(kafkaSink);');
return { pipeline, metrics, aggregations };
}
// ============================================================================
// Run All Examples
// ============================================================================
export async function runAllDistributedProcessingExamples() {
console.log('🚀 Running All Distributed Processing Examples\n');
console.log('='.repeat(70));
try {
await mapReduceJobData();
console.log('='.repeat(70));
await workerPoolSimulation();
console.log('='.repeat(70));
await messageQueueScenarios();
console.log('='.repeat(70));
await eventDrivenArchitecture();
console.log('='.repeat(70));
await sagaPatternTransactions();
console.log('='.repeat(70));
await streamProcessingPipeline();
console.log('='.repeat(70));
console.log('\n✅ All distributed processing examples completed!\n');
} catch (error: any) {
console.error('❌ Error running examples:', error.message);
throw error;
}
}
// Run if executed directly
if (import.meta.url === `file://${process.argv[1]}`) {
runAllDistributedProcessingExamples().catch(console.error);
}