Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/npm/packages/agentic-synth-examples/tests/dspy/benchmark.test.ts
+++ b/npm/packages/agentic-synth-examples/tests/dspy/benchmark.test.ts
@@ -0,0 +1,376 @@
+/**
+ * Tests for Multi-Model Benchmarking
+ */
+
+import { describe, it, expect, beforeEach } from 'vitest';
+import { MultiModelBenchmark } from '../../src/dspy/benchmark.js';
+import { ModelProvider } from '../../src/types/index.js';
+import type { BenchmarkConfig } from '../../src/dspy/benchmark.js';
+
+describe('MultiModelBenchmark', () => {
+  let config: BenchmarkConfig;
+
+  beforeEach(() => {
+    config = {
+      models: [
+        {
+          provider: ModelProvider.GEMINI,
+          model: 'gemini-2.0-flash-exp',
+          apiKey: 'test-key-1'
+        },
+        {
+          provider: ModelProvider.CLAUDE,
+          model: 'claude-sonnet-4',
+          apiKey: 'test-key-2'
+        }
+      ],
+      tasks: ['code-generation', 'text-summarization'],
+      iterations: 3
+    };
+  });
+
+  describe('Initialization', () => {
+    it('should create benchmark with valid config', () => {
+      const benchmark = new MultiModelBenchmark(config);
+      expect(benchmark).toBeDefined();
+    });
+
+    it('should accept timeout option', () => {
+      const benchmarkWithTimeout = new MultiModelBenchmark({
+        ...config,
+        timeout: 5000
+      });
+      expect(benchmarkWithTimeout).toBeDefined();
+    });
+  });
+
+  describe('Benchmark Execution', () => {
+    it('should run complete benchmark and return results', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      expect(result.results).toBeDefined();
+      expect(result.results.length).toBeGreaterThan(0);
+      expect(result.bestModel).toBeDefined();
+      expect(result.bestProvider).toBeDefined();
+      expect(result.summary).toBeDefined();
+    });
+
+    it('should test all model and task combinations', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      // 2 models × 2 tasks × 3 iterations = 12 results
+      expect(result.results.length).toBe(12);
+
+      // Verify all tasks are covered
+      const tasks = new Set(result.results.map(r => r.task));
+      expect(tasks.size).toBe(2);
+      expect(tasks.has('code-generation')).toBe(true);
+      expect(tasks.has('text-summarization')).toBe(true);
+
+      // Verify all models are covered
+      const providers = new Set(result.results.map(r => r.provider));
+      expect(providers.size).toBe(2);
+    });
+
+    it('should run multiple iterations per task', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        iterations: 5
+      });
+      const result = await benchmark.run();
+
+      // 2 models × 2 tasks × 5 iterations = 20 results
+      expect(result.results.length).toBe(20);
+    });
+  });
+
+  describe('Performance Metrics', () => {
+    it('should track latency for each test', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      result.results.forEach(r => {
+        expect(r.latency).toBeGreaterThan(0);
+        expect(r.latency).toBeLessThan(2000); // Reasonable latency limit
+      });
+    });
+
+    it('should track cost for each test', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      result.results.forEach(r => {
+        expect(r.cost).toBeGreaterThanOrEqual(0);
+      });
+
+      expect(result.summary.totalCost).toBeGreaterThan(0);
+    });
+
+    it('should track tokens used', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      result.results.forEach(r => {
+        expect(r.tokensUsed).toBeGreaterThanOrEqual(0);
+      });
+    });
+
+    it('should calculate quality scores', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      result.results.forEach(r => {
+        expect(r.score).toBeGreaterThanOrEqual(0);
+        expect(r.score).toBeLessThanOrEqual(1);
+      });
+    });
+  });
+
+  describe('Result Aggregation', () => {
+    it('should generate summary statistics', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      expect(result.summary.totalTests).toBe(12);
+      expect(result.summary.avgScore).toBeGreaterThan(0);
+      expect(result.summary.avgLatency).toBeGreaterThan(0);
+      expect(result.summary.totalCost).toBeGreaterThan(0);
+      expect(result.summary.successRate).toBeGreaterThan(0);
+      expect(result.summary.successRate).toBeLessThanOrEqual(1);
+    });
+
+    it('should include model comparison in summary', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      expect(result.summary.modelComparison).toBeDefined();
+      expect(Array.isArray(result.summary.modelComparison)).toBe(true);
+      expect(result.summary.modelComparison.length).toBe(2); // 2 models
+
+      result.summary.modelComparison.forEach((comparison: any) => {
+        expect(comparison.model).toBeDefined();
+        expect(comparison.avgScore).toBeDefined();
+        expect(comparison.minScore).toBeDefined();
+        expect(comparison.maxScore).toBeDefined();
+      });
+    });
+
+    it('should identify best performing model', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      expect(result.bestModel).toBeDefined();
+      expect(result.bestProvider).toBeDefined();
+      expect([ModelProvider.GEMINI, ModelProvider.CLAUDE]).toContain(result.bestProvider);
+
+      // Verify the best model actually performed best
+      const bestModelResults = result.results.filter(
+        r => r.model === result.bestModel && r.provider === result.bestProvider
+      );
+      const avgBestScore = bestModelResults.reduce((sum, r) => sum + r.score, 0) / bestModelResults.length;
+
+      // Best model should have above-average score
+      expect(avgBestScore).toBeGreaterThanOrEqual(result.summary.avgScore * 0.9);
+    });
+  });
+
+  describe('Model Comparison', () => {
+    it('should directly compare two models', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.compare(
+        config.models[0],
+        config.models[1],
+        'code-generation'
+      );
+
+      expect(result.winner).toBeDefined();
+      expect([ModelProvider.GEMINI, ModelProvider.CLAUDE]).toContain(result.winner);
+      expect(result.model1Results.length).toBe(3); // 3 iterations
+      expect(result.model2Results.length).toBe(3);
+      expect(result.comparison).toBeDefined();
+      expect(result.comparison.scoreImprovement).toBeGreaterThanOrEqual(0);
+    });
+
+    it('should calculate score improvement in comparison', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.compare(
+        config.models[0],
+        config.models[1],
+        'text-summarization'
+      );
+
+      expect(result.comparison.model1Avg).toBeGreaterThan(0);
+      expect(result.comparison.model2Avg).toBeGreaterThan(0);
+      expect(typeof result.comparison.scoreImprovement).toBe('number');
+    });
+  });
+
+  describe('Error Handling', () => {
+    it('should handle API failures gracefully', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      // Some tests might fail (simulated 5% failure rate)
+      const failedTests = result.results.filter(r => r.score === 0);
+      const successRate = result.summary.successRate;
+
+      expect(successRate).toBeGreaterThan(0.8); // At least 80% success
+      expect(successRate).toBeLessThanOrEqual(1.0);
+    });
+
+    it('should continue after individual test failures', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      // Should complete all tests even if some fail
+      expect(result.results.length).toBe(12);
+    });
+
+    it('should handle timeout scenarios', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        timeout: 100 // Very short timeout
+      });
+
+      const result = await benchmark.run();
+      expect(result.results).toBeDefined();
+      // Tests should complete or fail, but not hang
+    });
+  });
+
+  describe('Task Variations', () => {
+    it('should handle single task benchmark', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        tasks: ['code-generation']
+      });
+      const result = await benchmark.run();
+
+      expect(result.results.length).toBe(6); // 2 models × 1 task × 3 iterations
+      expect(result.results.every(r => r.task === 'code-generation')).toBe(true);
+    });
+
+    it('should handle multiple task types', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        tasks: ['code-generation', 'text-summarization', 'data-analysis', 'creative-writing']
+      });
+      const result = await benchmark.run();
+
+      // 2 models × 4 tasks × 3 iterations = 24 results
+      expect(result.results.length).toBe(24);
+
+      const tasks = new Set(result.results.map(r => r.task));
+      expect(tasks.size).toBe(4);
+    });
+  });
+
+  describe('Model Variations', () => {
+    it('should handle single model benchmark', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        models: [config.models[0]]
+      });
+      const result = await benchmark.run();
+
+      expect(result.results.length).toBe(6); // 1 model × 2 tasks × 3 iterations
+      expect(result.results.every(r => r.provider === ModelProvider.GEMINI)).toBe(true);
+    });
+
+    it('should handle three or more models', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        models: [
+          ...config.models,
+          {
+            provider: ModelProvider.GPT4,
+            model: 'gpt-4-turbo',
+            apiKey: 'test-key-3'
+          }
+        ]
+      });
+      const result = await benchmark.run();
+
+      // 3 models × 2 tasks × 3 iterations = 18 results
+      expect(result.results.length).toBe(18);
+
+      const providers = new Set(result.results.map(r => r.provider));
+      expect(providers.size).toBe(3);
+    });
+  });
+
+  describe('Performance Analysis', () => {
+    it('should track consistency across iterations', async () => {
+      const benchmark = new MultiModelBenchmark({
+        ...config,
+        iterations: 10 // More iterations for consistency check
+      });
+      const result = await benchmark.run();
+
+      // Group results by model and task
+      const groupedResults = result.results.reduce((acc, r) => {
+        const key = `${r.provider}:${r.task}`;
+        if (!acc[key]) acc[key] = [];
+        acc[key].push(r.score);
+        return acc;
+      }, {} as Record<string, number[]>);
+
+      // Check variance isn't too high (scores should be relatively consistent)
+      Object.values(groupedResults).forEach(scores => {
+        const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
+        const variance = scores.reduce((sum, score) => sum + Math.pow(score - mean, 2), 0) / scores.length;
+        const stdDev = Math.sqrt(variance);
+
+        // Standard deviation should be reasonable (not random)
+        expect(stdDev).toBeLessThan(0.3);
+      });
+    });
+
+    it('should identify performance patterns', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      // Verify we can identify which model is better for which task
+      const taskPerformance = result.results.reduce((acc, r) => {
+        if (!acc[r.task]) acc[r.task] = {};
+        if (!acc[r.task][r.provider]) acc[r.task][r.provider] = [];
+        acc[r.task][r.provider].push(r.score);
+        return acc;
+      }, {} as Record<string, Record<string, number[]>>);
+
+      // Each task should have results from both models
+      Object.keys(taskPerformance).forEach(task => {
+        expect(Object.keys(taskPerformance[task]).length).toBe(2);
+      });
+    });
+  });
+
+  describe('Cost Analysis', () => {
+    it('should calculate total cost accurately', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      const manualTotal = result.results.reduce((sum, r) => sum + r.cost, 0);
+      expect(result.summary.totalCost).toBeCloseTo(manualTotal, 2);
+    });
+
+    it('should track cost per model', async () => {
+      const benchmark = new MultiModelBenchmark(config);
+      const result = await benchmark.run();
+
+      const costByModel = result.results.reduce((acc, r) => {
+        const key = `${r.provider}:${r.model}`;
+        acc[key] = (acc[key] || 0) + r.cost;
+        return acc;
+      }, {} as Record<string, number>);
+
+      // Both models should have incurred costs
+      expect(Object.keys(costByModel).length).toBe(2);
+      Object.values(costByModel).forEach(cost => {
+        expect(cost).toBeGreaterThan(0);
+      });
+    });
+  });
+});
--- a/npm/packages/agentic-synth-examples/tests/dspy/training-session.test.ts
+++ b/npm/packages/agentic-synth-examples/tests/dspy/training-session.test.ts
@@ -0,0 +1,363 @@
+/**
+ * Tests for DSPy Training Session
+ */
+
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { DSPyTrainingSession } from '../../src/dspy/training-session.js';
+import { ModelProvider } from '../../src/types/index.js';
+import type { TrainingSessionConfig } from '../../src/dspy/training-session.js';
+
+describe('DSPyTrainingSession', () => {
+  let config: TrainingSessionConfig;
+
+  beforeEach(() => {
+    config = {
+      models: [
+        {
+          provider: ModelProvider.GEMINI,
+          model: 'gemini-2.0-flash-exp',
+          apiKey: 'test-key-1'
+        },
+        {
+          provider: ModelProvider.CLAUDE,
+          model: 'claude-sonnet-4',
+          apiKey: 'test-key-2'
+        }
+      ],
+      optimizationRounds: 3,
+      convergenceThreshold: 0.95
+    };
+  });
+
+  describe('Initialization', () => {
+    it('should create training session with valid config', () => {
+      const session = new DSPyTrainingSession(config);
+      expect(session).toBeDefined();
+      expect(session.getStatus().isRunning).toBe(false);
+    });
+
+    it('should accept custom budget', () => {
+      const sessionWithBudget = new DSPyTrainingSession({
+        ...config,
+        budget: 1.0
+      });
+      expect(sessionWithBudget).toBeDefined();
+    });
+
+    it('should accept maxConcurrent option', () => {
+      const sessionWithConcurrency = new DSPyTrainingSession({
+        ...config,
+        maxConcurrent: 5
+      });
+      expect(sessionWithConcurrency).toBeDefined();
+    });
+  });
+
+  describe('Training Execution', () => {
+    it('should run training session and return report', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Generate product descriptions', {});
+
+      expect(report).toBeDefined();
+      expect(report.bestModel).toBeDefined();
+      expect(report.bestProvider).toBeDefined();
+      expect(report.bestScore).toBeGreaterThan(0);
+      expect(report.totalCost).toBeGreaterThan(0);
+      expect(report.iterations).toBe(3);
+      expect(report.results).toHaveLength(6); // 2 models × 3 rounds
+    });
+
+    it('should train multiple models in parallel', async () => {
+      const session = new DSPyTrainingSession({
+        ...config,
+        optimizationRounds: 2
+      });
+
+      const startTime = Date.now();
+      await session.run('Test prompt', {});
+      const duration = Date.now() - startTime;
+
+      // Parallel execution should be faster than sequential
+      // With 2 models and 2 rounds, parallel should be ~2x faster
+      expect(duration).toBeLessThan(1000); // Should complete quickly
+    });
+
+    it('should show quality improvement over iterations', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Test improvement', {});
+
+      // Get first and last iteration scores for each model
+      const firstRound = report.results.filter(r => r.iteration === 1);
+      const lastRound = report.results.filter(r => r.iteration === config.optimizationRounds);
+
+      const avgFirstScore = firstRound.reduce((sum, r) => sum + r.quality.score, 0) / firstRound.length;
+      const avgLastScore = lastRound.reduce((sum, r) => sum + r.quality.score, 0) / lastRound.length;
+
+      expect(avgLastScore).toBeGreaterThanOrEqual(avgFirstScore);
+      expect(report.qualityImprovement).toBeGreaterThanOrEqual(0);
+    });
+
+    it('should stop when convergence threshold is reached', async () => {
+      const session = new DSPyTrainingSession({
+        ...config,
+        optimizationRounds: 10,
+        convergenceThreshold: 0.7 // Lower threshold to ensure we hit it
+      });
+
+      let convergedEvent = false;
+      session.on('converged', () => {
+        convergedEvent = true;
+      });
+
+      const report = await session.run('Test convergence', {});
+
+      // Should stop before completing all 10 rounds
+      expect(report.iterations).toBeLessThanOrEqual(10);
+      expect(report.bestScore).toBeGreaterThanOrEqual(0.7);
+    });
+
+    it('should respect budget constraints', async () => {
+      const budget = 0.5;
+      const session = new DSPyTrainingSession({
+        ...config,
+        optimizationRounds: 10,
+        budget
+      });
+
+      let budgetExceeded = false;
+      session.on('budget-exceeded', () => {
+        budgetExceeded = true;
+      });
+
+      const report = await session.run('Test budget', {});
+
+      expect(report.totalCost).toBeLessThanOrEqual(budget * 1.1); // Allow 10% margin
+    });
+  });
+
+  describe('Event Emissions', () => {
+    it('should emit start event', async () => {
+      const session = new DSPyTrainingSession(config);
+      let startEmitted = false;
+
+      session.on('start', (data) => {
+        startEmitted = true;
+        expect(data.models).toBe(2);
+        expect(data.rounds).toBe(3);
+      });
+
+      await session.run('Test events', {});
+      expect(startEmitted).toBe(true);
+    });
+
+    it('should emit iteration events', async () => {
+      const session = new DSPyTrainingSession(config);
+      const iterationResults: any[] = [];
+
+      session.on('iteration', (result) => {
+        iterationResults.push(result);
+      });
+
+      await session.run('Test iterations', {});
+
+      expect(iterationResults.length).toBe(6); // 2 models × 3 rounds
+      iterationResults.forEach(result => {
+        expect(result.modelProvider).toBeDefined();
+        expect(result.quality.score).toBeGreaterThan(0);
+        expect(result.cost).toBeGreaterThan(0);
+      });
+    });
+
+    it('should emit round events', async () => {
+      const session = new DSPyTrainingSession(config);
+      const rounds: number[] = [];
+
+      session.on('round', (data) => {
+        rounds.push(data.round);
+      });
+
+      await session.run('Test rounds', {});
+
+      expect(rounds).toEqual([1, 2, 3]);
+    });
+
+    it('should emit complete event', async () => {
+      const session = new DSPyTrainingSession(config);
+      let completeData: any = null;
+
+      session.on('complete', (report) => {
+        completeData = report;
+      });
+
+      await session.run('Test complete', {});
+
+      expect(completeData).toBeDefined();
+      expect(completeData.bestModel).toBeDefined();
+      expect(completeData.totalCost).toBeGreaterThan(0);
+    });
+
+    it('should emit error on failure', async () => {
+      const invalidConfig = {
+        ...config,
+        models: [] // Invalid: no models
+      };
+
+      const session = new DSPyTrainingSession(invalidConfig);
+      let errorEmitted = false;
+
+      session.on('error', () => {
+        errorEmitted = true;
+      });
+
+      try {
+        await session.run('Test error', {});
+      } catch {
+        // Expected to throw
+      }
+
+      expect(errorEmitted).toBe(true);
+    });
+  });
+
+  describe('Status Tracking', () => {
+    it('should track running status', async () => {
+      const session = new DSPyTrainingSession(config);
+
+      expect(session.getStatus().isRunning).toBe(false);
+
+      const runPromise = session.run('Test status', {});
+
+      // Check status during execution would require more complex async handling
+      await runPromise;
+
+      const status = session.getStatus();
+      expect(status.completedIterations).toBe(3);
+      expect(status.totalCost).toBeGreaterThan(0);
+      expect(status.results).toHaveLength(6);
+    });
+
+    it('should track total cost', async () => {
+      const session = new DSPyTrainingSession(config);
+      await session.run('Test cost', {});
+
+      const status = session.getStatus();
+      expect(status.totalCost).toBeGreaterThan(0);
+      expect(status.totalCost).toBeLessThan(1.0); // Reasonable cost limit
+    });
+  });
+
+  describe('Error Handling', () => {
+    it('should handle empty models array', async () => {
+      const session = new DSPyTrainingSession({
+        ...config,
+        models: []
+      });
+
+      await expect(session.run('Test empty', {})).rejects.toThrow();
+    });
+
+    it('should handle invalid optimization rounds', async () => {
+      const session = new DSPyTrainingSession({
+        ...config,
+        optimizationRounds: 0
+      });
+
+      const report = await session.run('Test invalid rounds', {});
+      expect(report.iterations).toBe(0);
+      expect(report.results).toHaveLength(0);
+    });
+
+    it('should handle negative convergence threshold', async () => {
+      const session = new DSPyTrainingSession({
+        ...config,
+        convergenceThreshold: -1
+      });
+
+      const report = await session.run('Test negative threshold', {});
+      expect(report).toBeDefined();
+      // Should still complete normally, just never converge
+    });
+  });
+
+  describe('Quality Metrics', () => {
+    it('should include quality metrics in results', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Test metrics', {});
+
+      report.results.forEach(result => {
+        expect(result.quality).toBeDefined();
+        expect(result.quality.score).toBeGreaterThan(0);
+        expect(result.quality.score).toBeLessThanOrEqual(1);
+        expect(result.quality.metrics).toBeDefined();
+        expect(result.quality.metrics.accuracy).toBeDefined();
+        expect(result.quality.metrics.consistency).toBeDefined();
+        expect(result.quality.metrics.relevance).toBeDefined();
+      });
+    });
+
+    it('should calculate quality improvement percentage', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Test improvement percentage', {});
+
+      expect(typeof report.qualityImprovement).toBe('number');
+      expect(report.qualityImprovement).toBeGreaterThanOrEqual(0);
+    });
+  });
+
+  describe('Model Comparison', () => {
+    it('should identify best performing model', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Test best model', {});
+
+      expect(report.bestModel).toBeDefined();
+      expect(report.bestProvider).toBeDefined();
+      expect([ModelProvider.GEMINI, ModelProvider.CLAUDE]).toContain(report.bestProvider);
+
+      // Verify best score matches the best model's score
+      const bestResult = report.results.find(
+        r => r.model === report.bestModel && r.modelProvider === report.bestProvider
+      );
+      expect(bestResult).toBeDefined();
+    });
+
+    it('should handle three or more models', async () => {
+      const multiModelConfig = {
+        ...config,
+        models: [
+          ...config.models,
+          {
+            provider: ModelProvider.GPT4,
+            model: 'gpt-4-turbo',
+            apiKey: 'test-key-3'
+          }
+        ]
+      };
+
+      const session = new DSPyTrainingSession(multiModelConfig);
+      const report = await session.run('Test multiple models', {});
+
+      expect(report.results.length).toBe(9); // 3 models × 3 rounds
+      expect(report.bestProvider).toBeDefined();
+    });
+  });
+
+  describe('Duration Tracking', () => {
+    it('should track total duration', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Test duration', {});
+
+      expect(report.totalDuration).toBeGreaterThan(0);
+      expect(report.totalDuration).toBeLessThan(10000); // Should complete within 10 seconds
+    });
+
+    it('should track per-iteration duration', async () => {
+      const session = new DSPyTrainingSession(config);
+      const report = await session.run('Test iteration duration', {});
+
+      report.results.forEach(result => {
+        expect(result.duration).toBeGreaterThan(0);
+        expect(result.duration).toBeLessThan(5000); // Each iteration under 5 seconds
+      });
+    });
+  });
+});