Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
376
vendor/ruvector/npm/packages/agentic-synth-examples/tests/dspy/benchmark.test.ts
vendored
Normal file
376
vendor/ruvector/npm/packages/agentic-synth-examples/tests/dspy/benchmark.test.ts
vendored
Normal file
@@ -0,0 +1,376 @@
|
||||
/**
|
||||
* Tests for Multi-Model Benchmarking
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { MultiModelBenchmark } from '../../src/dspy/benchmark.js';
|
||||
import { ModelProvider } from '../../src/types/index.js';
|
||||
import type { BenchmarkConfig } from '../../src/dspy/benchmark.js';
|
||||
|
||||
describe('MultiModelBenchmark', () => {
|
||||
let config: BenchmarkConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key-1'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key-2'
|
||||
}
|
||||
],
|
||||
tasks: ['code-generation', 'text-summarization'],
|
||||
iterations: 3
|
||||
};
|
||||
});
|
||||
|
||||
describe('Initialization', () => {
|
||||
it('should create benchmark with valid config', () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
expect(benchmark).toBeDefined();
|
||||
});
|
||||
|
||||
it('should accept timeout option', () => {
|
||||
const benchmarkWithTimeout = new MultiModelBenchmark({
|
||||
...config,
|
||||
timeout: 5000
|
||||
});
|
||||
expect(benchmarkWithTimeout).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Benchmark Execution', () => {
|
||||
it('should run complete benchmark and return results', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.results).toBeDefined();
|
||||
expect(result.results.length).toBeGreaterThan(0);
|
||||
expect(result.bestModel).toBeDefined();
|
||||
expect(result.bestProvider).toBeDefined();
|
||||
expect(result.summary).toBeDefined();
|
||||
});
|
||||
|
||||
it('should test all model and task combinations', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
// 2 models × 2 tasks × 3 iterations = 12 results
|
||||
expect(result.results.length).toBe(12);
|
||||
|
||||
// Verify all tasks are covered
|
||||
const tasks = new Set(result.results.map(r => r.task));
|
||||
expect(tasks.size).toBe(2);
|
||||
expect(tasks.has('code-generation')).toBe(true);
|
||||
expect(tasks.has('text-summarization')).toBe(true);
|
||||
|
||||
// Verify all models are covered
|
||||
const providers = new Set(result.results.map(r => r.provider));
|
||||
expect(providers.size).toBe(2);
|
||||
});
|
||||
|
||||
it('should run multiple iterations per task', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
iterations: 5
|
||||
});
|
||||
const result = await benchmark.run();
|
||||
|
||||
// 2 models × 2 tasks × 5 iterations = 20 results
|
||||
expect(result.results.length).toBe(20);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance Metrics', () => {
|
||||
it('should track latency for each test', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
result.results.forEach(r => {
|
||||
expect(r.latency).toBeGreaterThan(0);
|
||||
expect(r.latency).toBeLessThan(2000); // Reasonable latency limit
|
||||
});
|
||||
});
|
||||
|
||||
it('should track cost for each test', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
result.results.forEach(r => {
|
||||
expect(r.cost).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
expect(result.summary.totalCost).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should track tokens used', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
result.results.forEach(r => {
|
||||
expect(r.tokensUsed).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
|
||||
it('should calculate quality scores', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
result.results.forEach(r => {
|
||||
expect(r.score).toBeGreaterThanOrEqual(0);
|
||||
expect(r.score).toBeLessThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Result Aggregation', () => {
|
||||
it('should generate summary statistics', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.summary.totalTests).toBe(12);
|
||||
expect(result.summary.avgScore).toBeGreaterThan(0);
|
||||
expect(result.summary.avgLatency).toBeGreaterThan(0);
|
||||
expect(result.summary.totalCost).toBeGreaterThan(0);
|
||||
expect(result.summary.successRate).toBeGreaterThan(0);
|
||||
expect(result.summary.successRate).toBeLessThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('should include model comparison in summary', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.summary.modelComparison).toBeDefined();
|
||||
expect(Array.isArray(result.summary.modelComparison)).toBe(true);
|
||||
expect(result.summary.modelComparison.length).toBe(2); // 2 models
|
||||
|
||||
result.summary.modelComparison.forEach((comparison: any) => {
|
||||
expect(comparison.model).toBeDefined();
|
||||
expect(comparison.avgScore).toBeDefined();
|
||||
expect(comparison.minScore).toBeDefined();
|
||||
expect(comparison.maxScore).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
it('should identify best performing model', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.bestModel).toBeDefined();
|
||||
expect(result.bestProvider).toBeDefined();
|
||||
expect([ModelProvider.GEMINI, ModelProvider.CLAUDE]).toContain(result.bestProvider);
|
||||
|
||||
// Verify the best model actually performed best
|
||||
const bestModelResults = result.results.filter(
|
||||
r => r.model === result.bestModel && r.provider === result.bestProvider
|
||||
);
|
||||
const avgBestScore = bestModelResults.reduce((sum, r) => sum + r.score, 0) / bestModelResults.length;
|
||||
|
||||
// Best model should have above-average score
|
||||
expect(avgBestScore).toBeGreaterThanOrEqual(result.summary.avgScore * 0.9);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Model Comparison', () => {
|
||||
it('should directly compare two models', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.compare(
|
||||
config.models[0],
|
||||
config.models[1],
|
||||
'code-generation'
|
||||
);
|
||||
|
||||
expect(result.winner).toBeDefined();
|
||||
expect([ModelProvider.GEMINI, ModelProvider.CLAUDE]).toContain(result.winner);
|
||||
expect(result.model1Results.length).toBe(3); // 3 iterations
|
||||
expect(result.model2Results.length).toBe(3);
|
||||
expect(result.comparison).toBeDefined();
|
||||
expect(result.comparison.scoreImprovement).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should calculate score improvement in comparison', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.compare(
|
||||
config.models[0],
|
||||
config.models[1],
|
||||
'text-summarization'
|
||||
);
|
||||
|
||||
expect(result.comparison.model1Avg).toBeGreaterThan(0);
|
||||
expect(result.comparison.model2Avg).toBeGreaterThan(0);
|
||||
expect(typeof result.comparison.scoreImprovement).toBe('number');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Error Handling', () => {
|
||||
it('should handle API failures gracefully', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
// Some tests might fail (simulated 5% failure rate)
|
||||
const failedTests = result.results.filter(r => r.score === 0);
|
||||
const successRate = result.summary.successRate;
|
||||
|
||||
expect(successRate).toBeGreaterThan(0.8); // At least 80% success
|
||||
expect(successRate).toBeLessThanOrEqual(1.0);
|
||||
});
|
||||
|
||||
it('should continue after individual test failures', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
// Should complete all tests even if some fail
|
||||
expect(result.results.length).toBe(12);
|
||||
});
|
||||
|
||||
it('should handle timeout scenarios', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
timeout: 100 // Very short timeout
|
||||
});
|
||||
|
||||
const result = await benchmark.run();
|
||||
expect(result.results).toBeDefined();
|
||||
// Tests should complete or fail, but not hang
|
||||
});
|
||||
});
|
||||
|
||||
describe('Task Variations', () => {
|
||||
it('should handle single task benchmark', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
tasks: ['code-generation']
|
||||
});
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.results.length).toBe(6); // 2 models × 1 task × 3 iterations
|
||||
expect(result.results.every(r => r.task === 'code-generation')).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle multiple task types', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
tasks: ['code-generation', 'text-summarization', 'data-analysis', 'creative-writing']
|
||||
});
|
||||
const result = await benchmark.run();
|
||||
|
||||
// 2 models × 4 tasks × 3 iterations = 24 results
|
||||
expect(result.results.length).toBe(24);
|
||||
|
||||
const tasks = new Set(result.results.map(r => r.task));
|
||||
expect(tasks.size).toBe(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Model Variations', () => {
|
||||
it('should handle single model benchmark', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
models: [config.models[0]]
|
||||
});
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.results.length).toBe(6); // 1 model × 2 tasks × 3 iterations
|
||||
expect(result.results.every(r => r.provider === ModelProvider.GEMINI)).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle three or more models', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
models: [
|
||||
...config.models,
|
||||
{
|
||||
provider: ModelProvider.GPT4,
|
||||
model: 'gpt-4-turbo',
|
||||
apiKey: 'test-key-3'
|
||||
}
|
||||
]
|
||||
});
|
||||
const result = await benchmark.run();
|
||||
|
||||
// 3 models × 2 tasks × 3 iterations = 18 results
|
||||
expect(result.results.length).toBe(18);
|
||||
|
||||
const providers = new Set(result.results.map(r => r.provider));
|
||||
expect(providers.size).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance Analysis', () => {
|
||||
it('should track consistency across iterations', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
...config,
|
||||
iterations: 10 // More iterations for consistency check
|
||||
});
|
||||
const result = await benchmark.run();
|
||||
|
||||
// Group results by model and task
|
||||
const groupedResults = result.results.reduce((acc, r) => {
|
||||
const key = `${r.provider}:${r.task}`;
|
||||
if (!acc[key]) acc[key] = [];
|
||||
acc[key].push(r.score);
|
||||
return acc;
|
||||
}, {} as Record<string, number[]>);
|
||||
|
||||
// Check variance isn't too high (scores should be relatively consistent)
|
||||
Object.values(groupedResults).forEach(scores => {
|
||||
const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
|
||||
const variance = scores.reduce((sum, score) => sum + Math.pow(score - mean, 2), 0) / scores.length;
|
||||
const stdDev = Math.sqrt(variance);
|
||||
|
||||
// Standard deviation should be reasonable (not random)
|
||||
expect(stdDev).toBeLessThan(0.3);
|
||||
});
|
||||
});
|
||||
|
||||
it('should identify performance patterns', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
// Verify we can identify which model is better for which task
|
||||
const taskPerformance = result.results.reduce((acc, r) => {
|
||||
if (!acc[r.task]) acc[r.task] = {};
|
||||
if (!acc[r.task][r.provider]) acc[r.task][r.provider] = [];
|
||||
acc[r.task][r.provider].push(r.score);
|
||||
return acc;
|
||||
}, {} as Record<string, Record<string, number[]>>);
|
||||
|
||||
// Each task should have results from both models
|
||||
Object.keys(taskPerformance).forEach(task => {
|
||||
expect(Object.keys(taskPerformance[task]).length).toBe(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Cost Analysis', () => {
|
||||
it('should calculate total cost accurately', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
const manualTotal = result.results.reduce((sum, r) => sum + r.cost, 0);
|
||||
expect(result.summary.totalCost).toBeCloseTo(manualTotal, 2);
|
||||
});
|
||||
|
||||
it('should track cost per model', async () => {
|
||||
const benchmark = new MultiModelBenchmark(config);
|
||||
const result = await benchmark.run();
|
||||
|
||||
const costByModel = result.results.reduce((acc, r) => {
|
||||
const key = `${r.provider}:${r.model}`;
|
||||
acc[key] = (acc[key] || 0) + r.cost;
|
||||
return acc;
|
||||
}, {} as Record<string, number>);
|
||||
|
||||
// Both models should have incurred costs
|
||||
expect(Object.keys(costByModel).length).toBe(2);
|
||||
Object.values(costByModel).forEach(cost => {
|
||||
expect(cost).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
363
vendor/ruvector/npm/packages/agentic-synth-examples/tests/dspy/training-session.test.ts
vendored
Normal file
363
vendor/ruvector/npm/packages/agentic-synth-examples/tests/dspy/training-session.test.ts
vendored
Normal file
@@ -0,0 +1,363 @@
|
||||
/**
|
||||
* Tests for DSPy Training Session
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { DSPyTrainingSession } from '../../src/dspy/training-session.js';
|
||||
import { ModelProvider } from '../../src/types/index.js';
|
||||
import type { TrainingSessionConfig } from '../../src/dspy/training-session.js';
|
||||
|
||||
describe('DSPyTrainingSession', () => {
|
||||
let config: TrainingSessionConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key-1'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key-2'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 3,
|
||||
convergenceThreshold: 0.95
|
||||
};
|
||||
});
|
||||
|
||||
describe('Initialization', () => {
|
||||
it('should create training session with valid config', () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
expect(session).toBeDefined();
|
||||
expect(session.getStatus().isRunning).toBe(false);
|
||||
});
|
||||
|
||||
it('should accept custom budget', () => {
|
||||
const sessionWithBudget = new DSPyTrainingSession({
|
||||
...config,
|
||||
budget: 1.0
|
||||
});
|
||||
expect(sessionWithBudget).toBeDefined();
|
||||
});
|
||||
|
||||
it('should accept maxConcurrent option', () => {
|
||||
const sessionWithConcurrency = new DSPyTrainingSession({
|
||||
...config,
|
||||
maxConcurrent: 5
|
||||
});
|
||||
expect(sessionWithConcurrency).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Training Execution', () => {
|
||||
it('should run training session and return report', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Generate product descriptions', {});
|
||||
|
||||
expect(report).toBeDefined();
|
||||
expect(report.bestModel).toBeDefined();
|
||||
expect(report.bestProvider).toBeDefined();
|
||||
expect(report.bestScore).toBeGreaterThan(0);
|
||||
expect(report.totalCost).toBeGreaterThan(0);
|
||||
expect(report.iterations).toBe(3);
|
||||
expect(report.results).toHaveLength(6); // 2 models × 3 rounds
|
||||
});
|
||||
|
||||
it('should train multiple models in parallel', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
...config,
|
||||
optimizationRounds: 2
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
await session.run('Test prompt', {});
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// Parallel execution should be faster than sequential
|
||||
// With 2 models and 2 rounds, parallel should be ~2x faster
|
||||
expect(duration).toBeLessThan(1000); // Should complete quickly
|
||||
});
|
||||
|
||||
it('should show quality improvement over iterations', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Test improvement', {});
|
||||
|
||||
// Get first and last iteration scores for each model
|
||||
const firstRound = report.results.filter(r => r.iteration === 1);
|
||||
const lastRound = report.results.filter(r => r.iteration === config.optimizationRounds);
|
||||
|
||||
const avgFirstScore = firstRound.reduce((sum, r) => sum + r.quality.score, 0) / firstRound.length;
|
||||
const avgLastScore = lastRound.reduce((sum, r) => sum + r.quality.score, 0) / lastRound.length;
|
||||
|
||||
expect(avgLastScore).toBeGreaterThanOrEqual(avgFirstScore);
|
||||
expect(report.qualityImprovement).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should stop when convergence threshold is reached', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
...config,
|
||||
optimizationRounds: 10,
|
||||
convergenceThreshold: 0.7 // Lower threshold to ensure we hit it
|
||||
});
|
||||
|
||||
let convergedEvent = false;
|
||||
session.on('converged', () => {
|
||||
convergedEvent = true;
|
||||
});
|
||||
|
||||
const report = await session.run('Test convergence', {});
|
||||
|
||||
// Should stop before completing all 10 rounds
|
||||
expect(report.iterations).toBeLessThanOrEqual(10);
|
||||
expect(report.bestScore).toBeGreaterThanOrEqual(0.7);
|
||||
});
|
||||
|
||||
it('should respect budget constraints', async () => {
|
||||
const budget = 0.5;
|
||||
const session = new DSPyTrainingSession({
|
||||
...config,
|
||||
optimizationRounds: 10,
|
||||
budget
|
||||
});
|
||||
|
||||
let budgetExceeded = false;
|
||||
session.on('budget-exceeded', () => {
|
||||
budgetExceeded = true;
|
||||
});
|
||||
|
||||
const report = await session.run('Test budget', {});
|
||||
|
||||
expect(report.totalCost).toBeLessThanOrEqual(budget * 1.1); // Allow 10% margin
|
||||
});
|
||||
});
|
||||
|
||||
describe('Event Emissions', () => {
|
||||
it('should emit start event', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
let startEmitted = false;
|
||||
|
||||
session.on('start', (data) => {
|
||||
startEmitted = true;
|
||||
expect(data.models).toBe(2);
|
||||
expect(data.rounds).toBe(3);
|
||||
});
|
||||
|
||||
await session.run('Test events', {});
|
||||
expect(startEmitted).toBe(true);
|
||||
});
|
||||
|
||||
it('should emit iteration events', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const iterationResults: any[] = [];
|
||||
|
||||
session.on('iteration', (result) => {
|
||||
iterationResults.push(result);
|
||||
});
|
||||
|
||||
await session.run('Test iterations', {});
|
||||
|
||||
expect(iterationResults.length).toBe(6); // 2 models × 3 rounds
|
||||
iterationResults.forEach(result => {
|
||||
expect(result.modelProvider).toBeDefined();
|
||||
expect(result.quality.score).toBeGreaterThan(0);
|
||||
expect(result.cost).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
it('should emit round events', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const rounds: number[] = [];
|
||||
|
||||
session.on('round', (data) => {
|
||||
rounds.push(data.round);
|
||||
});
|
||||
|
||||
await session.run('Test rounds', {});
|
||||
|
||||
expect(rounds).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('should emit complete event', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
let completeData: any = null;
|
||||
|
||||
session.on('complete', (report) => {
|
||||
completeData = report;
|
||||
});
|
||||
|
||||
await session.run('Test complete', {});
|
||||
|
||||
expect(completeData).toBeDefined();
|
||||
expect(completeData.bestModel).toBeDefined();
|
||||
expect(completeData.totalCost).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should emit error on failure', async () => {
|
||||
const invalidConfig = {
|
||||
...config,
|
||||
models: [] // Invalid: no models
|
||||
};
|
||||
|
||||
const session = new DSPyTrainingSession(invalidConfig);
|
||||
let errorEmitted = false;
|
||||
|
||||
session.on('error', () => {
|
||||
errorEmitted = true;
|
||||
});
|
||||
|
||||
try {
|
||||
await session.run('Test error', {});
|
||||
} catch {
|
||||
// Expected to throw
|
||||
}
|
||||
|
||||
expect(errorEmitted).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Status Tracking', () => {
|
||||
it('should track running status', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
|
||||
expect(session.getStatus().isRunning).toBe(false);
|
||||
|
||||
const runPromise = session.run('Test status', {});
|
||||
|
||||
// Check status during execution would require more complex async handling
|
||||
await runPromise;
|
||||
|
||||
const status = session.getStatus();
|
||||
expect(status.completedIterations).toBe(3);
|
||||
expect(status.totalCost).toBeGreaterThan(0);
|
||||
expect(status.results).toHaveLength(6);
|
||||
});
|
||||
|
||||
it('should track total cost', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
await session.run('Test cost', {});
|
||||
|
||||
const status = session.getStatus();
|
||||
expect(status.totalCost).toBeGreaterThan(0);
|
||||
expect(status.totalCost).toBeLessThan(1.0); // Reasonable cost limit
|
||||
});
|
||||
});
|
||||
|
||||
describe('Error Handling', () => {
|
||||
it('should handle empty models array', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
...config,
|
||||
models: []
|
||||
});
|
||||
|
||||
await expect(session.run('Test empty', {})).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('should handle invalid optimization rounds', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
...config,
|
||||
optimizationRounds: 0
|
||||
});
|
||||
|
||||
const report = await session.run('Test invalid rounds', {});
|
||||
expect(report.iterations).toBe(0);
|
||||
expect(report.results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('should handle negative convergence threshold', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
...config,
|
||||
convergenceThreshold: -1
|
||||
});
|
||||
|
||||
const report = await session.run('Test negative threshold', {});
|
||||
expect(report).toBeDefined();
|
||||
// Should still complete normally, just never converge
|
||||
});
|
||||
});
|
||||
|
||||
describe('Quality Metrics', () => {
|
||||
it('should include quality metrics in results', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Test metrics', {});
|
||||
|
||||
report.results.forEach(result => {
|
||||
expect(result.quality).toBeDefined();
|
||||
expect(result.quality.score).toBeGreaterThan(0);
|
||||
expect(result.quality.score).toBeLessThanOrEqual(1);
|
||||
expect(result.quality.metrics).toBeDefined();
|
||||
expect(result.quality.metrics.accuracy).toBeDefined();
|
||||
expect(result.quality.metrics.consistency).toBeDefined();
|
||||
expect(result.quality.metrics.relevance).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
it('should calculate quality improvement percentage', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Test improvement percentage', {});
|
||||
|
||||
expect(typeof report.qualityImprovement).toBe('number');
|
||||
expect(report.qualityImprovement).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Model Comparison', () => {
|
||||
it('should identify best performing model', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Test best model', {});
|
||||
|
||||
expect(report.bestModel).toBeDefined();
|
||||
expect(report.bestProvider).toBeDefined();
|
||||
expect([ModelProvider.GEMINI, ModelProvider.CLAUDE]).toContain(report.bestProvider);
|
||||
|
||||
// Verify best score matches the best model's score
|
||||
const bestResult = report.results.find(
|
||||
r => r.model === report.bestModel && r.modelProvider === report.bestProvider
|
||||
);
|
||||
expect(bestResult).toBeDefined();
|
||||
});
|
||||
|
||||
it('should handle three or more models', async () => {
|
||||
const multiModelConfig = {
|
||||
...config,
|
||||
models: [
|
||||
...config.models,
|
||||
{
|
||||
provider: ModelProvider.GPT4,
|
||||
model: 'gpt-4-turbo',
|
||||
apiKey: 'test-key-3'
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
const session = new DSPyTrainingSession(multiModelConfig);
|
||||
const report = await session.run('Test multiple models', {});
|
||||
|
||||
expect(report.results.length).toBe(9); // 3 models × 3 rounds
|
||||
expect(report.bestProvider).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Duration Tracking', () => {
|
||||
it('should track total duration', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Test duration', {});
|
||||
|
||||
expect(report.totalDuration).toBeGreaterThan(0);
|
||||
expect(report.totalDuration).toBeLessThan(10000); // Should complete within 10 seconds
|
||||
});
|
||||
|
||||
it('should track per-iteration duration', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const report = await session.run('Test iteration duration', {});
|
||||
|
||||
report.results.forEach(result => {
|
||||
expect(result.duration).toBeGreaterThan(0);
|
||||
expect(result.duration).toBeLessThan(5000); // Each iteration under 5 seconds
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
430
vendor/ruvector/npm/packages/agentic-synth-examples/tests/generators/self-learning.test.ts
vendored
Normal file
430
vendor/ruvector/npm/packages/agentic-synth-examples/tests/generators/self-learning.test.ts
vendored
Normal file
@@ -0,0 +1,430 @@
|
||||
/**
|
||||
* Tests for Self-Learning Generator
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { SelfLearningGenerator } from '../../src/generators/self-learning.js';
|
||||
import type { SelfLearningConfig, GenerateOptions } from '../../src/generators/self-learning.js';
|
||||
|
||||
describe('SelfLearningGenerator', () => {
|
||||
let config: SelfLearningConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
task: 'code-generation',
|
||||
learningRate: 0.1,
|
||||
iterations: 5
|
||||
};
|
||||
});
|
||||
|
||||
describe('Initialization', () => {
|
||||
it('should create generator with valid config', () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
expect(generator).toBeDefined();
|
||||
});
|
||||
|
||||
it('should accept quality threshold', () => {
|
||||
const generatorWithThreshold = new SelfLearningGenerator({
|
||||
...config,
|
||||
qualityThreshold: 0.9
|
||||
});
|
||||
expect(generatorWithThreshold).toBeDefined();
|
||||
});
|
||||
|
||||
it('should accept maxAttempts option', () => {
|
||||
const generatorWithMax = new SelfLearningGenerator({
|
||||
...config,
|
||||
maxAttempts: 20
|
||||
});
|
||||
expect(generatorWithMax).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Generation and Learning', () => {
|
||||
it('should generate output with quality improvement', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const result = await generator.generate({
|
||||
prompt: 'Generate a function to validate emails'
|
||||
});
|
||||
|
||||
expect(result.output).toBeDefined();
|
||||
expect(result.finalQuality).toBeGreaterThan(0);
|
||||
expect(result.finalQuality).toBeLessThanOrEqual(1);
|
||||
expect(result.improvement).toBeGreaterThanOrEqual(0);
|
||||
expect(result.iterations).toBe(5);
|
||||
expect(result.metrics).toHaveLength(5);
|
||||
});
|
||||
|
||||
it('should show quality improvement over iterations', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test improvement tracking'
|
||||
});
|
||||
|
||||
const firstQuality = result.metrics[0].quality;
|
||||
const lastQuality = result.metrics[result.metrics.length - 1].quality;
|
||||
|
||||
// Quality should generally improve (or at least not decrease significantly)
|
||||
expect(lastQuality).toBeGreaterThanOrEqual(firstQuality * 0.95);
|
||||
expect(result.improvement).toBeDefined();
|
||||
});
|
||||
|
||||
it('should track metrics for each iteration', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const result = await generator.generate({
|
||||
prompt: 'Track iteration metrics'
|
||||
});
|
||||
|
||||
expect(result.metrics).toHaveLength(5);
|
||||
result.metrics.forEach((metric, index) => {
|
||||
expect(metric.iteration).toBe(index + 1);
|
||||
expect(metric.quality).toBeGreaterThan(0);
|
||||
expect(typeof metric.improvement).toBe('number');
|
||||
expect(Array.isArray(metric.feedback)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('should apply learning rate correctly', async () => {
|
||||
const highLearningRate = new SelfLearningGenerator({
|
||||
...config,
|
||||
learningRate: 0.5,
|
||||
iterations: 3
|
||||
});
|
||||
const lowLearningRate = new SelfLearningGenerator({
|
||||
...config,
|
||||
learningRate: 0.05,
|
||||
iterations: 3
|
||||
});
|
||||
|
||||
const highResult = await highLearningRate.generate({
|
||||
prompt: 'Test high learning rate'
|
||||
});
|
||||
const lowResult = await lowLearningRate.generate({
|
||||
prompt: 'Test low learning rate'
|
||||
});
|
||||
|
||||
// Higher learning rate should generally lead to faster improvement
|
||||
expect(highResult.improvement).toBeDefined();
|
||||
expect(lowResult.improvement).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Test Integration', () => {
|
||||
it('should evaluate against test cases', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const tests = [
|
||||
(output: any) => output.content.length > 10,
|
||||
(output: any) => output.quality > 0.5,
|
||||
(output: any) => output.metadata !== undefined
|
||||
];
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Generate with tests',
|
||||
tests
|
||||
});
|
||||
|
||||
expect(result.finalQuality).toBeGreaterThan(0);
|
||||
result.metrics.forEach(metric => {
|
||||
expect(metric.testsPassingRate).toBeDefined();
|
||||
expect(metric.testsPassingRate).toBeGreaterThanOrEqual(0);
|
||||
expect(metric.testsPassingRate).toBeLessThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
it('should track test passing rate', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const tests = [
|
||||
(output: any) => output.quality > 0.6,
|
||||
(output: any) => output.quality > 0.7
|
||||
];
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Track test pass rate',
|
||||
tests
|
||||
});
|
||||
|
||||
// Test passing rate should be tracked for each iteration
|
||||
result.metrics.forEach(metric => {
|
||||
expect(metric.testsPassingRate).toBeGreaterThanOrEqual(0);
|
||||
expect(metric.testsPassingRate).toBeLessThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle failing tests gracefully', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const impossibleTests = [
|
||||
() => false, // Always fails
|
||||
() => false
|
||||
];
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Handle test failures',
|
||||
tests: impossibleTests
|
||||
});
|
||||
|
||||
expect(result.output).toBeDefined();
|
||||
expect(result.finalQuality).toBeGreaterThan(0);
|
||||
// Should complete despite test failures
|
||||
});
|
||||
});
|
||||
|
||||
describe('Event Emissions', () => {
|
||||
it('should emit start event', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
let startEmitted = false;
|
||||
|
||||
generator.on('start', (data) => {
|
||||
startEmitted = true;
|
||||
expect(data.task).toBe('code-generation');
|
||||
expect(data.iterations).toBe(5);
|
||||
});
|
||||
|
||||
await generator.generate({ prompt: 'Test start event' });
|
||||
expect(startEmitted).toBe(true);
|
||||
});
|
||||
|
||||
it('should emit improvement events', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const improvements: any[] = [];
|
||||
|
||||
generator.on('improvement', (metrics) => {
|
||||
improvements.push(metrics);
|
||||
});
|
||||
|
||||
await generator.generate({ prompt: 'Test improvement events' });
|
||||
|
||||
expect(improvements).toHaveLength(5);
|
||||
improvements.forEach(metric => {
|
||||
expect(metric.iteration).toBeDefined();
|
||||
expect(metric.quality).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
it('should emit complete event', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
let completeData: any = null;
|
||||
|
||||
generator.on('complete', (data) => {
|
||||
completeData = data;
|
||||
});
|
||||
|
||||
await generator.generate({ prompt: 'Test complete event' });
|
||||
|
||||
expect(completeData).toBeDefined();
|
||||
expect(completeData.finalQuality).toBeDefined();
|
||||
expect(completeData.improvement).toBeDefined();
|
||||
expect(completeData.iterations).toBe(5);
|
||||
});
|
||||
|
||||
it('should emit threshold-reached event', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
qualityThreshold: 0.6,
|
||||
iterations: 10
|
||||
});
|
||||
let thresholdReached = false;
|
||||
|
||||
generator.on('threshold-reached', (data) => {
|
||||
thresholdReached = true;
|
||||
expect(data.quality).toBeGreaterThanOrEqual(0.6);
|
||||
});
|
||||
|
||||
await generator.generate({ prompt: 'Test threshold' });
|
||||
// Threshold might or might not be reached depending on random variation
|
||||
});
|
||||
});
|
||||
|
||||
describe('Quality Thresholds', () => {
|
||||
it('should stop when quality threshold is reached', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
qualityThreshold: 0.7,
|
||||
iterations: 10
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test early stopping'
|
||||
});
|
||||
|
||||
// Should stop before completing all iterations if threshold reached
|
||||
expect(result.iterations).toBeLessThanOrEqual(10);
|
||||
if (result.finalQuality >= 0.7) {
|
||||
expect(result.iterations).toBeLessThan(10);
|
||||
}
|
||||
});
|
||||
|
||||
it('should use initial quality if provided', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test initial quality',
|
||||
initialQuality: 0.8
|
||||
});
|
||||
|
||||
expect(result.output).toBeDefined();
|
||||
// Improvement calculation should be based on initial quality
|
||||
});
|
||||
});
|
||||
|
||||
describe('History Tracking', () => {
|
||||
it('should maintain learning history', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
await generator.generate({ prompt: 'First generation' });
|
||||
|
||||
const history = generator.getHistory();
|
||||
expect(history).toHaveLength(5);
|
||||
expect(history[0].iteration).toBe(1);
|
||||
expect(history[4].iteration).toBe(5);
|
||||
});
|
||||
|
||||
it('should accumulate history across multiple generations', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
await generator.generate({ prompt: 'First' });
|
||||
await generator.generate({ prompt: 'Second' });
|
||||
|
||||
const history = generator.getHistory();
|
||||
expect(history.length).toBe(10); // 5 + 5 iterations
|
||||
});
|
||||
|
||||
it('should reset history when reset is called', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
await generator.generate({ prompt: 'Generate before reset' });
|
||||
|
||||
expect(generator.getHistory().length).toBe(5);
|
||||
|
||||
generator.reset();
|
||||
|
||||
expect(generator.getHistory()).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('should emit reset event', () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
let resetEmitted = false;
|
||||
|
||||
generator.on('reset', () => {
|
||||
resetEmitted = true;
|
||||
});
|
||||
|
||||
generator.reset();
|
||||
expect(resetEmitted).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Feedback Generation', () => {
|
||||
it('should generate relevant feedback', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test feedback generation'
|
||||
});
|
||||
|
||||
result.metrics.forEach(metric => {
|
||||
expect(Array.isArray(metric.feedback)).toBe(true);
|
||||
expect(metric.feedback.length).toBeGreaterThan(0);
|
||||
metric.feedback.forEach(fb => {
|
||||
expect(typeof fb).toBe('string');
|
||||
expect(fb.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('should provide contextual feedback based on quality', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test contextual feedback'
|
||||
});
|
||||
|
||||
// Feedback should vary based on performance
|
||||
const feedbackTypes = new Set(
|
||||
result.metrics.flatMap(m => m.feedback)
|
||||
);
|
||||
expect(feedbackTypes.size).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Edge Cases', () => {
|
||||
it('should handle zero iterations', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
iterations: 0
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test zero iterations'
|
||||
});
|
||||
|
||||
expect(result.output).toBeNull();
|
||||
expect(result.metrics).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('should handle very high learning rate', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
learningRate: 1.0
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test high learning rate'
|
||||
});
|
||||
|
||||
expect(result.output).toBeDefined();
|
||||
expect(result.finalQuality).toBeLessThanOrEqual(1.0);
|
||||
});
|
||||
|
||||
it('should handle very low learning rate', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
learningRate: 0.001
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Test low learning rate'
|
||||
});
|
||||
|
||||
expect(result.output).toBeDefined();
|
||||
// Improvement should be minimal but positive
|
||||
});
|
||||
|
||||
it('should handle single iteration', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
iterations: 1
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Single iteration test'
|
||||
});
|
||||
|
||||
expect(result.iterations).toBe(1);
|
||||
expect(result.metrics).toHaveLength(1);
|
||||
expect(result.output).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance', () => {
|
||||
it('should complete within reasonable time', async () => {
|
||||
const generator = new SelfLearningGenerator(config);
|
||||
const startTime = Date.now();
|
||||
|
||||
await generator.generate({
|
||||
prompt: 'Performance test'
|
||||
});
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
expect(duration).toBeLessThan(2000); // Should complete in under 2 seconds
|
||||
});
|
||||
|
||||
it('should handle many iterations efficiently', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
...config,
|
||||
iterations: 20
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
await generator.generate({
|
||||
prompt: 'Many iterations test'
|
||||
});
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(duration).toBeLessThan(5000); // Even with 20 iterations
|
||||
});
|
||||
});
|
||||
});
|
||||
453
vendor/ruvector/npm/packages/agentic-synth-examples/tests/generators/stock-market.test.ts
vendored
Normal file
453
vendor/ruvector/npm/packages/agentic-synth-examples/tests/generators/stock-market.test.ts
vendored
Normal file
@@ -0,0 +1,453 @@
|
||||
/**
|
||||
* Tests for Stock Market Simulator
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { StockMarketSimulator } from '../../src/generators/stock-market.js';
|
||||
import type { StockSimulatorConfig, GenerateOptions } from '../../src/generators/stock-market.js';
|
||||
|
||||
describe('StockMarketSimulator', () => {
|
||||
let config: StockSimulatorConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
symbols: ['AAPL', 'GOOGL'],
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-10',
|
||||
volatility: 'medium'
|
||||
};
|
||||
});
|
||||
|
||||
describe('Initialization', () => {
|
||||
it('should create simulator with valid config', () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
expect(simulator).toBeDefined();
|
||||
});
|
||||
|
||||
it('should accept Date objects', () => {
|
||||
const simulatorWithDates = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: new Date('2024-01-01'),
|
||||
endDate: new Date('2024-01-10')
|
||||
});
|
||||
expect(simulatorWithDates).toBeDefined();
|
||||
});
|
||||
|
||||
it('should handle different volatility levels', () => {
|
||||
const lowVol = new StockMarketSimulator({ ...config, volatility: 'low' });
|
||||
const highVol = new StockMarketSimulator({ ...config, volatility: 'high' });
|
||||
|
||||
expect(lowVol).toBeDefined();
|
||||
expect(highVol).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Data Generation', () => {
|
||||
it('should generate OHLCV data for all symbols', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
|
||||
// Check that all symbols are present
|
||||
const symbols = new Set(data.map(d => d.symbol));
|
||||
expect(symbols.has('AAPL')).toBe(true);
|
||||
expect(symbols.has('GOOGL')).toBe(true);
|
||||
});
|
||||
|
||||
it('should generate correct number of trading days', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Should have data points for both symbols
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
const googlData = data.filter(d => d.symbol === 'GOOGL');
|
||||
|
||||
expect(aaplData.length).toBeGreaterThan(0);
|
||||
expect(googlData.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should skip weekends by default', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
symbols: ['AAPL'],
|
||||
startDate: '2024-01-06', // Saturday
|
||||
endDate: '2024-01-08', // Monday
|
||||
volatility: 'medium'
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Should only have Monday's data, not Saturday or Sunday
|
||||
expect(data.length).toBe(1);
|
||||
expect(data[0].date.getDay()).not.toBe(0); // Not Sunday
|
||||
expect(data[0].date.getDay()).not.toBe(6); // Not Saturday
|
||||
});
|
||||
|
||||
it('should include weekends when configured', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
includeWeekends: true,
|
||||
startDate: '2024-01-06', // Saturday
|
||||
endDate: '2024-01-08' // Monday
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
expect(aaplData.length).toBe(3); // Saturday, Sunday, Monday
|
||||
});
|
||||
});
|
||||
|
||||
describe('OHLCV Data Validation', () => {
|
||||
it('should generate valid OHLCV data', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
data.forEach(point => {
|
||||
expect(point.open).toBeGreaterThan(0);
|
||||
expect(point.high).toBeGreaterThan(0);
|
||||
expect(point.low).toBeGreaterThan(0);
|
||||
expect(point.close).toBeGreaterThan(0);
|
||||
expect(point.volume).toBeGreaterThan(0);
|
||||
|
||||
// High should be highest
|
||||
expect(point.high).toBeGreaterThanOrEqual(point.open);
|
||||
expect(point.high).toBeGreaterThanOrEqual(point.close);
|
||||
expect(point.high).toBeGreaterThanOrEqual(point.low);
|
||||
|
||||
// Low should be lowest
|
||||
expect(point.low).toBeLessThanOrEqual(point.open);
|
||||
expect(point.low).toBeLessThanOrEqual(point.close);
|
||||
expect(point.low).toBeLessThanOrEqual(point.high);
|
||||
});
|
||||
});
|
||||
|
||||
it('should have reasonable price ranges', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
data.forEach(point => {
|
||||
// Prices should be in a reasonable range (not negative, not absurdly high)
|
||||
expect(point.open).toBeLessThan(10000);
|
||||
expect(point.high).toBeLessThan(10000);
|
||||
expect(point.low).toBeLessThan(10000);
|
||||
expect(point.close).toBeLessThan(10000);
|
||||
|
||||
// Price precision (2 decimal places)
|
||||
expect(point.open.toString().split('.')[1]?.length || 0).toBeLessThanOrEqual(2);
|
||||
expect(point.close.toString().split('.')[1]?.length || 0).toBeLessThanOrEqual(2);
|
||||
});
|
||||
});
|
||||
|
||||
it('should have realistic volume', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
data.forEach(point => {
|
||||
expect(Number.isInteger(point.volume)).toBe(true);
|
||||
expect(point.volume).toBeGreaterThan(1000000); // At least 1M volume
|
||||
expect(point.volume).toBeLessThan(1000000000); // Less than 1B volume
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Market Conditions', () => {
|
||||
it('should generate bullish trends', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-30'
|
||||
});
|
||||
const data = await simulator.generate({ marketConditions: 'bullish' });
|
||||
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL').sort((a, b) => a.date.getTime() - b.date.getTime());
|
||||
|
||||
if (aaplData.length > 5) {
|
||||
const firstPrice = aaplData[0].close;
|
||||
const lastPrice = aaplData[aaplData.length - 1].close;
|
||||
|
||||
// Bullish market should trend upward (with some tolerance for randomness)
|
||||
// Over 30 days, we expect positive movement more often than not
|
||||
const priceChange = ((lastPrice - firstPrice) / firstPrice) * 100;
|
||||
// Allow for some randomness, but generally should be positive
|
||||
}
|
||||
});
|
||||
|
||||
it('should generate bearish trends', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-30'
|
||||
});
|
||||
const data = await simulator.generate({ marketConditions: 'bearish' });
|
||||
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
// Bearish trends are applied but due to randomness, actual direction may vary
|
||||
});
|
||||
|
||||
it('should generate neutral market', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-30'
|
||||
});
|
||||
const data = await simulator.generate({ marketConditions: 'neutral' });
|
||||
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
// Neutral market should have balanced ups and downs
|
||||
});
|
||||
});
|
||||
|
||||
describe('Volatility Levels', () => {
|
||||
it('should reflect different volatility in price movements', async () => {
|
||||
const lowVolSimulator = new StockMarketSimulator({ ...config, volatility: 'low' });
|
||||
const highVolSimulator = new StockMarketSimulator({ ...config, volatility: 'high' });
|
||||
|
||||
const lowVolData = await lowVolSimulator.generate();
|
||||
const highVolData = await highVolSimulator.generate();
|
||||
|
||||
// Both should generate data
|
||||
expect(lowVolData.length).toBeGreaterThan(0);
|
||||
expect(highVolData.length).toBeGreaterThan(0);
|
||||
|
||||
// Calculate average daily price range for comparison
|
||||
const calcAvgRange = (data: any[]) => {
|
||||
const ranges = data.map(d => ((d.high - d.low) / d.close) * 100);
|
||||
return ranges.reduce((a, b) => a + b, 0) / ranges.length;
|
||||
};
|
||||
|
||||
const lowAvgRange = calcAvgRange(lowVolData.filter(d => d.symbol === 'AAPL'));
|
||||
const highAvgRange = calcAvgRange(highVolData.filter(d => d.symbol === 'AAPL'));
|
||||
|
||||
// High volatility should generally have larger ranges (with some tolerance)
|
||||
// Due to randomness, this might not always hold, so we just check they're different
|
||||
expect(lowAvgRange).toBeGreaterThan(0);
|
||||
expect(highAvgRange).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Optional Features', () => {
|
||||
it('should include sentiment when requested', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate({ includeSentiment: true });
|
||||
|
||||
data.forEach(point => {
|
||||
expect(point.sentiment).toBeDefined();
|
||||
expect(point.sentiment).toBeGreaterThanOrEqual(-1);
|
||||
expect(point.sentiment).toBeLessThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
it('should not include sentiment by default', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Most points should not have sentiment
|
||||
const withSentiment = data.filter(d => d.sentiment !== undefined);
|
||||
expect(withSentiment.length).toBe(0);
|
||||
});
|
||||
|
||||
it('should include news when requested', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-02-01' // Longer period for more news events
|
||||
});
|
||||
const data = await simulator.generate({ includeNews: true });
|
||||
|
||||
// Should have some news events (10% probability per day)
|
||||
const withNews = data.filter(d => d.news && d.news.length > 0);
|
||||
expect(withNews.length).toBeGreaterThan(0);
|
||||
|
||||
withNews.forEach(point => {
|
||||
expect(Array.isArray(point.news)).toBe(true);
|
||||
expect(point.news!.length).toBeGreaterThan(0);
|
||||
point.news!.forEach(headline => {
|
||||
expect(typeof headline).toBe('string');
|
||||
expect(headline.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('should not include news by default', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
const withNews = data.filter(d => d.news && d.news.length > 0);
|
||||
expect(withNews.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Date Handling', () => {
|
||||
it('should generate data in correct date range', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
const startDate = new Date('2024-01-01');
|
||||
const endDate = new Date('2024-01-10');
|
||||
|
||||
data.forEach(point => {
|
||||
expect(point.date.getTime()).toBeGreaterThanOrEqual(startDate.getTime());
|
||||
expect(point.date.getTime()).toBeLessThanOrEqual(endDate.getTime());
|
||||
});
|
||||
});
|
||||
|
||||
it('should sort data by date', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Data should be sorted by date
|
||||
for (let i = 1; i < data.length; i++) {
|
||||
expect(data[i].date.getTime()).toBeGreaterThanOrEqual(data[i - 1].date.getTime());
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle single day generation', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-15',
|
||||
endDate: '2024-01-15'
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
expect(aaplData.length).toBe(1);
|
||||
expect(aaplData[0].date.toISOString().split('T')[0]).toBe('2024-01-15');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Statistics', () => {
|
||||
it('should calculate market statistics', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-30'
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
const stats = simulator.getStatistics(aaplData);
|
||||
|
||||
expect(stats.totalDays).toBe(aaplData.length);
|
||||
expect(stats.avgPrice).toBeGreaterThan(0);
|
||||
expect(stats.minPrice).toBeGreaterThan(0);
|
||||
expect(stats.maxPrice).toBeGreaterThan(0);
|
||||
expect(stats.avgVolume).toBeGreaterThan(0);
|
||||
expect(typeof stats.priceChange).toBe('number');
|
||||
expect(stats.volatility).toBeGreaterThan(0);
|
||||
|
||||
// Min should be less than avg, avg less than max
|
||||
expect(stats.minPrice).toBeLessThanOrEqual(stats.avgPrice);
|
||||
expect(stats.avgPrice).toBeLessThanOrEqual(stats.maxPrice);
|
||||
});
|
||||
|
||||
it('should handle empty data for statistics', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const stats = simulator.getStatistics([]);
|
||||
|
||||
expect(stats).toEqual({});
|
||||
});
|
||||
|
||||
it('should calculate volatility correctly', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
const stats = simulator.getStatistics(aaplData);
|
||||
|
||||
expect(stats.volatility).toBeGreaterThan(0);
|
||||
expect(stats.volatility).toBeLessThan(100); // Reasonable volatility range
|
||||
});
|
||||
});
|
||||
|
||||
describe('Multiple Symbols', () => {
|
||||
it('should handle single symbol', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
symbols: ['AAPL']
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
expect(data.every(d => d.symbol === 'AAPL')).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle many symbols', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
symbols: ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA']
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
const symbols = new Set(data.map(d => d.symbol));
|
||||
expect(symbols.size).toBe(5);
|
||||
expect(symbols.has('AAPL')).toBe(true);
|
||||
expect(symbols.has('TSLA')).toBe(true);
|
||||
});
|
||||
|
||||
it('should generate independent data for each symbol', async () => {
|
||||
const simulator = new StockMarketSimulator(config);
|
||||
const data = await simulator.generate();
|
||||
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
const googlData = data.filter(d => d.symbol === 'GOOGL');
|
||||
|
||||
// Prices should be different (independent generation)
|
||||
expect(aaplData[0].close).not.toBe(googlData[0].close);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Edge Cases', () => {
|
||||
it('should handle very short time period', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-02',
|
||||
endDate: '2024-01-02'
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should handle long time periods', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-12-31'
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Should have roughly 252 trading days * 2 symbols
|
||||
expect(data.length).toBeGreaterThan(400);
|
||||
});
|
||||
|
||||
it('should handle unknown symbols gracefully', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
symbols: ['UNKNOWN', 'FAKE']
|
||||
});
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Should still generate data with default prices
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
data.forEach(point => {
|
||||
expect(point.close).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance', () => {
|
||||
it('should generate data efficiently', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
...config,
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-03-31',
|
||||
symbols: ['AAPL', 'GOOGL', 'MSFT']
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
await simulator.generate();
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// Should complete quickly even with 3 months of data
|
||||
expect(duration).toBeLessThan(1000);
|
||||
});
|
||||
});
|
||||
});
|
||||
498
vendor/ruvector/npm/packages/agentic-synth-examples/tests/integration.test.ts
vendored
Normal file
498
vendor/ruvector/npm/packages/agentic-synth-examples/tests/integration.test.ts
vendored
Normal file
@@ -0,0 +1,498 @@
|
||||
/**
|
||||
* Integration Tests
|
||||
* End-to-end workflows and package integration
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { DSPyTrainingSession, MultiModelBenchmark } from '../src/dspy/index.js';
|
||||
import { SelfLearningGenerator } from '../src/generators/self-learning.js';
|
||||
import { StockMarketSimulator } from '../src/generators/stock-market.js';
|
||||
import { ModelProvider } from '../src/types/index.js';
|
||||
|
||||
describe('Integration Tests', () => {
|
||||
describe('Package Exports', () => {
|
||||
it('should export all main classes', () => {
|
||||
expect(DSPyTrainingSession).toBeDefined();
|
||||
expect(MultiModelBenchmark).toBeDefined();
|
||||
expect(SelfLearningGenerator).toBeDefined();
|
||||
expect(StockMarketSimulator).toBeDefined();
|
||||
});
|
||||
|
||||
it('should export types and enums', () => {
|
||||
expect(ModelProvider).toBeDefined();
|
||||
expect(ModelProvider.GEMINI).toBe('gemini');
|
||||
expect(ModelProvider.CLAUDE).toBe('claude');
|
||||
expect(ModelProvider.GPT4).toBe('gpt4');
|
||||
expect(ModelProvider.LLAMA).toBe('llama');
|
||||
});
|
||||
});
|
||||
|
||||
describe('End-to-End Workflows', () => {
|
||||
it('should complete full DSPy training workflow', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 2,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
const report = await session.run('Generate test data', {});
|
||||
|
||||
expect(report).toBeDefined();
|
||||
expect(report.bestModel).toBeDefined();
|
||||
expect(report.totalCost).toBeGreaterThan(0);
|
||||
expect(report.results.length).toBe(2); // 2 rounds
|
||||
});
|
||||
|
||||
it('should complete self-learning generation workflow', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
task: 'test-generation',
|
||||
learningRate: 0.1,
|
||||
iterations: 3
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Generate test content'
|
||||
});
|
||||
|
||||
expect(result.output).toBeDefined();
|
||||
expect(result.finalQuality).toBeGreaterThan(0);
|
||||
expect(result.metrics.length).toBe(3);
|
||||
});
|
||||
|
||||
it('should complete stock market simulation workflow', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
symbols: ['AAPL'],
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-05',
|
||||
volatility: 'medium'
|
||||
});
|
||||
|
||||
const data = await simulator.generate();
|
||||
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
expect(data[0].symbol).toBe('AAPL');
|
||||
expect(data[0].open).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should complete benchmark workflow', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
tasks: ['test-task'],
|
||||
iterations: 2
|
||||
});
|
||||
|
||||
const result = await benchmark.run();
|
||||
|
||||
expect(result.results.length).toBe(2); // 1 model × 1 task × 2 iterations
|
||||
expect(result.bestModel).toBeDefined();
|
||||
expect(result.summary).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Cross-Component Integration', () => {
|
||||
it('should use training results in benchmark', async () => {
|
||||
// Train models
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key-1'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key-2'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 2,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
const trainingReport = await session.run('Test prompt', {});
|
||||
|
||||
// Use trained models in benchmark
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
models: [
|
||||
{
|
||||
provider: trainingReport.bestProvider,
|
||||
model: trainingReport.bestModel,
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
tasks: ['validation'],
|
||||
iterations: 1
|
||||
});
|
||||
|
||||
const benchmarkResult = await benchmark.run();
|
||||
|
||||
expect(benchmarkResult.results.length).toBe(1);
|
||||
expect(benchmarkResult.bestProvider).toBe(trainingReport.bestProvider);
|
||||
});
|
||||
|
||||
it('should use self-learning with quality metrics', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
task: 'quality-test',
|
||||
learningRate: 0.2,
|
||||
iterations: 5,
|
||||
qualityThreshold: 0.8
|
||||
});
|
||||
|
||||
let improvementEvents = 0;
|
||||
generator.on('improvement', () => {
|
||||
improvementEvents++;
|
||||
});
|
||||
|
||||
const result = await generator.generate({
|
||||
prompt: 'Generate with quality tracking',
|
||||
tests: [
|
||||
(output: any) => output.quality > 0.5,
|
||||
(output: any) => output.content.length > 0
|
||||
]
|
||||
});
|
||||
|
||||
expect(result.finalQuality).toBeGreaterThan(0);
|
||||
expect(improvementEvents).toBeGreaterThan(0);
|
||||
expect(result.metrics.every(m => m.testsPassingRate !== undefined)).toBe(true);
|
||||
});
|
||||
|
||||
it('should integrate stock market data with statistics', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
symbols: ['AAPL', 'GOOGL'],
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-15',
|
||||
volatility: 'high'
|
||||
});
|
||||
|
||||
const data = await simulator.generate({
|
||||
includeSentiment: true,
|
||||
includeNews: true,
|
||||
marketConditions: 'bullish'
|
||||
});
|
||||
|
||||
expect(data.length).toBeGreaterThan(0);
|
||||
|
||||
// Get statistics for each symbol
|
||||
const aaplData = data.filter(d => d.symbol === 'AAPL');
|
||||
const googlData = data.filter(d => d.symbol === 'GOOGL');
|
||||
|
||||
const aaplStats = simulator.getStatistics(aaplData);
|
||||
const googlStats = simulator.getStatistics(googlData);
|
||||
|
||||
expect(aaplStats.totalDays).toBeGreaterThan(0);
|
||||
expect(googlStats.totalDays).toBeGreaterThan(0);
|
||||
expect(aaplStats.volatility).toBeGreaterThan(0);
|
||||
expect(googlStats.volatility).toBeGreaterThan(0);
|
||||
|
||||
// Check sentiment is included
|
||||
expect(data.some(d => d.sentiment !== undefined)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Event-Driven Coordination', () => {
|
||||
it('should coordinate events across DSPy training', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 3,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
const events: string[] = [];
|
||||
|
||||
session.on('start', () => events.push('start'));
|
||||
session.on('round', () => events.push('round'));
|
||||
session.on('iteration', () => events.push('iteration'));
|
||||
session.on('complete', () => events.push('complete'));
|
||||
|
||||
await session.run('Coordinate events', {});
|
||||
|
||||
expect(events).toContain('start');
|
||||
expect(events).toContain('round');
|
||||
expect(events).toContain('iteration');
|
||||
expect(events).toContain('complete');
|
||||
expect(events[0]).toBe('start');
|
||||
expect(events[events.length - 1]).toBe('complete');
|
||||
});
|
||||
|
||||
it('should coordinate events in self-learning', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
task: 'event-test',
|
||||
learningRate: 0.1,
|
||||
iterations: 3
|
||||
});
|
||||
|
||||
const events: string[] = [];
|
||||
|
||||
generator.on('start', () => events.push('start'));
|
||||
generator.on('improvement', () => events.push('improvement'));
|
||||
generator.on('complete', () => events.push('complete'));
|
||||
|
||||
await generator.generate({ prompt: 'Test events' });
|
||||
|
||||
expect(events).toContain('start');
|
||||
expect(events).toContain('improvement');
|
||||
expect(events).toContain('complete');
|
||||
expect(events.filter(e => e === 'improvement').length).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Error Recovery', () => {
|
||||
it('should handle errors gracefully in training', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [], // Invalid: no models
|
||||
optimizationRounds: 2,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
await expect(session.run('Test error', {})).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('should continue after partial failures in benchmark', async () => {
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
tasks: ['task1', 'task2'],
|
||||
iterations: 3
|
||||
});
|
||||
|
||||
const result = await benchmark.run();
|
||||
|
||||
// Should complete even with simulated 5% failure rate
|
||||
expect(result.results).toBeDefined();
|
||||
expect(result.summary.successRate).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance at Scale', () => {
|
||||
it('should handle multiple models and rounds efficiently', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key-1'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key-2'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.GPT4,
|
||||
model: 'gpt-4-turbo',
|
||||
apiKey: 'test-key-3'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 3,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
const report = await session.run('Scale test', {});
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(report.results.length).toBe(9); // 3 models × 3 rounds
|
||||
expect(duration).toBeLessThan(3000); // Should complete quickly with parallel execution
|
||||
});
|
||||
|
||||
it('should handle long time series efficiently', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
symbols: ['AAPL', 'GOOGL', 'MSFT'],
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-12-31',
|
||||
volatility: 'medium'
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
const data = await simulator.generate();
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(data.length).toBeGreaterThan(500); // ~252 trading days × 3 symbols
|
||||
expect(duration).toBeLessThan(2000); // Should generate efficiently
|
||||
});
|
||||
|
||||
it('should handle many learning iterations', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
task: 'scale-test',
|
||||
learningRate: 0.05,
|
||||
iterations: 20
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await generator.generate({
|
||||
prompt: 'Scale test prompt'
|
||||
});
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(result.iterations).toBe(20);
|
||||
expect(result.metrics.length).toBe(20);
|
||||
expect(duration).toBeLessThan(5000); // Should complete in reasonable time
|
||||
});
|
||||
});
|
||||
|
||||
describe('Data Consistency', () => {
|
||||
it('should maintain consistency in training results', async () => {
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 3,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
const report = await session.run('Consistency test', {});
|
||||
|
||||
// Verify result consistency
|
||||
expect(report.results.length).toBe(3);
|
||||
expect(report.iterations).toBe(3);
|
||||
expect(report.results.every(r => r.modelProvider === ModelProvider.GEMINI)).toBe(true);
|
||||
|
||||
// Verify cost tracking
|
||||
const totalCost = report.results.reduce((sum, r) => sum + r.cost, 0);
|
||||
expect(Math.abs(totalCost - report.totalCost)).toBeLessThan(0.01);
|
||||
});
|
||||
|
||||
it('should maintain data integrity in stock simulation', async () => {
|
||||
const simulator = new StockMarketSimulator({
|
||||
symbols: ['AAPL'],
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-10',
|
||||
volatility: 'medium'
|
||||
});
|
||||
|
||||
const data = await simulator.generate();
|
||||
|
||||
// Verify sequential dates
|
||||
for (let i = 1; i < data.length; i++) {
|
||||
const prevDate = data[i - 1].date;
|
||||
const currDate = data[i].date;
|
||||
expect(currDate.getTime()).toBeGreaterThan(prevDate.getTime());
|
||||
}
|
||||
|
||||
// Verify OHLCV consistency
|
||||
data.forEach(point => {
|
||||
expect(point.high).toBeGreaterThanOrEqual(point.open);
|
||||
expect(point.high).toBeGreaterThanOrEqual(point.close);
|
||||
expect(point.low).toBeLessThanOrEqual(point.open);
|
||||
expect(point.low).toBeLessThanOrEqual(point.close);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Real-World Scenarios', () => {
|
||||
it('should support model selection workflow', async () => {
|
||||
// Step 1: Train multiple models
|
||||
const session = new DSPyTrainingSession({
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key-1'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key-2'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 2,
|
||||
convergenceThreshold: 0.95
|
||||
});
|
||||
|
||||
const trainingReport = await session.run('Select best model', {});
|
||||
|
||||
// Step 2: Benchmark the best model
|
||||
const benchmark = new MultiModelBenchmark({
|
||||
models: [
|
||||
{
|
||||
provider: trainingReport.bestProvider,
|
||||
model: trainingReport.bestModel,
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
tasks: ['validation', 'production'],
|
||||
iterations: 3
|
||||
});
|
||||
|
||||
const benchmarkResult = await benchmark.run();
|
||||
|
||||
// Step 3: Verify the selected model performs well
|
||||
expect(benchmarkResult.summary.avgScore).toBeGreaterThan(0.5);
|
||||
expect(benchmarkResult.summary.successRate).toBeGreaterThan(0.8);
|
||||
});
|
||||
|
||||
it('should support data generation for testing', async () => {
|
||||
// Generate synthetic financial data
|
||||
const simulator = new StockMarketSimulator({
|
||||
symbols: ['TEST1', 'TEST2'],
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-01-31',
|
||||
volatility: 'low'
|
||||
});
|
||||
|
||||
const testData = await simulator.generate({
|
||||
includeSentiment: true,
|
||||
marketConditions: 'neutral'
|
||||
});
|
||||
|
||||
// Use the data for testing purposes
|
||||
expect(testData.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify data is suitable for testing
|
||||
const stats = simulator.getStatistics(testData.filter(d => d.symbol === 'TEST1'));
|
||||
expect(stats.totalDays).toBeGreaterThan(10);
|
||||
expect(stats.avgPrice).toBeGreaterThan(0);
|
||||
expect(stats.volatility).toBeLessThan(10); // Low volatility
|
||||
});
|
||||
|
||||
it('should support iterative improvement workflow', async () => {
|
||||
const generator = new SelfLearningGenerator({
|
||||
task: 'iterative-improvement',
|
||||
learningRate: 0.15,
|
||||
iterations: 5,
|
||||
qualityThreshold: 0.85
|
||||
});
|
||||
|
||||
// Track improvement over multiple generations
|
||||
const run1 = await generator.generate({
|
||||
prompt: 'Initial generation',
|
||||
initialQuality: 0.5
|
||||
});
|
||||
|
||||
const run2 = await generator.generate({
|
||||
prompt: 'Improved generation',
|
||||
initialQuality: run1.finalQuality
|
||||
});
|
||||
|
||||
// Second run should start from where first ended
|
||||
expect(run2.finalQuality).toBeGreaterThanOrEqual(run1.finalQuality * 0.95);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user