Files
wifi-densepose/vendor/ruvector/npm/packages/agentic-synth/tests/dspy-learning-session.test.ts

837 lines
22 KiB
TypeScript

/**
* DSPy Learning Session - Unit Tests
*/
import { describe, it, expect, beforeEach, vi } from 'vitest';
import {
DSPyTrainingSession,
ModelProvider,
TrainingPhase,
ClaudeSonnetAgent,
GPT4Agent,
GeminiAgent,
LlamaAgent,
OptimizationEngine,
BenchmarkCollector,
type ModelConfig,
type DSPySignature,
type IterationResult,
type QualityMetrics,
type PerformanceMetrics
} from '../training/dspy-learning-session.js';
describe('DSPyTrainingSession', () => {
let config: any;
beforeEach(() => {
config = {
models: [
{
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key-gemini'
},
{
provider: ModelProvider.CLAUDE,
model: 'claude-sonnet-4',
apiKey: 'test-key-claude'
}
],
optimizationRounds: 2,
convergenceThreshold: 0.9,
maxConcurrency: 2,
enableCrossLearning: true,
enableHooksIntegration: false,
costBudget: 1.0,
timeoutPerIteration: 5000,
baselineIterations: 2,
benchmarkSamples: 5
};
});
describe('Constructor', () => {
it('should create a training session with valid config', () => {
const session = new DSPyTrainingSession(config);
expect(session).toBeDefined();
expect(session.getStatistics()).toBeDefined();
});
it('should throw error with invalid config', () => {
const invalidConfig = { ...config, models: [] };
expect(() => new DSPyTrainingSession(invalidConfig)).toThrow();
});
it('should initialize with default values', () => {
const minimalConfig = {
models: [
{
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key'
}
]
};
const session = new DSPyTrainingSession(minimalConfig);
const stats = session.getStatistics();
expect(stats.currentPhase).toBe(TrainingPhase.BASELINE);
expect(stats.totalCost).toBe(0);
});
});
describe('Event System', () => {
it('should emit start event', async () => {
const session = new DSPyTrainingSession(config);
await new Promise<void>((resolve) => {
session.on('start', (data) => {
expect(data.phase).toBe(TrainingPhase.BASELINE);
resolve();
});
const optimizer = new OptimizationEngine();
const signature = optimizer.createSignature('test', 'input', 'output');
session.run('test prompt', signature);
});
});
it('should emit phase transitions', async () => {
const session = new DSPyTrainingSession(config);
const phases: TrainingPhase[] = [];
await new Promise<void>((resolve) => {
session.on('phase', (phase) => {
phases.push(phase);
});
session.on('complete', () => {
expect(phases.length).toBeGreaterThan(0);
expect(phases).toContain(TrainingPhase.BASELINE);
resolve();
});
const optimizer = new OptimizationEngine();
const signature = optimizer.createSignature('test', 'input', 'output');
session.run('test prompt', signature);
});
});
it('should emit iteration events', async () => {
const session = new DSPyTrainingSession(config);
let iterationCount = 0;
await new Promise<void>((resolve) => {
session.on('iteration', (result) => {
iterationCount++;
expect(result).toBeDefined();
expect(result.modelProvider).toBeDefined();
expect(result.quality).toBeDefined();
expect(result.performance).toBeDefined();
});
session.on('complete', () => {
expect(iterationCount).toBeGreaterThan(0);
resolve();
});
const optimizer = new OptimizationEngine();
const signature = optimizer.createSignature('test', 'input', 'output');
session.run('test prompt', signature);
});
});
});
describe('Statistics', () => {
it('should track session statistics', () => {
const session = new DSPyTrainingSession(config);
const initialStats = session.getStatistics();
expect(initialStats.currentPhase).toBe(TrainingPhase.BASELINE);
expect(initialStats.totalCost).toBe(0);
expect(initialStats.duration).toBeGreaterThanOrEqual(0);
});
it('should update cost during training', async () => {
const session = new DSPyTrainingSession(config);
await new Promise<void>((resolve) => {
session.on('complete', () => {
const stats = session.getStatistics();
expect(stats.totalCost).toBeGreaterThan(0);
resolve();
});
const optimizer = new OptimizationEngine();
const signature = optimizer.createSignature('test', 'input', 'output');
session.run('test prompt', signature);
});
});
});
describe('Stop Functionality', () => {
it('should stop training session', async () => {
const session = new DSPyTrainingSession(config);
await new Promise<void>((resolve) => {
session.on('stopped', (stats) => {
expect(stats).toBeDefined();
expect(stats.currentPhase).toBeDefined();
resolve();
});
setTimeout(() => {
session.stop();
}, 100);
const optimizer = new OptimizationEngine();
const signature = optimizer.createSignature('test', 'input', 'output');
session.run('test prompt', signature);
});
});
});
});
describe('Model Agents', () => {
describe('ClaudeSonnetAgent', () => {
let agent: ClaudeSonnetAgent;
let config: ModelConfig;
beforeEach(() => {
config = {
provider: ModelProvider.CLAUDE,
model: 'claude-sonnet-4',
apiKey: 'test-key',
temperature: 0.7
};
agent = new ClaudeSonnetAgent(config);
});
it('should execute and return result', async () => {
const signature: DSPySignature = {
input: 'test input',
output: 'test output'
};
const result = await agent.execute('test prompt', signature);
expect(result).toBeDefined();
expect(result.modelProvider).toBe(ModelProvider.CLAUDE);
expect(result.quality).toBeDefined();
expect(result.performance).toBeDefined();
expect(result.quality.score).toBeGreaterThanOrEqual(0);
expect(result.quality.score).toBeLessThanOrEqual(1);
});
it('should track results', async () => {
const signature: DSPySignature = {
input: 'test input',
output: 'test output'
};
await agent.execute('test prompt 1', signature);
await agent.execute('test prompt 2', signature);
const results = agent.getResults();
expect(results.length).toBe(2);
});
it('should track total cost', async () => {
const signature: DSPySignature = {
input: 'test input',
output: 'test output'
};
await agent.execute('test prompt', signature);
const cost = agent.getTotalCost();
expect(cost).toBeGreaterThan(0);
});
});
describe('GPT4Agent', () => {
it('should execute with correct provider', async () => {
const config: ModelConfig = {
provider: ModelProvider.GPT4,
model: 'gpt-4-turbo',
apiKey: 'test-key'
};
const agent = new GPT4Agent(config);
const signature: DSPySignature = {
input: 'test',
output: 'test'
};
const result = await agent.execute('test', signature);
expect(result.modelProvider).toBe(ModelProvider.GPT4);
});
});
describe('GeminiAgent', () => {
it('should execute with correct provider', async () => {
const config: ModelConfig = {
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key'
};
const agent = new GeminiAgent(config);
const signature: DSPySignature = {
input: 'test',
output: 'test'
};
const result = await agent.execute('test', signature);
expect(result.modelProvider).toBe(ModelProvider.GEMINI);
});
});
describe('LlamaAgent', () => {
it('should execute with correct provider', async () => {
const config: ModelConfig = {
provider: ModelProvider.LLAMA,
model: 'llama-3.1-70b',
apiKey: 'test-key'
};
const agent = new LlamaAgent(config);
const signature: DSPySignature = {
input: 'test',
output: 'test'
};
const result = await agent.execute('test', signature);
expect(result.modelProvider).toBe(ModelProvider.LLAMA);
});
});
});
describe('OptimizationEngine', () => {
let optimizer: OptimizationEngine;
beforeEach(() => {
optimizer = new OptimizationEngine();
});
describe('Signature Creation', () => {
it('should create basic signature', () => {
const signature = optimizer.createSignature(
'test',
'input',
'output'
);
expect(signature).toBeDefined();
expect(signature.input).toBe('input');
expect(signature.output).toBe('output');
expect(signature.examples).toEqual([]);
expect(signature.constraints).toEqual([]);
expect(signature.objectives).toEqual([]);
});
it('should create signature with options', () => {
const signature = optimizer.createSignature(
'test',
'input',
'output',
{
examples: [{ input: 'ex1', output: 'ex1' }],
constraints: ['min_length:10'],
objectives: ['maximize quality']
}
);
expect(signature.examples?.length).toBe(1);
expect(signature.constraints?.length).toBe(1);
expect(signature.objectives?.length).toBe(1);
});
});
describe('Prompt Optimization', () => {
it('should optimize prompt based on results', async () => {
const signature: DSPySignature = {
input: 'test input',
output: 'test output',
examples: [{ input: 'example', output: 'example output' }],
constraints: ['min_length:10'],
objectives: ['high quality']
};
const results: IterationResult[] = [
{
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.5,
accuracy: 0.5,
coherence: 0.5,
relevance: 0.5,
diversity: 0.5,
creativity: 0.5
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'base prompt',
output: 'base output',
optimizations: []
}
];
const optimized = await optimizer.optimizePrompt(
'base prompt',
results,
signature
);
expect(optimized).toBeDefined();
expect(optimized.length).toBeGreaterThan('base prompt'.length);
});
});
describe('Cross-Model Optimization', () => {
it('should perform cross-model optimization', async () => {
const allResults = new Map<ModelProvider, IterationResult[]>();
const result1: IterationResult = {
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.9,
accuracy: 0.9,
coherence: 0.9,
relevance: 0.9,
diversity: 0.9,
creativity: 0.9
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'good prompt',
output: 'good output',
optimizations: []
};
const result2: IterationResult = {
...result1,
modelProvider: ModelProvider.CLAUDE,
quality: {
score: 0.5,
accuracy: 0.5,
coherence: 0.5,
relevance: 0.5,
diversity: 0.5,
creativity: 0.5
},
prompt: 'poor prompt'
};
allResults.set(ModelProvider.GEMINI, [result1]);
allResults.set(ModelProvider.CLAUDE, [result2]);
const optimized = await optimizer.crossModelOptimization(allResults);
expect(optimized).toBeDefined();
expect(optimized.size).toBeGreaterThan(0);
});
});
});
describe('BenchmarkCollector', () => {
let collector: BenchmarkCollector;
beforeEach(() => {
collector = new BenchmarkCollector();
});
describe('Result Collection', () => {
it('should add results', () => {
const result: IterationResult = {
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.8,
accuracy: 0.8,
coherence: 0.8,
relevance: 0.8,
diversity: 0.8,
creativity: 0.8
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
};
collector.addResult(result);
const metrics = collector.getModelMetrics(ModelProvider.GEMINI);
expect(metrics.length).toBe(1);
expect(metrics[0]).toEqual(result);
});
it('should get metrics for specific model', () => {
const result1: IterationResult = {
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.8,
accuracy: 0.8,
coherence: 0.8,
relevance: 0.8,
diversity: 0.8,
creativity: 0.8
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
};
const result2 = { ...result1, modelProvider: ModelProvider.CLAUDE };
collector.addResult(result1);
collector.addResult(result2);
const geminiMetrics = collector.getModelMetrics(ModelProvider.GEMINI);
const claudeMetrics = collector.getModelMetrics(ModelProvider.CLAUDE);
expect(geminiMetrics.length).toBe(1);
expect(claudeMetrics.length).toBe(1);
});
});
describe('Statistics', () => {
it('should calculate aggregate statistics', () => {
const results: IterationResult[] = [
{
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.7,
accuracy: 0.7,
coherence: 0.7,
relevance: 0.7,
diversity: 0.7,
creativity: 0.7
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
},
{
iteration: 2,
phase: TrainingPhase.OPTIMIZATION,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.9,
accuracy: 0.9,
coherence: 0.9,
relevance: 0.9,
diversity: 0.9,
creativity: 0.9
},
performance: {
latency: 120,
throughput: 8,
tokensUsed: 120,
cost: 0.012,
memoryUsage: 55,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
}
];
results.forEach(r => collector.addResult(r));
const stats = collector.getAggregateStats(ModelProvider.GEMINI);
expect(stats).toBeDefined();
expect(stats?.totalIterations).toBe(2);
expect(stats?.avgQualityScore).toBeCloseTo(0.8, 1);
expect(stats?.avgLatency).toBeCloseTo(110, 0);
expect(stats?.totalCost).toBeCloseTo(0.022, 3);
});
it('should identify best model', () => {
const geminiResult: IterationResult = {
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.9,
accuracy: 0.9,
coherence: 0.9,
relevance: 0.9,
diversity: 0.9,
creativity: 0.9
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
};
const claudeResult = {
...geminiResult,
modelProvider: ModelProvider.CLAUDE,
quality: {
score: 0.7,
accuracy: 0.7,
coherence: 0.7,
relevance: 0.7,
diversity: 0.7,
creativity: 0.7
}
};
collector.addResult(geminiResult);
collector.addResult(claudeResult);
const bestModel = collector.getBestModel();
expect(bestModel).toBe(ModelProvider.GEMINI);
});
});
describe('Report Generation', () => {
it('should generate comprehensive report', () => {
const result: IterationResult = {
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.8,
accuracy: 0.8,
coherence: 0.8,
relevance: 0.8,
diversity: 0.8,
creativity: 0.8
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
};
collector.addResult(result);
const report = collector.generateReport();
expect(report).toContain('DSPy Training Session Report');
expect(report).toContain('Best Performing Model');
expect(report).toContain('Model Comparison');
expect(report).toContain('gemini');
});
it('should generate comparison data', () => {
const geminiResult: IterationResult = {
iteration: 1,
phase: TrainingPhase.BASELINE,
modelProvider: ModelProvider.GEMINI,
quality: {
score: 0.8,
accuracy: 0.8,
coherence: 0.8,
relevance: 0.8,
diversity: 0.8,
creativity: 0.8
},
performance: {
latency: 100,
throughput: 10,
tokensUsed: 100,
cost: 0.01,
memoryUsage: 50,
errorRate: 0
},
timestamp: new Date(),
prompt: 'test',
output: 'test',
optimizations: []
};
const claudeResult = { ...geminiResult, modelProvider: ModelProvider.CLAUDE };
collector.addResult(geminiResult);
collector.addResult(claudeResult);
const comparison = collector.getComparison();
expect(comparison).toBeDefined();
expect(comparison[ModelProvider.GEMINI]).toBeDefined();
expect(comparison[ModelProvider.CLAUDE]).toBeDefined();
});
});
});
describe('Quality Metrics Calculation', () => {
it('should calculate quality scores correctly', async () => {
const config: ModelConfig = {
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key'
};
const agent = new GeminiAgent(config);
const signature: DSPySignature = {
input: 'test input with keywords',
output: 'test output',
constraints: ['min_length:10']
};
const result = await agent.execute('test prompt', signature);
expect(result.quality.score).toBeGreaterThanOrEqual(0);
expect(result.quality.score).toBeLessThanOrEqual(1);
expect(result.quality.accuracy).toBeGreaterThanOrEqual(0);
expect(result.quality.coherence).toBeGreaterThanOrEqual(0);
expect(result.quality.relevance).toBeGreaterThanOrEqual(0);
expect(result.quality.diversity).toBeGreaterThanOrEqual(0);
expect(result.quality.creativity).toBeGreaterThanOrEqual(0);
});
});
describe('Performance Metrics Calculation', () => {
it('should track latency correctly', async () => {
const config: ModelConfig = {
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key'
};
const agent = new GeminiAgent(config);
const signature: DSPySignature = {
input: 'test',
output: 'test'
};
const result = await agent.execute('test', signature);
expect(result.performance.latency).toBeGreaterThan(0);
expect(result.performance.throughput).toBeGreaterThan(0);
});
it('should calculate cost correctly', async () => {
const config: ModelConfig = {
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key'
};
const agent = new GeminiAgent(config);
const signature: DSPySignature = {
input: 'test',
output: 'test'
};
const result = await agent.execute('test prompt', signature);
expect(result.performance.cost).toBeGreaterThan(0);
expect(result.performance.tokensUsed).toBeGreaterThan(0);
});
});
describe('Integration Tests', () => {
it('should complete full training pipeline', async () => {
const config = {
models: [
{
provider: ModelProvider.GEMINI,
model: 'gemini-2.0-flash-exp',
apiKey: 'test-key'
}
],
optimizationRounds: 1,
baselineIterations: 1,
benchmarkSamples: 2,
enableCrossLearning: false,
enableHooksIntegration: false
};
const session = new DSPyTrainingSession(config);
const phases: TrainingPhase[] = [];
session.on('phase', (phase) => phases.push(phase));
await new Promise<void>((resolve) => {
session.on('complete', () => {
expect(phases.length).toBeGreaterThan(0);
resolve();
});
const optimizer = new OptimizationEngine();
const signature = optimizer.createSignature('test', 'input', 'output');
session.run('test prompt', signature);
});
}, 10000);
});