Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
836
vendor/ruvector/npm/packages/agentic-synth/tests/dspy-learning-session.test.ts
vendored
Normal file
836
vendor/ruvector/npm/packages/agentic-synth/tests/dspy-learning-session.test.ts
vendored
Normal file
@@ -0,0 +1,836 @@
|
||||
/**
|
||||
* DSPy Learning Session - Unit Tests
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import {
|
||||
DSPyTrainingSession,
|
||||
ModelProvider,
|
||||
TrainingPhase,
|
||||
ClaudeSonnetAgent,
|
||||
GPT4Agent,
|
||||
GeminiAgent,
|
||||
LlamaAgent,
|
||||
OptimizationEngine,
|
||||
BenchmarkCollector,
|
||||
type ModelConfig,
|
||||
type DSPySignature,
|
||||
type IterationResult,
|
||||
type QualityMetrics,
|
||||
type PerformanceMetrics
|
||||
} from '../training/dspy-learning-session.js';
|
||||
|
||||
describe('DSPyTrainingSession', () => {
|
||||
let config: any;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key-gemini'
|
||||
},
|
||||
{
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key-claude'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 2,
|
||||
convergenceThreshold: 0.9,
|
||||
maxConcurrency: 2,
|
||||
enableCrossLearning: true,
|
||||
enableHooksIntegration: false,
|
||||
costBudget: 1.0,
|
||||
timeoutPerIteration: 5000,
|
||||
baselineIterations: 2,
|
||||
benchmarkSamples: 5
|
||||
};
|
||||
});
|
||||
|
||||
describe('Constructor', () => {
|
||||
it('should create a training session with valid config', () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
expect(session).toBeDefined();
|
||||
expect(session.getStatistics()).toBeDefined();
|
||||
});
|
||||
|
||||
it('should throw error with invalid config', () => {
|
||||
const invalidConfig = { ...config, models: [] };
|
||||
expect(() => new DSPyTrainingSession(invalidConfig)).toThrow();
|
||||
});
|
||||
|
||||
it('should initialize with default values', () => {
|
||||
const minimalConfig = {
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
const session = new DSPyTrainingSession(minimalConfig);
|
||||
const stats = session.getStatistics();
|
||||
|
||||
expect(stats.currentPhase).toBe(TrainingPhase.BASELINE);
|
||||
expect(stats.totalCost).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Event System', () => {
|
||||
it('should emit start event', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
session.on('start', (data) => {
|
||||
expect(data.phase).toBe(TrainingPhase.BASELINE);
|
||||
resolve();
|
||||
});
|
||||
|
||||
const optimizer = new OptimizationEngine();
|
||||
const signature = optimizer.createSignature('test', 'input', 'output');
|
||||
|
||||
session.run('test prompt', signature);
|
||||
});
|
||||
});
|
||||
|
||||
it('should emit phase transitions', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const phases: TrainingPhase[] = [];
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
session.on('phase', (phase) => {
|
||||
phases.push(phase);
|
||||
});
|
||||
|
||||
session.on('complete', () => {
|
||||
expect(phases.length).toBeGreaterThan(0);
|
||||
expect(phases).toContain(TrainingPhase.BASELINE);
|
||||
resolve();
|
||||
});
|
||||
|
||||
const optimizer = new OptimizationEngine();
|
||||
const signature = optimizer.createSignature('test', 'input', 'output');
|
||||
|
||||
session.run('test prompt', signature);
|
||||
});
|
||||
});
|
||||
|
||||
it('should emit iteration events', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
let iterationCount = 0;
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
session.on('iteration', (result) => {
|
||||
iterationCount++;
|
||||
expect(result).toBeDefined();
|
||||
expect(result.modelProvider).toBeDefined();
|
||||
expect(result.quality).toBeDefined();
|
||||
expect(result.performance).toBeDefined();
|
||||
});
|
||||
|
||||
session.on('complete', () => {
|
||||
expect(iterationCount).toBeGreaterThan(0);
|
||||
resolve();
|
||||
});
|
||||
|
||||
const optimizer = new OptimizationEngine();
|
||||
const signature = optimizer.createSignature('test', 'input', 'output');
|
||||
|
||||
session.run('test prompt', signature);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Statistics', () => {
|
||||
it('should track session statistics', () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
const initialStats = session.getStatistics();
|
||||
|
||||
expect(initialStats.currentPhase).toBe(TrainingPhase.BASELINE);
|
||||
expect(initialStats.totalCost).toBe(0);
|
||||
expect(initialStats.duration).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should update cost during training', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
session.on('complete', () => {
|
||||
const stats = session.getStatistics();
|
||||
expect(stats.totalCost).toBeGreaterThan(0);
|
||||
resolve();
|
||||
});
|
||||
|
||||
const optimizer = new OptimizationEngine();
|
||||
const signature = optimizer.createSignature('test', 'input', 'output');
|
||||
|
||||
session.run('test prompt', signature);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Stop Functionality', () => {
|
||||
it('should stop training session', async () => {
|
||||
const session = new DSPyTrainingSession(config);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
session.on('stopped', (stats) => {
|
||||
expect(stats).toBeDefined();
|
||||
expect(stats.currentPhase).toBeDefined();
|
||||
resolve();
|
||||
});
|
||||
|
||||
setTimeout(() => {
|
||||
session.stop();
|
||||
}, 100);
|
||||
|
||||
const optimizer = new OptimizationEngine();
|
||||
const signature = optimizer.createSignature('test', 'input', 'output');
|
||||
|
||||
session.run('test prompt', signature);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Model Agents', () => {
|
||||
describe('ClaudeSonnetAgent', () => {
|
||||
let agent: ClaudeSonnetAgent;
|
||||
let config: ModelConfig;
|
||||
|
||||
beforeEach(() => {
|
||||
config = {
|
||||
provider: ModelProvider.CLAUDE,
|
||||
model: 'claude-sonnet-4',
|
||||
apiKey: 'test-key',
|
||||
temperature: 0.7
|
||||
};
|
||||
agent = new ClaudeSonnetAgent(config);
|
||||
});
|
||||
|
||||
it('should execute and return result', async () => {
|
||||
const signature: DSPySignature = {
|
||||
input: 'test input',
|
||||
output: 'test output'
|
||||
};
|
||||
|
||||
const result = await agent.execute('test prompt', signature);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result.modelProvider).toBe(ModelProvider.CLAUDE);
|
||||
expect(result.quality).toBeDefined();
|
||||
expect(result.performance).toBeDefined();
|
||||
expect(result.quality.score).toBeGreaterThanOrEqual(0);
|
||||
expect(result.quality.score).toBeLessThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('should track results', async () => {
|
||||
const signature: DSPySignature = {
|
||||
input: 'test input',
|
||||
output: 'test output'
|
||||
};
|
||||
|
||||
await agent.execute('test prompt 1', signature);
|
||||
await agent.execute('test prompt 2', signature);
|
||||
|
||||
const results = agent.getResults();
|
||||
expect(results.length).toBe(2);
|
||||
});
|
||||
|
||||
it('should track total cost', async () => {
|
||||
const signature: DSPySignature = {
|
||||
input: 'test input',
|
||||
output: 'test output'
|
||||
};
|
||||
|
||||
await agent.execute('test prompt', signature);
|
||||
|
||||
const cost = agent.getTotalCost();
|
||||
expect(cost).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('GPT4Agent', () => {
|
||||
it('should execute with correct provider', async () => {
|
||||
const config: ModelConfig = {
|
||||
provider: ModelProvider.GPT4,
|
||||
model: 'gpt-4-turbo',
|
||||
apiKey: 'test-key'
|
||||
};
|
||||
const agent = new GPT4Agent(config);
|
||||
const signature: DSPySignature = {
|
||||
input: 'test',
|
||||
output: 'test'
|
||||
};
|
||||
|
||||
const result = await agent.execute('test', signature);
|
||||
|
||||
expect(result.modelProvider).toBe(ModelProvider.GPT4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('GeminiAgent', () => {
|
||||
it('should execute with correct provider', async () => {
|
||||
const config: ModelConfig = {
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
};
|
||||
const agent = new GeminiAgent(config);
|
||||
const signature: DSPySignature = {
|
||||
input: 'test',
|
||||
output: 'test'
|
||||
};
|
||||
|
||||
const result = await agent.execute('test', signature);
|
||||
|
||||
expect(result.modelProvider).toBe(ModelProvider.GEMINI);
|
||||
});
|
||||
});
|
||||
|
||||
describe('LlamaAgent', () => {
|
||||
it('should execute with correct provider', async () => {
|
||||
const config: ModelConfig = {
|
||||
provider: ModelProvider.LLAMA,
|
||||
model: 'llama-3.1-70b',
|
||||
apiKey: 'test-key'
|
||||
};
|
||||
const agent = new LlamaAgent(config);
|
||||
const signature: DSPySignature = {
|
||||
input: 'test',
|
||||
output: 'test'
|
||||
};
|
||||
|
||||
const result = await agent.execute('test', signature);
|
||||
|
||||
expect(result.modelProvider).toBe(ModelProvider.LLAMA);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('OptimizationEngine', () => {
|
||||
let optimizer: OptimizationEngine;
|
||||
|
||||
beforeEach(() => {
|
||||
optimizer = new OptimizationEngine();
|
||||
});
|
||||
|
||||
describe('Signature Creation', () => {
|
||||
it('should create basic signature', () => {
|
||||
const signature = optimizer.createSignature(
|
||||
'test',
|
||||
'input',
|
||||
'output'
|
||||
);
|
||||
|
||||
expect(signature).toBeDefined();
|
||||
expect(signature.input).toBe('input');
|
||||
expect(signature.output).toBe('output');
|
||||
expect(signature.examples).toEqual([]);
|
||||
expect(signature.constraints).toEqual([]);
|
||||
expect(signature.objectives).toEqual([]);
|
||||
});
|
||||
|
||||
it('should create signature with options', () => {
|
||||
const signature = optimizer.createSignature(
|
||||
'test',
|
||||
'input',
|
||||
'output',
|
||||
{
|
||||
examples: [{ input: 'ex1', output: 'ex1' }],
|
||||
constraints: ['min_length:10'],
|
||||
objectives: ['maximize quality']
|
||||
}
|
||||
);
|
||||
|
||||
expect(signature.examples?.length).toBe(1);
|
||||
expect(signature.constraints?.length).toBe(1);
|
||||
expect(signature.objectives?.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Prompt Optimization', () => {
|
||||
it('should optimize prompt based on results', async () => {
|
||||
const signature: DSPySignature = {
|
||||
input: 'test input',
|
||||
output: 'test output',
|
||||
examples: [{ input: 'example', output: 'example output' }],
|
||||
constraints: ['min_length:10'],
|
||||
objectives: ['high quality']
|
||||
};
|
||||
|
||||
const results: IterationResult[] = [
|
||||
{
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.5,
|
||||
accuracy: 0.5,
|
||||
coherence: 0.5,
|
||||
relevance: 0.5,
|
||||
diversity: 0.5,
|
||||
creativity: 0.5
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'base prompt',
|
||||
output: 'base output',
|
||||
optimizations: []
|
||||
}
|
||||
];
|
||||
|
||||
const optimized = await optimizer.optimizePrompt(
|
||||
'base prompt',
|
||||
results,
|
||||
signature
|
||||
);
|
||||
|
||||
expect(optimized).toBeDefined();
|
||||
expect(optimized.length).toBeGreaterThan('base prompt'.length);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Cross-Model Optimization', () => {
|
||||
it('should perform cross-model optimization', async () => {
|
||||
const allResults = new Map<ModelProvider, IterationResult[]>();
|
||||
|
||||
const result1: IterationResult = {
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.9,
|
||||
accuracy: 0.9,
|
||||
coherence: 0.9,
|
||||
relevance: 0.9,
|
||||
diversity: 0.9,
|
||||
creativity: 0.9
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'good prompt',
|
||||
output: 'good output',
|
||||
optimizations: []
|
||||
};
|
||||
|
||||
const result2: IterationResult = {
|
||||
...result1,
|
||||
modelProvider: ModelProvider.CLAUDE,
|
||||
quality: {
|
||||
score: 0.5,
|
||||
accuracy: 0.5,
|
||||
coherence: 0.5,
|
||||
relevance: 0.5,
|
||||
diversity: 0.5,
|
||||
creativity: 0.5
|
||||
},
|
||||
prompt: 'poor prompt'
|
||||
};
|
||||
|
||||
allResults.set(ModelProvider.GEMINI, [result1]);
|
||||
allResults.set(ModelProvider.CLAUDE, [result2]);
|
||||
|
||||
const optimized = await optimizer.crossModelOptimization(allResults);
|
||||
|
||||
expect(optimized).toBeDefined();
|
||||
expect(optimized.size).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('BenchmarkCollector', () => {
|
||||
let collector: BenchmarkCollector;
|
||||
|
||||
beforeEach(() => {
|
||||
collector = new BenchmarkCollector();
|
||||
});
|
||||
|
||||
describe('Result Collection', () => {
|
||||
it('should add results', () => {
|
||||
const result: IterationResult = {
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.8,
|
||||
accuracy: 0.8,
|
||||
coherence: 0.8,
|
||||
relevance: 0.8,
|
||||
diversity: 0.8,
|
||||
creativity: 0.8
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
};
|
||||
|
||||
collector.addResult(result);
|
||||
|
||||
const metrics = collector.getModelMetrics(ModelProvider.GEMINI);
|
||||
expect(metrics.length).toBe(1);
|
||||
expect(metrics[0]).toEqual(result);
|
||||
});
|
||||
|
||||
it('should get metrics for specific model', () => {
|
||||
const result1: IterationResult = {
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.8,
|
||||
accuracy: 0.8,
|
||||
coherence: 0.8,
|
||||
relevance: 0.8,
|
||||
diversity: 0.8,
|
||||
creativity: 0.8
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
};
|
||||
|
||||
const result2 = { ...result1, modelProvider: ModelProvider.CLAUDE };
|
||||
|
||||
collector.addResult(result1);
|
||||
collector.addResult(result2);
|
||||
|
||||
const geminiMetrics = collector.getModelMetrics(ModelProvider.GEMINI);
|
||||
const claudeMetrics = collector.getModelMetrics(ModelProvider.CLAUDE);
|
||||
|
||||
expect(geminiMetrics.length).toBe(1);
|
||||
expect(claudeMetrics.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Statistics', () => {
|
||||
it('should calculate aggregate statistics', () => {
|
||||
const results: IterationResult[] = [
|
||||
{
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.7,
|
||||
accuracy: 0.7,
|
||||
coherence: 0.7,
|
||||
relevance: 0.7,
|
||||
diversity: 0.7,
|
||||
creativity: 0.7
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
},
|
||||
{
|
||||
iteration: 2,
|
||||
phase: TrainingPhase.OPTIMIZATION,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.9,
|
||||
accuracy: 0.9,
|
||||
coherence: 0.9,
|
||||
relevance: 0.9,
|
||||
diversity: 0.9,
|
||||
creativity: 0.9
|
||||
},
|
||||
performance: {
|
||||
latency: 120,
|
||||
throughput: 8,
|
||||
tokensUsed: 120,
|
||||
cost: 0.012,
|
||||
memoryUsage: 55,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
}
|
||||
];
|
||||
|
||||
results.forEach(r => collector.addResult(r));
|
||||
|
||||
const stats = collector.getAggregateStats(ModelProvider.GEMINI);
|
||||
|
||||
expect(stats).toBeDefined();
|
||||
expect(stats?.totalIterations).toBe(2);
|
||||
expect(stats?.avgQualityScore).toBeCloseTo(0.8, 1);
|
||||
expect(stats?.avgLatency).toBeCloseTo(110, 0);
|
||||
expect(stats?.totalCost).toBeCloseTo(0.022, 3);
|
||||
});
|
||||
|
||||
it('should identify best model', () => {
|
||||
const geminiResult: IterationResult = {
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.9,
|
||||
accuracy: 0.9,
|
||||
coherence: 0.9,
|
||||
relevance: 0.9,
|
||||
diversity: 0.9,
|
||||
creativity: 0.9
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
};
|
||||
|
||||
const claudeResult = {
|
||||
...geminiResult,
|
||||
modelProvider: ModelProvider.CLAUDE,
|
||||
quality: {
|
||||
score: 0.7,
|
||||
accuracy: 0.7,
|
||||
coherence: 0.7,
|
||||
relevance: 0.7,
|
||||
diversity: 0.7,
|
||||
creativity: 0.7
|
||||
}
|
||||
};
|
||||
|
||||
collector.addResult(geminiResult);
|
||||
collector.addResult(claudeResult);
|
||||
|
||||
const bestModel = collector.getBestModel();
|
||||
expect(bestModel).toBe(ModelProvider.GEMINI);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Report Generation', () => {
|
||||
it('should generate comprehensive report', () => {
|
||||
const result: IterationResult = {
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.8,
|
||||
accuracy: 0.8,
|
||||
coherence: 0.8,
|
||||
relevance: 0.8,
|
||||
diversity: 0.8,
|
||||
creativity: 0.8
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
};
|
||||
|
||||
collector.addResult(result);
|
||||
|
||||
const report = collector.generateReport();
|
||||
|
||||
expect(report).toContain('DSPy Training Session Report');
|
||||
expect(report).toContain('Best Performing Model');
|
||||
expect(report).toContain('Model Comparison');
|
||||
expect(report).toContain('gemini');
|
||||
});
|
||||
|
||||
it('should generate comparison data', () => {
|
||||
const geminiResult: IterationResult = {
|
||||
iteration: 1,
|
||||
phase: TrainingPhase.BASELINE,
|
||||
modelProvider: ModelProvider.GEMINI,
|
||||
quality: {
|
||||
score: 0.8,
|
||||
accuracy: 0.8,
|
||||
coherence: 0.8,
|
||||
relevance: 0.8,
|
||||
diversity: 0.8,
|
||||
creativity: 0.8
|
||||
},
|
||||
performance: {
|
||||
latency: 100,
|
||||
throughput: 10,
|
||||
tokensUsed: 100,
|
||||
cost: 0.01,
|
||||
memoryUsage: 50,
|
||||
errorRate: 0
|
||||
},
|
||||
timestamp: new Date(),
|
||||
prompt: 'test',
|
||||
output: 'test',
|
||||
optimizations: []
|
||||
};
|
||||
|
||||
const claudeResult = { ...geminiResult, modelProvider: ModelProvider.CLAUDE };
|
||||
|
||||
collector.addResult(geminiResult);
|
||||
collector.addResult(claudeResult);
|
||||
|
||||
const comparison = collector.getComparison();
|
||||
|
||||
expect(comparison).toBeDefined();
|
||||
expect(comparison[ModelProvider.GEMINI]).toBeDefined();
|
||||
expect(comparison[ModelProvider.CLAUDE]).toBeDefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Quality Metrics Calculation', () => {
|
||||
it('should calculate quality scores correctly', async () => {
|
||||
const config: ModelConfig = {
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
};
|
||||
const agent = new GeminiAgent(config);
|
||||
|
||||
const signature: DSPySignature = {
|
||||
input: 'test input with keywords',
|
||||
output: 'test output',
|
||||
constraints: ['min_length:10']
|
||||
};
|
||||
|
||||
const result = await agent.execute('test prompt', signature);
|
||||
|
||||
expect(result.quality.score).toBeGreaterThanOrEqual(0);
|
||||
expect(result.quality.score).toBeLessThanOrEqual(1);
|
||||
expect(result.quality.accuracy).toBeGreaterThanOrEqual(0);
|
||||
expect(result.quality.coherence).toBeGreaterThanOrEqual(0);
|
||||
expect(result.quality.relevance).toBeGreaterThanOrEqual(0);
|
||||
expect(result.quality.diversity).toBeGreaterThanOrEqual(0);
|
||||
expect(result.quality.creativity).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Performance Metrics Calculation', () => {
|
||||
it('should track latency correctly', async () => {
|
||||
const config: ModelConfig = {
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
};
|
||||
const agent = new GeminiAgent(config);
|
||||
|
||||
const signature: DSPySignature = {
|
||||
input: 'test',
|
||||
output: 'test'
|
||||
};
|
||||
|
||||
const result = await agent.execute('test', signature);
|
||||
|
||||
expect(result.performance.latency).toBeGreaterThan(0);
|
||||
expect(result.performance.throughput).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should calculate cost correctly', async () => {
|
||||
const config: ModelConfig = {
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
};
|
||||
const agent = new GeminiAgent(config);
|
||||
|
||||
const signature: DSPySignature = {
|
||||
input: 'test',
|
||||
output: 'test'
|
||||
};
|
||||
|
||||
const result = await agent.execute('test prompt', signature);
|
||||
|
||||
expect(result.performance.cost).toBeGreaterThan(0);
|
||||
expect(result.performance.tokensUsed).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Integration Tests', () => {
|
||||
it('should complete full training pipeline', async () => {
|
||||
const config = {
|
||||
models: [
|
||||
{
|
||||
provider: ModelProvider.GEMINI,
|
||||
model: 'gemini-2.0-flash-exp',
|
||||
apiKey: 'test-key'
|
||||
}
|
||||
],
|
||||
optimizationRounds: 1,
|
||||
baselineIterations: 1,
|
||||
benchmarkSamples: 2,
|
||||
enableCrossLearning: false,
|
||||
enableHooksIntegration: false
|
||||
};
|
||||
|
||||
const session = new DSPyTrainingSession(config);
|
||||
|
||||
const phases: TrainingPhase[] = [];
|
||||
session.on('phase', (phase) => phases.push(phase));
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
session.on('complete', () => {
|
||||
expect(phases.length).toBeGreaterThan(0);
|
||||
resolve();
|
||||
});
|
||||
|
||||
const optimizer = new OptimizationEngine();
|
||||
const signature = optimizer.createSignature('test', 'input', 'output');
|
||||
|
||||
session.run('test prompt', signature);
|
||||
});
|
||||
}, 10000);
|
||||
});
|
||||
Reference in New Issue
Block a user