Files
wifi-densepose/vendor/ruvector/npm/packages/ruvllm/scripts/real-model-compare.js

281 lines
15 KiB
JavaScript

#!/usr/bin/env node
/**
* Real Model Comparison - Qwen 0.5B vs RuvLTRA Claude Code
*
* Uses llama-embedding for actual model inference.
*/
const { execSync } = require('child_process');
const { existsSync } = require('fs');
const { join } = require('path');
const { homedir } = require('os');
// Model paths
const MODELS_DIR = join(homedir(), '.ruvllm', 'models');
const QWEN_MODEL = join(MODELS_DIR, 'qwen2.5-0.5b-instruct-q4_k_m.gguf');
const RUVLTRA_MODEL = join(MODELS_DIR, 'ruvltra-claude-code-0.5b-q4_k_m.gguf');
// Agent descriptions for routing
const AGENT_DESCRIPTIONS = {
coder: 'implement create write build add code function class component feature',
researcher: 'research find investigate analyze explore search discover examine',
reviewer: 'review check evaluate assess inspect examine code quality',
tester: 'test unit integration e2e coverage mock assertion spec',
architect: 'design architecture schema system structure plan database',
'security-architect': 'security vulnerability xss injection audit cve authentication',
debugger: 'debug fix bug error issue broken crash exception trace',
documenter: 'document readme jsdoc comment explain describe documentation',
refactorer: 'refactor extract rename consolidate clean restructure simplify',
optimizer: 'optimize performance slow fast cache speed memory latency',
devops: 'deploy ci cd kubernetes docker pipeline container infrastructure',
'api-docs': 'openapi swagger api documentation graphql schema endpoint',
planner: 'plan estimate prioritize sprint roadmap schedule milestone',
};
// Test cases for routing
const ROUTING_TESTS = [
{ task: 'Implement a binary search function in TypeScript', expected: 'coder' },
{ task: 'Write unit tests for the authentication module', expected: 'tester' },
{ task: 'Review the pull request for security vulnerabilities', expected: 'reviewer' },
{ task: 'Research best practices for React state management', expected: 'researcher' },
{ task: 'Design the database schema for user profiles', expected: 'architect' },
{ task: 'Fix the null pointer exception in the login handler', expected: 'debugger' },
{ task: 'Audit the API endpoints for XSS vulnerabilities', expected: 'security-architect' },
{ task: 'Write JSDoc comments for the utility functions', expected: 'documenter' },
{ task: 'Refactor the payment module to use async/await', expected: 'refactorer' },
{ task: 'Optimize the database queries for the dashboard', expected: 'optimizer' },
{ task: 'Set up the CI/CD pipeline for the microservices', expected: 'devops' },
{ task: 'Generate OpenAPI documentation for the REST API', expected: 'api-docs' },
{ task: 'Create a sprint plan for the next two weeks', expected: 'planner' },
{ task: 'Build a React component for user registration', expected: 'coder' },
{ task: 'Debug memory leak in the WebSocket handler', expected: 'debugger' },
{ task: 'Investigate slow API response times', expected: 'researcher' },
{ task: 'Check code for potential race conditions', expected: 'reviewer' },
{ task: 'Add integration tests for the payment gateway', expected: 'tester' },
{ task: 'Plan the architecture for real-time notifications', expected: 'architect' },
{ task: 'Cache the frequently accessed user data', expected: 'optimizer' },
];
// Similarity test pairs
const SIMILARITY_TESTS = [
{ text1: 'implement user authentication', text2: 'create login functionality', expected: 'high' },
{ text1: 'write unit tests', text2: 'fix database bug', expected: 'low' },
{ text1: 'optimize query performance', text2: 'improve database speed', expected: 'high' },
{ text1: 'design system architecture', text2: 'plan software structure', expected: 'high' },
{ text1: 'deploy to kubernetes', text2: 'analyze user behavior', expected: 'low' },
{ text1: 'refactor legacy code', text2: 'restructure old module', expected: 'high' },
{ text1: 'debug memory leak', text2: 'fix memory consumption issue', expected: 'high' },
{ text1: 'document api endpoints', text2: 'write openapi spec', expected: 'high' },
];
/**
* Get embedding from model using llama-embedding
*/
function getEmbedding(modelPath, text) {
try {
const sanitized = text.replace(/"/g, '\\"').replace(/\n/g, ' ');
const result = execSync(
`llama-embedding -m "${modelPath}" -p "${sanitized}" --embd-output-format json 2>/dev/null`,
{ encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }
);
const json = JSON.parse(result);
// Return the last embedding (the full prompt embedding)
return json.data[json.data.length - 1].embedding;
} catch (err) {
console.error(`Error getting embedding: ${err.message}`);
return null;
}
}
/**
* Compute cosine similarity
*/
function cosineSimilarity(a, b) {
if (!a || !b || a.length !== b.length) return 0;
let dot = 0, normA = 0, normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB) || 1);
}
/**
* Route task to agent using embedding similarity
*/
function routeTask(taskEmbedding, agentEmbeddings) {
let bestAgent = 'coder';
let bestSimilarity = -1;
for (const [agent, embedding] of Object.entries(agentEmbeddings)) {
const sim = cosineSimilarity(taskEmbedding, embedding);
if (sim > bestSimilarity) {
bestSimilarity = sim;
bestAgent = agent;
}
}
return { agent: bestAgent, confidence: bestSimilarity };
}
/**
* Run routing benchmark for a model
*/
function runRoutingBenchmark(modelPath, modelName) {
console.log(`\n Computing agent embeddings for ${modelName}...`);
// Pre-compute agent embeddings
const agentEmbeddings = {};
for (const [agent, description] of Object.entries(AGENT_DESCRIPTIONS)) {
process.stdout.write(` ${agent}... `);
agentEmbeddings[agent] = getEmbedding(modelPath, description);
console.log('done');
}
console.log(` Running routing tests...`);
let correct = 0;
const results = [];
for (const test of ROUTING_TESTS) {
process.stdout.write(` "${test.task.slice(0, 40)}..." `);
const taskEmbedding = getEmbedding(modelPath, test.task);
const { agent, confidence } = routeTask(taskEmbedding, agentEmbeddings);
const isCorrect = agent === test.expected;
if (isCorrect) correct++;
console.log(`${agent} (expected: ${test.expected}) ${isCorrect ? '✓' : '✗'}`);
results.push({ task: test.task, expected: test.expected, actual: agent, correct: isCorrect, confidence });
}
const accuracy = correct / ROUTING_TESTS.length;
return { accuracy, correct, total: ROUTING_TESTS.length, results };
}
/**
* Run similarity benchmark for a model
*/
function runSimilarityBenchmark(modelPath, modelName) {
console.log(`\n Running similarity tests for ${modelName}...`);
let correct = 0;
const results = [];
for (const test of SIMILARITY_TESTS) {
process.stdout.write(` "${test.text1}" vs "${test.text2}"... `);
const emb1 = getEmbedding(modelPath, test.text1);
const emb2 = getEmbedding(modelPath, test.text2);
const similarity = cosineSimilarity(emb1, emb2);
// Threshold: > 0.7 is high, < 0.5 is low
const predicted = similarity > 0.6 ? 'high' : 'low';
const isCorrect = predicted === test.expected;
if (isCorrect) correct++;
console.log(`${(similarity * 100).toFixed(1)}% (${predicted}, expected: ${test.expected}) ${isCorrect ? '✓' : '✗'}`);
results.push({ text1: test.text1, text2: test.text2, similarity, predicted, expected: test.expected, correct: isCorrect });
}
const accuracy = correct / SIMILARITY_TESTS.length;
return { accuracy, correct, total: SIMILARITY_TESTS.length, results };
}
/**
* Main comparison
*/
async function main() {
console.log('\n╔═══════════════════════════════════════════════════════════════════════════════════╗');
console.log('║ REAL MODEL COMPARISON: Qwen 0.5B vs RuvLTRA Claude Code ║');
console.log('║ Using llama-embedding inference ║');
console.log('╚═══════════════════════════════════════════════════════════════════════════════════╝\n');
// Check models exist
if (!existsSync(QWEN_MODEL)) {
console.error(`Qwen model not found at: ${QWEN_MODEL}`);
console.error('Download with: curl -L -o ~/.ruvllm/models/qwen2.5-0.5b-instruct-q4_k_m.gguf "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf"');
process.exit(1);
}
if (!existsSync(RUVLTRA_MODEL)) {
console.error(`RuvLTRA model not found at: ${RUVLTRA_MODEL}`);
console.error('Download with: ruvllm models download claude-code');
process.exit(1);
}
console.log('Models found:');
console.log(` Qwen: ${QWEN_MODEL}`);
console.log(` RuvLTRA: ${RUVLTRA_MODEL}`);
// Run benchmarks for both models
console.log('\n─────────────────────────────────────────────────────────────────');
console.log(' QWEN 0.5B BASE MODEL');
console.log('─────────────────────────────────────────────────────────────────');
const qwenRouting = runRoutingBenchmark(QWEN_MODEL, 'Qwen 0.5B');
const qwenSimilarity = runSimilarityBenchmark(QWEN_MODEL, 'Qwen 0.5B');
console.log('\n─────────────────────────────────────────────────────────────────');
console.log(' RUVLTRA CLAUDE CODE MODEL');
console.log('─────────────────────────────────────────────────────────────────');
const ruvltraRouting = runRoutingBenchmark(RUVLTRA_MODEL, 'RuvLTRA Claude Code');
const ruvltraSimilarity = runSimilarityBenchmark(RUVLTRA_MODEL, 'RuvLTRA Claude Code');
// Results summary
console.log('\n═══════════════════════════════════════════════════════════════════════════════════');
console.log(' COMPARISON RESULTS');
console.log('═══════════════════════════════════════════════════════════════════════════════════\n');
console.log('┌─────────────────────────────┬───────────────┬───────────────┐');
console.log('│ Metric │ Qwen Base │ RuvLTRA │');
console.log('├─────────────────────────────┼───────────────┼───────────────┤');
const qwenRoutingPct = `${(qwenRouting.accuracy * 100).toFixed(1)}%`;
const ruvltraRoutingPct = `${(ruvltraRouting.accuracy * 100).toFixed(1)}%`;
const routingWinner = ruvltraRouting.accuracy > qwenRouting.accuracy ? '✓' : ' ';
const routingLoser = qwenRouting.accuracy > ruvltraRouting.accuracy ? '✓' : ' ';
console.log(`│ Routing Accuracy │${routingLoser}${qwenRoutingPct.padStart(12)}${routingWinner}${ruvltraRoutingPct.padStart(12)}`);
const qwenSimPct = `${(qwenSimilarity.accuracy * 100).toFixed(1)}%`;
const ruvltraSimPct = `${(ruvltraSimilarity.accuracy * 100).toFixed(1)}%`;
const simWinner = ruvltraSimilarity.accuracy > qwenSimilarity.accuracy ? '✓' : ' ';
const simLoser = qwenSimilarity.accuracy > ruvltraSimilarity.accuracy ? '✓' : ' ';
console.log(`│ Similarity Detection │${simLoser}${qwenSimPct.padStart(12)}${simWinner}${ruvltraSimPct.padStart(12)}`);
// Overall score
const qwenOverall = (qwenRouting.accuracy * 0.6 + qwenSimilarity.accuracy * 0.4);
const ruvltraOverall = (ruvltraRouting.accuracy * 0.6 + ruvltraSimilarity.accuracy * 0.4);
const qwenOverallPct = `${(qwenOverall * 100).toFixed(1)}%`;
const ruvltraOverallPct = `${(ruvltraOverall * 100).toFixed(1)}%`;
const overallWinner = ruvltraOverall > qwenOverall ? '✓' : ' ';
const overallLoser = qwenOverall > ruvltraOverall ? '✓' : ' ';
console.log('├─────────────────────────────┼───────────────┼───────────────┤');
console.log(`│ Overall Score (60/40) │${overallLoser}${qwenOverallPct.padStart(12)}${overallWinner}${ruvltraOverallPct.padStart(12)}`);
console.log('└─────────────────────────────┴───────────────┴───────────────┘');
// Winner announcement
const winner = ruvltraOverall > qwenOverall ? 'RuvLTRA Claude Code' : 'Qwen 0.5B Base';
const improvement = Math.abs(ruvltraOverall - qwenOverall) * 100;
console.log('\n═══════════════════════════════════════════════════════════════════════════════════');
console.log(` WINNER: ${winner}`);
console.log('═══════════════════════════════════════════════════════════════════════════════════');
if (ruvltraOverall > qwenOverall) {
console.log(`\n RuvLTRA outperforms Qwen base by ${improvement.toFixed(1)} percentage points.`);
console.log(' Fine-tuning for Claude Code workflows provides measurable improvements.');
} else if (qwenOverall > ruvltraOverall) {
console.log(`\n Qwen base outperforms RuvLTRA by ${improvement.toFixed(1)} percentage points.`);
console.log(' Consider additional fine-tuning or different training approach.');
} else {
console.log('\n Both models perform equally. Fine-tuning may need adjustment.');
}
console.log('\n');
}
main().catch(console.error);