wifi-densepose/vendor/ruvector/npm/packages/ruvllm/scripts/optimized-model-compare.js

#!/usr/bin/env node
/**
 * Optimized Model Comparison
 *
 * Key insight: Shorter, more focused descriptions work better for embeddings.
 * This version tests:
 * 1. Focused discriminating keywords (no overlap)
 * 2. Multi-embedding approach (multiple short phrases per agent)
 * 3. Weighted voting from multiple description variants
 */

const { execSync } = require('child_process');
const { existsSync } = require('fs');
const { join } = require('path');
const { homedir } = require('os');

const MODELS_DIR = join(homedir(), '.ruvllm', 'models');
const QWEN_MODEL = join(MODELS_DIR, 'qwen2.5-0.5b-instruct-q4_k_m.gguf');
const RUVLTRA_MODEL = join(MODELS_DIR, 'ruvltra-claude-code-0.5b-q4_k_m.gguf');

// V1: Original keywords (baseline)
const DESCRIPTIONS_V1 = {
  coder: 'implement create write build add code function class component feature',
  researcher: 'research find investigate analyze explore search discover examine',
  reviewer: 'review check evaluate assess inspect examine code quality',
  tester: 'test unit integration e2e coverage mock assertion spec',
  architect: 'design architecture schema system structure plan database',
  'security-architect': 'security vulnerability xss injection audit cve authentication',
  debugger: 'debug fix bug error issue broken crash exception trace',
  documenter: 'document readme jsdoc comment explain describe documentation',
  refactorer: 'refactor extract rename consolidate clean restructure simplify',
  optimizer: 'optimize performance slow fast cache speed memory latency',
  devops: 'deploy ci cd kubernetes docker pipeline container infrastructure',
  'api-docs': 'openapi swagger api documentation graphql schema endpoint',
  planner: 'plan estimate prioritize sprint roadmap schedule milestone',
};

// V4: Focused discriminating keywords - remove overlap, add unique identifiers
const DESCRIPTIONS_V4 = {
  coder: 'implement build create function component feature typescript react',
  researcher: 'research investigate explore discover best practices patterns',
  reviewer: 'review pull request code quality style check pr',
  tester: 'test unit integration e2e tests testing coverage spec',
  architect: 'design architecture schema database system structure diagram',
  'security-architect': 'security vulnerability xss injection csrf audit cve',
  debugger: 'debug fix bug error exception crash trace null pointer',
  documenter: 'jsdoc comments readme documentation describe explain',
  refactorer: 'refactor async await modernize restructure extract',
  optimizer: 'optimize cache performance speed latency slow fast',
  devops: 'deploy ci cd kubernetes docker pipeline infrastructure',
  'api-docs': 'openapi swagger rest api spec endpoint documentation',
  planner: 'sprint plan roadmap milestone estimate schedule prioritize',
};

// V5: Multi-phrase approach - multiple short embeddings per agent, use max similarity
const MULTI_DESCRIPTIONS = {
  coder: [
    'implement function',
    'build component',
    'create typescript code',
    'write feature',
  ],
  researcher: [
    'research best practices',
    'investigate issue',
    'explore solutions',
    'analyze patterns',
  ],
  reviewer: [
    'review pull request',
    'check code quality',
    'evaluate code',
    'assess implementation',
  ],
  tester: [
    'write unit tests',
    'add integration tests',
    'create test coverage',
    'test authentication',
  ],
  architect: [
    'design database schema',
    'plan architecture',
    'system structure',
    'microservices design',
  ],
  'security-architect': [
    'audit xss vulnerability',
    'security audit',
    'check injection',
    'cve vulnerability',
  ],
  debugger: [
    'fix bug',
    'debug error',
    'trace exception',
    'fix null pointer',
  ],
  documenter: [
    'write jsdoc comments',
    'create readme',
    'document functions',
    'explain code',
  ],
  refactorer: [
    'refactor to async await',
    'restructure code',
    'modernize legacy',
    'extract function',
  ],
  optimizer: [
    'cache data',
    'optimize query',
    'improve performance',
    'reduce latency',
  ],
  devops: [
    'deploy kubernetes',
    'setup ci cd',
    'docker container',
    'infrastructure pipeline',
  ],
  'api-docs': [
    'generate openapi',
    'swagger documentation',
    'rest api spec',
    'api endpoint docs',
  ],
  planner: [
    'create sprint plan',
    'estimate timeline',
    'prioritize tasks',
    'roadmap milestone',
  ],
};

const ROUTING_TESTS = [
  { task: 'Implement a binary search function in TypeScript', expected: 'coder' },
  { task: 'Write unit tests for the authentication module', expected: 'tester' },
  { task: 'Review the pull request for security vulnerabilities', expected: 'reviewer' },
  { task: 'Research best practices for React state management', expected: 'researcher' },
  { task: 'Design the database schema for user profiles', expected: 'architect' },
  { task: 'Fix the null pointer exception in the login handler', expected: 'debugger' },
  { task: 'Audit the API endpoints for XSS vulnerabilities', expected: 'security-architect' },
  { task: 'Write JSDoc comments for the utility functions', expected: 'documenter' },
  { task: 'Refactor the payment module to use async/await', expected: 'refactorer' },
  { task: 'Optimize the database queries for the dashboard', expected: 'optimizer' },
  { task: 'Set up the CI/CD pipeline for the microservices', expected: 'devops' },
  { task: 'Generate OpenAPI documentation for the REST API', expected: 'api-docs' },
  { task: 'Create a sprint plan for the next two weeks', expected: 'planner' },
  { task: 'Build a React component for user registration', expected: 'coder' },
  { task: 'Debug memory leak in the WebSocket handler', expected: 'debugger' },
  { task: 'Investigate slow API response times', expected: 'researcher' },
  { task: 'Check code for potential race conditions', expected: 'reviewer' },
  { task: 'Add integration tests for the payment gateway', expected: 'tester' },
  { task: 'Plan the architecture for real-time notifications', expected: 'architect' },
  { task: 'Cache the frequently accessed user data', expected: 'optimizer' },
];

function getEmbedding(modelPath, text) {
  try {
    const sanitized = text.replace(/"/g, '\\"').replace(/\n/g, ' ');
    const result = execSync(
      `llama-embedding -m "${modelPath}" -p "${sanitized}" --embd-output-format json 2>/dev/null`,
      { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }
    );
    const json = JSON.parse(result);
    return json.data[json.data.length - 1].embedding;
  } catch (err) {
    return null;
  }
}

function cosineSimilarity(a, b) {
  if (!a || !b || a.length !== b.length) return 0;
  let dot = 0, normA = 0, normB = 0;
  for (let i = 0; i < a.length; i++) {
    dot += a[i] * b[i];
    normA += a[i] * a[i];
    normB += b[i] * b[i];
  }
  return dot / (Math.sqrt(normA) * Math.sqrt(normB) || 1);
}

/**
 * Standard single-embedding routing
 */
function routeTaskSingle(taskEmbedding, agentEmbeddings) {
  let bestAgent = 'coder';
  let bestSim = -1;
  for (const [agent, emb] of Object.entries(agentEmbeddings)) {
    const sim = cosineSimilarity(taskEmbedding, emb);
    if (sim > bestSim) {
      bestSim = sim;
      bestAgent = agent;
    }
  }
  return { agent: bestAgent, confidence: bestSim };
}

/**
 * Multi-embedding routing - use max similarity across multiple phrases
 */
function routeTaskMulti(taskEmbedding, multiAgentEmbeddings) {
  let bestAgent = 'coder';
  let bestSim = -1;

  for (const [agent, embeddings] of Object.entries(multiAgentEmbeddings)) {
    // Take max similarity across all phrases for this agent
    let maxSim = -1;
    for (const emb of embeddings) {
      const sim = cosineSimilarity(taskEmbedding, emb);
      if (sim > maxSim) maxSim = sim;
    }
    if (maxSim > bestSim) {
      bestSim = maxSim;
      bestAgent = agent;
    }
  }
  return { agent: bestAgent, confidence: bestSim };
}

/**
 * Run single-embedding benchmark
 */
function runSingleBenchmark(modelPath, descriptions, version) {
  process.stdout.write(`  [${version}] Computing embeddings... `);

  const agentEmbeddings = {};
  for (const [agent, desc] of Object.entries(descriptions)) {
    agentEmbeddings[agent] = getEmbedding(modelPath, desc);
  }
  console.log('done');

  let correct = 0;
  for (const test of ROUTING_TESTS) {
    const taskEmb = getEmbedding(modelPath, test.task);
    const { agent } = routeTaskSingle(taskEmb, agentEmbeddings);
    if (agent === test.expected) correct++;
  }

  return { accuracy: correct / ROUTING_TESTS.length, correct, total: ROUTING_TESTS.length, version };
}

/**
 * Run multi-embedding benchmark
 */
function runMultiBenchmark(modelPath, multiDescriptions, version) {
  process.stdout.write(`  [${version}] Computing multi-embeddings... `);

  const multiAgentEmbeddings = {};
  for (const [agent, phrases] of Object.entries(multiDescriptions)) {
    multiAgentEmbeddings[agent] = phrases.map(p => getEmbedding(modelPath, p));
  }
  console.log('done');

  let correct = 0;
  const results = [];
  for (const test of ROUTING_TESTS) {
    const taskEmb = getEmbedding(modelPath, test.task);
    const { agent, confidence } = routeTaskMulti(taskEmb, multiAgentEmbeddings);
    const isCorrect = agent === test.expected;
    if (isCorrect) correct++;
    results.push({ task: test.task, expected: test.expected, got: agent, correct: isCorrect });
  }

  return { accuracy: correct / ROUTING_TESTS.length, correct, total: ROUTING_TESTS.length, version, results };
}

async function main() {
  console.log('\n╔═══════════════════════════════════════════════════════════════════════════════════╗');
  console.log('║           OPTIMIZED MODEL COMPARISON: Focused & Multi-Embedding                   ║');
  console.log('╚═══════════════════════════════════════════════════════════════════════════════════╝\n');

  if (!existsSync(RUVLTRA_MODEL)) {
    console.error('RuvLTRA model not found.');
    process.exit(1);
  }

  console.log('Strategies:');
  console.log('  V1: Original keywords (baseline)');
  console.log('  V4: Focused discriminating keywords');
  console.log('  V5: Multi-phrase (4 phrases per agent, max similarity)\n');

  // RuvLTRA tests
  console.log('─────────────────────────────────────────────────────────────────');
  console.log('                   RUVLTRA CLAUDE CODE');
  console.log('─────────────────────────────────────────────────────────────────');

  const ruvV1 = runSingleBenchmark(RUVLTRA_MODEL, DESCRIPTIONS_V1, 'V1-Original');
  const ruvV4 = runSingleBenchmark(RUVLTRA_MODEL, DESCRIPTIONS_V4, 'V4-Focused');
  const ruvV5 = runMultiBenchmark(RUVLTRA_MODEL, MULTI_DESCRIPTIONS, 'V5-Multi');

  // Qwen tests
  console.log('\n─────────────────────────────────────────────────────────────────');
  console.log('                      QWEN 0.5B BASE');
  console.log('─────────────────────────────────────────────────────────────────');

  const qwenV1 = runSingleBenchmark(QWEN_MODEL, DESCRIPTIONS_V1, 'V1-Original');
  const qwenV4 = runSingleBenchmark(QWEN_MODEL, DESCRIPTIONS_V4, 'V4-Focused');
  const qwenV5 = runMultiBenchmark(QWEN_MODEL, MULTI_DESCRIPTIONS, 'V5-Multi');

  // Results
  console.log('\n═══════════════════════════════════════════════════════════════════════════════════');
  console.log('                              RESULTS');
  console.log('═══════════════════════════════════════════════════════════════════════════════════\n');

  console.log('┌─────────────────────────┬───────────────┬───────────────┬───────────────┐');
  console.log('│ Strategy                │ RuvLTRA       │ Qwen Base     │ RuvLTRA Delta │');
  console.log('├─────────────────────────┼───────────────┼───────────────┼───────────────┤');

  const fmt = (v) => `${(v * 100).toFixed(1)}%`.padStart(12);
  const fmtDelta = (v, base) => {
    const delta = (v - base) * 100;
    const sign = delta >= 0 ? '+' : '';
    return `${sign}${delta.toFixed(1)}%`.padStart(12);
  };

  console.log(`│ V1: Original            │${fmt(ruvV1.accuracy)}  │${fmt(qwenV1.accuracy)}  │    baseline   │`);
  console.log(`│ V4: Focused             │${fmt(ruvV4.accuracy)}  │${fmt(qwenV4.accuracy)}  │${fmtDelta(ruvV4.accuracy, ruvV1.accuracy)}  │`);
  console.log(`│ V5: Multi-phrase        │${fmt(ruvV5.accuracy)}  │${fmt(qwenV5.accuracy)}  │${fmtDelta(ruvV5.accuracy, ruvV1.accuracy)}  │`);
  console.log('└─────────────────────────┴───────────────┴───────────────┴───────────────┘');

  // Best result
  const allResults = [
    { model: 'RuvLTRA', ...ruvV1 },
    { model: 'RuvLTRA', ...ruvV4 },
    { model: 'RuvLTRA', ...ruvV5 },
    { model: 'Qwen', ...qwenV1 },
    { model: 'Qwen', ...qwenV4 },
    { model: 'Qwen', ...qwenV5 },
  ];

  const best = allResults.reduce((a, b) => a.accuracy > b.accuracy ? a : b);

  console.log(`\n  BEST: ${best.model} + ${best.version} = ${(best.accuracy * 100).toFixed(1)}%`);

  // Show V5 detailed results
  console.log('\n─────────────────────────────────────────────────────────────────');
  console.log('                V5 MULTI-PHRASE DETAILED (RuvLTRA)');
  console.log('─────────────────────────────────────────────────────────────────');

  for (const r of ruvV5.results) {
    const mark = r.correct ? '✓' : '✗';
    const task = r.task.slice(0, 50).padEnd(50);
    const exp = r.expected.padEnd(18);
    const got = r.got.padEnd(18);
    console.log(`  ${mark} ${task} ${exp} ${r.correct ? '' : '→ ' + got}`);
  }

  // Final comparison
  const ruvBest = [ruvV1, ruvV4, ruvV5].reduce((a, b) => a.accuracy > b.accuracy ? a : b);
  const qwenBest = [qwenV1, qwenV4, qwenV5].reduce((a, b) => a.accuracy > b.accuracy ? a : b);

  console.log('\n═══════════════════════════════════════════════════════════════════════════════════');
  console.log('                           FINAL WINNER');
  console.log('═══════════════════════════════════════════════════════════════════════════════════');
  console.log(`\n  RuvLTRA best: ${ruvBest.version} = ${(ruvBest.accuracy * 100).toFixed(1)}%`);
  console.log(`  Qwen best:    ${qwenBest.version} = ${(qwenBest.accuracy * 100).toFixed(1)}%`);
  console.log(`\n  Margin: RuvLTRA leads by ${((ruvBest.accuracy - qwenBest.accuracy) * 100).toFixed(1)} points`);
  console.log('\n');
}

main().catch(console.error);