Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/npm/packages/ruvllm/scripts/training/claude-hard-negatives.js
+++ b/npm/packages/ruvllm/scripts/training/claude-hard-negatives.js
@@ -0,0 +1,222 @@
+#!/usr/bin/env node
+/**
+ * Claude-Powered Hard Negative Generator for SOTA Agent Routing
+ *
+ * Uses Claude Opus 4.5 to generate high-quality confusing triplets
+ * that push embedding-only accuracy toward 100%.
+ */
+
+const fs = require('fs');
+const path = require('path');
+require('dotenv').config({ path: path.resolve(__dirname, '../../../../../.env') });
+
+const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
+if (!ANTHROPIC_API_KEY) {
+  console.error('Error: ANTHROPIC_API_KEY not found in .env');
+  process.exit(1);
+}
+
+// Agent types and their descriptions
+const AGENTS = {
+  coder: 'Implements code, builds features, writes functions',
+  researcher: 'Investigates problems, explores documentation, gathers information',
+  reviewer: 'Reviews pull requests, checks code quality, suggests improvements',
+  tester: 'Writes tests, validates behavior, ensures coverage',
+  architect: 'Designs systems, creates schemas, plans architecture',
+  'security-architect': 'Audits for vulnerabilities, checks security, reviews auth',
+  debugger: 'Fixes bugs, traces errors, diagnoses issues',
+  documenter: 'Writes documentation, adds comments, creates READMEs',
+  refactorer: 'Refactors code, modernizes patterns, improves structure',
+  optimizer: 'Optimizes performance, adds caching, improves speed',
+  devops: 'Deploys apps, sets up CI/CD, manages infrastructure',
+  'api-docs': 'Generates OpenAPI specs, documents endpoints, creates Swagger',
+  planner: 'Creates sprint plans, estimates timelines, prioritizes tasks'
+};
+
+// Confusing pairs - agent types that are easily mixed up
+const CONFUSING_PAIRS = [
+  ['coder', 'refactorer'],        // Both modify code
+  ['researcher', 'architect'],    // Both do analysis
+  ['reviewer', 'tester'],         // Both validate
+  ['debugger', 'optimizer'],      // Both fix issues
+  ['documenter', 'api-docs'],     // Both write docs
+  ['architect', 'planner'],       // Both plan
+  ['security-architect', 'reviewer'], // Both check code
+  ['coder', 'debugger'],          // Both write/fix code
+  ['tester', 'debugger'],         // Both find problems
+  ['optimizer', 'architect']      // Both improve systems
+];
+
+async function callClaude(prompt) {
+  const response = await fetch('https://api.anthropic.com/v1/messages', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-api-key': ANTHROPIC_API_KEY,
+      'anthropic-version': '2023-06-01'
+    },
+    body: JSON.stringify({
+      model: 'claude-opus-4-5-20251101',
+      max_tokens: 4096,
+      messages: [{
+        role: 'user',
+        content: prompt
+      }]
+    })
+  });
+
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Claude API error: ${response.status} - ${error}`);
+  }
+
+  const data = await response.json();
+  return data.content[0].text;
+}
+
+async function generateHardNegatives(pair, count = 10) {
+  const [agent1, agent2] = pair;
+
+  const prompt = `You are helping train an AI routing model. Generate ${count} task descriptions that are AMBIGUOUS between "${agent1}" and "${agent2}" agents.
+
+Agent descriptions:
+- ${agent1}: ${AGENTS[agent1]}
+- ${agent2}: ${AGENTS[agent2]}
+
+Generate tasks that could reasonably be assigned to either agent but have a subtle preference for one.
+
+Format each line as JSON:
+{"anchor": "task description", "positive": "correct_agent", "negative": "wrong_agent", "isHard": true, "reason": "why this is confusing"}
+
+Requirements:
+1. Tasks should be realistic software development scenarios
+2. The distinction should be subtle but learnable
+3. Include edge cases and ambiguous wording
+4. Mix which agent is the positive/negative
+
+Generate exactly ${count} examples, one per line:`;
+
+  const response = await callClaude(prompt);
+
+  // Parse response - extract JSON lines
+  const lines = response.split('\n').filter(line => line.trim().startsWith('{'));
+  const triplets = [];
+
+  for (const line of lines) {
+    try {
+      const triplet = JSON.parse(line);
+      if (triplet.anchor && triplet.positive && triplet.negative) {
+        triplets.push({
+          anchor: triplet.anchor,
+          positive: triplet.positive,
+          negative: triplet.negative,
+          isHard: true
+        });
+      }
+    } catch (e) {
+      // Skip malformed JSON
+    }
+  }
+
+  return triplets;
+}
+
+async function evaluateWithGRPO(triplets, model = 'keyword-first') {
+  // GRPO-style evaluation: Use Claude to judge if predictions are correct
+  const prompt = `You are evaluating an AI agent router. For each task, determine which agent should handle it.
+
+Agents: ${Object.keys(AGENTS).join(', ')}
+
+Tasks to evaluate:
+${triplets.slice(0, 10).map((t, i) => `${i + 1}. "${t.anchor}"`).join('\n')}
+
+For each task, respond with the agent name that should handle it and your confidence (0-1).
+Format: 1. agent_name (0.95)`;
+
+  const response = await callClaude(prompt);
+  console.log('\nGRPO Evaluation (Claude as judge):');
+  console.log(response);
+
+  return response;
+}
+
+async function main() {
+  console.log('╔═══════════════════════════════════════════════════════════════════════════════════╗');
+  console.log('║     Claude-Powered Hard Negative Generator for SOTA Agent Routing                ║');
+  console.log('╚═══════════════════════════════════════════════════════════════════════════════════╝\n');
+
+  const args = process.argv.slice(2);
+  const outputPath = args.find(a => a.startsWith('--output='))?.split('=')[1]
+    || path.join(process.env.HOME, '.ruvllm/training/claude-hard-negatives.jsonl');
+  const tripletCount = parseInt(args.find(a => a.startsWith('--count='))?.split('=')[1] || '5');
+  const doGRPO = args.includes('--grpo');
+
+  console.log(`Configuration:`);
+  console.log(`  Output: ${outputPath}`);
+  console.log(`  Triplets per pair: ${tripletCount}`);
+  console.log(`  Confusing pairs: ${CONFUSING_PAIRS.length}`);
+  console.log(`  Total expected: ~${CONFUSING_PAIRS.length * tripletCount} triplets`);
+  console.log(`  GRPO evaluation: ${doGRPO}`);
+  console.log();
+
+  const allTriplets = [];
+
+  console.log('Generating hard negatives using Claude Opus 4.5...\n');
+
+  for (const pair of CONFUSING_PAIRS) {
+    console.log(`  Generating for ${pair[0]} vs ${pair[1]}...`);
+    try {
+      const triplets = await generateHardNegatives(pair, tripletCount);
+      allTriplets.push(...triplets);
+      console.log(`    ✓ Generated ${triplets.length} triplets`);
+    } catch (error) {
+      console.log(`    ✗ Error: ${error.message}`);
+    }
+
+    // Rate limiting - wait between requests
+    await new Promise(resolve => setTimeout(resolve, 1000));
+  }
+
+  console.log(`\nTotal triplets generated: ${allTriplets.length}`);
+
+  // Save triplets
+  const dir = path.dirname(outputPath);
+  if (!fs.existsSync(dir)) {
+    fs.mkdirSync(dir, { recursive: true });
+  }
+
+  const jsonl = allTriplets.map(t => JSON.stringify(t)).join('\n');
+  fs.writeFileSync(outputPath, jsonl);
+  console.log(`Saved to: ${outputPath}`);
+
+  // Optional GRPO evaluation
+  if (doGRPO && allTriplets.length > 0) {
+    console.log('\n─────────────────────────────────────────────────────────────────');
+    console.log('                     GRPO EVALUATION');
+    console.log('─────────────────────────────────────────────────────────────────\n');
+    await evaluateWithGRPO(allTriplets);
+  }
+
+  // Show sample
+  console.log('\n─────────────────────────────────────────────────────────────────');
+  console.log('                     SAMPLE TRIPLETS');
+  console.log('─────────────────────────────────────────────────────────────────\n');
+
+  for (const triplet of allTriplets.slice(0, 5)) {
+    console.log(`  Task: "${triplet.anchor}"`);
+    console.log(`  → Correct: ${triplet.positive}, Wrong: ${triplet.negative}`);
+    console.log();
+  }
+
+  console.log('═══════════════════════════════════════════════════════════════════════════════════');
+  console.log('                              NEXT STEPS');
+  console.log('═══════════════════════════════════════════════════════════════════════════════════\n');
+  console.log('1. Merge with existing triplets:');
+  console.log(`   cat ~/.ruvllm/training/ruvltra-finetuned/triplets.jsonl ${outputPath} > combined.jsonl`);
+  console.log('\n2. Run training with enhanced data:');
+  console.log('   cargo run --example train_contrastive --release -- --triplets combined.jsonl --epochs 30');
+  console.log('\n3. Benchmark embedding-only accuracy improvement');
+  console.log();
+}
+
+main().catch(console.error);