wifi-densepose/npm/packages/ruvllm/scripts/training/generate-ecosystem-triplets.js

#!/usr/bin/env node
/**
 * Ecosystem Training Data Generator for RuvLTRA
 *
 * Generates comprehensive triplet training data for the Claude Flow ecosystem:
 * - claude-flow: Multi-agent coordination and swarm orchestration
 * - agentic-flow: AI workflow orchestration and ONNX embeddings
 * - ruvector: High-performance vector database
 *
 * Features:
 * - Reads capability definitions from JSON files
 * - Generates 5-10 natural language prompts per capability
 * - Creates hard negatives for contrastive learning
 * - Outputs combined JSONL dataset for fine-tuning
 */

const fs = require('fs');
const path = require('path');

// ============================================================================
// CAPABILITY DEFINITIONS
// ============================================================================

/**
 * Claude Flow V3 Capabilities
 * Multi-agent swarm coordination, memory, hooks, workflows
 */
const CLAUDE_FLOW_CAPABILITIES = {
  name: 'claude-flow',
  description: 'Multi-agent swarm coordination and orchestration framework',
  version: '3.0.0',
  categories: {
    swarm: {
      description: 'Multi-agent swarm coordination and topology management',
      commands: {
        'swarm init': {
          description: 'Initialize a swarm with specified topology',
          keywords: ['swarm', 'init', 'initialize', 'topology', 'multi-agent', 'coordination'],
          parameters: ['--topology', '--max-agents', '--strategy'],
        },
        'swarm status': {
          description: 'Get current swarm status and agent health',
          keywords: ['swarm', 'status', 'health', 'agents', 'monitoring'],
          parameters: ['--verbose'],
        },
        'swarm shutdown': {
          description: 'Gracefully shutdown the swarm',
          keywords: ['swarm', 'shutdown', 'stop', 'terminate', 'graceful'],
          parameters: ['--graceful', '--force'],
        },
      },
    },
    agent: {
      description: 'Agent lifecycle management',
      commands: {
        'agent spawn': {
          description: 'Spawn a new agent with specified type',
          keywords: ['agent', 'spawn', 'create', 'start', 'worker'],
          parameters: ['-t', '--type', '--name', '--model'],
        },
        'agent list': {
          description: 'List all active agents',
          keywords: ['agent', 'list', 'show', 'active', 'running'],
          parameters: ['--status', '--domain'],
        },
        'agent terminate': {
          description: 'Terminate a specific agent',
          keywords: ['agent', 'terminate', 'kill', 'stop', 'remove'],
          parameters: ['--force'],
        },
        'agent status': {
          description: 'Get status of a specific agent',
          keywords: ['agent', 'status', 'info', 'details', 'health'],
          parameters: ['--agentId'],
        },
      },
    },
    memory: {
      description: 'Persistent memory with vector search',
      commands: {
        'memory store': {
          description: 'Store a value in memory with optional namespace',
          keywords: ['memory', 'store', 'save', 'persist', 'key-value'],
          parameters: ['--key', '--value', '--namespace', '--ttl'],
        },
        'memory retrieve': {
          description: 'Retrieve a value from memory by key',
          keywords: ['memory', 'retrieve', 'get', 'fetch', 'read'],
          parameters: ['--key', '--namespace'],
        },
        'memory search': {
          description: 'Semantic vector search in memory',
          keywords: ['memory', 'search', 'query', 'find', 'semantic', 'vector'],
          parameters: ['--query', '--namespace', '--limit', '--threshold'],
        },
        'memory list': {
          description: 'List memory entries',
          keywords: ['memory', 'list', 'entries', 'keys', 'show'],
          parameters: ['--namespace', '--limit'],
        },
        'memory delete': {
          description: 'Delete a memory entry',
          keywords: ['memory', 'delete', 'remove', 'clear'],
          parameters: ['--key', '--namespace'],
        },
      },
    },
    hooks: {
      description: 'Self-learning hooks and background workers',
      commands: {
        'hooks pre-task': {
          description: 'Get agent suggestions before starting a task',
          keywords: ['hooks', 'pre-task', 'routing', 'suggestions', 'before'],
          parameters: ['--description', '--taskId'],
        },
        'hooks post-task': {
          description: 'Record task completion for learning',
          keywords: ['hooks', 'post-task', 'completion', 'learning', 'after'],
          parameters: ['--taskId', '--success', '--quality'],
        },
        'hooks route': {
          description: 'Route task to optimal agent',
          keywords: ['hooks', 'route', 'routing', 'optimal', 'agent'],
          parameters: ['--task', '--context'],
        },
        'hooks worker dispatch': {
          description: 'Dispatch a background worker',
          keywords: ['hooks', 'worker', 'dispatch', 'background', 'trigger'],
          parameters: ['--trigger', '--context', '--priority'],
        },
        'hooks metrics': {
          description: 'View learning metrics dashboard',
          keywords: ['hooks', 'metrics', 'dashboard', 'stats', 'learning'],
          parameters: ['--period', '--format'],
        },
        'hooks pretrain': {
          description: 'Bootstrap intelligence from repository',
          keywords: ['hooks', 'pretrain', 'bootstrap', 'intelligence', 'analyze'],
          parameters: ['--path', '--depth'],
        },
      },
    },
    workflow: {
      description: 'Workflow execution and templates',
      commands: {
        'workflow create': {
          description: 'Create a new workflow',
          keywords: ['workflow', 'create', 'new', 'define'],
          parameters: ['--name', '--steps', '--description'],
        },
        'workflow execute': {
          description: 'Execute a workflow',
          keywords: ['workflow', 'execute', 'run', 'start'],
          parameters: ['--workflowId', '--variables'],
        },
        'workflow status': {
          description: 'Get workflow execution status',
          keywords: ['workflow', 'status', 'progress', 'state'],
          parameters: ['--workflowId', '--verbose'],
        },
      },
    },
    hivemind: {
      description: 'Hive-mind collective consensus',
      commands: {
        'hive-mind init': {
          description: 'Initialize hive-mind collective',
          keywords: ['hive-mind', 'init', 'collective', 'consensus'],
          parameters: ['--topology', '--queenId'],
        },
        'hive-mind spawn': {
          description: 'Spawn workers and join to hive-mind',
          keywords: ['hive-mind', 'spawn', 'workers', 'join'],
          parameters: ['--count', '--prefix', '--role'],
        },
        'hive-mind consensus': {
          description: 'Propose or vote on consensus',
          keywords: ['hive-mind', 'consensus', 'vote', 'propose'],
          parameters: ['--action', '--proposalId', '--vote'],
        },
        'hive-mind broadcast': {
          description: 'Broadcast message to all workers',
          keywords: ['hive-mind', 'broadcast', 'message', 'all'],
          parameters: ['--message', '--priority'],
        },
      },
    },
    task: {
      description: 'Task creation and management',
      commands: {
        'task create': {
          description: 'Create a new task',
          keywords: ['task', 'create', 'new', 'add'],
          parameters: ['--type', '--description', '--priority'],
        },
        'task list': {
          description: 'List all tasks',
          keywords: ['task', 'list', 'show', 'all'],
          parameters: ['--status', '--priority'],
        },
        'task complete': {
          description: 'Mark task as complete',
          keywords: ['task', 'complete', 'done', 'finish'],
          parameters: ['--taskId', '--result'],
        },
      },
    },
    session: {
      description: 'Session state management',
      commands: {
        'session save': {
          description: 'Save current session state',
          keywords: ['session', 'save', 'persist', 'state'],
          parameters: ['--name', '--description'],
        },
        'session restore': {
          description: 'Restore a saved session',
          keywords: ['session', 'restore', 'load', 'resume'],
          parameters: ['--sessionId', '--name'],
        },
        'session list': {
          description: 'List saved sessions',
          keywords: ['session', 'list', 'saved', 'history'],
          parameters: ['--limit'],
        },
      },
    },
    neural: {
      description: 'Neural pattern training and prediction',
      commands: {
        'neural train': {
          description: 'Train a neural model',
          keywords: ['neural', 'train', 'model', 'learning'],
          parameters: ['--modelType', '--epochs', '--learningRate'],
        },
        'neural predict': {
          description: 'Make predictions using neural model',
          keywords: ['neural', 'predict', 'inference', 'model'],
          parameters: ['--input', '--modelId'],
        },
        'neural patterns': {
          description: 'Manage neural patterns',
          keywords: ['neural', 'patterns', 'store', 'search'],
          parameters: ['--action', '--patternId'],
        },
      },
    },
    security: {
      description: 'Security scanning and threat detection',
      commands: {
        'aidefence scan': {
          description: 'Scan input for AI manipulation threats',
          keywords: ['aidefence', 'scan', 'security', 'threats', 'injection'],
          parameters: ['--input', '--quick'],
        },
        'aidefence analyze': {
          description: 'Deep analysis for threat types',
          keywords: ['aidefence', 'analyze', 'deep', 'threats'],
          parameters: ['--input', '--searchSimilar'],
        },
        'aidefence is_safe': {
          description: 'Quick boolean safety check',
          keywords: ['aidefence', 'safe', 'check', 'validate'],
          parameters: ['--input'],
        },
      },
    },
    performance: {
      description: 'Performance profiling and optimization',
      commands: {
        'performance benchmark': {
          description: 'Run performance benchmarks',
          keywords: ['performance', 'benchmark', 'speed', 'test'],
          parameters: ['--suite', '--iterations'],
        },
        'performance profile': {
          description: 'Profile specific component',
          keywords: ['performance', 'profile', 'analyze', 'bottleneck'],
          parameters: ['--target', '--duration'],
        },
        'performance optimize': {
          description: 'Apply performance optimizations',
          keywords: ['performance', 'optimize', 'improve', 'speed'],
          parameters: ['--target', '--aggressive'],
        },
      },
    },
    embeddings: {
      description: 'Vector embeddings with ONNX',
      commands: {
        'embeddings generate': {
          description: 'Generate embeddings for text',
          keywords: ['embeddings', 'generate', 'embed', 'vector'],
          parameters: ['--text', '--hyperbolic'],
        },
        'embeddings compare': {
          description: 'Compare similarity between texts',
          keywords: ['embeddings', 'compare', 'similarity', 'distance'],
          parameters: ['--text1', '--text2', '--metric'],
        },
        'embeddings search': {
          description: 'Semantic search across stored embeddings',
          keywords: ['embeddings', 'search', 'semantic', 'query'],
          parameters: ['--query', '--topK', '--threshold'],
        },
      },
    },
    claims: {
      description: 'Issue claiming and coordination',
      commands: {
        'claims claim': {
          description: 'Claim an issue for work',
          keywords: ['claims', 'claim', 'issue', 'work', 'assign'],
          parameters: ['--issueId', '--claimant'],
        },
        'claims release': {
          description: 'Release a claim on an issue',
          keywords: ['claims', 'release', 'unclaim', 'free'],
          parameters: ['--issueId', '--reason'],
        },
        'claims handoff': {
          description: 'Handoff issue to another claimant',
          keywords: ['claims', 'handoff', 'transfer', 'pass'],
          parameters: ['--issueId', '--from', '--to'],
        },
        'claims board': {
          description: 'View claims board',
          keywords: ['claims', 'board', 'view', 'overview'],
          parameters: [],
        },
      },
    },
  },
};

/**
 * Agentic Flow Capabilities
 * AI workflow orchestration and ONNX embeddings
 */
const AGENTIC_FLOW_CAPABILITIES = {
  name: 'agentic-flow',
  description: 'AI workflow orchestration with ONNX runtime and vector embeddings',
  version: '1.0.0',
  categories: {
    embeddings: {
      description: 'High-performance ONNX embeddings',
      commands: {
        'embed': {
          description: 'Generate embeddings for text using ONNX models',
          keywords: ['embed', 'embedding', 'vector', 'encode', 'onnx'],
          parameters: ['--text', '--model', '--normalize'],
        },
        'batch-embed': {
          description: 'Batch embed multiple texts efficiently',
          keywords: ['batch', 'embed', 'multiple', 'parallel'],
          parameters: ['--texts', '--concurrency'],
        },
        'similarity': {
          description: 'Compute similarity between embeddings',
          keywords: ['similarity', 'cosine', 'distance', 'compare'],
          parameters: ['--a', '--b', '--metric'],
        },
      },
    },
    models: {
      description: 'ONNX model management',
      commands: {
        'model load': {
          description: 'Load an ONNX model for inference',
          keywords: ['model', 'load', 'onnx', 'initialize'],
          parameters: ['--path', '--name'],
        },
        'model list': {
          description: 'List available models',
          keywords: ['model', 'list', 'available', 'show'],
          parameters: [],
        },
        'model info': {
          description: 'Get model information and metadata',
          keywords: ['model', 'info', 'metadata', 'details'],
          parameters: ['--name'],
        },
        'model quantize': {
          description: 'Quantize model for faster inference',
          keywords: ['model', 'quantize', 'compress', 'optimize'],
          parameters: ['--input', '--output', '--bits'],
        },
      },
    },
    pipeline: {
      description: 'Workflow pipeline orchestration',
      commands: {
        'pipeline create': {
          description: 'Create a new processing pipeline',
          keywords: ['pipeline', 'create', 'workflow', 'chain'],
          parameters: ['--name', '--steps'],
        },
        'pipeline run': {
          description: 'Execute a pipeline with input data',
          keywords: ['pipeline', 'run', 'execute', 'process'],
          parameters: ['--name', '--input'],
        },
        'pipeline visualize': {
          description: 'Visualize pipeline structure',
          keywords: ['pipeline', 'visualize', 'graph', 'diagram'],
          parameters: ['--name', '--format'],
        },
      },
    },
    cache: {
      description: 'Embedding cache management',
      commands: {
        'cache set': {
          description: 'Store embedding in cache',
          keywords: ['cache', 'set', 'store', 'save'],
          parameters: ['--key', '--embedding'],
        },
        'cache get': {
          description: 'Retrieve embedding from cache',
          keywords: ['cache', 'get', 'retrieve', 'fetch'],
          parameters: ['--key'],
        },
        'cache clear': {
          description: 'Clear embedding cache',
          keywords: ['cache', 'clear', 'flush', 'reset'],
          parameters: ['--namespace'],
        },
        'cache stats': {
          description: 'Get cache statistics',
          keywords: ['cache', 'stats', 'statistics', 'info'],
          parameters: [],
        },
      },
    },
    search: {
      description: 'Vector search operations',
      commands: {
        'search nearest': {
          description: 'Find nearest neighbors to query vector',
          keywords: ['search', 'nearest', 'neighbors', 'knn'],
          parameters: ['--query', '--k', '--threshold'],
        },
        'search range': {
          description: 'Range search within distance threshold',
          keywords: ['search', 'range', 'radius', 'threshold'],
          parameters: ['--query', '--radius'],
        },
        'search hybrid': {
          description: 'Hybrid search combining keyword and semantic',
          keywords: ['search', 'hybrid', 'combined', 'keyword', 'semantic'],
          parameters: ['--query', '--alpha'],
        },
      },
    },
  },
};

/**
 * RuVector Capabilities
 * High-performance vector database
 */
const RUVECTOR_CAPABILITIES = {
  name: 'ruvector',
  description: 'High-performance vector database with HNSW indexing',
  version: '0.1.0',
  categories: {
    collections: {
      description: 'Vector collection management',
      commands: {
        'collection create': {
          description: 'Create a new vector collection',
          keywords: ['collection', 'create', 'new', 'database'],
          parameters: ['--name', '--dimension', '--metric'],
        },
        'collection delete': {
          description: 'Delete a vector collection',
          keywords: ['collection', 'delete', 'drop', 'remove'],
          parameters: ['--name', '--confirm'],
        },
        'collection info': {
          description: 'Get collection information',
          keywords: ['collection', 'info', 'stats', 'details'],
          parameters: ['--name'],
        },
        'collection list': {
          description: 'List all collections',
          keywords: ['collection', 'list', 'all', 'show'],
          parameters: [],
        },
      },
    },
    vectors: {
      description: 'Vector CRUD operations',
      commands: {
        'vector insert': {
          description: 'Insert vectors into collection',
          keywords: ['vector', 'insert', 'add', 'upsert'],
          parameters: ['--collection', '--vectors', '--ids'],
        },
        'vector delete': {
          description: 'Delete vectors from collection',
          keywords: ['vector', 'delete', 'remove', 'drop'],
          parameters: ['--collection', '--ids'],
        },
        'vector get': {
          description: 'Get vectors by ID',
          keywords: ['vector', 'get', 'fetch', 'retrieve'],
          parameters: ['--collection', '--ids'],
        },
        'vector update': {
          description: 'Update existing vectors',
          keywords: ['vector', 'update', 'modify', 'change'],
          parameters: ['--collection', '--id', '--vector'],
        },
      },
    },
    search: {
      description: 'Vector search with HNSW',
      commands: {
        'search knn': {
          description: 'K-nearest neighbor search',
          keywords: ['search', 'knn', 'nearest', 'similar'],
          parameters: ['--collection', '--query', '--k'],
        },
        'search filter': {
          description: 'Filtered vector search with metadata',
          keywords: ['search', 'filter', 'metadata', 'conditional'],
          parameters: ['--collection', '--query', '--filter'],
        },
        'search batch': {
          description: 'Batch search multiple queries',
          keywords: ['search', 'batch', 'multiple', 'parallel'],
          parameters: ['--collection', '--queries', '--k'],
        },
      },
    },
    index: {
      description: 'HNSW index management',
      commands: {
        'index build': {
          description: 'Build HNSW index for collection',
          keywords: ['index', 'build', 'create', 'hnsw'],
          parameters: ['--collection', '--ef', '--m'],
        },
        'index rebuild': {
          description: 'Rebuild existing index',
          keywords: ['index', 'rebuild', 'refresh', 'reindex'],
          parameters: ['--collection'],
        },
        'index stats': {
          description: 'Get index statistics',
          keywords: ['index', 'stats', 'info', 'metrics'],
          parameters: ['--collection'],
        },
        'index optimize': {
          description: 'Optimize index for search performance',
          keywords: ['index', 'optimize', 'tune', 'improve'],
          parameters: ['--collection', '--target'],
        },
      },
    },
    persistence: {
      description: 'Data persistence and backup',
      commands: {
        'snapshot create': {
          description: 'Create a snapshot of collection',
          keywords: ['snapshot', 'create', 'backup', 'save'],
          parameters: ['--collection', '--path'],
        },
        'snapshot restore': {
          description: 'Restore collection from snapshot',
          keywords: ['snapshot', 'restore', 'load', 'recover'],
          parameters: ['--path', '--collection'],
        },
        'snapshot list': {
          description: 'List available snapshots',
          keywords: ['snapshot', 'list', 'backups', 'show'],
          parameters: ['--collection'],
        },
      },
    },
    quantization: {
      description: 'Vector quantization for memory efficiency',
      commands: {
        'quantize apply': {
          description: 'Apply quantization to collection',
          keywords: ['quantize', 'apply', 'compress', 'reduce'],
          parameters: ['--collection', '--type', '--bits'],
        },
        'quantize info': {
          description: 'Get quantization info',
          keywords: ['quantize', 'info', 'status', 'details'],
          parameters: ['--collection'],
        },
      },
    },
  },
};

// ============================================================================
// PROMPT TEMPLATES
// ============================================================================

/**
 * Natural language prompt variations for generating diverse training data
 * Each template has placeholders for action, object, and description
 */
const PROMPT_TEMPLATES = {
  // Direct commands
  direct: [
    '{action} {object}',
    '{action} the {object}',
    'Please {action} {object}',
    'I need to {action} {object}',
  ],
  // Request style
  request: [
    'Can you {action} {object}?',
    'Help me {action} {object}',
    'I want to {action} {object}',
    'Could you {action} {object} for me?',
  ],
  // Question style
  question: [
    'How do I {action} {object}?',
    'What\'s the best way to {action} {object}?',
    'How can I {action} {object}?',
    'Which command {desc}?',
  ],
  // Contextual
  contextual: [
    'I\'m trying to {desc}',
    'I need a way to {desc}',
    'My goal is to {desc}',
    'For this project, I need to {desc}',
  ],
  // Descriptive
  descriptive: [
    '{desc}',
    'Help with {desc}',
    'I want {desc}',
    'Need {desc}',
  ],
};

/**
 * Action verbs mapped to capability types
 */
const ACTION_MAPPINGS = {
  // Swarm/Agent actions
  swarm: ['initialize', 'start', 'create', 'set up', 'configure', 'launch'],
  agent: ['spawn', 'create', 'start', 'launch', 'deploy', 'run'],
  terminate: ['stop', 'kill', 'terminate', 'shutdown', 'end', 'close'],
  status: ['check', 'get', 'view', 'show', 'monitor', 'inspect'],
  list: ['list', 'show', 'display', 'enumerate', 'get all'],

  // Memory actions
  store: ['store', 'save', 'persist', 'write', 'put', 'cache'],
  retrieve: ['retrieve', 'get', 'fetch', 'read', 'load'],
  search: ['search', 'find', 'query', 'look up', 'discover'],
  delete: ['delete', 'remove', 'clear', 'drop', 'erase'],

  // Workflow actions
  create: ['create', 'make', 'build', 'define', 'set up'],
  execute: ['execute', 'run', 'start', 'trigger', 'launch'],

  // Vector operations
  embed: ['embed', 'encode', 'vectorize', 'generate embedding for'],
  insert: ['insert', 'add', 'upsert', 'put', 'store'],
  knn: ['find similar', 'search for nearest', 'query neighbors', 'find k-nearest'],

  // Index operations
  build: ['build', 'create', 'construct', 'generate'],
  optimize: ['optimize', 'tune', 'improve', 'speed up'],
  rebuild: ['rebuild', 'regenerate', 'refresh', 'recreate'],
};

// ============================================================================
// HARD NEGATIVE PATTERNS
// ============================================================================

/**
 * Confusing command pairs for hard negative generation
 * These are commands that sound similar but have different purposes
 */
const CONFUSING_PAIRS = [
  // Claude Flow internal confusion
  { cmd1: 'memory store', cmd2: 'memory search', reason: 'both involve memory' },
  { cmd1: 'agent spawn', cmd2: 'hive-mind spawn', reason: 'both spawn workers' },
  { cmd1: 'swarm init', cmd2: 'hive-mind init', reason: 'both initialize coordination' },
  { cmd1: 'hooks route', cmd2: 'hooks pre-task', reason: 'both involve task routing' },
  { cmd1: 'workflow execute', cmd2: 'task create', reason: 'both start work' },
  { cmd1: 'session save', cmd2: 'memory store', reason: 'both persist data' },
  { cmd1: 'neural train', cmd2: 'hooks pretrain', reason: 'both involve training' },
  { cmd1: 'embeddings search', cmd2: 'memory search', reason: 'both search semantically' },
  { cmd1: 'performance profile', cmd2: 'performance benchmark', reason: 'both analyze performance' },
  { cmd1: 'claims claim', cmd2: 'task create', reason: 'both assign work' },

  // Cross-tool confusion
  { cmd1: 'claude-flow memory search', cmd2: 'ruvector search knn', reason: 'both do vector search' },
  { cmd1: 'claude-flow embeddings generate', cmd2: 'agentic-flow embed', reason: 'both generate embeddings' },
  { cmd1: 'ruvector collection create', cmd2: 'claude-flow memory store', reason: 'both store data' },
  { cmd1: 'agentic-flow cache set', cmd2: 'claude-flow memory store', reason: 'both cache data' },
  { cmd1: 'ruvector index build', cmd2: 'claude-flow hooks pretrain', reason: 'both build indexes' },
  { cmd1: 'agentic-flow search hybrid', cmd2: 'ruvector search filter', reason: 'both filtered search' },
  { cmd1: 'claude-flow swarm init', cmd2: 'agentic-flow pipeline create', reason: 'both orchestrate work' },

  // Category confusion
  { cmd1: 'ruvector vector insert', cmd2: 'agentic-flow cache set', reason: 'both store vectors' },
  { cmd1: 'claude-flow agent list', cmd2: 'agentic-flow model list', reason: 'both list resources' },
  { cmd1: 'ruvector snapshot create', cmd2: 'claude-flow session save', reason: 'both create backups' },
  { cmd1: 'agentic-flow model quantize', cmd2: 'ruvector quantize apply', reason: 'both quantize' },
];

// ============================================================================
// TRIPLET GENERATION
// ============================================================================

/**
 * Generate natural language prompts for a capability
 */
function generatePrompts(capability, command, config) {
  const prompts = [];
  const { description, keywords } = config;

  // Extract action and object from command
  const parts = command.split(' ');
  const category = parts[0];
  const action = parts[1] || parts[0];

  // Get action variations - but avoid repeating category name as action
  const actionVariations = ACTION_MAPPINGS[action] || [action];
  const primaryAction = actionVariations[0];

  // Create clean description for prompts
  const descLower = description.toLowerCase();

  // Avoid redundant phrases like "search search" or "status status"
  const isActionSameAsCategory = primaryAction.toLowerCase() === category.toLowerCase();

  // Direct commands: "{action} {object}"
  if (!isActionSameAsCategory) {
    prompts.push(`${primaryAction} ${category}`);
    prompts.push(`${primaryAction} the ${category}`);
    prompts.push(`I need to ${primaryAction} ${category}`);
    prompts.push(`Can you ${primaryAction} ${category}?`);
    prompts.push(`Help me ${primaryAction} ${category}`);
  } else {
    // When action == category, use description instead
    prompts.push(`${primaryAction}`);
    prompts.push(`I need to ${primaryAction}`);
    prompts.push(`Help me ${primaryAction}`);
  }

  // Use description-based prompts (always good quality)
  prompts.push(`I want to ${descLower}`);
  prompts.push(`How do I ${descLower}?`);
  prompts.push(`I need a way to ${descLower}`);

  // Action variations (skip if redundant)
  for (const actionVar of actionVariations.slice(1, 3)) {
    if (actionVar.toLowerCase() !== category.toLowerCase()) {
      prompts.push(`${actionVar} ${category}`);
      prompts.push(`I want to ${actionVar} ${category}`);
    }
  }

  // Keyword-based prompts (only use unique keywords not in action/category)
  const usedWords = new Set([primaryAction.toLowerCase(), category.toLowerCase(),
    ...actionVariations.map(a => a.toLowerCase())]);
  for (const keyword of keywords) {
    const kwLower = keyword.toLowerCase();
    if (!usedWords.has(kwLower) && kwLower !== category.toLowerCase()) {
      prompts.push(`${keyword} in ${category}`);
      prompts.push(`I need ${keyword} functionality`);
      usedWords.add(kwLower);
      if (usedWords.size > keywords.length + 3) break;
    }
  }

  // Tool-specific technical prompts
  prompts.push(`run ${command}`);
  prompts.push(`${capability} ${command}`);

  // Clean up prompts: remove duplicates, fix spacing, validate
  const cleanPrompts = [...new Set(prompts)]
    .map(p => p.trim().replace(/\s+/g, ' '))
    .filter(p => {
      // Filter out bad prompts
      if (p.length < 5) return false;
      if (p.includes('undefined')) return false;
      // Check for redundant word repetition (e.g., "status status")
      const words = p.toLowerCase().split(' ');
      for (let i = 0; i < words.length - 1; i++) {
        if (words[i] === words[i + 1] && words[i].length > 2) return false;
      }
      return true;
    })
    .slice(0, 10);

  return cleanPrompts;
}

/**
 * Find hard negatives for a command
 */
function findHardNegatives(tool, command, allCapabilities) {
  const negatives = [];
  const fullCommand = `${tool} ${command}`;

  // Find from predefined confusing pairs
  for (const pair of CONFUSING_PAIRS) {
    if (pair.cmd1.includes(command) || pair.cmd2.includes(command)) {
      const negative = pair.cmd1.includes(command) ? pair.cmd2 : pair.cmd1;
      negatives.push({
        command: negative,
        reason: pair.reason,
      });
    }
  }

  // Find similar commands from other tools
  for (const cap of allCapabilities) {
    if (cap.name === tool) continue;

    for (const [catName, category] of Object.entries(cap.categories)) {
      for (const [cmdName, cmdConfig] of Object.entries(category.commands)) {
        // Check for keyword overlap
        const cmdKeywords = cmdConfig.keywords || [];
        const sourceConfig = getCommandConfig(tool, command, allCapabilities);
        const sourceKeywords = sourceConfig?.keywords || [];

        const overlap = cmdKeywords.filter((k) => sourceKeywords.includes(k));
        if (overlap.length >= 2) {
          negatives.push({
            command: `${cap.name} ${cmdName}`,
            reason: `keyword overlap: ${overlap.join(', ')}`,
          });
        }
      }
    }
  }

  // Find similar commands within same tool (different category)
  const sourceCapability = allCapabilities.find((c) => c.name === tool);
  if (sourceCapability) {
    const [sourceCategory] = command.split(' ');
    for (const [catName, category] of Object.entries(sourceCapability.categories)) {
      if (catName === sourceCategory) continue;

      for (const [cmdName, cmdConfig] of Object.entries(category.commands)) {
        // Similar action words
        const cmdAction = cmdName.split(' ')[1] || cmdName.split(' ')[0];
        const sourceAction = command.split(' ')[1] || command.split(' ')[0];

        if (cmdAction === sourceAction) {
          negatives.push({
            command: `${tool} ${cmdName}`,
            reason: `same action '${cmdAction}' different category`,
          });
        }
      }
    }
  }

  // Limit and deduplicate
  const seen = new Set();
  return negatives.filter((n) => {
    if (seen.has(n.command)) return false;
    seen.add(n.command);
    return true;
  }).slice(0, 5);
}

/**
 * Get command config from capabilities
 */
function getCommandConfig(tool, command, allCapabilities) {
  const capability = allCapabilities.find((c) => c.name === tool);
  if (!capability) return null;

  for (const category of Object.values(capability.categories)) {
    if (category.commands[command]) {
      return category.commands[command];
    }
  }
  return null;
}

/**
 * Generate triplets for all capabilities
 */
function generateTriplets(capabilities) {
  const triplets = [];

  for (const cap of capabilities) {
    for (const [catName, category] of Object.entries(cap.categories)) {
      for (const [cmdName, cmdConfig] of Object.entries(category.commands)) {
        const fullCommand = `${cap.name} ${cmdName}`;
        const prompts = generatePrompts(cap.name, cmdName, cmdConfig);
        const negatives = findHardNegatives(cap.name, cmdName, capabilities);

        // Create triplets
        for (const prompt of prompts) {
          // Skip malformed prompts
          if (!prompt || prompt.length < 5) continue;

          // For each prompt, create triplets with each negative
          if (negatives.length > 0) {
            for (const negative of negatives) {
              // Ensure negative has full tool prefix
              let negCommand = negative.command;
              if (!negCommand.includes(' ') || (!negCommand.startsWith('claude-flow') &&
                  !negCommand.startsWith('agentic-flow') && !negCommand.startsWith('ruvector'))) {
                continue; // Skip incomplete negatives
              }

              // Skip if negative equals positive
              if (negCommand === fullCommand) continue;

              triplets.push({
                anchor: prompt,
                positive: fullCommand,
                negative: negCommand,
                isHard: true,
                category: catName,
                tool: cap.name,
              });
            }
          } else {
            // Create triplet with random different tool command as negative
            const otherCaps = capabilities.filter((c) => c.name !== cap.name);
            if (otherCaps.length > 0) {
              const randomCap = otherCaps[Math.floor(Math.random() * otherCaps.length)];
              const randomCatName = Object.keys(randomCap.categories)[0];
              const randomCmdName = Object.keys(randomCap.categories[randomCatName].commands)[0];
              const negCommand = `${randomCap.name} ${randomCmdName}`;

              // Skip if somehow equals positive
              if (negCommand === fullCommand) continue;

              triplets.push({
                anchor: prompt,
                positive: fullCommand,
                negative: negCommand,
                isHard: false,
                category: catName,
                tool: cap.name,
              });
            }
          }
        }
      }
    }
  }

  return triplets;
}

/**
 * Generate category-specific examples with rich diversity
 */
function generateCategoryExamples() {
  const examples = [];

  // ---- SWARM COORDINATION ----
  const swarmExamples = [
    // Initialization
    { anchor: 'Set up a multi-agent swarm for parallel processing', positive: 'claude-flow swarm init', negatives: ['agentic-flow pipeline create', 'claude-flow agent spawn'] },
    { anchor: 'Initialize hierarchical agent coordination', positive: 'claude-flow swarm init', negatives: ['claude-flow hive-mind init'] },
    { anchor: 'Create a mesh topology swarm with 10 agents', positive: 'claude-flow swarm init', negatives: ['claude-flow hive-mind spawn'] },
    { anchor: 'Configure swarm consensus using raft protocol', positive: 'claude-flow swarm init', negatives: ['claude-flow hive-mind consensus'] },
    { anchor: 'Start a queen-led hive-mind collective', positive: 'claude-flow hive-mind init', negatives: ['claude-flow swarm init'] },
    { anchor: 'Coordinate multiple AI agents on a complex task', positive: 'claude-flow swarm init', negatives: ['agentic-flow pipeline create'] },
    { anchor: 'Set up Byzantine fault-tolerant consensus', positive: 'claude-flow hive-mind init', negatives: ['claude-flow swarm init'] },
    { anchor: 'Launch a distributed agent network', positive: 'claude-flow swarm init', negatives: ['claude-flow agent spawn'] },
    { anchor: 'Create a star topology for centralized coordination', positive: 'claude-flow swarm init', negatives: ['claude-flow hive-mind init'] },
    // Status/monitoring
    { anchor: 'Check the health of all running agents', positive: 'claude-flow swarm status', negatives: ['claude-flow agent status'] },
    { anchor: 'Monitor swarm performance and throughput', positive: 'claude-flow swarm status', negatives: ['claude-flow performance benchmark'] },
    { anchor: 'Get swarm coordination status', positive: 'claude-flow swarm status', negatives: ['claude-flow agent list'] },
  ];

  // ---- AGENT MANAGEMENT ----
  const agentExamples = [
    { anchor: 'Spawn a coder agent to implement features', positive: 'claude-flow agent spawn', negatives: ['claude-flow hive-mind spawn', 'claude-flow task create'] },
    { anchor: 'Create a new worker agent for this task', positive: 'claude-flow agent spawn', negatives: ['claude-flow hive-mind spawn'] },
    { anchor: 'Start a researcher agent to investigate', positive: 'claude-flow agent spawn', negatives: ['claude-flow task create'] },
    { anchor: 'List all active agents in the system', positive: 'claude-flow agent list', negatives: ['agentic-flow model list', 'ruvector collection list'] },
    { anchor: 'Show running agent processes', positive: 'claude-flow agent list', negatives: ['claude-flow task list'] },
    { anchor: 'Kill a misbehaving agent', positive: 'claude-flow agent terminate', negatives: ['claude-flow swarm shutdown'] },
    { anchor: 'Stop the agent that is stuck', positive: 'claude-flow agent terminate', negatives: ['claude-flow task cancel'] },
    { anchor: 'Get details about a specific agent', positive: 'claude-flow agent status', negatives: ['claude-flow swarm status'] },
  ];

  // ---- MEMORY OPERATIONS ----
  const memoryExamples = [
    // Store operations
    { anchor: 'Store learned patterns for future reference', positive: 'claude-flow memory store', negatives: ['ruvector vector insert', 'agentic-flow cache set'] },
    { anchor: 'Save task completion metrics to memory', positive: 'claude-flow memory store', negatives: ['claude-flow session save'] },
    { anchor: 'Persist agent decisions for analysis', positive: 'claude-flow memory store', negatives: ['ruvector vector insert'] },
    { anchor: 'Cache successful code patterns', positive: 'claude-flow memory store', negatives: ['agentic-flow cache set'] },
    { anchor: 'Remember this debugging solution', positive: 'claude-flow memory store', negatives: ['agentic-flow cache set'] },
    { anchor: 'Store API response for later retrieval', positive: 'claude-flow memory store', negatives: ['ruvector vector insert'] },
    { anchor: 'Save this configuration for reuse', positive: 'claude-flow memory store', negatives: ['claude-flow session save'] },
    // Retrieve operations
    { anchor: 'Get the stored pattern for authentication', positive: 'claude-flow memory retrieve', negatives: ['claude-flow memory search', 'ruvector vector get'] },
    { anchor: 'Fetch previously saved configuration', positive: 'claude-flow memory retrieve', negatives: ['claude-flow session restore'] },
    { anchor: 'Load cached data from memory', positive: 'claude-flow memory retrieve', negatives: ['agentic-flow cache get'] },
    // Search operations
    { anchor: 'Search memory for similar patterns', positive: 'claude-flow memory search', negatives: ['ruvector search knn', 'agentic-flow search nearest'] },
    { anchor: 'Find relevant past solutions', positive: 'claude-flow memory search', negatives: ['ruvector search filter'] },
    { anchor: 'Query semantic memory for debugging tips', positive: 'claude-flow memory search', negatives: ['agentic-flow search hybrid'] },
    { anchor: 'Look up related patterns in storage', positive: 'claude-flow memory search', negatives: ['ruvector search knn'] },
  ];

  // ---- VECTOR DATABASE (RUVECTOR) ----
  const vectorExamples = [
    // Search operations
    { anchor: 'Find k-nearest matches to this embedding', positive: 'ruvector search knn', negatives: ['claude-flow memory search', 'agentic-flow search nearest'] },
    { anchor: 'Search vectors with metadata filters', positive: 'ruvector search filter', negatives: ['claude-flow memory search', 'agentic-flow search hybrid'] },
    { anchor: 'Perform approximate nearest neighbor search', positive: 'ruvector search knn', negatives: ['agentic-flow search nearest'] },
    { anchor: 'Query the vector database for similar items', positive: 'ruvector search knn', negatives: ['claude-flow memory search'] },
    { anchor: 'Find similar embeddings in the collection', positive: 'ruvector search knn', negatives: ['claude-flow embeddings search'] },
    { anchor: 'Batch search multiple query vectors', positive: 'ruvector search batch', negatives: ['agentic-flow batch-embed'] },
    // Collection operations
    { anchor: 'Create a new vector collection for documents', positive: 'ruvector collection create', negatives: ['claude-flow memory store'] },
    { anchor: 'Set up a database for embedding storage', positive: 'ruvector collection create', negatives: ['agentic-flow cache set'] },
    { anchor: 'Delete the old vector collection', positive: 'ruvector collection delete', negatives: ['claude-flow memory delete'] },
    { anchor: 'Get information about the collection', positive: 'ruvector collection info', negatives: ['agentic-flow model info'] },
    // Vector CRUD
    { anchor: 'Insert embeddings into the database', positive: 'ruvector vector insert', negatives: ['claude-flow memory store', 'agentic-flow cache set'] },
    { anchor: 'Add vectors to the collection', positive: 'ruvector vector insert', negatives: ['claude-flow memory store'] },
    { anchor: 'Upsert vectors with metadata', positive: 'ruvector vector insert', negatives: ['agentic-flow cache set'] },
    { anchor: 'Delete vectors by ID', positive: 'ruvector vector delete', negatives: ['claude-flow memory delete'] },
    // Index operations
    { anchor: 'Build HNSW index for faster search', positive: 'ruvector index build', negatives: ['claude-flow hooks pretrain'] },
    { anchor: 'Create search index for vectors', positive: 'ruvector index build', negatives: ['claude-flow neural train'] },
    { anchor: 'Optimize index for query performance', positive: 'ruvector index optimize', negatives: ['claude-flow performance optimize'] },
    { anchor: 'Rebuild search index after updates', positive: 'ruvector index rebuild', negatives: ['claude-flow hooks pretrain'] },
    { anchor: 'Configure HNSW parameters for accuracy', positive: 'ruvector index build', negatives: ['ruvector quantize apply'] },
    // Persistence
    { anchor: 'Create a snapshot backup of vectors', positive: 'ruvector snapshot create', negatives: ['claude-flow session save'] },
    { anchor: 'Restore vectors from backup', positive: 'ruvector snapshot restore', negatives: ['claude-flow session restore'] },
  ];

  // ---- EMBEDDINGS (AGENTIC-FLOW) ----
  const embeddingExamples = [
    { anchor: 'Generate embeddings for these documents', positive: 'agentic-flow embed', negatives: ['claude-flow embeddings generate'] },
    { anchor: 'Create vector representations of text', positive: 'agentic-flow embed', negatives: ['claude-flow embeddings generate'] },
    { anchor: 'Encode sentences into embeddings', positive: 'agentic-flow embed', negatives: ['claude-flow embeddings generate'] },
    { anchor: 'Vectorize code snippets for search', positive: 'agentic-flow embed', negatives: ['ruvector vector insert'] },
    { anchor: 'Produce semantic embeddings from descriptions', positive: 'agentic-flow embed', negatives: ['claude-flow embeddings generate'] },
    { anchor: 'Convert text to numerical vectors using ONNX', positive: 'agentic-flow embed', negatives: ['claude-flow embeddings generate'] },
    { anchor: 'Batch embed multiple documents efficiently', positive: 'agentic-flow batch-embed', negatives: ['ruvector search batch'] },
    { anchor: 'Embed large corpus in parallel', positive: 'agentic-flow batch-embed', negatives: ['ruvector vector insert'] },
    { anchor: 'Compare similarity between two texts', positive: 'agentic-flow similarity', negatives: ['claude-flow embeddings compare'] },
    { anchor: 'Calculate cosine distance between embeddings', positive: 'agentic-flow similarity', negatives: ['claude-flow embeddings compare'] },
    // Model operations
    { anchor: 'Load the ONNX model for inference', positive: 'agentic-flow model load', negatives: ['claude-flow neural train'] },
    { anchor: 'List available embedding models', positive: 'agentic-flow model list', negatives: ['claude-flow agent list'] },
    { anchor: 'Quantize the model for faster inference', positive: 'agentic-flow model quantize', negatives: ['ruvector quantize apply'] },
    // Cache operations
    { anchor: 'Cache the embedding for reuse', positive: 'agentic-flow cache set', negatives: ['claude-flow memory store'] },
    { anchor: 'Get cached embedding', positive: 'agentic-flow cache get', negatives: ['claude-flow memory retrieve'] },
    { anchor: 'Clear the embedding cache', positive: 'agentic-flow cache clear', negatives: ['claude-flow memory delete'] },
    // Search
    { anchor: 'Find nearest neighbors to query', positive: 'agentic-flow search nearest', negatives: ['ruvector search knn'] },
    { anchor: 'Hybrid keyword and semantic search', positive: 'agentic-flow search hybrid', negatives: ['ruvector search filter'] },
    // Pipeline
    { anchor: 'Create an embedding pipeline', positive: 'agentic-flow pipeline create', negatives: ['claude-flow workflow create'] },
    { anchor: 'Run the processing pipeline', positive: 'agentic-flow pipeline run', negatives: ['claude-flow workflow execute'] },
  ];

  // ---- HOOKS AND LEARNING ----
  const hookExamples = [
    { anchor: 'Route this task to the optimal agent', positive: 'claude-flow hooks route', negatives: ['claude-flow agent spawn', 'claude-flow hooks pre-task'] },
    { anchor: 'Get agent suggestions before starting work', positive: 'claude-flow hooks pre-task', negatives: ['claude-flow hooks route'] },
    { anchor: 'Record task completion for learning', positive: 'claude-flow hooks post-task', negatives: ['claude-flow hooks metrics'] },
    { anchor: 'Analyze codebase to bootstrap intelligence', positive: 'claude-flow hooks pretrain', negatives: ['claude-flow neural train', 'ruvector index build'] },
    { anchor: 'Track metrics from completed tasks', positive: 'claude-flow hooks metrics', negatives: ['claude-flow performance benchmark'] },
    { anchor: 'Pre-train routing model on repository', positive: 'claude-flow hooks pretrain', negatives: ['claude-flow neural train'] },
    { anchor: 'Dispatch a background worker for optimization', positive: 'claude-flow hooks worker dispatch', negatives: ['claude-flow agent spawn'] },
    { anchor: 'Log task before starting', positive: 'claude-flow hooks pre-task', negatives: ['claude-flow task create'] },
  ];

  // ---- WORKFLOW AND TASKS ----
  const workflowExamples = [
    { anchor: 'Create a new workflow for code review', positive: 'claude-flow workflow create', negatives: ['agentic-flow pipeline create', 'claude-flow task create'] },
    { anchor: 'Define a multi-step workflow', positive: 'claude-flow workflow create', negatives: ['agentic-flow pipeline create'] },
    { anchor: 'Execute the deployment workflow', positive: 'claude-flow workflow execute', negatives: ['agentic-flow pipeline run'] },
    { anchor: 'Run the CI/CD workflow', positive: 'claude-flow workflow execute', negatives: ['claude-flow task create'] },
    { anchor: 'Check workflow execution status', positive: 'claude-flow workflow status', negatives: ['claude-flow task status'] },
    { anchor: 'Create a task for the coder agent', positive: 'claude-flow task create', negatives: ['claude-flow agent spawn'] },
    { anchor: 'Add a new task to the queue', positive: 'claude-flow task create', negatives: ['claude-flow workflow create'] },
    { anchor: 'Mark this task as complete', positive: 'claude-flow task complete', negatives: ['claude-flow hooks post-task'] },
    { anchor: 'List pending tasks', positive: 'claude-flow task list', negatives: ['claude-flow agent list'] },
  ];

  // ---- SESSION AND STATE ----
  const sessionExamples = [
    { anchor: 'Save the current session state', positive: 'claude-flow session save', negatives: ['claude-flow memory store', 'ruvector snapshot create'] },
    { anchor: 'Persist session for later continuation', positive: 'claude-flow session save', negatives: ['claude-flow memory store'] },
    { anchor: 'Restore previous session', positive: 'claude-flow session restore', negatives: ['ruvector snapshot restore', 'claude-flow memory retrieve'] },
    { anchor: 'Continue where I left off', positive: 'claude-flow session restore', negatives: ['claude-flow memory retrieve'] },
    { anchor: 'List saved sessions', positive: 'claude-flow session list', negatives: ['claude-flow memory list'] },
  ];

  // ---- NEURAL AND ML ----
  const neuralExamples = [
    { anchor: 'Train the routing model', positive: 'claude-flow neural train', negatives: ['claude-flow hooks pretrain'] },
    { anchor: 'Train neural patterns for better routing', positive: 'claude-flow neural train', negatives: ['claude-flow hooks pretrain'] },
    { anchor: 'Make prediction with neural model', positive: 'claude-flow neural predict', negatives: ['claude-flow hooks route'] },
    { anchor: 'Get neural routing prediction', positive: 'claude-flow neural predict', negatives: ['agentic-flow embed'] },
    { anchor: 'Store learned neural patterns', positive: 'claude-flow neural patterns', negatives: ['claude-flow memory store'] },
  ];

  // ---- PERFORMANCE AND SECURITY ----
  const perfExamples = [
    { anchor: 'Benchmark system performance', positive: 'claude-flow performance benchmark', negatives: ['claude-flow hooks metrics'] },
    { anchor: 'Profile slow operations', positive: 'claude-flow performance profile', negatives: ['claude-flow performance benchmark'] },
    { anchor: 'Optimize for lower latency', positive: 'claude-flow performance optimize', negatives: ['ruvector index optimize'] },
    { anchor: 'Scan input for security threats', positive: 'claude-flow aidefence scan', negatives: ['claude-flow hooks pre-task'] },
    { anchor: 'Check if input is safe', positive: 'claude-flow aidefence is_safe', negatives: ['claude-flow aidefence scan'] },
    { anchor: 'Analyze potential prompt injection', positive: 'claude-flow aidefence analyze', negatives: ['claude-flow hooks route'] },
  ];

  // ---- CLAIMS AND COORDINATION ----
  const claimsExamples = [
    { anchor: 'Claim this issue for work', positive: 'claude-flow claims claim', negatives: ['claude-flow task create'] },
    { anchor: 'Assign this issue to me', positive: 'claude-flow claims claim', negatives: ['claude-flow task create'] },
    { anchor: 'Release my claim on this issue', positive: 'claude-flow claims release', negatives: ['claude-flow task complete'] },
    { anchor: 'Hand off issue to another agent', positive: 'claude-flow claims handoff', negatives: ['claude-flow claims release'] },
    { anchor: 'View the claims board', positive: 'claude-flow claims board', negatives: ['claude-flow task list'] },
  ];

  // Convert all examples to triplet format
  const allExampleSets = [
    swarmExamples, agentExamples, memoryExamples, vectorExamples,
    embeddingExamples, hookExamples, workflowExamples, sessionExamples,
    neuralExamples, perfExamples, claimsExamples
  ];

  for (const exampleSet of allExampleSets) {
    for (const ex of exampleSet) {
      const negatives = ex.negatives || [ex.negative];
      for (const neg of negatives) {
        examples.push({
          anchor: ex.anchor,
          positive: ex.positive,
          negative: neg,
          isHard: true,
        });
      }
    }
  }

  return examples;
}

// ============================================================================
// MAIN GENERATION
// ============================================================================

/**
 * Save capability definitions to JSON files
 */
function saveCapabilities(outputDir) {
  const capabilities = [
    { filename: 'claude-flow-capabilities.json', data: CLAUDE_FLOW_CAPABILITIES },
    { filename: 'agentic-flow-capabilities.json', data: AGENTIC_FLOW_CAPABILITIES },
    { filename: 'ruvector-capabilities.json', data: RUVECTOR_CAPABILITIES },
  ];

  for (const { filename, data } of capabilities) {
    const filepath = path.join(outputDir, filename);
    fs.writeFileSync(filepath, JSON.stringify(data, null, 2));
    console.log(`  Saved ${filepath}`);
  }
}

/**
 * Main entry point
 */
function main() {
  console.log('\n' + '='.repeat(80));
  console.log('          ECOSYSTEM TRAINING DATA GENERATOR FOR RUVLTRA');
  console.log('='.repeat(80) + '\n');

  const args = process.argv.slice(2);
  const outputDir = path.dirname(path.resolve(__filename));
  const outputFile = args.find((a) => a.startsWith('--output='))?.split('=')[1] ||
    path.join(outputDir, 'ecosystem-triplets.jsonl');
  const saveCapabilityFiles = args.includes('--save-capabilities');

  console.log('Configuration:');
  console.log(`  Output: ${outputFile}`);
  console.log(`  Save capability JSONs: ${saveCapabilityFiles}`);
  console.log();

  // All capabilities
  const allCapabilities = [
    CLAUDE_FLOW_CAPABILITIES,
    AGENTIC_FLOW_CAPABILITIES,
    RUVECTOR_CAPABILITIES,
  ];

  // Save capability definitions if requested
  if (saveCapabilityFiles) {
    console.log('Saving capability definitions...');
    saveCapabilities(outputDir);
    console.log();
  }

  // Generate triplets
  console.log('Generating training triplets...');

  const triplets = generateTriplets(allCapabilities);
  const categoryExamples = generateCategoryExamples();

  // Combine all triplets
  const allTriplets = [...triplets, ...categoryExamples];

  // Shuffle for better training
  allTriplets.sort(() => Math.random() - 0.5);

  // Statistics
  const stats = {
    total: allTriplets.length,
    byTool: {},
    byCategory: {},
    hardNegatives: 0,
  };

  for (const t of allTriplets) {
    if (t.tool) {
      stats.byTool[t.tool] = (stats.byTool[t.tool] || 0) + 1;
    }
    if (t.category) {
      stats.byCategory[t.category] = (stats.byCategory[t.category] || 0) + 1;
    }
    if (t.isHard) {
      stats.hardNegatives++;
    }
  }

  // Save triplets as JSONL
  const jsonlContent = allTriplets.map((t) => JSON.stringify({
    anchor: t.anchor,
    positive: t.positive,
    negative: t.negative,
  })).join('\n');

  fs.writeFileSync(outputFile, jsonlContent);

  // Print summary
  console.log('\n' + '-'.repeat(60));
  console.log('                    GENERATION SUMMARY');
  console.log('-'.repeat(60) + '\n');

  console.log(`Total triplets generated: ${stats.total}`);
  console.log(`Hard negatives: ${stats.hardNegatives} (${((stats.hardNegatives / stats.total) * 100).toFixed(1)}%)`);
  console.log();

  console.log('Triplets by tool:');
  for (const [tool, count] of Object.entries(stats.byTool)) {
    console.log(`  ${tool.padEnd(20)} ${count}`);
  }
  console.log();

  console.log('Triplets by category:');
  for (const [category, count] of Object.entries(stats.byCategory).slice(0, 10)) {
    console.log(`  ${category.padEnd(20)} ${count}`);
  }
  if (Object.keys(stats.byCategory).length > 10) {
    console.log(`  ... and ${Object.keys(stats.byCategory).length - 10} more categories`);
  }
  console.log();

  console.log(`Output saved to: ${outputFile}`);
  console.log();

  // Show sample triplets
  console.log('-'.repeat(60));
  console.log('                    SAMPLE TRIPLETS');
  console.log('-'.repeat(60) + '\n');

  for (const triplet of allTriplets.slice(0, 5)) {
    console.log(`  Anchor:   "${triplet.anchor}"`);
    console.log(`  Positive: ${triplet.positive}`);
    console.log(`  Negative: ${triplet.negative}`);
    console.log();
  }

  console.log('='.repeat(80));
  console.log('                         NEXT STEPS');
  console.log('='.repeat(80) + '\n');

  console.log('1. Merge with existing training data:');
  console.log(`   cat ~/.ruvllm/training/ruvltra-finetuned/triplets.jsonl ${outputFile} > combined.jsonl`);
  console.log();
  console.log('2. Train with contrastive loss:');
  console.log('   cargo run --example train_contrastive --release -- --triplets combined.jsonl --epochs 30');
  console.log();
  console.log('3. Evaluate routing accuracy improvement');
  console.log();
}

// Export for testing
module.exports = {
  CLAUDE_FLOW_CAPABILITIES,
  AGENTIC_FLOW_CAPABILITIES,
  RUVECTOR_CAPABILITIES,
  generatePrompts,
  findHardNegatives,
  generateTriplets,
  generateCategoryExamples,
  CONFUSING_PAIRS,
};

// Run if called directly
if (require.main === module) {
  main();
}