wifi-densepose/vendor/ruvector/benchmarks/graph/src/comparison-runner.ts

/**
 * Comparison runner for RuVector vs Neo4j benchmarks
 * Executes benchmarks on both systems and compares results
 */

import { exec } from 'child_process';
import { promisify } from 'util';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { join } from 'path';

const execAsync = promisify(exec);

export interface BenchmarkMetrics {
  system: 'ruvector' | 'neo4j';
  scenario: string;
  operation: string;
  duration_ms: number;
  throughput_ops: number;
  memory_mb: number;
  cpu_percent: number;
  latency_p50: number;
  latency_p95: number;
  latency_p99: number;
}

export interface ComparisonResult {
  scenario: string;
  operation: string;
  ruvector: BenchmarkMetrics;
  neo4j: BenchmarkMetrics;
  speedup: number;
  memory_improvement: number;
  verdict: 'pass' | 'fail';
}

/**
 * Run RuVector benchmarks
 */
async function runRuVectorBenchmarks(scenario: string): Promise<BenchmarkMetrics[]> {
  console.log(`Running RuVector benchmarks for ${scenario}...`);

  try {
    // Run Rust benchmarks
    const { stdout, stderr } = await execAsync(
      `cargo bench --bench graph_bench -- --save-baseline ${scenario}`,
      { cwd: '/home/user/ruvector/crates/ruvector-graph' }
    );

    console.log('RuVector benchmark output:', stdout);

    // Parse criterion output
    const metrics = parseCriterionOutput(stdout, 'ruvector', scenario);

    return metrics;
  } catch (error) {
    console.error('Error running RuVector benchmarks:', error);
    throw error;
  }
}

/**
 * Run Neo4j benchmarks
 */
async function runNeo4jBenchmarks(scenario: string): Promise<BenchmarkMetrics[]> {
  console.log(`Running Neo4j benchmarks for ${scenario}...`);

  // Check if Neo4j is available
  try {
    await execAsync('which cypher-shell');
  } catch {
    console.warn('Neo4j not available, using baseline metrics');
    return loadBaselineMetrics('neo4j', scenario);
  }

  try {
    // Run equivalent Neo4j queries
    const queries = generateNeo4jQuery(scenario);
    const metrics: BenchmarkMetrics[] = [];

    for (const query of queries) {
      const start = Date.now();

      await execAsync(
        `cypher-shell -u neo4j -p password "${query.cypher}"`,
        { timeout: 300000 }
      );

      const duration = Date.now() - start;

      metrics.push({
        system: 'neo4j',
        scenario,
        operation: query.operation,
        duration_ms: duration,
        throughput_ops: query.count / (duration / 1000),
        memory_mb: 0, // Would need Neo4j metrics API
        cpu_percent: 0,
        latency_p50: duration,
        latency_p95: 0, // Cannot accurately estimate without percentile data
        latency_p99: 0  // Cannot accurately estimate without percentile data
      });
    }

    return metrics;
  } catch (error) {
    console.error('Error running Neo4j benchmarks:', error);
    return loadBaselineMetrics('neo4j', scenario);
  }
}

/**
 * Generate Neo4j Cypher queries for scenario
 */
function generateNeo4jQuery(scenario: string): Array<{ operation: string; cypher: string; count: number }> {
  const queries: Record<string, Array<{ operation: string; cypher: string; count: number }>> = {
    social_network: [
      {
        operation: 'node_creation',
        cypher: 'UNWIND range(1, 1000) AS i CREATE (u:User {id: i, name: "user_" + i})',
        count: 1000
      },
      {
        operation: 'edge_creation',
        cypher: 'MATCH (u1:User), (u2:User) WHERE u1.id < u2.id AND rand() < 0.01 CREATE (u1)-[:FRIENDS_WITH]->(u2)',
        count: 10000
      },
      {
        operation: '1hop_traversal',
        cypher: 'MATCH (u:User {id: 500})-[:FRIENDS_WITH]-(friend) RETURN count(friend)',
        count: 1
      },
      {
        operation: '2hop_traversal',
        cypher: 'MATCH (u:User {id: 500})-[:FRIENDS_WITH*..2]-(friend) RETURN count(DISTINCT friend)',
        count: 1
      },
      {
        operation: 'aggregation',
        cypher: 'MATCH (u:User) RETURN avg(u.age) AS avgAge',
        count: 1
      }
    ],
    knowledge_graph: [
      {
        operation: 'multi_hop',
        cypher: 'MATCH (p:Person)-[:WORKS_AT]->(o:Organization)-[:LOCATED_IN]->(l:Location) RETURN p.name, o.name, l.name LIMIT 100',
        count: 100
      },
      {
        operation: 'path_finding',
        cypher: 'MATCH path = shortestPath((e1:Entity)-[*]-(e2:Entity)) WHERE id(e1) = 0 AND id(e2) = 1000 RETURN length(path)',
        count: 1
      }
    ],
    temporal_events: [
      {
        operation: 'time_range_query',
        cypher: 'MATCH (e:Event) WHERE e.timestamp > datetime() - duration({days: 7}) RETURN count(e)',
        count: 1
      },
      {
        operation: 'state_transition',
        cypher: 'MATCH (e1:Event)-[:TRANSITIONS_TO]->(e2:Event) RETURN count(*)',
        count: 1
      }
    ]
  };

  return queries[scenario] || [];
}

/**
 * Parse Criterion benchmark output
 */
function parseCriterionOutput(output: string, system: 'ruvector' | 'neo4j', scenario: string): BenchmarkMetrics[] {
  const metrics: BenchmarkMetrics[] = [];

  // Parse criterion output format
  const lines = output.split('\n');
  let currentOperation = '';

  for (const line of lines) {
    // Match benchmark group names
    if (line.includes('Benchmarking')) {
      const match = line.match(/Benchmarking (.+)/);
      if (match) {
        currentOperation = match[1];
      }
    }

    // Match timing results
    if (line.includes('time:') && currentOperation) {
      const timeMatch = line.match(/time:\s+\[(.+?)\s+(.+?)\s+(.+?)\]/);
      if (timeMatch) {
        const p50 = parseFloat(timeMatch[2]);

        metrics.push({
          system,
          scenario,
          operation: currentOperation,
          duration_ms: p50,
          throughput_ops: 1000 / p50,
          memory_mb: 0,
          cpu_percent: 0,
          latency_p50: p50,
          latency_p95: 0, // Would need to parse from criterion percentile output
          latency_p99: 0  // Would need to parse from criterion percentile output
        });
      }
    }
  }

  return metrics;
}

/**
 * Load baseline metrics (pre-recorded Neo4j results)
 */
function loadBaselineMetrics(system: string, scenario: string): BenchmarkMetrics[] {
  const baselinePath = join(__dirname, '../data/baselines', `${system}_${scenario}.json`);

  if (existsSync(baselinePath)) {
    const data = readFileSync(baselinePath, 'utf-8');
    return JSON.parse(data);
  }

  // Error: no baseline data available
  throw new Error(
    `No baseline data available for ${system} ${scenario}. ` +
    `Cannot run comparison without actual measured data. ` +
    `Please run benchmarks on both systems first and save results to ${baselinePath}`
  );
}

/**
 * Compare RuVector vs Neo4j results
 */
function compareResults(
  ruvectorMetrics: BenchmarkMetrics[],
  neo4jMetrics: BenchmarkMetrics[]
): ComparisonResult[] {
  const results: ComparisonResult[] = [];

  // Match operations between systems
  for (const rvMetric of ruvectorMetrics) {
    const neoMetric = neo4jMetrics.find(m =>
      m.operation === rvMetric.operation ||
      m.operation.includes(rvMetric.operation.split('_')[0])
    );

    if (!neoMetric) continue;

    const speedup = neoMetric.duration_ms / rvMetric.duration_ms;
    const memoryImprovement = (neoMetric.memory_mb - rvMetric.memory_mb) / neoMetric.memory_mb;

    // Pass if RuVector is 10x faster OR uses 50% less memory
    const verdict = speedup >= 10 || memoryImprovement >= 0.5 ? 'pass' : 'fail';

    results.push({
      scenario: rvMetric.scenario,
      operation: rvMetric.operation,
      ruvector: rvMetric,
      neo4j: neoMetric,
      speedup,
      memory_improvement: memoryImprovement,
      verdict
    });
  }

  return results;
}

/**
 * Run comparison benchmark
 */
export async function runComparison(scenario: string): Promise<ComparisonResult[]> {
  console.log(`\n=== Running Comparison: ${scenario} ===\n`);

  // Run both benchmarks in parallel
  const [ruvectorMetrics, neo4jMetrics] = await Promise.all([
    runRuVectorBenchmarks(scenario),
    runNeo4jBenchmarks(scenario)
  ]);

  // Compare results
  const comparison = compareResults(ruvectorMetrics, neo4jMetrics);

  // Print summary
  console.log('\n=== Comparison Results ===\n');
  console.table(comparison.map(r => ({
    Operation: r.operation,
    'RuVector (ms)': r.ruvector.duration_ms.toFixed(2),
    'Neo4j (ms)': r.neo4j.duration_ms.toFixed(2),
    'Speedup': `${r.speedup.toFixed(2)}x`,
    'Verdict': r.verdict === 'pass' ? '✅ PASS' : '❌ FAIL'
  })));

  // Save results
  const outputPath = join(__dirname, '../results/graph', `${scenario}_comparison.json`);
  writeFileSync(outputPath, JSON.stringify(comparison, null, 2));
  console.log(`\nResults saved to: ${outputPath}`);

  return comparison;
}

/**
 * Run all comparisons
 */
export async function runAllComparisons(): Promise<void> {
  const scenarios = ['social_network', 'knowledge_graph', 'temporal_events'];

  for (const scenario of scenarios) {
    await runComparison(scenario);
  }

  console.log('\n=== All Comparisons Complete ===');
}

// Run if called directly
if (require.main === module) {
  const scenario = process.argv[2] || 'all';

  if (scenario === 'all') {
    runAllComparisons().catch(console.error);
  } else {
    runComparison(scenario).catch(console.error);
  }
}