/** * Comparison runner for RuVector vs Neo4j benchmarks * Executes benchmarks on both systems and compares results */ import { exec } from 'child_process'; import { promisify } from 'util'; import { readFileSync, writeFileSync, existsSync } from 'fs'; import { join } from 'path'; const execAsync = promisify(exec); export interface BenchmarkMetrics { system: 'ruvector' | 'neo4j'; scenario: string; operation: string; duration_ms: number; throughput_ops: number; memory_mb: number; cpu_percent: number; latency_p50: number; latency_p95: number; latency_p99: number; } export interface ComparisonResult { scenario: string; operation: string; ruvector: BenchmarkMetrics; neo4j: BenchmarkMetrics; speedup: number; memory_improvement: number; verdict: 'pass' | 'fail'; } /** * Run RuVector benchmarks */ async function runRuVectorBenchmarks(scenario: string): Promise { console.log(`Running RuVector benchmarks for ${scenario}...`); try { // Run Rust benchmarks const { stdout, stderr } = await execAsync( `cargo bench --bench graph_bench -- --save-baseline ${scenario}`, { cwd: '/home/user/ruvector/crates/ruvector-graph' } ); console.log('RuVector benchmark output:', stdout); // Parse criterion output const metrics = parseCriterionOutput(stdout, 'ruvector', scenario); return metrics; } catch (error) { console.error('Error running RuVector benchmarks:', error); throw error; } } /** * Run Neo4j benchmarks */ async function runNeo4jBenchmarks(scenario: string): Promise { console.log(`Running Neo4j benchmarks for ${scenario}...`); // Check if Neo4j is available try { await execAsync('which cypher-shell'); } catch { console.warn('Neo4j not available, using baseline metrics'); return loadBaselineMetrics('neo4j', scenario); } try { // Run equivalent Neo4j queries const queries = generateNeo4jQuery(scenario); const metrics: BenchmarkMetrics[] = []; for (const query of queries) { const start = Date.now(); await execAsync( `cypher-shell -u neo4j -p password "${query.cypher}"`, { timeout: 300000 } ); const duration = Date.now() - start; metrics.push({ system: 'neo4j', scenario, operation: query.operation, duration_ms: duration, throughput_ops: query.count / (duration / 1000), memory_mb: 0, // Would need Neo4j metrics API cpu_percent: 0, latency_p50: duration, latency_p95: 0, // Cannot accurately estimate without percentile data latency_p99: 0 // Cannot accurately estimate without percentile data }); } return metrics; } catch (error) { console.error('Error running Neo4j benchmarks:', error); return loadBaselineMetrics('neo4j', scenario); } } /** * Generate Neo4j Cypher queries for scenario */ function generateNeo4jQuery(scenario: string): Array<{ operation: string; cypher: string; count: number }> { const queries: Record> = { social_network: [ { operation: 'node_creation', cypher: 'UNWIND range(1, 1000) AS i CREATE (u:User {id: i, name: "user_" + i})', count: 1000 }, { operation: 'edge_creation', cypher: 'MATCH (u1:User), (u2:User) WHERE u1.id < u2.id AND rand() < 0.01 CREATE (u1)-[:FRIENDS_WITH]->(u2)', count: 10000 }, { operation: '1hop_traversal', cypher: 'MATCH (u:User {id: 500})-[:FRIENDS_WITH]-(friend) RETURN count(friend)', count: 1 }, { operation: '2hop_traversal', cypher: 'MATCH (u:User {id: 500})-[:FRIENDS_WITH*..2]-(friend) RETURN count(DISTINCT friend)', count: 1 }, { operation: 'aggregation', cypher: 'MATCH (u:User) RETURN avg(u.age) AS avgAge', count: 1 } ], knowledge_graph: [ { operation: 'multi_hop', cypher: 'MATCH (p:Person)-[:WORKS_AT]->(o:Organization)-[:LOCATED_IN]->(l:Location) RETURN p.name, o.name, l.name LIMIT 100', count: 100 }, { operation: 'path_finding', cypher: 'MATCH path = shortestPath((e1:Entity)-[*]-(e2:Entity)) WHERE id(e1) = 0 AND id(e2) = 1000 RETURN length(path)', count: 1 } ], temporal_events: [ { operation: 'time_range_query', cypher: 'MATCH (e:Event) WHERE e.timestamp > datetime() - duration({days: 7}) RETURN count(e)', count: 1 }, { operation: 'state_transition', cypher: 'MATCH (e1:Event)-[:TRANSITIONS_TO]->(e2:Event) RETURN count(*)', count: 1 } ] }; return queries[scenario] || []; } /** * Parse Criterion benchmark output */ function parseCriterionOutput(output: string, system: 'ruvector' | 'neo4j', scenario: string): BenchmarkMetrics[] { const metrics: BenchmarkMetrics[] = []; // Parse criterion output format const lines = output.split('\n'); let currentOperation = ''; for (const line of lines) { // Match benchmark group names if (line.includes('Benchmarking')) { const match = line.match(/Benchmarking (.+)/); if (match) { currentOperation = match[1]; } } // Match timing results if (line.includes('time:') && currentOperation) { const timeMatch = line.match(/time:\s+\[(.+?)\s+(.+?)\s+(.+?)\]/); if (timeMatch) { const p50 = parseFloat(timeMatch[2]); metrics.push({ system, scenario, operation: currentOperation, duration_ms: p50, throughput_ops: 1000 / p50, memory_mb: 0, cpu_percent: 0, latency_p50: p50, latency_p95: 0, // Would need to parse from criterion percentile output latency_p99: 0 // Would need to parse from criterion percentile output }); } } } return metrics; } /** * Load baseline metrics (pre-recorded Neo4j results) */ function loadBaselineMetrics(system: string, scenario: string): BenchmarkMetrics[] { const baselinePath = join(__dirname, '../data/baselines', `${system}_${scenario}.json`); if (existsSync(baselinePath)) { const data = readFileSync(baselinePath, 'utf-8'); return JSON.parse(data); } // Error: no baseline data available throw new Error( `No baseline data available for ${system} ${scenario}. ` + `Cannot run comparison without actual measured data. ` + `Please run benchmarks on both systems first and save results to ${baselinePath}` ); } /** * Compare RuVector vs Neo4j results */ function compareResults( ruvectorMetrics: BenchmarkMetrics[], neo4jMetrics: BenchmarkMetrics[] ): ComparisonResult[] { const results: ComparisonResult[] = []; // Match operations between systems for (const rvMetric of ruvectorMetrics) { const neoMetric = neo4jMetrics.find(m => m.operation === rvMetric.operation || m.operation.includes(rvMetric.operation.split('_')[0]) ); if (!neoMetric) continue; const speedup = neoMetric.duration_ms / rvMetric.duration_ms; const memoryImprovement = (neoMetric.memory_mb - rvMetric.memory_mb) / neoMetric.memory_mb; // Pass if RuVector is 10x faster OR uses 50% less memory const verdict = speedup >= 10 || memoryImprovement >= 0.5 ? 'pass' : 'fail'; results.push({ scenario: rvMetric.scenario, operation: rvMetric.operation, ruvector: rvMetric, neo4j: neoMetric, speedup, memory_improvement: memoryImprovement, verdict }); } return results; } /** * Run comparison benchmark */ export async function runComparison(scenario: string): Promise { console.log(`\n=== Running Comparison: ${scenario} ===\n`); // Run both benchmarks in parallel const [ruvectorMetrics, neo4jMetrics] = await Promise.all([ runRuVectorBenchmarks(scenario), runNeo4jBenchmarks(scenario) ]); // Compare results const comparison = compareResults(ruvectorMetrics, neo4jMetrics); // Print summary console.log('\n=== Comparison Results ===\n'); console.table(comparison.map(r => ({ Operation: r.operation, 'RuVector (ms)': r.ruvector.duration_ms.toFixed(2), 'Neo4j (ms)': r.neo4j.duration_ms.toFixed(2), 'Speedup': `${r.speedup.toFixed(2)}x`, 'Verdict': r.verdict === 'pass' ? '✅ PASS' : '❌ FAIL' }))); // Save results const outputPath = join(__dirname, '../results/graph', `${scenario}_comparison.json`); writeFileSync(outputPath, JSON.stringify(comparison, null, 2)); console.log(`\nResults saved to: ${outputPath}`); return comparison; } /** * Run all comparisons */ export async function runAllComparisons(): Promise { const scenarios = ['social_network', 'knowledge_graph', 'temporal_events']; for (const scenario of scenarios) { await runComparison(scenario); } console.log('\n=== All Comparisons Complete ==='); } // Run if called directly if (require.main === module) { const scenario = process.argv[2] || 'all'; if (scenario === 'all') { runAllComparisons().catch(console.error); } else { runComparison(scenario).catch(console.error); } }