Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
328
vendor/ruvector/benchmarks/graph/src/comparison-runner.ts
vendored
Normal file
328
vendor/ruvector/benchmarks/graph/src/comparison-runner.ts
vendored
Normal file
@@ -0,0 +1,328 @@
|
||||
/**
|
||||
* Comparison runner for RuVector vs Neo4j benchmarks
|
||||
* Executes benchmarks on both systems and compares results
|
||||
*/
|
||||
|
||||
import { exec } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
export interface BenchmarkMetrics {
|
||||
system: 'ruvector' | 'neo4j';
|
||||
scenario: string;
|
||||
operation: string;
|
||||
duration_ms: number;
|
||||
throughput_ops: number;
|
||||
memory_mb: number;
|
||||
cpu_percent: number;
|
||||
latency_p50: number;
|
||||
latency_p95: number;
|
||||
latency_p99: number;
|
||||
}
|
||||
|
||||
export interface ComparisonResult {
|
||||
scenario: string;
|
||||
operation: string;
|
||||
ruvector: BenchmarkMetrics;
|
||||
neo4j: BenchmarkMetrics;
|
||||
speedup: number;
|
||||
memory_improvement: number;
|
||||
verdict: 'pass' | 'fail';
|
||||
}
|
||||
|
||||
/**
|
||||
* Run RuVector benchmarks
|
||||
*/
|
||||
async function runRuVectorBenchmarks(scenario: string): Promise<BenchmarkMetrics[]> {
|
||||
console.log(`Running RuVector benchmarks for ${scenario}...`);
|
||||
|
||||
try {
|
||||
// Run Rust benchmarks
|
||||
const { stdout, stderr } = await execAsync(
|
||||
`cargo bench --bench graph_bench -- --save-baseline ${scenario}`,
|
||||
{ cwd: '/home/user/ruvector/crates/ruvector-graph' }
|
||||
);
|
||||
|
||||
console.log('RuVector benchmark output:', stdout);
|
||||
|
||||
// Parse criterion output
|
||||
const metrics = parseCriterionOutput(stdout, 'ruvector', scenario);
|
||||
|
||||
return metrics;
|
||||
} catch (error) {
|
||||
console.error('Error running RuVector benchmarks:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run Neo4j benchmarks
|
||||
*/
|
||||
async function runNeo4jBenchmarks(scenario: string): Promise<BenchmarkMetrics[]> {
|
||||
console.log(`Running Neo4j benchmarks for ${scenario}...`);
|
||||
|
||||
// Check if Neo4j is available
|
||||
try {
|
||||
await execAsync('which cypher-shell');
|
||||
} catch {
|
||||
console.warn('Neo4j not available, using baseline metrics');
|
||||
return loadBaselineMetrics('neo4j', scenario);
|
||||
}
|
||||
|
||||
try {
|
||||
// Run equivalent Neo4j queries
|
||||
const queries = generateNeo4jQuery(scenario);
|
||||
const metrics: BenchmarkMetrics[] = [];
|
||||
|
||||
for (const query of queries) {
|
||||
const start = Date.now();
|
||||
|
||||
await execAsync(
|
||||
`cypher-shell -u neo4j -p password "${query.cypher}"`,
|
||||
{ timeout: 300000 }
|
||||
);
|
||||
|
||||
const duration = Date.now() - start;
|
||||
|
||||
metrics.push({
|
||||
system: 'neo4j',
|
||||
scenario,
|
||||
operation: query.operation,
|
||||
duration_ms: duration,
|
||||
throughput_ops: query.count / (duration / 1000),
|
||||
memory_mb: 0, // Would need Neo4j metrics API
|
||||
cpu_percent: 0,
|
||||
latency_p50: duration,
|
||||
latency_p95: 0, // Cannot accurately estimate without percentile data
|
||||
latency_p99: 0 // Cannot accurately estimate without percentile data
|
||||
});
|
||||
}
|
||||
|
||||
return metrics;
|
||||
} catch (error) {
|
||||
console.error('Error running Neo4j benchmarks:', error);
|
||||
return loadBaselineMetrics('neo4j', scenario);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate Neo4j Cypher queries for scenario
|
||||
*/
|
||||
function generateNeo4jQuery(scenario: string): Array<{ operation: string; cypher: string; count: number }> {
|
||||
const queries: Record<string, Array<{ operation: string; cypher: string; count: number }>> = {
|
||||
social_network: [
|
||||
{
|
||||
operation: 'node_creation',
|
||||
cypher: 'UNWIND range(1, 1000) AS i CREATE (u:User {id: i, name: "user_" + i})',
|
||||
count: 1000
|
||||
},
|
||||
{
|
||||
operation: 'edge_creation',
|
||||
cypher: 'MATCH (u1:User), (u2:User) WHERE u1.id < u2.id AND rand() < 0.01 CREATE (u1)-[:FRIENDS_WITH]->(u2)',
|
||||
count: 10000
|
||||
},
|
||||
{
|
||||
operation: '1hop_traversal',
|
||||
cypher: 'MATCH (u:User {id: 500})-[:FRIENDS_WITH]-(friend) RETURN count(friend)',
|
||||
count: 1
|
||||
},
|
||||
{
|
||||
operation: '2hop_traversal',
|
||||
cypher: 'MATCH (u:User {id: 500})-[:FRIENDS_WITH*..2]-(friend) RETURN count(DISTINCT friend)',
|
||||
count: 1
|
||||
},
|
||||
{
|
||||
operation: 'aggregation',
|
||||
cypher: 'MATCH (u:User) RETURN avg(u.age) AS avgAge',
|
||||
count: 1
|
||||
}
|
||||
],
|
||||
knowledge_graph: [
|
||||
{
|
||||
operation: 'multi_hop',
|
||||
cypher: 'MATCH (p:Person)-[:WORKS_AT]->(o:Organization)-[:LOCATED_IN]->(l:Location) RETURN p.name, o.name, l.name LIMIT 100',
|
||||
count: 100
|
||||
},
|
||||
{
|
||||
operation: 'path_finding',
|
||||
cypher: 'MATCH path = shortestPath((e1:Entity)-[*]-(e2:Entity)) WHERE id(e1) = 0 AND id(e2) = 1000 RETURN length(path)',
|
||||
count: 1
|
||||
}
|
||||
],
|
||||
temporal_events: [
|
||||
{
|
||||
operation: 'time_range_query',
|
||||
cypher: 'MATCH (e:Event) WHERE e.timestamp > datetime() - duration({days: 7}) RETURN count(e)',
|
||||
count: 1
|
||||
},
|
||||
{
|
||||
operation: 'state_transition',
|
||||
cypher: 'MATCH (e1:Event)-[:TRANSITIONS_TO]->(e2:Event) RETURN count(*)',
|
||||
count: 1
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
return queries[scenario] || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Criterion benchmark output
|
||||
*/
|
||||
function parseCriterionOutput(output: string, system: 'ruvector' | 'neo4j', scenario: string): BenchmarkMetrics[] {
|
||||
const metrics: BenchmarkMetrics[] = [];
|
||||
|
||||
// Parse criterion output format
|
||||
const lines = output.split('\n');
|
||||
let currentOperation = '';
|
||||
|
||||
for (const line of lines) {
|
||||
// Match benchmark group names
|
||||
if (line.includes('Benchmarking')) {
|
||||
const match = line.match(/Benchmarking (.+)/);
|
||||
if (match) {
|
||||
currentOperation = match[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Match timing results
|
||||
if (line.includes('time:') && currentOperation) {
|
||||
const timeMatch = line.match(/time:\s+\[(.+?)\s+(.+?)\s+(.+?)\]/);
|
||||
if (timeMatch) {
|
||||
const p50 = parseFloat(timeMatch[2]);
|
||||
|
||||
metrics.push({
|
||||
system,
|
||||
scenario,
|
||||
operation: currentOperation,
|
||||
duration_ms: p50,
|
||||
throughput_ops: 1000 / p50,
|
||||
memory_mb: 0,
|
||||
cpu_percent: 0,
|
||||
latency_p50: p50,
|
||||
latency_p95: 0, // Would need to parse from criterion percentile output
|
||||
latency_p99: 0 // Would need to parse from criterion percentile output
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load baseline metrics (pre-recorded Neo4j results)
|
||||
*/
|
||||
function loadBaselineMetrics(system: string, scenario: string): BenchmarkMetrics[] {
|
||||
const baselinePath = join(__dirname, '../data/baselines', `${system}_${scenario}.json`);
|
||||
|
||||
if (existsSync(baselinePath)) {
|
||||
const data = readFileSync(baselinePath, 'utf-8');
|
||||
return JSON.parse(data);
|
||||
}
|
||||
|
||||
// Error: no baseline data available
|
||||
throw new Error(
|
||||
`No baseline data available for ${system} ${scenario}. ` +
|
||||
`Cannot run comparison without actual measured data. ` +
|
||||
`Please run benchmarks on both systems first and save results to ${baselinePath}`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare RuVector vs Neo4j results
|
||||
*/
|
||||
function compareResults(
|
||||
ruvectorMetrics: BenchmarkMetrics[],
|
||||
neo4jMetrics: BenchmarkMetrics[]
|
||||
): ComparisonResult[] {
|
||||
const results: ComparisonResult[] = [];
|
||||
|
||||
// Match operations between systems
|
||||
for (const rvMetric of ruvectorMetrics) {
|
||||
const neoMetric = neo4jMetrics.find(m =>
|
||||
m.operation === rvMetric.operation ||
|
||||
m.operation.includes(rvMetric.operation.split('_')[0])
|
||||
);
|
||||
|
||||
if (!neoMetric) continue;
|
||||
|
||||
const speedup = neoMetric.duration_ms / rvMetric.duration_ms;
|
||||
const memoryImprovement = (neoMetric.memory_mb - rvMetric.memory_mb) / neoMetric.memory_mb;
|
||||
|
||||
// Pass if RuVector is 10x faster OR uses 50% less memory
|
||||
const verdict = speedup >= 10 || memoryImprovement >= 0.5 ? 'pass' : 'fail';
|
||||
|
||||
results.push({
|
||||
scenario: rvMetric.scenario,
|
||||
operation: rvMetric.operation,
|
||||
ruvector: rvMetric,
|
||||
neo4j: neoMetric,
|
||||
speedup,
|
||||
memory_improvement: memoryImprovement,
|
||||
verdict
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run comparison benchmark
|
||||
*/
|
||||
export async function runComparison(scenario: string): Promise<ComparisonResult[]> {
|
||||
console.log(`\n=== Running Comparison: ${scenario} ===\n`);
|
||||
|
||||
// Run both benchmarks in parallel
|
||||
const [ruvectorMetrics, neo4jMetrics] = await Promise.all([
|
||||
runRuVectorBenchmarks(scenario),
|
||||
runNeo4jBenchmarks(scenario)
|
||||
]);
|
||||
|
||||
// Compare results
|
||||
const comparison = compareResults(ruvectorMetrics, neo4jMetrics);
|
||||
|
||||
// Print summary
|
||||
console.log('\n=== Comparison Results ===\n');
|
||||
console.table(comparison.map(r => ({
|
||||
Operation: r.operation,
|
||||
'RuVector (ms)': r.ruvector.duration_ms.toFixed(2),
|
||||
'Neo4j (ms)': r.neo4j.duration_ms.toFixed(2),
|
||||
'Speedup': `${r.speedup.toFixed(2)}x`,
|
||||
'Verdict': r.verdict === 'pass' ? '✅ PASS' : '❌ FAIL'
|
||||
})));
|
||||
|
||||
// Save results
|
||||
const outputPath = join(__dirname, '../results/graph', `${scenario}_comparison.json`);
|
||||
writeFileSync(outputPath, JSON.stringify(comparison, null, 2));
|
||||
console.log(`\nResults saved to: ${outputPath}`);
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run all comparisons
|
||||
*/
|
||||
export async function runAllComparisons(): Promise<void> {
|
||||
const scenarios = ['social_network', 'knowledge_graph', 'temporal_events'];
|
||||
|
||||
for (const scenario of scenarios) {
|
||||
await runComparison(scenario);
|
||||
}
|
||||
|
||||
console.log('\n=== All Comparisons Complete ===');
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
const scenario = process.argv[2] || 'all';
|
||||
|
||||
if (scenario === 'all') {
|
||||
runAllComparisons().catch(console.error);
|
||||
} else {
|
||||
runComparison(scenario).catch(console.error);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user