Files

ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900

2026-02-28 14:39:40 -05:00

17 KiB

Raw Blame History

Benchmarking Plan

Overview

Comprehensive benchmarking strategy for ruvector-postgres covering micro-benchmarks, integration tests, comparison with competitors, and production workload simulation.

Benchmark Categories

1. Micro-Benchmarks

Test individual operations in isolation.

// benches/distance_bench.rs
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};

fn bench_euclidean_distance(c: &mut Criterion) {
    let dims = [128, 256, 512, 768, 1024, 1536];

    let mut group = c.benchmark_group("euclidean_distance");

    for dim in dims {
        let a: Vec<f32> = (0..dim).map(|_| rand::random()).collect();
        let b: Vec<f32> = (0..dim).map(|_| rand::random()).collect();

        group.bench_with_input(
            BenchmarkId::new("scalar", dim),
            &dim,
            |bench, _| bench.iter(|| euclidean_scalar(&a, &b))
        );

        group.bench_with_input(
            BenchmarkId::new("simd_auto", dim),
            &dim,
            |bench, _| bench.iter(|| euclidean_simd(&a, &b))
        );

        #[cfg(target_arch = "x86_64")]
        {
            group.bench_with_input(
                BenchmarkId::new("avx2", dim),
                &dim,
                |bench, _| bench.iter(|| unsafe { euclidean_avx2(&a, &b) })
            );

            if is_x86_feature_detected!("avx512f") {
                group.bench_with_input(
                    BenchmarkId::new("avx512", dim),
                    &dim,
                    |bench, _| bench.iter(|| unsafe { euclidean_avx512(&a, &b) })
                );
            }
        }
    }

    group.finish();
}

fn bench_cosine_distance(c: &mut Criterion) {
    // Similar structure for cosine
}

fn bench_dot_product(c: &mut Criterion) {
    // Similar structure for dot product
}

criterion_group!(
    distance_benches,
    bench_euclidean_distance,
    bench_cosine_distance,
    bench_dot_product
);
criterion_main!(distance_benches);

Expected Results: Distance Functions

Operation	Dimension	Scalar (ns)	AVX2 (ns)	AVX-512 (ns)	Speedup
Euclidean	128	180	45	28	6.4x
Euclidean	768	980	210	125	7.8x
Euclidean	1536	1950	420	245	8.0x
Cosine	128	240	62	38	6.3x
Cosine	768	1280	285	168	7.6x
Dot Product	768	450	95	58	7.8x

2. Index Benchmarks

// benches/index_bench.rs

fn bench_hnsw_build(c: &mut Criterion) {
    let sizes = [10_000, 100_000, 1_000_000];
    let dims = [128, 768];

    let mut group = c.benchmark_group("hnsw_build");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));

    for size in sizes {
        for dim in dims {
            let vectors = generate_random_vectors(size, dim);

            group.bench_with_input(
                BenchmarkId::new(format!("{}d", dim), size),
                &(&vectors, dim),
                |bench, (vecs, _)| {
                    bench.iter(|| {
                        let mut index = HnswIndex::new(HnswConfig {
                            m: 16,
                            ef_construction: 200,
                            ..Default::default()
                        });
                        for (i, v) in vecs.iter().enumerate() {
                            index.insert(i as u64, v);
                        }
                    })
                }
            );
        }
    }

    group.finish();
}

fn bench_hnsw_search(c: &mut Criterion) {
    // Pre-build index
    let index = build_hnsw_index(1_000_000, 768);
    let queries = generate_random_vectors(1000, 768);

    let ef_values = [10, 50, 100, 200, 500];
    let k_values = [1, 10, 100];

    let mut group = c.benchmark_group("hnsw_search");

    for ef in ef_values {
        for k in k_values {
            group.bench_with_input(
                BenchmarkId::new(format!("ef{}_k{}", ef, k), "1M"),
                &(&index, &queries, ef, k),
                |bench, (idx, qs, ef, k)| {
                    bench.iter(|| {
                        for q in qs.iter() {
                            idx.search(q, *k, *ef);
                        }
                    })
                }
            );
        }
    }

    group.finish();
}

fn bench_ivfflat_search(c: &mut Criterion) {
    let index = build_ivfflat_index(1_000_000, 768, 1000); // 1000 lists
    let queries = generate_random_vectors(1000, 768);

    let probe_values = [1, 5, 10, 20, 50];

    let mut group = c.benchmark_group("ivfflat_search");

    for probes in probe_values {
        group.bench_with_input(
            BenchmarkId::new(format!("probes{}", probes), "1M"),
            &probes,
            |bench, probes| {
                bench.iter(|| {
                    for q in queries.iter() {
                        index.search(q, 10, *probes);
                    }
                })
            }
        );
    }

    group.finish();
}

Expected Results: Index Operations

Index	Size	Build Time	Memory	Search (p50)	Search (p99)	Recall@10
HNSW	100K	45s	450MB	0.8ms	2.1ms	0.98
HNSW	1M	8min	4.5GB	1.2ms	4.5ms	0.97
HNSW	10M	95min	45GB	2.1ms	8.2ms	0.96
IVFFlat	100K	12s	320MB	1.5ms	4.2ms	0.92
IVFFlat	1M	2min	3.2GB	3.2ms	9.5ms	0.91
IVFFlat	10M	25min	32GB	8.5ms	25ms	0.89

3. Quantization Benchmarks

// benches/quantization_bench.rs

fn bench_quantization_build(c: &mut Criterion) {
    let vectors = generate_random_vectors(100_000, 768);

    let mut group = c.benchmark_group("quantization_build");

    group.bench_function("scalar_q8", |bench| {
        bench.iter(|| ScalarQuantized::from_f32(&vectors))
    });

    group.bench_function("binary", |bench| {
        bench.iter(|| BinaryQuantized::from_f32(&vectors))
    });

    group.bench_function("product_q", |bench| {
        bench.iter(|| ProductQuantized::from_f32(&vectors, 96, 256))
    });

    group.finish();
}

fn bench_quantized_search(c: &mut Criterion) {
    let vectors = generate_random_vectors(1_000_000, 768);
    let query = generate_random_vectors(1, 768).pop().unwrap();

    let sq8 = ScalarQuantized::from_f32(&vectors);
    let binary = BinaryQuantized::from_f32(&vectors);
    let pq = ProductQuantized::from_f32(&vectors, 96, 256);

    let mut group = c.benchmark_group("quantized_search_1M");

    group.bench_function("full_precision", |bench| {
        bench.iter(|| {
            vectors.iter()
                .enumerate()
                .map(|(i, v)| (i, euclidean_distance(&query, v)))
                .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
        })
    });

    group.bench_function("scalar_q8", |bench| {
        bench.iter(|| {
            (0..vectors.len())
                .map(|i| (i, sq8.distance(&query, i)))
                .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
        })
    });

    group.bench_function("binary_hamming", |bench| {
        let query_bits = binary.quantize_query(&query);
        bench.iter(|| {
            (0..vectors.len())
                .map(|i| (i, binary.hamming_distance(&query_bits, i)))
                .min_by(|a, b| a.1.cmp(&b.1))
        })
    });

    group.finish();
}

Expected Results: Quantization

Method	Memory (1M 768d)	Search Time	Recall Loss
Full Precision	3GB	850ms	0%
Scalar Q8	750MB	420ms	1-2%
Binary	94MB	95ms	5-10%
Product Q	200MB	180ms	2-4%

4. PostgreSQL Integration Benchmarks

-- Test setup script
CREATE EXTENSION ruvector;

-- Create test table
CREATE TABLE bench_vectors (
    id SERIAL PRIMARY KEY,
    embedding vector(768),
    category TEXT,
    created_at TIMESTAMP DEFAULT NOW()
);

-- Insert test data
INSERT INTO bench_vectors (embedding, category)
SELECT
    array_agg(random())::vector(768),
    'category_' || (i % 100)::text
FROM generate_series(1, 1000000) i
GROUP BY i;

-- Create indexes
CREATE INDEX ON bench_vectors USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 200);

CREATE INDEX ON bench_vectors USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 1000);

-- Benchmark queries
\timing on

-- Simple k-NN
EXPLAIN ANALYZE
SELECT id, embedding <=> '[...]'::vector AS distance
FROM bench_vectors
ORDER BY distance
LIMIT 10;

-- k-NN with filter
EXPLAIN ANALYZE
SELECT id, embedding <=> '[...]'::vector AS distance
FROM bench_vectors
WHERE category = 'category_42'
ORDER BY distance
LIMIT 10;

-- Batch search
EXPLAIN ANALYZE
SELECT b.id, q.query_id,
       b.embedding <=> q.embedding AS distance
FROM bench_vectors b
CROSS JOIN (
    SELECT 1 AS query_id, '[...]'::vector AS embedding
    UNION ALL
    SELECT 2, '[...]'::vector
    -- ... more queries
) q
ORDER BY q.query_id, distance
LIMIT 100;

5. Competitor Comparison

# benchmark_comparison.py

import time
import numpy as np
from typing import List, Tuple

# Test data
SIZES = [10_000, 100_000, 1_000_000]
DIMS = [128, 768, 1536]
K = 10
QUERIES = 1000

def run_pgvector_benchmark(conn, size, dim):
    """Benchmark pgvector"""
    # Setup
    conn.execute(f"""
        CREATE TABLE pgvector_test (
            id SERIAL PRIMARY KEY,
            embedding vector({dim})
        );
        CREATE INDEX ON pgvector_test USING hnsw (embedding vector_cosine_ops);
    """)

    # Insert
    start = time.time()
    # ... bulk insert
    build_time = time.time() - start

    # Search
    query = np.random.randn(dim).astype(np.float32)
    start = time.time()
    for _ in range(QUERIES):
        conn.execute(f"""
            SELECT id FROM pgvector_test
            ORDER BY embedding <=> %s
            LIMIT {K}
        """, (query.tolist(),))
    search_time = (time.time() - start) / QUERIES * 1000

    return {
        'build_time': build_time,
        'search_time_ms': search_time,
    }

def run_ruvector_benchmark(conn, size, dim):
    """Benchmark ruvector-postgres"""
    # Similar setup with ruvector
    pass

def run_pinecone_benchmark(index, size, dim):
    """Benchmark Pinecone (cloud)"""
    pass

def run_qdrant_benchmark(client, size, dim):
    """Benchmark Qdrant"""
    pass

def run_milvus_benchmark(collection, size, dim):
    """Benchmark Milvus"""
    pass

# Run all benchmarks
results = {}
for size in SIZES:
    for dim in DIMS:
        results[(size, dim)] = {
            'pgvector': run_pgvector_benchmark(...),
            'ruvector': run_ruvector_benchmark(...),
            'qdrant': run_qdrant_benchmark(...),
            'milvus': run_milvus_benchmark(...),
        }

# Generate comparison report

Expected Comparison Results

System	1M Build	1M Search (p50)	1M Search (p99)	Memory	Recall@10
ruvector-postgres	5min	0.9ms	3.2ms	4.2GB	0.97
pgvector	12min	2.1ms	8.5ms	4.8GB	0.95
Qdrant	7min	1.2ms	4.1ms	4.5GB	0.96
Milvus	8min	1.5ms	5.2ms	5.1GB	0.96
Pinecone (P1)	3min*	5ms*	15ms*	N/A	0.98

*Cloud latency includes network overhead

6. Stress Testing

#!/bin/bash
# stress_test.sh

# Configuration
DURATION=3600  # 1 hour
CONCURRENCY=100
QPS_TARGET=10000

# Start PostgreSQL with ruvector
pg_ctl start -D $PGDATA

# Run pgbench-style workload
pgbench -c $CONCURRENCY -j 10 -T $DURATION \
    -f stress_queries.sql \
    -P 10 \
    --rate=$QPS_TARGET \
    testdb

# Monitor during test
while true; do
    psql -c "SELECT * FROM ruvector_stats();" >> stats.log
    psql -c "SELECT * FROM pg_stat_activity WHERE state = 'active';" >> activity.log
    sleep 10
done

stress_queries.sql

-- Mixed workload
\set query_type random(1, 100)

\if :query_type <= 60
    -- 60% simple k-NN
    SELECT id FROM vectors
    ORDER BY embedding <=> :'random_vector'::vector
    LIMIT 10;
\elif :query_type <= 80
    -- 20% filtered k-NN
    SELECT id FROM vectors
    WHERE category = :'random_category'
    ORDER BY embedding <=> :'random_vector'::vector
    LIMIT 10;
\elif :query_type <= 90
    -- 10% batch search
    SELECT v.id, q.id as query_id
    FROM vectors v, query_batch q
    ORDER BY v.embedding <=> q.embedding
    LIMIT 100;
\else
    -- 10% insert
    INSERT INTO vectors (embedding, category)
    VALUES (:'random_vector'::vector, :'random_category');
\endif

7. Memory Benchmarks

// benches/memory_bench.rs

fn bench_memory_footprint(c: &mut Criterion) {
    let sizes = [100_000, 1_000_000, 10_000_000];

    println!("\n=== Memory Footprint Analysis ===\n");

    for size in sizes {
        println!("Size: {} vectors", size);

        // Full precision vectors
        let vectors: Vec<Vec<f32>> = generate_random_vectors(size, 768);
        let raw_size = size * 768 * 4;
        println!("  Raw vectors: {} MB", raw_size / 1_000_000);

        // HNSW index
        let hnsw = HnswIndex::new(HnswConfig::default());
        for (i, v) in vectors.iter().enumerate() {
            hnsw.insert(i as u64, v);
        }
        println!("  HNSW overhead: {} MB", hnsw.memory_usage() / 1_000_000);

        // Quantized
        let sq8 = ScalarQuantized::from_f32(&vectors);
        println!("  SQ8 size: {} MB", sq8.memory_usage() / 1_000_000);

        let binary = BinaryQuantized::from_f32(&vectors);
        println!("  Binary size: {} MB", binary.memory_usage() / 1_000_000);

        println!();
    }
}

8. Recall vs Latency Analysis

# recall_latency_analysis.py

import matplotlib.pyplot as plt
import numpy as np

def measure_recall_latency_tradeoff(index, queries, ground_truth, ef_values):
    """Measure recall vs latency for different ef values"""
    results = []

    for ef in ef_values:
        latencies = []
        recalls = []

        for i, query in enumerate(queries):
            start = time.time()
            results = index.search(query, k=10, ef=ef)
            latency = (time.time() - start) * 1000

            recall = len(set(results) & set(ground_truth[i])) / 10

            latencies.append(latency)
            recalls.append(recall)

        results.append({
            'ef': ef,
            'avg_latency': np.mean(latencies),
            'p99_latency': np.percentile(latencies, 99),
            'avg_recall': np.mean(recalls),
        })

    return results

# Plot results
plt.figure(figsize=(10, 6))
plt.plot([r['avg_latency'] for r in results],
         [r['avg_recall'] for r in results], 'b-o')
plt.xlabel('Latency (ms)')
plt.ylabel('Recall@10')
plt.title('Recall vs Latency Tradeoff')
plt.savefig('recall_latency.png')

Benchmark Automation

CI/CD Integration

# .github/workflows/benchmark.yml
name: Benchmarks

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  benchmark:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install dependencies
        run: |
          sudo apt-get install postgresql-16
          cargo install cargo-criterion

      - name: Run micro-benchmarks
        run: |
          cargo criterion --output-format json > bench_results.json

      - name: Run PostgreSQL benchmarks
        run: |
          ./scripts/run_pg_benchmarks.sh

      - name: Compare with baseline
        run: |
          python scripts/compare_benchmarks.py \
            --baseline baseline.json \
            --current bench_results.json \
            --threshold 10

      - name: Upload results
        uses: actions/upload-artifact@v3
        with:
          name: benchmark-results
          path: bench_results.json

Benchmark Dashboard

-- Create benchmark results table
CREATE TABLE benchmark_results (
    id SERIAL PRIMARY KEY,
    run_date TIMESTAMP DEFAULT NOW(),
    git_commit TEXT,
    benchmark_name TEXT,
    metric_name TEXT,
    value FLOAT,
    unit TEXT,
    metadata JSONB
);

-- Query for trend analysis
SELECT
    date_trunc('day', run_date) AS day,
    benchmark_name,
    AVG(value) AS avg_value,
    MIN(value) AS min_value,
    MAX(value) AS max_value
FROM benchmark_results
WHERE metric_name = 'search_latency_p50'
  AND run_date > NOW() - INTERVAL '30 days'
GROUP BY 1, 2
ORDER BY 1, 2;

Reporting Format

Performance Report Template

# RuVector-Postgres Performance Report

**Date:** 2024-XX-XX
**Version:** 0.X.0
**Commit:** abc123

## Summary

- Overall performance: **X% faster** than pgvector
- Memory efficiency: **X% less** than competitors
- Recall@10: **0.97** (target: 0.95)

## Detailed Results

### Index Build Performance
| Size | HNSW Time | IVFFlat Time | Memory |
|------|-----------|--------------|--------|
| 100K | Xs | Xs | XMB |
| 1M | Xm | Xm | XGB |

### Search Latency (1M vectors, 768d)
| Metric | HNSW | IVFFlat | Target |
|--------|------|---------|--------|
| p50 | Xms | Xms | <2ms |
| p99 | Xms | Xms | <10ms |
| QPS | X | X | >5000 |

### Comparison with Competitors
[Charts and tables]

## Recommendations

1. For latency-sensitive workloads: Use HNSW with ef_search=64
2. For memory-constrained: Use IVFFlat with SQ8 quantization
3. For maximum throughput: Enable parallel search with 4 workers

Running Benchmarks

# Run all micro-benchmarks
cargo bench --features bench

# Run specific benchmark
cargo bench -- distance

# Run PostgreSQL benchmarks
./scripts/run_pg_benchmarks.sh

# Generate comparison report
python scripts/generate_report.py

# Quick smoke test
cargo bench -- --quick

17 KiB Raw Blame History