git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
172 lines
5.5 KiB
SQL
172 lines
5.5 KiB
SQL
-- RuVector Optimized Benchmark Runner
|
|
-- Tests performance of optimized operations
|
|
|
|
\timing on
|
|
|
|
-- ============================================================================
|
|
-- Test 1: HNSW Vector Search (Target: ~24ms for 1000 vectors)
|
|
-- ============================================================================
|
|
\echo '=== Test 1: HNSW Vector Search ==='
|
|
|
|
-- Warm up
|
|
SELECT id, embedding <-> ruvector_random(128) AS distance
|
|
FROM benchmark_vectors
|
|
ORDER BY distance
|
|
LIMIT 10;
|
|
|
|
-- Benchmark: Find 10 nearest neighbors
|
|
EXPLAIN ANALYZE
|
|
SELECT id, embedding <-> ruvector_random(128) AS distance
|
|
FROM benchmark_vectors
|
|
ORDER BY distance
|
|
LIMIT 10;
|
|
|
|
-- ============================================================================
|
|
-- Test 2: Hamming Distance with bit_count (Target: ~7.6ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 2: Hamming Distance ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
SELECT
|
|
a.id AS id_a,
|
|
b.id AS id_b,
|
|
bench_hamming_distance(a.binary_quantized, b.binary_quantized) AS hamming_dist
|
|
FROM benchmark_quantized a
|
|
CROSS JOIN benchmark_quantized b
|
|
WHERE a.id < b.id
|
|
LIMIT 1000;
|
|
|
|
-- ============================================================================
|
|
-- Test 3: Full-Text Search with GIN (Target: ~3.5ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 3: Full-Text Search ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
SELECT id, content, ts_rank(content_tsvector, query) AS rank
|
|
FROM benchmark_documents, plainto_tsquery('english', 'vector database search') query
|
|
WHERE content_tsvector @@ query
|
|
ORDER BY rank DESC
|
|
LIMIT 20;
|
|
|
|
-- ============================================================================
|
|
-- Test 4: GraphSAGE Aggregation (Target: ~2.6ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 4: GraphSAGE Neighbor Aggregation ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
WITH neighbor_features AS (
|
|
SELECT
|
|
e.source_id,
|
|
ruvector_mean(ARRAY_AGG(n.features)) AS mean_neighbor
|
|
FROM benchmark_edges e
|
|
JOIN benchmark_nodes n ON e.target_id = n.id
|
|
GROUP BY e.source_id
|
|
)
|
|
SELECT
|
|
s.id,
|
|
ruvector_concat(s.features, COALESCE(nf.mean_neighbor, s.features)) AS aggregated
|
|
FROM benchmark_nodes s
|
|
LEFT JOIN neighbor_features nf ON s.id = nf.source_id
|
|
LIMIT 50;
|
|
|
|
-- ============================================================================
|
|
-- Test 5: Sparse Vector Dot Product (Target: ~27ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 5: Sparse Dot Product ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
SELECT
|
|
a.id AS id_a,
|
|
b.id AS id_b,
|
|
bench_sparse_dot(a.sparse_embedding, b.sparse_embedding) AS similarity
|
|
FROM benchmark_documents a
|
|
CROSS JOIN benchmark_documents b
|
|
WHERE a.id < b.id
|
|
LIMIT 500;
|
|
|
|
-- ============================================================================
|
|
-- Test 6: Graph Edge Lookup (Target: ~5ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 6: Graph Edge Lookup ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
SELECT
|
|
e.*,
|
|
s.features AS source_features,
|
|
t.features AS target_features
|
|
FROM benchmark_edges e
|
|
JOIN benchmark_nodes s ON e.source_id = s.id
|
|
JOIN benchmark_nodes t ON e.target_id = t.id
|
|
WHERE e.source_id IN (SELECT id FROM benchmark_nodes ORDER BY random() LIMIT 10);
|
|
|
|
-- ============================================================================
|
|
-- Test 7: Scalar Quantization Compression (Target: ~75ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 7: Scalar Quantization ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
SELECT
|
|
id,
|
|
octet_length(scalar_quantized) AS compressed_size,
|
|
ruvector_dim(original) * 4 AS original_size,
|
|
ROUND(100.0 * octet_length(scalar_quantized) / (ruvector_dim(original) * 4), 2) AS compression_ratio
|
|
FROM benchmark_quantized
|
|
LIMIT 100;
|
|
|
|
-- ============================================================================
|
|
-- Test 8: Binary Quantization + Hamming (Target: ~85ms)
|
|
-- ============================================================================
|
|
\echo '=== Test 8: Binary Quantization Search ==='
|
|
|
|
EXPLAIN ANALYZE
|
|
WITH query_binary AS (
|
|
SELECT ruvector_binary_quantize(ruvector_random(128)) AS q
|
|
)
|
|
SELECT
|
|
bq.id,
|
|
bench_hamming_distance(bq.binary_quantized, query_binary.q) AS hamming_dist
|
|
FROM benchmark_quantized bq, query_binary
|
|
ORDER BY hamming_dist
|
|
LIMIT 20;
|
|
|
|
-- ============================================================================
|
|
-- Summary
|
|
-- ============================================================================
|
|
\echo '=== Benchmark Summary ==='
|
|
SELECT
|
|
'benchmark_vectors' AS table_name,
|
|
COUNT(*) AS row_count,
|
|
pg_size_pretty(pg_relation_size('benchmark_vectors')) AS table_size,
|
|
pg_size_pretty(pg_indexes_size('benchmark_vectors')) AS index_size
|
|
FROM benchmark_vectors
|
|
UNION ALL
|
|
SELECT
|
|
'benchmark_documents',
|
|
COUNT(*),
|
|
pg_size_pretty(pg_relation_size('benchmark_documents')),
|
|
pg_size_pretty(pg_indexes_size('benchmark_documents'))
|
|
FROM benchmark_documents
|
|
UNION ALL
|
|
SELECT
|
|
'benchmark_nodes',
|
|
COUNT(*),
|
|
pg_size_pretty(pg_relation_size('benchmark_nodes')),
|
|
pg_size_pretty(pg_indexes_size('benchmark_nodes'))
|
|
FROM benchmark_nodes
|
|
UNION ALL
|
|
SELECT
|
|
'benchmark_edges',
|
|
COUNT(*),
|
|
pg_size_pretty(pg_relation_size('benchmark_edges')),
|
|
pg_size_pretty(pg_indexes_size('benchmark_edges'))
|
|
FROM benchmark_edges
|
|
UNION ALL
|
|
SELECT
|
|
'benchmark_quantized',
|
|
COUNT(*),
|
|
pg_size_pretty(pg_relation_size('benchmark_quantized')),
|
|
pg_size_pretty(pg_indexes_size('benchmark_quantized'))
|
|
FROM benchmark_quantized;
|
|
|
|
\timing off
|