Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
171
vendor/ruvector/npm/packages/postgres-cli/benchmarks/run_benchmarks_optimized.sql
vendored
Normal file
171
vendor/ruvector/npm/packages/postgres-cli/benchmarks/run_benchmarks_optimized.sql
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
-- RuVector Optimized Benchmark Runner
|
||||
-- Tests performance of optimized operations
|
||||
|
||||
\timing on
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 1: HNSW Vector Search (Target: ~24ms for 1000 vectors)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 1: HNSW Vector Search ==='
|
||||
|
||||
-- Warm up
|
||||
SELECT id, embedding <-> ruvector_random(128) AS distance
|
||||
FROM benchmark_vectors
|
||||
ORDER BY distance
|
||||
LIMIT 10;
|
||||
|
||||
-- Benchmark: Find 10 nearest neighbors
|
||||
EXPLAIN ANALYZE
|
||||
SELECT id, embedding <-> ruvector_random(128) AS distance
|
||||
FROM benchmark_vectors
|
||||
ORDER BY distance
|
||||
LIMIT 10;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 2: Hamming Distance with bit_count (Target: ~7.6ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 2: Hamming Distance ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
a.id AS id_a,
|
||||
b.id AS id_b,
|
||||
bench_hamming_distance(a.binary_quantized, b.binary_quantized) AS hamming_dist
|
||||
FROM benchmark_quantized a
|
||||
CROSS JOIN benchmark_quantized b
|
||||
WHERE a.id < b.id
|
||||
LIMIT 1000;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 3: Full-Text Search with GIN (Target: ~3.5ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 3: Full-Text Search ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
SELECT id, content, ts_rank(content_tsvector, query) AS rank
|
||||
FROM benchmark_documents, plainto_tsquery('english', 'vector database search') query
|
||||
WHERE content_tsvector @@ query
|
||||
ORDER BY rank DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 4: GraphSAGE Aggregation (Target: ~2.6ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 4: GraphSAGE Neighbor Aggregation ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
WITH neighbor_features AS (
|
||||
SELECT
|
||||
e.source_id,
|
||||
ruvector_mean(ARRAY_AGG(n.features)) AS mean_neighbor
|
||||
FROM benchmark_edges e
|
||||
JOIN benchmark_nodes n ON e.target_id = n.id
|
||||
GROUP BY e.source_id
|
||||
)
|
||||
SELECT
|
||||
s.id,
|
||||
ruvector_concat(s.features, COALESCE(nf.mean_neighbor, s.features)) AS aggregated
|
||||
FROM benchmark_nodes s
|
||||
LEFT JOIN neighbor_features nf ON s.id = nf.source_id
|
||||
LIMIT 50;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 5: Sparse Vector Dot Product (Target: ~27ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 5: Sparse Dot Product ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
a.id AS id_a,
|
||||
b.id AS id_b,
|
||||
bench_sparse_dot(a.sparse_embedding, b.sparse_embedding) AS similarity
|
||||
FROM benchmark_documents a
|
||||
CROSS JOIN benchmark_documents b
|
||||
WHERE a.id < b.id
|
||||
LIMIT 500;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 6: Graph Edge Lookup (Target: ~5ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 6: Graph Edge Lookup ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
e.*,
|
||||
s.features AS source_features,
|
||||
t.features AS target_features
|
||||
FROM benchmark_edges e
|
||||
JOIN benchmark_nodes s ON e.source_id = s.id
|
||||
JOIN benchmark_nodes t ON e.target_id = t.id
|
||||
WHERE e.source_id IN (SELECT id FROM benchmark_nodes ORDER BY random() LIMIT 10);
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 7: Scalar Quantization Compression (Target: ~75ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 7: Scalar Quantization ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
id,
|
||||
octet_length(scalar_quantized) AS compressed_size,
|
||||
ruvector_dim(original) * 4 AS original_size,
|
||||
ROUND(100.0 * octet_length(scalar_quantized) / (ruvector_dim(original) * 4), 2) AS compression_ratio
|
||||
FROM benchmark_quantized
|
||||
LIMIT 100;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test 8: Binary Quantization + Hamming (Target: ~85ms)
|
||||
-- ============================================================================
|
||||
\echo '=== Test 8: Binary Quantization Search ==='
|
||||
|
||||
EXPLAIN ANALYZE
|
||||
WITH query_binary AS (
|
||||
SELECT ruvector_binary_quantize(ruvector_random(128)) AS q
|
||||
)
|
||||
SELECT
|
||||
bq.id,
|
||||
bench_hamming_distance(bq.binary_quantized, query_binary.q) AS hamming_dist
|
||||
FROM benchmark_quantized bq, query_binary
|
||||
ORDER BY hamming_dist
|
||||
LIMIT 20;
|
||||
|
||||
-- ============================================================================
|
||||
-- Summary
|
||||
-- ============================================================================
|
||||
\echo '=== Benchmark Summary ==='
|
||||
SELECT
|
||||
'benchmark_vectors' AS table_name,
|
||||
COUNT(*) AS row_count,
|
||||
pg_size_pretty(pg_relation_size('benchmark_vectors')) AS table_size,
|
||||
pg_size_pretty(pg_indexes_size('benchmark_vectors')) AS index_size
|
||||
FROM benchmark_vectors
|
||||
UNION ALL
|
||||
SELECT
|
||||
'benchmark_documents',
|
||||
COUNT(*),
|
||||
pg_size_pretty(pg_relation_size('benchmark_documents')),
|
||||
pg_size_pretty(pg_indexes_size('benchmark_documents'))
|
||||
FROM benchmark_documents
|
||||
UNION ALL
|
||||
SELECT
|
||||
'benchmark_nodes',
|
||||
COUNT(*),
|
||||
pg_size_pretty(pg_relation_size('benchmark_nodes')),
|
||||
pg_size_pretty(pg_indexes_size('benchmark_nodes'))
|
||||
FROM benchmark_nodes
|
||||
UNION ALL
|
||||
SELECT
|
||||
'benchmark_edges',
|
||||
COUNT(*),
|
||||
pg_size_pretty(pg_relation_size('benchmark_edges')),
|
||||
pg_size_pretty(pg_indexes_size('benchmark_edges'))
|
||||
FROM benchmark_edges
|
||||
UNION ALL
|
||||
SELECT
|
||||
'benchmark_quantized',
|
||||
COUNT(*),
|
||||
pg_size_pretty(pg_relation_size('benchmark_quantized')),
|
||||
pg_size_pretty(pg_indexes_size('benchmark_quantized'))
|
||||
FROM benchmark_quantized;
|
||||
|
||||
\timing off
|
||||
145
vendor/ruvector/npm/packages/postgres-cli/benchmarks/ruvector_benchmark_optimized.sql
vendored
Normal file
145
vendor/ruvector/npm/packages/postgres-cli/benchmarks/ruvector_benchmark_optimized.sql
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
-- RuVector Optimized Benchmark Setup
|
||||
-- Performance-optimized schema with indexes and parallel-safe functions
|
||||
|
||||
-- Enable extension
|
||||
CREATE EXTENSION IF NOT EXISTS ruvector;
|
||||
|
||||
-- ============================================================================
|
||||
-- Optimized Vector Table with HNSW Index
|
||||
-- ============================================================================
|
||||
DROP TABLE IF EXISTS benchmark_vectors CASCADE;
|
||||
CREATE TABLE benchmark_vectors (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding ruvector,
|
||||
category TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert test vectors (1000 random 128-dim vectors)
|
||||
INSERT INTO benchmark_vectors (embedding, category)
|
||||
SELECT
|
||||
ruvector_random(128),
|
||||
'category_' || (random() * 10)::int
|
||||
FROM generate_series(1, 1000);
|
||||
|
||||
-- Create HNSW index for fast similarity search
|
||||
-- m=16: connections per layer, ef_construction=100: build-time accuracy
|
||||
CREATE INDEX IF NOT EXISTS idx_vectors_hnsw
|
||||
ON benchmark_vectors USING hnsw (embedding ruvector_cosine_ops)
|
||||
WITH (m = 16, ef_construction = 100);
|
||||
|
||||
-- ============================================================================
|
||||
-- Optimized Full-Text Search with GIN Index
|
||||
-- ============================================================================
|
||||
DROP TABLE IF EXISTS benchmark_documents CASCADE;
|
||||
CREATE TABLE benchmark_documents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
content TEXT,
|
||||
content_tsvector TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', content)) STORED,
|
||||
sparse_embedding TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert test documents
|
||||
INSERT INTO benchmark_documents (content, sparse_embedding)
|
||||
SELECT
|
||||
'Document ' || i || ' contains words like vector database similarity search embedding neural network',
|
||||
ruvector_sparse_from_dense(ARRAY[random(), 0, random(), 0, random(), 0, random(), 0]::float4[])
|
||||
FROM generate_series(1, 500) i;
|
||||
|
||||
-- GIN index for full-text search
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_fts
|
||||
ON benchmark_documents USING gin (content_tsvector);
|
||||
|
||||
-- ============================================================================
|
||||
-- Optimized Graph Tables with B-tree Indexes
|
||||
-- ============================================================================
|
||||
DROP TABLE IF EXISTS benchmark_edges CASCADE;
|
||||
DROP TABLE IF EXISTS benchmark_nodes CASCADE;
|
||||
|
||||
CREATE TABLE benchmark_nodes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
features ruvector,
|
||||
node_type TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE benchmark_edges (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_id INT REFERENCES benchmark_nodes(id),
|
||||
target_id INT REFERENCES benchmark_nodes(id),
|
||||
edge_type TEXT,
|
||||
weight FLOAT DEFAULT 1.0
|
||||
);
|
||||
|
||||
-- Insert test graph data
|
||||
INSERT INTO benchmark_nodes (features, node_type)
|
||||
SELECT
|
||||
ruvector_random(64),
|
||||
'type_' || (random() * 5)::int
|
||||
FROM generate_series(1, 200);
|
||||
|
||||
INSERT INTO benchmark_edges (source_id, target_id, edge_type, weight)
|
||||
SELECT
|
||||
(random() * 199 + 1)::int,
|
||||
(random() * 199 + 1)::int,
|
||||
'edge_' || (random() * 3)::int,
|
||||
random()
|
||||
FROM generate_series(1, 1000);
|
||||
|
||||
-- B-tree indexes for fast edge lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_edges_source ON benchmark_edges(source_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_edges_target ON benchmark_edges(target_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_edges_source_target ON benchmark_edges(source_id, target_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- Optimized Quantization Tables
|
||||
-- ============================================================================
|
||||
DROP TABLE IF EXISTS benchmark_quantized CASCADE;
|
||||
CREATE TABLE benchmark_quantized (
|
||||
id SERIAL PRIMARY KEY,
|
||||
original ruvector,
|
||||
binary_quantized BIT VARYING,
|
||||
scalar_quantized BYTEA
|
||||
);
|
||||
|
||||
-- Insert and quantize vectors
|
||||
INSERT INTO benchmark_quantized (original, binary_quantized, scalar_quantized)
|
||||
SELECT
|
||||
v.embedding,
|
||||
ruvector_binary_quantize(v.embedding),
|
||||
ruvector_scalar_quantize(v.embedding, 8)
|
||||
FROM benchmark_vectors v
|
||||
LIMIT 500;
|
||||
|
||||
-- ============================================================================
|
||||
-- Parallel-Safe Helper Functions
|
||||
-- ============================================================================
|
||||
|
||||
-- Parallel-safe cosine distance function
|
||||
CREATE OR REPLACE FUNCTION bench_cosine_distance(a ruvector, b ruvector)
|
||||
RETURNS float8 AS $$
|
||||
SELECT ruvector_distance(a, b, 'cosine')
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Parallel-safe Hamming distance using bit_count
|
||||
CREATE OR REPLACE FUNCTION bench_hamming_distance(a BIT VARYING, b BIT VARYING)
|
||||
RETURNS int AS $$
|
||||
SELECT bit_count(a # b)::int
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Parallel-safe sparse dot product
|
||||
CREATE OR REPLACE FUNCTION bench_sparse_dot(a TEXT, b TEXT)
|
||||
RETURNS float8 AS $$
|
||||
SELECT ruvector_sparse_distance(a, b, 'cosine')
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- ============================================================================
|
||||
-- Statistics Update
|
||||
-- ============================================================================
|
||||
ANALYZE benchmark_vectors;
|
||||
ANALYZE benchmark_documents;
|
||||
ANALYZE benchmark_nodes;
|
||||
ANALYZE benchmark_edges;
|
||||
ANALYZE benchmark_quantized;
|
||||
|
||||
SELECT 'Optimized benchmark setup complete' AS status;
|
||||
Reference in New Issue
Block a user