Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
326
vendor/ruvector/crates/ruvector-postgres/docker/init-integration.sql
vendored
Normal file
326
vendor/ruvector/crates/ruvector-postgres/docker/init-integration.sql
vendored
Normal file
@@ -0,0 +1,326 @@
|
||||
-- RuVector-Postgres Integration Test Initialization
|
||||
-- Sets up comprehensive test environment with multiple schemas and test data
|
||||
|
||||
-- Enable required extensions
|
||||
CREATE EXTENSION IF NOT EXISTS ruvector;
|
||||
|
||||
-- Log initialization
|
||||
DO $$
|
||||
BEGIN
|
||||
RAISE NOTICE '========================================';
|
||||
RAISE NOTICE 'RuVector Integration Test Initialization';
|
||||
RAISE NOTICE '========================================';
|
||||
END $$;
|
||||
|
||||
-- ============================================================================
|
||||
-- Test Schemas
|
||||
-- ============================================================================
|
||||
|
||||
-- pgvector compatibility tests
|
||||
CREATE SCHEMA IF NOT EXISTS test_pgvector;
|
||||
COMMENT ON SCHEMA test_pgvector IS 'pgvector SQL compatibility tests';
|
||||
|
||||
-- Integrity system tests
|
||||
CREATE SCHEMA IF NOT EXISTS test_integrity;
|
||||
COMMENT ON SCHEMA test_integrity IS 'Integrity and mincut tests';
|
||||
|
||||
-- Hybrid search tests
|
||||
CREATE SCHEMA IF NOT EXISTS test_hybrid;
|
||||
COMMENT ON SCHEMA test_hybrid IS 'Hybrid BM25+vector search tests';
|
||||
|
||||
-- Multi-tenancy tests
|
||||
CREATE SCHEMA IF NOT EXISTS test_tenancy;
|
||||
COMMENT ON SCHEMA test_tenancy IS 'Multi-tenant isolation tests';
|
||||
|
||||
-- Self-healing tests
|
||||
CREATE SCHEMA IF NOT EXISTS test_healing;
|
||||
COMMENT ON SCHEMA test_healing IS 'Self-healing and recovery tests';
|
||||
|
||||
-- Performance tests
|
||||
CREATE SCHEMA IF NOT EXISTS test_perf;
|
||||
COMMENT ON SCHEMA test_perf IS 'Performance benchmarks';
|
||||
|
||||
-- ============================================================================
|
||||
-- Test Tables
|
||||
-- ============================================================================
|
||||
|
||||
-- pgvector compatibility test table
|
||||
CREATE TABLE test_pgvector.vectors (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(128),
|
||||
metadata JSONB,
|
||||
category TEXT,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Table for HNSW index testing
|
||||
CREATE TABLE test_pgvector.hnsw_vectors (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(128),
|
||||
label TEXT
|
||||
);
|
||||
|
||||
-- Table for IVFFlat index testing
|
||||
CREATE TABLE test_pgvector.ivfflat_vectors (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(128),
|
||||
label TEXT
|
||||
);
|
||||
|
||||
-- Integrity test tables
|
||||
CREATE TABLE test_integrity.graph_nodes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(64),
|
||||
layer INTEGER DEFAULT 0,
|
||||
connections INTEGER[]
|
||||
);
|
||||
|
||||
CREATE TABLE test_integrity.metrics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP DEFAULT NOW(),
|
||||
mincut_value INTEGER,
|
||||
load_factor FLOAT,
|
||||
error_rate FLOAT,
|
||||
state TEXT
|
||||
);
|
||||
|
||||
-- Hybrid search test tables
|
||||
CREATE TABLE test_hybrid.documents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
embedding vector(384),
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE test_hybrid.search_results (
|
||||
id SERIAL PRIMARY KEY,
|
||||
query_id INTEGER,
|
||||
doc_id INTEGER,
|
||||
vector_score FLOAT,
|
||||
text_score FLOAT,
|
||||
fused_score FLOAT,
|
||||
rank INTEGER
|
||||
);
|
||||
|
||||
-- Multi-tenancy test tables
|
||||
CREATE TABLE test_tenancy.tenant_config (
|
||||
tenant_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
name TEXT NOT NULL,
|
||||
max_vectors BIGINT DEFAULT 100000,
|
||||
max_storage_bytes BIGINT DEFAULT 1073741824,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE test_tenancy.tenant_vectors (
|
||||
id SERIAL,
|
||||
tenant_id UUID NOT NULL,
|
||||
embedding vector(128),
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
PRIMARY KEY (tenant_id, id)
|
||||
);
|
||||
|
||||
CREATE TABLE test_tenancy.tenant_usage (
|
||||
tenant_id UUID PRIMARY KEY,
|
||||
vector_count BIGINT DEFAULT 0,
|
||||
storage_bytes BIGINT DEFAULT 0,
|
||||
query_count BIGINT DEFAULT 0,
|
||||
last_updated TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Self-healing test tables
|
||||
CREATE TABLE test_healing.health_metrics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP DEFAULT NOW(),
|
||||
metric_name TEXT NOT NULL,
|
||||
metric_value FLOAT NOT NULL,
|
||||
threshold FLOAT,
|
||||
status TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE test_healing.remediation_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP DEFAULT NOW(),
|
||||
problem_type TEXT NOT NULL,
|
||||
action_taken TEXT NOT NULL,
|
||||
success BOOLEAN,
|
||||
recovery_time_ms INTEGER,
|
||||
notes TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE test_healing.learning_records (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP DEFAULT NOW(),
|
||||
problem_context JSONB,
|
||||
action TEXT,
|
||||
outcome JSONB,
|
||||
confidence FLOAT DEFAULT 0.5
|
||||
);
|
||||
|
||||
-- Performance test tables
|
||||
CREATE TABLE test_perf.benchmark_vectors (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(128),
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE test_perf.benchmark_results (
|
||||
id SERIAL PRIMARY KEY,
|
||||
benchmark_name TEXT NOT NULL,
|
||||
timestamp TIMESTAMP DEFAULT NOW(),
|
||||
iterations INTEGER,
|
||||
total_time_ms FLOAT,
|
||||
avg_time_ms FLOAT,
|
||||
p50_time_ms FLOAT,
|
||||
p95_time_ms FLOAT,
|
||||
p99_time_ms FLOAT,
|
||||
throughput FLOAT,
|
||||
notes TEXT
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- HNSW indexes for different test scenarios
|
||||
CREATE INDEX test_pgvector_vectors_hnsw ON test_pgvector.vectors
|
||||
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
||||
|
||||
CREATE INDEX test_pgvector_hnsw_idx ON test_pgvector.hnsw_vectors
|
||||
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
||||
|
||||
-- IVFFlat index
|
||||
CREATE INDEX test_pgvector_ivfflat_idx ON test_pgvector.ivfflat_vectors
|
||||
USING ivfflat (embedding vector_l2_ops) WITH (lists = 100);
|
||||
|
||||
-- Performance benchmark index
|
||||
CREATE INDEX test_perf_benchmark_hnsw ON test_perf.benchmark_vectors
|
||||
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
||||
|
||||
-- Hybrid search indexes
|
||||
CREATE INDEX test_hybrid_docs_embedding ON test_hybrid.documents
|
||||
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
||||
|
||||
-- GIN index for text search
|
||||
CREATE INDEX test_hybrid_docs_content ON test_hybrid.documents
|
||||
USING gin (to_tsvector('english', content));
|
||||
|
||||
-- Multi-tenancy indexes
|
||||
CREATE INDEX test_tenancy_vectors_tenant ON test_tenancy.tenant_vectors (tenant_id);
|
||||
CREATE INDEX test_tenancy_vectors_hnsw ON test_tenancy.tenant_vectors
|
||||
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
||||
|
||||
-- ============================================================================
|
||||
-- Test Data
|
||||
-- ============================================================================
|
||||
|
||||
-- Insert pgvector compatibility test data
|
||||
INSERT INTO test_pgvector.vectors (embedding, metadata, category)
|
||||
SELECT
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
||||
jsonb_build_object('idx', i, 'batch', 'init'),
|
||||
CASE WHEN i % 3 = 0 THEN 'A' WHEN i % 3 = 1 THEN 'B' ELSE 'C' END
|
||||
FROM generate_series(1, 1000) i;
|
||||
|
||||
-- Insert HNSW test data
|
||||
INSERT INTO test_pgvector.hnsw_vectors (embedding, label)
|
||||
SELECT
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
||||
'hnsw_' || i
|
||||
FROM generate_series(1, 500) i;
|
||||
|
||||
-- Insert IVFFlat test data
|
||||
INSERT INTO test_pgvector.ivfflat_vectors (embedding, label)
|
||||
SELECT
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
||||
'ivf_' || i
|
||||
FROM generate_series(1, 500) i;
|
||||
|
||||
-- Insert hybrid search test data
|
||||
INSERT INTO test_hybrid.documents (title, content, embedding)
|
||||
VALUES
|
||||
('Machine Learning Basics', 'Introduction to supervised and unsupervised learning algorithms.',
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
||||
('Deep Learning', 'Neural networks and deep learning architectures for complex pattern recognition.',
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
||||
('Natural Language Processing', 'Text processing and understanding using transformer models.',
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
||||
('Computer Vision', 'Image recognition and object detection with convolutional networks.',
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
||||
('Reinforcement Learning', 'Agent-based learning through reward optimization.',
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector);
|
||||
|
||||
-- Insert multi-tenancy test data
|
||||
INSERT INTO test_tenancy.tenant_config (tenant_id, name, max_vectors, max_storage_bytes)
|
||||
VALUES
|
||||
('00000000-0000-0000-0000-000000000001', 'Tenant A', 100000, 1073741824),
|
||||
('00000000-0000-0000-0000-000000000002', 'Tenant B', 50000, 536870912),
|
||||
('00000000-0000-0000-0000-000000000003', 'Tenant C', 200000, 2147483648);
|
||||
|
||||
-- Insert vectors for each tenant
|
||||
INSERT INTO test_tenancy.tenant_vectors (tenant_id, embedding, metadata)
|
||||
SELECT
|
||||
'00000000-0000-0000-0000-00000000000' || ((i % 3) + 1)::text,
|
||||
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
||||
jsonb_build_object('idx', i)
|
||||
FROM generate_series(1, 300) i;
|
||||
|
||||
-- Update usage tracking
|
||||
INSERT INTO test_tenancy.tenant_usage (tenant_id, vector_count, storage_bytes)
|
||||
SELECT
|
||||
tenant_id,
|
||||
COUNT(*),
|
||||
COUNT(*) * 512 -- Approximate bytes per vector
|
||||
FROM test_tenancy.tenant_vectors
|
||||
GROUP BY tenant_id;
|
||||
|
||||
-- ============================================================================
|
||||
-- Row-Level Security Setup
|
||||
-- ============================================================================
|
||||
|
||||
-- Enable RLS on tenant tables
|
||||
ALTER TABLE test_tenancy.tenant_vectors ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
-- Create tenant isolation policy
|
||||
CREATE POLICY tenant_isolation ON test_tenancy.tenant_vectors
|
||||
USING (tenant_id = COALESCE(
|
||||
NULLIF(current_setting('app.tenant_id', true), '')::uuid,
|
||||
tenant_id
|
||||
));
|
||||
|
||||
-- ============================================================================
|
||||
-- Statistics and Verification
|
||||
-- ============================================================================
|
||||
|
||||
-- Analyze all test tables
|
||||
ANALYZE test_pgvector.vectors;
|
||||
ANALYZE test_pgvector.hnsw_vectors;
|
||||
ANALYZE test_pgvector.ivfflat_vectors;
|
||||
ANALYZE test_hybrid.documents;
|
||||
ANALYZE test_tenancy.tenant_vectors;
|
||||
ANALYZE test_perf.benchmark_vectors;
|
||||
|
||||
-- Verify setup
|
||||
DO $$
|
||||
DECLARE
|
||||
vec_count INTEGER;
|
||||
idx_count INTEGER;
|
||||
schema_count INTEGER;
|
||||
BEGIN
|
||||
SELECT COUNT(*) INTO vec_count FROM test_pgvector.vectors;
|
||||
SELECT COUNT(*) INTO idx_count FROM pg_indexes WHERE schemaname LIKE 'test_%';
|
||||
SELECT COUNT(*) INTO schema_count FROM information_schema.schemata WHERE schema_name LIKE 'test_%';
|
||||
|
||||
RAISE NOTICE '========================================';
|
||||
RAISE NOTICE 'Integration Test Setup Complete';
|
||||
RAISE NOTICE '========================================';
|
||||
RAISE NOTICE 'Test schemas created: %', schema_count;
|
||||
RAISE NOTICE 'Test vectors inserted: %', vec_count;
|
||||
RAISE NOTICE 'Test indexes created: %', idx_count;
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'Extension version: %', ruvector_version();
|
||||
RAISE NOTICE 'SIMD info: %', ruvector_simd_info();
|
||||
RAISE NOTICE '========================================';
|
||||
END $$;
|
||||
Reference in New Issue
Block a user