327 lines
11 KiB
SQL
327 lines
11 KiB
SQL
-- RuVector-Postgres Integration Test Initialization
|
|
-- Sets up comprehensive test environment with multiple schemas and test data
|
|
|
|
-- Enable required extensions
|
|
CREATE EXTENSION IF NOT EXISTS ruvector;
|
|
|
|
-- Log initialization
|
|
DO $$
|
|
BEGIN
|
|
RAISE NOTICE '========================================';
|
|
RAISE NOTICE 'RuVector Integration Test Initialization';
|
|
RAISE NOTICE '========================================';
|
|
END $$;
|
|
|
|
-- ============================================================================
|
|
-- Test Schemas
|
|
-- ============================================================================
|
|
|
|
-- pgvector compatibility tests
|
|
CREATE SCHEMA IF NOT EXISTS test_pgvector;
|
|
COMMENT ON SCHEMA test_pgvector IS 'pgvector SQL compatibility tests';
|
|
|
|
-- Integrity system tests
|
|
CREATE SCHEMA IF NOT EXISTS test_integrity;
|
|
COMMENT ON SCHEMA test_integrity IS 'Integrity and mincut tests';
|
|
|
|
-- Hybrid search tests
|
|
CREATE SCHEMA IF NOT EXISTS test_hybrid;
|
|
COMMENT ON SCHEMA test_hybrid IS 'Hybrid BM25+vector search tests';
|
|
|
|
-- Multi-tenancy tests
|
|
CREATE SCHEMA IF NOT EXISTS test_tenancy;
|
|
COMMENT ON SCHEMA test_tenancy IS 'Multi-tenant isolation tests';
|
|
|
|
-- Self-healing tests
|
|
CREATE SCHEMA IF NOT EXISTS test_healing;
|
|
COMMENT ON SCHEMA test_healing IS 'Self-healing and recovery tests';
|
|
|
|
-- Performance tests
|
|
CREATE SCHEMA IF NOT EXISTS test_perf;
|
|
COMMENT ON SCHEMA test_perf IS 'Performance benchmarks';
|
|
|
|
-- ============================================================================
|
|
-- Test Tables
|
|
-- ============================================================================
|
|
|
|
-- pgvector compatibility test table
|
|
CREATE TABLE test_pgvector.vectors (
|
|
id SERIAL PRIMARY KEY,
|
|
embedding vector(128),
|
|
metadata JSONB,
|
|
category TEXT,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
-- Table for HNSW index testing
|
|
CREATE TABLE test_pgvector.hnsw_vectors (
|
|
id SERIAL PRIMARY KEY,
|
|
embedding vector(128),
|
|
label TEXT
|
|
);
|
|
|
|
-- Table for IVFFlat index testing
|
|
CREATE TABLE test_pgvector.ivfflat_vectors (
|
|
id SERIAL PRIMARY KEY,
|
|
embedding vector(128),
|
|
label TEXT
|
|
);
|
|
|
|
-- Integrity test tables
|
|
CREATE TABLE test_integrity.graph_nodes (
|
|
id SERIAL PRIMARY KEY,
|
|
embedding vector(64),
|
|
layer INTEGER DEFAULT 0,
|
|
connections INTEGER[]
|
|
);
|
|
|
|
CREATE TABLE test_integrity.metrics (
|
|
id SERIAL PRIMARY KEY,
|
|
timestamp TIMESTAMP DEFAULT NOW(),
|
|
mincut_value INTEGER,
|
|
load_factor FLOAT,
|
|
error_rate FLOAT,
|
|
state TEXT
|
|
);
|
|
|
|
-- Hybrid search test tables
|
|
CREATE TABLE test_hybrid.documents (
|
|
id SERIAL PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding vector(384),
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE TABLE test_hybrid.search_results (
|
|
id SERIAL PRIMARY KEY,
|
|
query_id INTEGER,
|
|
doc_id INTEGER,
|
|
vector_score FLOAT,
|
|
text_score FLOAT,
|
|
fused_score FLOAT,
|
|
rank INTEGER
|
|
);
|
|
|
|
-- Multi-tenancy test tables
|
|
CREATE TABLE test_tenancy.tenant_config (
|
|
tenant_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name TEXT NOT NULL,
|
|
max_vectors BIGINT DEFAULT 100000,
|
|
max_storage_bytes BIGINT DEFAULT 1073741824,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE TABLE test_tenancy.tenant_vectors (
|
|
id SERIAL,
|
|
tenant_id UUID NOT NULL,
|
|
embedding vector(128),
|
|
metadata JSONB,
|
|
created_at TIMESTAMP DEFAULT NOW(),
|
|
PRIMARY KEY (tenant_id, id)
|
|
);
|
|
|
|
CREATE TABLE test_tenancy.tenant_usage (
|
|
tenant_id UUID PRIMARY KEY,
|
|
vector_count BIGINT DEFAULT 0,
|
|
storage_bytes BIGINT DEFAULT 0,
|
|
query_count BIGINT DEFAULT 0,
|
|
last_updated TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
-- Self-healing test tables
|
|
CREATE TABLE test_healing.health_metrics (
|
|
id SERIAL PRIMARY KEY,
|
|
timestamp TIMESTAMP DEFAULT NOW(),
|
|
metric_name TEXT NOT NULL,
|
|
metric_value FLOAT NOT NULL,
|
|
threshold FLOAT,
|
|
status TEXT
|
|
);
|
|
|
|
CREATE TABLE test_healing.remediation_log (
|
|
id SERIAL PRIMARY KEY,
|
|
timestamp TIMESTAMP DEFAULT NOW(),
|
|
problem_type TEXT NOT NULL,
|
|
action_taken TEXT NOT NULL,
|
|
success BOOLEAN,
|
|
recovery_time_ms INTEGER,
|
|
notes TEXT
|
|
);
|
|
|
|
CREATE TABLE test_healing.learning_records (
|
|
id SERIAL PRIMARY KEY,
|
|
timestamp TIMESTAMP DEFAULT NOW(),
|
|
problem_context JSONB,
|
|
action TEXT,
|
|
outcome JSONB,
|
|
confidence FLOAT DEFAULT 0.5
|
|
);
|
|
|
|
-- Performance test tables
|
|
CREATE TABLE test_perf.benchmark_vectors (
|
|
id SERIAL PRIMARY KEY,
|
|
embedding vector(128),
|
|
metadata JSONB,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE TABLE test_perf.benchmark_results (
|
|
id SERIAL PRIMARY KEY,
|
|
benchmark_name TEXT NOT NULL,
|
|
timestamp TIMESTAMP DEFAULT NOW(),
|
|
iterations INTEGER,
|
|
total_time_ms FLOAT,
|
|
avg_time_ms FLOAT,
|
|
p50_time_ms FLOAT,
|
|
p95_time_ms FLOAT,
|
|
p99_time_ms FLOAT,
|
|
throughput FLOAT,
|
|
notes TEXT
|
|
);
|
|
|
|
-- ============================================================================
|
|
-- Indexes
|
|
-- ============================================================================
|
|
|
|
-- HNSW indexes for different test scenarios
|
|
CREATE INDEX test_pgvector_vectors_hnsw ON test_pgvector.vectors
|
|
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
|
|
|
CREATE INDEX test_pgvector_hnsw_idx ON test_pgvector.hnsw_vectors
|
|
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
|
|
|
-- IVFFlat index
|
|
CREATE INDEX test_pgvector_ivfflat_idx ON test_pgvector.ivfflat_vectors
|
|
USING ivfflat (embedding vector_l2_ops) WITH (lists = 100);
|
|
|
|
-- Performance benchmark index
|
|
CREATE INDEX test_perf_benchmark_hnsw ON test_perf.benchmark_vectors
|
|
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
|
|
|
-- Hybrid search indexes
|
|
CREATE INDEX test_hybrid_docs_embedding ON test_hybrid.documents
|
|
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
|
|
|
-- GIN index for text search
|
|
CREATE INDEX test_hybrid_docs_content ON test_hybrid.documents
|
|
USING gin (to_tsvector('english', content));
|
|
|
|
-- Multi-tenancy indexes
|
|
CREATE INDEX test_tenancy_vectors_tenant ON test_tenancy.tenant_vectors (tenant_id);
|
|
CREATE INDEX test_tenancy_vectors_hnsw ON test_tenancy.tenant_vectors
|
|
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
|
|
|
|
-- ============================================================================
|
|
-- Test Data
|
|
-- ============================================================================
|
|
|
|
-- Insert pgvector compatibility test data
|
|
INSERT INTO test_pgvector.vectors (embedding, metadata, category)
|
|
SELECT
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
|
jsonb_build_object('idx', i, 'batch', 'init'),
|
|
CASE WHEN i % 3 = 0 THEN 'A' WHEN i % 3 = 1 THEN 'B' ELSE 'C' END
|
|
FROM generate_series(1, 1000) i;
|
|
|
|
-- Insert HNSW test data
|
|
INSERT INTO test_pgvector.hnsw_vectors (embedding, label)
|
|
SELECT
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
|
'hnsw_' || i
|
|
FROM generate_series(1, 500) i;
|
|
|
|
-- Insert IVFFlat test data
|
|
INSERT INTO test_pgvector.ivfflat_vectors (embedding, label)
|
|
SELECT
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
|
'ivf_' || i
|
|
FROM generate_series(1, 500) i;
|
|
|
|
-- Insert hybrid search test data
|
|
INSERT INTO test_hybrid.documents (title, content, embedding)
|
|
VALUES
|
|
('Machine Learning Basics', 'Introduction to supervised and unsupervised learning algorithms.',
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
|
('Deep Learning', 'Neural networks and deep learning architectures for complex pattern recognition.',
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
|
('Natural Language Processing', 'Text processing and understanding using transformer models.',
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
|
('Computer Vision', 'Image recognition and object detection with convolutional networks.',
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
|
|
('Reinforcement Learning', 'Agent-based learning through reward optimization.',
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector);
|
|
|
|
-- Insert multi-tenancy test data
|
|
INSERT INTO test_tenancy.tenant_config (tenant_id, name, max_vectors, max_storage_bytes)
|
|
VALUES
|
|
('00000000-0000-0000-0000-000000000001', 'Tenant A', 100000, 1073741824),
|
|
('00000000-0000-0000-0000-000000000002', 'Tenant B', 50000, 536870912),
|
|
('00000000-0000-0000-0000-000000000003', 'Tenant C', 200000, 2147483648);
|
|
|
|
-- Insert vectors for each tenant
|
|
INSERT INTO test_tenancy.tenant_vectors (tenant_id, embedding, metadata)
|
|
SELECT
|
|
'00000000-0000-0000-0000-00000000000' || ((i % 3) + 1)::text,
|
|
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
|
|
jsonb_build_object('idx', i)
|
|
FROM generate_series(1, 300) i;
|
|
|
|
-- Update usage tracking
|
|
INSERT INTO test_tenancy.tenant_usage (tenant_id, vector_count, storage_bytes)
|
|
SELECT
|
|
tenant_id,
|
|
COUNT(*),
|
|
COUNT(*) * 512 -- Approximate bytes per vector
|
|
FROM test_tenancy.tenant_vectors
|
|
GROUP BY tenant_id;
|
|
|
|
-- ============================================================================
|
|
-- Row-Level Security Setup
|
|
-- ============================================================================
|
|
|
|
-- Enable RLS on tenant tables
|
|
ALTER TABLE test_tenancy.tenant_vectors ENABLE ROW LEVEL SECURITY;
|
|
|
|
-- Create tenant isolation policy
|
|
CREATE POLICY tenant_isolation ON test_tenancy.tenant_vectors
|
|
USING (tenant_id = COALESCE(
|
|
NULLIF(current_setting('app.tenant_id', true), '')::uuid,
|
|
tenant_id
|
|
));
|
|
|
|
-- ============================================================================
|
|
-- Statistics and Verification
|
|
-- ============================================================================
|
|
|
|
-- Analyze all test tables
|
|
ANALYZE test_pgvector.vectors;
|
|
ANALYZE test_pgvector.hnsw_vectors;
|
|
ANALYZE test_pgvector.ivfflat_vectors;
|
|
ANALYZE test_hybrid.documents;
|
|
ANALYZE test_tenancy.tenant_vectors;
|
|
ANALYZE test_perf.benchmark_vectors;
|
|
|
|
-- Verify setup
|
|
DO $$
|
|
DECLARE
|
|
vec_count INTEGER;
|
|
idx_count INTEGER;
|
|
schema_count INTEGER;
|
|
BEGIN
|
|
SELECT COUNT(*) INTO vec_count FROM test_pgvector.vectors;
|
|
SELECT COUNT(*) INTO idx_count FROM pg_indexes WHERE schemaname LIKE 'test_%';
|
|
SELECT COUNT(*) INTO schema_count FROM information_schema.schemata WHERE schema_name LIKE 'test_%';
|
|
|
|
RAISE NOTICE '========================================';
|
|
RAISE NOTICE 'Integration Test Setup Complete';
|
|
RAISE NOTICE '========================================';
|
|
RAISE NOTICE 'Test schemas created: %', schema_count;
|
|
RAISE NOTICE 'Test vectors inserted: %', vec_count;
|
|
RAISE NOTICE 'Test indexes created: %', idx_count;
|
|
RAISE NOTICE '';
|
|
RAISE NOTICE 'Extension version: %', ruvector_version();
|
|
RAISE NOTICE 'SIMD info: %', ruvector_simd_info();
|
|
RAISE NOTICE '========================================';
|
|
END $$;
|