Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,229 @@
# =============================================================================
# RuVector PostgreSQL Extension Configuration
# =============================================================================
#
# This file contains configuration options for the RuVector extension.
# Copy this file to your PostgreSQL data directory and include it in
# postgresql.conf with: include = 'ruvector.conf'
#
# Or set individual parameters with: ALTER SYSTEM SET ruvector.param = value;
#
# =============================================================================
# SIMD Configuration
# =============================================================================
# SIMD instruction set to use for distance calculations
# Options:
# - 'auto' : Auto-detect best available (recommended)
# - 'avx512' : Force AVX-512 (16 floats per operation)
# - 'avx2' : Force AVX2 (8 floats per operation)
# - 'neon' : Force ARM NEON (4 floats per operation)
# - 'scalar' : Disable SIMD (portable, slowest)
# Default: 'auto'
#ruvector.simd_mode = 'auto'
# Enable SIMD prefetching for better cache utilization
# This can improve performance for large vector operations
# Default: on
#ruvector.simd_prefetch = on
# =============================================================================
# Memory Configuration
# =============================================================================
# Maximum memory allocation for vector operations (in MB)
# Set to 0 for unlimited (uses PostgreSQL's work_mem)
# Default: 0 (use work_mem)
#ruvector.max_memory_mb = 0
# Enable memory pooling for frequently accessed vectors
# Reduces allocation overhead for repeated operations
# Default: on
#ruvector.memory_pool_enabled = on
# Memory pool size (in MB)
# Only used when memory_pool_enabled = on
# Default: 64
#ruvector.memory_pool_size_mb = 64
# Enable zero-copy operations where possible
# Reduces memory copies but may hold references longer
# Default: on
#ruvector.zero_copy = on
# =============================================================================
# Distance Calculation Configuration
# =============================================================================
# Default distance metric for operators
# Options: 'l2' (Euclidean), 'cosine', 'ip' (inner product)
# Default: 'l2'
#ruvector.default_distance_metric = 'l2'
# Enable parallel distance computation for batch operations
# Uses multiple CPU cores for large vector comparisons
# Default: on
#ruvector.parallel_distance = on
# Minimum number of vectors to enable parallel processing
# Below this threshold, sequential processing is used
# Default: 1000
#ruvector.parallel_threshold = 1000
# Number of worker threads for parallel operations
# Set to 0 to use PostgreSQL's max_parallel_workers
# Default: 0
#ruvector.parallel_workers = 0
# =============================================================================
# Index Configuration (HNSW)
# =============================================================================
# Default ef_construction for HNSW index building
# Higher values = better quality, slower build
# Range: 4-1000, Default: 64
#ruvector.hnsw_ef_construction = 64
# Default M parameter for HNSW index
# Number of bi-directional links per node
# Higher values = better quality, more memory
# Range: 2-100, Default: 16
#ruvector.hnsw_m = 16
# Default ef_search for HNSW queries
# Higher values = better recall, slower queries
# Range: 1-1000, Default: 40
#ruvector.hnsw_ef_search = 40
# =============================================================================
# Index Configuration (IVF-Flat)
# =============================================================================
# Default number of lists (clusters) for IVF-Flat index
# More lists = faster search, longer build
# Recommendation: sqrt(num_vectors) to 4*sqrt(num_vectors)
# Default: 100
#ruvector.ivfflat_lists = 100
# Default number of probes for IVF-Flat queries
# More probes = better recall, slower queries
# Range: 1-lists, Default: 10
#ruvector.ivfflat_probes = 10
# =============================================================================
# Quantization Configuration
# =============================================================================
# Enable product quantization for memory compression
# Reduces memory usage by 4-32x with some accuracy loss
# Default: off
#ruvector.quantization_enabled = off
# Number of subquantizers for product quantization
# More subquantizers = better accuracy, more memory
# Must divide vector dimensions evenly
# Default: 8
#ruvector.pq_m = 8
# Bits per subquantizer (determines codebook size)
# Options: 4, 8, 16 (256, 65536, 4B centroids)
# Default: 8
#ruvector.pq_bits = 8
# Enable scalar quantization (int8) for faster operations
# Reduces memory by 4x with minimal accuracy loss
# Default: off
#ruvector.scalar_quantization = off
# =============================================================================
# Temporal Functions Configuration
# =============================================================================
# Default alpha for exponential moving average
# Range: 0.0-1.0, Default: 0.1
#ruvector.temporal_ema_alpha = 0.1
# Enable temporal compression (delta encoding)
# Default: off
#ruvector.temporal_compression = off
# =============================================================================
# Attention Functions Configuration
# =============================================================================
# Default scaling mode for attention scores
# Options: 'sqrt_dim', 'none', 'learned'
# Default: 'sqrt_dim'
#ruvector.attention_scale_mode = 'sqrt_dim'
# Maximum number of attention heads
# Default: 16
#ruvector.attention_max_heads = 16
# =============================================================================
# Graph Functions Configuration
# =============================================================================
# Default damping factor for PageRank calculations
# Range: 0.0-1.0, Default: 0.85
#ruvector.graph_damping = 0.85
# Default similarity threshold for graph connectivity
# Range: 0.0-1.0, Default: 0.5
#ruvector.graph_similarity_threshold = 0.5
# =============================================================================
# Logging Configuration
# =============================================================================
# Log level for RuVector messages
# Options: 'debug', 'info', 'warning', 'error'
# Default: 'info'
#ruvector.log_level = 'info'
# Log SIMD instruction usage (for debugging/optimization)
# Default: off
#ruvector.log_simd_ops = off
# Log distance calculation statistics
# Default: off
#ruvector.log_distance_stats = off
# Log memory allocation patterns
# Default: off
#ruvector.log_memory_stats = off
# =============================================================================
# Performance Tuning Presets
# =============================================================================
#
# Preset: High Throughput (many small queries)
# -------------------------------------------------
# ruvector.parallel_distance = off
# ruvector.memory_pool_enabled = on
# ruvector.zero_copy = on
# ruvector.hnsw_ef_search = 20
#
# Preset: High Accuracy (fewer queries, best recall)
# -------------------------------------------------
# ruvector.parallel_distance = on
# ruvector.hnsw_ef_search = 100
# ruvector.ivfflat_probes = 50
# ruvector.quantization_enabled = off
#
# Preset: Low Memory (large datasets)
# -------------------------------------------------
# ruvector.quantization_enabled = on
# ruvector.pq_m = 16
# ruvector.pq_bits = 8
# ruvector.scalar_quantization = on
# ruvector.memory_pool_size_mb = 32
#
# Preset: Real-time (minimal latency)
# -------------------------------------------------
# ruvector.parallel_distance = off
# ruvector.memory_pool_enabled = on
# ruvector.hnsw_ef_search = 10
# ruvector.ivfflat_probes = 1
#