Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-postgres/install/config/ruvector.conf.template
+++ b/vendor/ruvector/crates/ruvector-postgres/install/config/ruvector.conf.template
@@ -0,0 +1,229 @@
+# =============================================================================
+# RuVector PostgreSQL Extension Configuration
+# =============================================================================
+#
+# This file contains configuration options for the RuVector extension.
+# Copy this file to your PostgreSQL data directory and include it in
+# postgresql.conf with: include = 'ruvector.conf'
+#
+# Or set individual parameters with: ALTER SYSTEM SET ruvector.param = value;
+#
+
+# =============================================================================
+# SIMD Configuration
+# =============================================================================
+
+# SIMD instruction set to use for distance calculations
+# Options:
+#   - 'auto'    : Auto-detect best available (recommended)
+#   - 'avx512'  : Force AVX-512 (16 floats per operation)
+#   - 'avx2'    : Force AVX2 (8 floats per operation)
+#   - 'neon'    : Force ARM NEON (4 floats per operation)
+#   - 'scalar'  : Disable SIMD (portable, slowest)
+# Default: 'auto'
+#ruvector.simd_mode = 'auto'
+
+# Enable SIMD prefetching for better cache utilization
+# This can improve performance for large vector operations
+# Default: on
+#ruvector.simd_prefetch = on
+
+# =============================================================================
+# Memory Configuration
+# =============================================================================
+
+# Maximum memory allocation for vector operations (in MB)
+# Set to 0 for unlimited (uses PostgreSQL's work_mem)
+# Default: 0 (use work_mem)
+#ruvector.max_memory_mb = 0
+
+# Enable memory pooling for frequently accessed vectors
+# Reduces allocation overhead for repeated operations
+# Default: on
+#ruvector.memory_pool_enabled = on
+
+# Memory pool size (in MB)
+# Only used when memory_pool_enabled = on
+# Default: 64
+#ruvector.memory_pool_size_mb = 64
+
+# Enable zero-copy operations where possible
+# Reduces memory copies but may hold references longer
+# Default: on
+#ruvector.zero_copy = on
+
+# =============================================================================
+# Distance Calculation Configuration
+# =============================================================================
+
+# Default distance metric for operators
+# Options: 'l2' (Euclidean), 'cosine', 'ip' (inner product)
+# Default: 'l2'
+#ruvector.default_distance_metric = 'l2'
+
+# Enable parallel distance computation for batch operations
+# Uses multiple CPU cores for large vector comparisons
+# Default: on
+#ruvector.parallel_distance = on
+
+# Minimum number of vectors to enable parallel processing
+# Below this threshold, sequential processing is used
+# Default: 1000
+#ruvector.parallel_threshold = 1000
+
+# Number of worker threads for parallel operations
+# Set to 0 to use PostgreSQL's max_parallel_workers
+# Default: 0
+#ruvector.parallel_workers = 0
+
+# =============================================================================
+# Index Configuration (HNSW)
+# =============================================================================
+
+# Default ef_construction for HNSW index building
+# Higher values = better quality, slower build
+# Range: 4-1000, Default: 64
+#ruvector.hnsw_ef_construction = 64
+
+# Default M parameter for HNSW index
+# Number of bi-directional links per node
+# Higher values = better quality, more memory
+# Range: 2-100, Default: 16
+#ruvector.hnsw_m = 16
+
+# Default ef_search for HNSW queries
+# Higher values = better recall, slower queries
+# Range: 1-1000, Default: 40
+#ruvector.hnsw_ef_search = 40
+
+# =============================================================================
+# Index Configuration (IVF-Flat)
+# =============================================================================
+
+# Default number of lists (clusters) for IVF-Flat index
+# More lists = faster search, longer build
+# Recommendation: sqrt(num_vectors) to 4*sqrt(num_vectors)
+# Default: 100
+#ruvector.ivfflat_lists = 100
+
+# Default number of probes for IVF-Flat queries
+# More probes = better recall, slower queries
+# Range: 1-lists, Default: 10
+#ruvector.ivfflat_probes = 10
+
+# =============================================================================
+# Quantization Configuration
+# =============================================================================
+
+# Enable product quantization for memory compression
+# Reduces memory usage by 4-32x with some accuracy loss
+# Default: off
+#ruvector.quantization_enabled = off
+
+# Number of subquantizers for product quantization
+# More subquantizers = better accuracy, more memory
+# Must divide vector dimensions evenly
+# Default: 8
+#ruvector.pq_m = 8
+
+# Bits per subquantizer (determines codebook size)
+# Options: 4, 8, 16 (256, 65536, 4B centroids)
+# Default: 8
+#ruvector.pq_bits = 8
+
+# Enable scalar quantization (int8) for faster operations
+# Reduces memory by 4x with minimal accuracy loss
+# Default: off
+#ruvector.scalar_quantization = off
+
+# =============================================================================
+# Temporal Functions Configuration
+# =============================================================================
+
+# Default alpha for exponential moving average
+# Range: 0.0-1.0, Default: 0.1
+#ruvector.temporal_ema_alpha = 0.1
+
+# Enable temporal compression (delta encoding)
+# Default: off
+#ruvector.temporal_compression = off
+
+# =============================================================================
+# Attention Functions Configuration
+# =============================================================================
+
+# Default scaling mode for attention scores
+# Options: 'sqrt_dim', 'none', 'learned'
+# Default: 'sqrt_dim'
+#ruvector.attention_scale_mode = 'sqrt_dim'
+
+# Maximum number of attention heads
+# Default: 16
+#ruvector.attention_max_heads = 16
+
+# =============================================================================
+# Graph Functions Configuration
+# =============================================================================
+
+# Default damping factor for PageRank calculations
+# Range: 0.0-1.0, Default: 0.85
+#ruvector.graph_damping = 0.85
+
+# Default similarity threshold for graph connectivity
+# Range: 0.0-1.0, Default: 0.5
+#ruvector.graph_similarity_threshold = 0.5
+
+# =============================================================================
+# Logging Configuration
+# =============================================================================
+
+# Log level for RuVector messages
+# Options: 'debug', 'info', 'warning', 'error'
+# Default: 'info'
+#ruvector.log_level = 'info'
+
+# Log SIMD instruction usage (for debugging/optimization)
+# Default: off
+#ruvector.log_simd_ops = off
+
+# Log distance calculation statistics
+# Default: off
+#ruvector.log_distance_stats = off
+
+# Log memory allocation patterns
+# Default: off
+#ruvector.log_memory_stats = off
+
+# =============================================================================
+# Performance Tuning Presets
+# =============================================================================
+#
+# Preset: High Throughput (many small queries)
+# -------------------------------------------------
+# ruvector.parallel_distance = off
+# ruvector.memory_pool_enabled = on
+# ruvector.zero_copy = on
+# ruvector.hnsw_ef_search = 20
+#
+# Preset: High Accuracy (fewer queries, best recall)
+# -------------------------------------------------
+# ruvector.parallel_distance = on
+# ruvector.hnsw_ef_search = 100
+# ruvector.ivfflat_probes = 50
+# ruvector.quantization_enabled = off
+#
+# Preset: Low Memory (large datasets)
+# -------------------------------------------------
+# ruvector.quantization_enabled = on
+# ruvector.pq_m = 16
+# ruvector.pq_bits = 8
+# ruvector.scalar_quantization = on
+# ruvector.memory_pool_size_mb = 32
+#
+# Preset: Real-time (minimal latency)
+# -------------------------------------------------
+# ruvector.parallel_distance = off
+# ruvector.memory_pool_enabled = on
+# ruvector.hnsw_ef_search = 10
+# ruvector.ivfflat_probes = 1
+#