Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
268
vendor/ruvector/crates/ruvector-postgres/src/lib.rs
vendored
Normal file
268
vendor/ruvector/crates/ruvector-postgres/src/lib.rs
vendored
Normal file
@@ -0,0 +1,268 @@
|
||||
//! # RuVector-Postgres
|
||||
//!
|
||||
//! High-performance PostgreSQL extension for vector similarity search.
|
||||
//! A drop-in replacement for pgvector with SIMD optimizations.
|
||||
|
||||
// Allow development-stage lints for work-in-progress code
|
||||
#![allow(unexpected_cfgs)] // pgrx macros (pg12/pg13) and optional features (tokio)
|
||||
#![allow(dead_code)] // Stub implementations and future features
|
||||
#![allow(unused_variables)] // WIP function signatures
|
||||
#![allow(unused_mut)]
|
||||
// Variables prepared for future mutation
|
||||
// Allow clippy lints common in pgrx extensions and WIP code
|
||||
#![allow(clippy::all)] // Allow all clippy warnings for development
|
||||
#![allow(for_loops_over_fallibles)] // pgrx derive macro generates this pattern
|
||||
|
||||
use pgrx::prelude::*;
|
||||
use pgrx::{GucContext, GucFlags, GucRegistry, GucSetting};
|
||||
|
||||
// Initialize the extension
|
||||
::pgrx::pg_module_magic!();
|
||||
|
||||
// Module declarations
|
||||
pub mod attention;
|
||||
pub mod dag;
|
||||
pub mod distance;
|
||||
pub mod gnn;
|
||||
pub mod graph;
|
||||
pub mod healing;
|
||||
pub mod hybrid;
|
||||
pub mod hyperbolic;
|
||||
pub mod index;
|
||||
pub mod integrity;
|
||||
pub mod learning;
|
||||
pub mod operators;
|
||||
pub mod quantization;
|
||||
pub mod routing;
|
||||
pub mod sparse;
|
||||
pub mod tenancy;
|
||||
pub mod types;
|
||||
pub mod workers;
|
||||
|
||||
// Optional: Local embedding generation (requires 'embeddings' feature)
|
||||
#[cfg(feature = "embeddings")]
|
||||
pub mod embeddings;
|
||||
|
||||
// Optional: Mincut-gated transformer (requires 'gated-transformer' feature)
|
||||
#[cfg(feature = "gated-transformer")]
|
||||
pub mod gated_transformer;
|
||||
|
||||
// v0.3 feature-gated modules
|
||||
#[cfg(feature = "solver")]
|
||||
pub mod solver;
|
||||
|
||||
#[cfg(feature = "math-distances")]
|
||||
pub mod math;
|
||||
|
||||
#[cfg(feature = "tda")]
|
||||
pub mod tda;
|
||||
|
||||
#[cfg(feature = "sona-learning")]
|
||||
pub mod sona;
|
||||
|
||||
#[cfg(feature = "domain-expansion")]
|
||||
pub mod domain_expansion;
|
||||
|
||||
// Re-exports for convenience
|
||||
pub use distance::{cosine_distance, euclidean_distance, inner_product_distance, DistanceMetric};
|
||||
pub use types::RuVector;
|
||||
|
||||
/// Extension version
|
||||
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
/// Maximum supported vector dimensions
|
||||
pub const MAX_DIMENSIONS: usize = 16_000;
|
||||
|
||||
/// Default HNSW parameters
|
||||
pub const DEFAULT_HNSW_M: usize = 16;
|
||||
pub const DEFAULT_HNSW_EF_CONSTRUCTION: usize = 64;
|
||||
pub const DEFAULT_HNSW_EF_SEARCH: usize = 40;
|
||||
|
||||
/// Default IVFFlat parameters
|
||||
pub const DEFAULT_IVFFLAT_LISTS: usize = 100;
|
||||
pub const DEFAULT_IVFFLAT_PROBES: usize = 1;
|
||||
|
||||
// GUC variables
|
||||
static EF_SEARCH: GucSetting<i32> = GucSetting::<i32>::new(DEFAULT_HNSW_EF_SEARCH as i32);
|
||||
static PROBES: GucSetting<i32> = GucSetting::<i32>::new(DEFAULT_IVFFLAT_PROBES as i32);
|
||||
|
||||
// Hybrid search GUC variables
|
||||
static HYBRID_ALPHA: GucSetting<f64> = GucSetting::<f64>::new(0.5);
|
||||
static HYBRID_RRF_K: GucSetting<i32> = GucSetting::<i32>::new(60);
|
||||
static HYBRID_PREFETCH_K: GucSetting<i32> = GucSetting::<i32>::new(100);
|
||||
|
||||
// ============================================================================
|
||||
// Extension Initialization
|
||||
// ============================================================================
|
||||
|
||||
/// Called when the extension is loaded
|
||||
#[pg_guard]
|
||||
pub extern "C" fn _PG_init() {
|
||||
// Initialize SIMD dispatch
|
||||
distance::init_simd_dispatch();
|
||||
|
||||
// Register GUCs
|
||||
GucRegistry::define_int_guc(
|
||||
"ruvector.ef_search",
|
||||
"HNSW ef_search parameter for query time",
|
||||
"Higher values improve recall at the cost of speed",
|
||||
&EF_SEARCH,
|
||||
1,
|
||||
1000,
|
||||
GucContext::Userset,
|
||||
GucFlags::default(),
|
||||
);
|
||||
|
||||
GucRegistry::define_int_guc(
|
||||
"ruvector.probes",
|
||||
"IVFFlat number of lists to probe",
|
||||
"Higher values improve recall at the cost of speed",
|
||||
&PROBES,
|
||||
1,
|
||||
10000,
|
||||
GucContext::Userset,
|
||||
GucFlags::default(),
|
||||
);
|
||||
|
||||
// Hybrid search GUCs
|
||||
GucRegistry::define_float_guc(
|
||||
"ruvector.hybrid_alpha",
|
||||
"Default alpha for hybrid linear fusion (0=keyword only, 1=vector only)",
|
||||
"Controls the blend between vector and keyword search",
|
||||
&HYBRID_ALPHA,
|
||||
0.0,
|
||||
1.0,
|
||||
GucContext::Userset,
|
||||
GucFlags::default(),
|
||||
);
|
||||
|
||||
GucRegistry::define_int_guc(
|
||||
"ruvector.hybrid_rrf_k",
|
||||
"RRF constant for hybrid search (default 60)",
|
||||
"Lower values give more weight to top-ranked results",
|
||||
&HYBRID_RRF_K,
|
||||
1,
|
||||
1000,
|
||||
GucContext::Userset,
|
||||
GucFlags::default(),
|
||||
);
|
||||
|
||||
GucRegistry::define_int_guc(
|
||||
"ruvector.hybrid_prefetch_k",
|
||||
"Number of results to prefetch from each branch",
|
||||
"Higher values improve recall but increase latency",
|
||||
&HYBRID_PREFETCH_K,
|
||||
1,
|
||||
10000,
|
||||
GucContext::Userset,
|
||||
GucFlags::default(),
|
||||
);
|
||||
|
||||
// Initialize tenant GUCs for multi-tenancy
|
||||
tenancy::init_tenant_gucs();
|
||||
|
||||
// Note: DAG neural learning is initialized on first use
|
||||
// No explicit initialization required - state is lazy-loaded
|
||||
|
||||
// Log initialization
|
||||
pgrx::log!(
|
||||
"RuVector {} initialized with {} SIMD support",
|
||||
VERSION,
|
||||
distance::simd_info()
|
||||
);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SQL Functions
|
||||
// ============================================================================
|
||||
|
||||
/// Returns the extension version
|
||||
#[pg_extern]
|
||||
fn ruvector_version() -> &'static str {
|
||||
VERSION
|
||||
}
|
||||
|
||||
/// Returns SIMD capability information
|
||||
#[pg_extern]
|
||||
fn ruvector_simd_info() -> String {
|
||||
distance::simd_info_detailed()
|
||||
}
|
||||
|
||||
/// Returns memory statistics for the extension
|
||||
#[pg_extern]
|
||||
fn ruvector_memory_stats() -> pgrx::JsonB {
|
||||
let stats = serde_json::json!({
|
||||
"index_memory_mb": index::get_total_index_memory_mb(),
|
||||
"vector_cache_mb": types::get_vector_cache_memory_mb(),
|
||||
"quantization_tables_mb": quantization::get_table_memory_mb(),
|
||||
"total_extension_mb": index::get_total_index_memory_mb() +
|
||||
types::get_vector_cache_memory_mb() +
|
||||
quantization::get_table_memory_mb(),
|
||||
});
|
||||
pgrx::JsonB(stats)
|
||||
}
|
||||
|
||||
/// Perform index maintenance
|
||||
#[pg_extern]
|
||||
fn ruvector_index_maintenance(index_name: &str) -> String {
|
||||
match index::perform_maintenance(index_name) {
|
||||
Ok(stats) => format!("Maintenance completed: {:?}", stats),
|
||||
Err(e) => format!("Maintenance failed: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Quantization Functions (Array-based)
|
||||
// ============================================================================
|
||||
|
||||
/// Binary quantize a vector (array-based)
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn binary_quantize_arr(v: Vec<f32>) -> Vec<u8> {
|
||||
quantization::binary::quantize(&v)
|
||||
}
|
||||
|
||||
/// Scalar quantize a vector (SQ8) (array-based)
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn scalar_quantize_arr(v: Vec<f32>) -> pgrx::JsonB {
|
||||
let (quantized, scale, offset) = quantization::scalar::quantize(&v);
|
||||
pgrx::JsonB(serde_json::json!({
|
||||
"data": quantized,
|
||||
"scale": scale,
|
||||
"offset": offset,
|
||||
}))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tests
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(feature = "pg_test")]
|
||||
#[pg_schema]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[pg_test]
|
||||
fn test_version() {
|
||||
assert!(!ruvector_version().is_empty());
|
||||
}
|
||||
|
||||
#[pg_test]
|
||||
fn test_simd_info() {
|
||||
let info = ruvector_simd_info();
|
||||
assert!(
|
||||
info.contains("avx512")
|
||||
|| info.contains("avx2")
|
||||
|| info.contains("neon")
|
||||
|| info.contains("scalar")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Bootstrap the extension (called by pgrx)
|
||||
#[cfg(test)]
|
||||
pub mod pg_test {
|
||||
pub fn setup(_options: Vec<&str>) {}
|
||||
pub fn postgresql_conf_options() -> Vec<&'static str> {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user