Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
394
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/01-self-learning.md
vendored
Normal file
394
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/01-self-learning.md
vendored
Normal file
@@ -0,0 +1,394 @@
|
||||
# Self-Learning / ReasoningBank Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate adaptive learning capabilities into ruvector-postgres, enabling the database to learn from query patterns, optimize search strategies, and improve recall/precision over time.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
|
||||
│ │ Trajectory │ │ Verdict │ │ Memory Distillation│ │
|
||||
│ │ Tracker │ │ Judgment │ │ Engine │ │
|
||||
│ └──────┬──────┘ └──────┬──────┘ └──────────┬──────────┘ │
|
||||
│ │ │ │ │
|
||||
│ └────────────────┼─────────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────┐ │
|
||||
│ │ ReasoningBank │ │
|
||||
│ │ (Pattern Storage) │ │
|
||||
│ └───────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── learning/
|
||||
│ ├── mod.rs # Module exports
|
||||
│ ├── trajectory.rs # Query trajectory tracking
|
||||
│ ├── verdict.rs # Success/failure judgment
|
||||
│ ├── distillation.rs # Pattern extraction
|
||||
│ ├── reasoning_bank.rs # Pattern storage & retrieval
|
||||
│ └── optimizer.rs # Search parameter optimization
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Configuration
|
||||
|
||||
```sql
|
||||
-- Enable self-learning for a table
|
||||
SELECT ruvector_enable_learning('embeddings',
|
||||
trajectory_window := 1000,
|
||||
learning_rate := 0.01,
|
||||
min_samples := 100
|
||||
);
|
||||
|
||||
-- View learning statistics
|
||||
SELECT * FROM ruvector_learning_stats('embeddings');
|
||||
|
||||
-- Export learned patterns
|
||||
SELECT ruvector_export_patterns('embeddings') AS patterns_json;
|
||||
|
||||
-- Import patterns from another instance
|
||||
SELECT ruvector_import_patterns('embeddings', patterns_json);
|
||||
```
|
||||
|
||||
### Automatic Optimization
|
||||
|
||||
```sql
|
||||
-- Auto-tune HNSW parameters based on query patterns
|
||||
SELECT ruvector_auto_tune('embeddings_idx',
|
||||
optimize_for := 'recall', -- or 'latency', 'balanced'
|
||||
sample_queries := 1000
|
||||
);
|
||||
|
||||
-- Get recommended index parameters
|
||||
SELECT * FROM ruvector_recommend_params('embeddings');
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Trajectory Tracking (Week 1-2)
|
||||
|
||||
```rust
|
||||
// src/learning/trajectory.rs
|
||||
|
||||
pub struct QueryTrajectory {
|
||||
pub query_id: Uuid,
|
||||
pub query_vector: Vec<f32>,
|
||||
pub timestamp: DateTime<Utc>,
|
||||
pub index_params: IndexParams,
|
||||
pub results: Vec<SearchResult>,
|
||||
pub latency_ms: f64,
|
||||
pub recall_estimate: Option<f32>,
|
||||
}
|
||||
|
||||
pub struct TrajectoryTracker {
|
||||
buffer: RingBuffer<QueryTrajectory>,
|
||||
storage: TrajectoryStorage,
|
||||
}
|
||||
|
||||
impl TrajectoryTracker {
|
||||
pub fn record(&mut self, trajectory: QueryTrajectory);
|
||||
pub fn get_recent(&self, n: usize) -> Vec<&QueryTrajectory>;
|
||||
pub fn analyze_patterns(&self) -> PatternAnalysis;
|
||||
}
|
||||
```
|
||||
|
||||
**SQL Functions:**
|
||||
```sql
|
||||
-- Record query feedback (user indicates relevance)
|
||||
SELECT ruvector_record_feedback(
|
||||
query_id := 'abc123',
|
||||
relevant_ids := ARRAY[1, 5, 7],
|
||||
irrelevant_ids := ARRAY[2, 3]
|
||||
);
|
||||
```
|
||||
|
||||
### Phase 2: Verdict Judgment (Week 3-4)
|
||||
|
||||
```rust
|
||||
// src/learning/verdict.rs
|
||||
|
||||
pub struct VerdictEngine {
|
||||
success_threshold: f32,
|
||||
metrics: VerdictMetrics,
|
||||
}
|
||||
|
||||
impl VerdictEngine {
|
||||
/// Judge if a search was successful based on multiple signals
|
||||
pub fn judge(&self, trajectory: &QueryTrajectory) -> Verdict {
|
||||
let signals = vec![
|
||||
self.latency_score(trajectory),
|
||||
self.recall_score(trajectory),
|
||||
self.diversity_score(trajectory),
|
||||
self.user_feedback_score(trajectory),
|
||||
];
|
||||
|
||||
Verdict {
|
||||
success: signals.iter().sum::<f32>() / signals.len() as f32 > self.success_threshold,
|
||||
confidence: self.compute_confidence(&signals),
|
||||
recommendations: self.generate_recommendations(&signals),
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Memory Distillation (Week 5-6)
|
||||
|
||||
```rust
|
||||
// src/learning/distillation.rs
|
||||
|
||||
pub struct DistillationEngine {
|
||||
pattern_extractor: PatternExtractor,
|
||||
compressor: PatternCompressor,
|
||||
}
|
||||
|
||||
impl DistillationEngine {
|
||||
/// Extract reusable patterns from trajectories
|
||||
pub fn distill(&self, trajectories: &[QueryTrajectory]) -> Vec<LearnedPattern> {
|
||||
let raw_patterns = self.pattern_extractor.extract(trajectories);
|
||||
let compressed = self.compressor.compress(raw_patterns);
|
||||
compressed
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LearnedPattern {
|
||||
pub query_cluster_centroid: Vec<f32>,
|
||||
pub optimal_ef_search: u32,
|
||||
pub optimal_probes: u32,
|
||||
pub expected_recall: f32,
|
||||
pub confidence: f32,
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: ReasoningBank Storage (Week 7-8)
|
||||
|
||||
```rust
|
||||
// src/learning/reasoning_bank.rs
|
||||
|
||||
pub struct ReasoningBank {
|
||||
patterns: HnswIndex<LearnedPattern>,
|
||||
metadata: HashMap<PatternId, PatternMetadata>,
|
||||
}
|
||||
|
||||
impl ReasoningBank {
|
||||
/// Find applicable patterns for a query
|
||||
pub fn lookup(&self, query: &[f32], k: usize) -> Vec<&LearnedPattern> {
|
||||
self.patterns.search(query, k)
|
||||
}
|
||||
|
||||
/// Store a new pattern
|
||||
pub fn store(&mut self, pattern: LearnedPattern) -> PatternId;
|
||||
|
||||
/// Merge similar patterns to prevent bloat
|
||||
pub fn consolidate(&mut self);
|
||||
|
||||
/// Prune low-value patterns
|
||||
pub fn prune(&mut self, min_usage: u32, min_confidence: f32);
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Search Optimizer (Week 9-10)
|
||||
|
||||
```rust
|
||||
// src/learning/optimizer.rs
|
||||
|
||||
pub struct SearchOptimizer {
|
||||
reasoning_bank: Arc<ReasoningBank>,
|
||||
default_params: SearchParams,
|
||||
}
|
||||
|
||||
impl SearchOptimizer {
|
||||
/// Get optimized parameters for a query
|
||||
pub fn optimize(&self, query: &[f32]) -> SearchParams {
|
||||
match self.reasoning_bank.lookup(query, 3) {
|
||||
patterns if !patterns.is_empty() => {
|
||||
self.interpolate_params(query, patterns)
|
||||
}
|
||||
_ => self.default_params.clone()
|
||||
}
|
||||
}
|
||||
|
||||
fn interpolate_params(&self, query: &[f32], patterns: &[&LearnedPattern]) -> SearchParams {
|
||||
// Weight patterns by similarity to query
|
||||
let weights: Vec<f32> = patterns.iter()
|
||||
.map(|p| cosine_similarity(query, &p.query_cluster_centroid))
|
||||
.collect();
|
||||
|
||||
SearchParams {
|
||||
ef_search: weighted_average(
|
||||
patterns.iter().map(|p| p.optimal_ef_search as f32),
|
||||
&weights
|
||||
) as u32,
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## PostgreSQL Integration
|
||||
|
||||
### Background Worker
|
||||
|
||||
```rust
|
||||
// src/learning/bgworker.rs
|
||||
|
||||
#[pg_guard]
|
||||
pub extern "C" fn learning_bgworker_main(_arg: pg_sys::Datum) {
|
||||
BackgroundWorker::attach_signal_handlers(SignalWakeFlags::SIGHUP | SignalWakeFlags::SIGTERM);
|
||||
|
||||
loop {
|
||||
// Process trajectory buffer
|
||||
let trajectories = TRAJECTORY_BUFFER.drain();
|
||||
|
||||
if trajectories.len() >= MIN_BATCH_SIZE {
|
||||
// Distill patterns
|
||||
let patterns = DISTILLATION_ENGINE.distill(&trajectories);
|
||||
|
||||
// Store in reasoning bank
|
||||
for pattern in patterns {
|
||||
REASONING_BANK.store(pattern);
|
||||
}
|
||||
|
||||
// Periodic consolidation
|
||||
if should_consolidate() {
|
||||
REASONING_BANK.consolidate();
|
||||
}
|
||||
}
|
||||
|
||||
// Sleep until next batch
|
||||
BackgroundWorker::wait_latch(LEARNING_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### GUC Configuration
|
||||
|
||||
```rust
|
||||
static LEARNING_ENABLED: GucSetting<bool> = GucSetting::new(false);
|
||||
static LEARNING_RATE: GucSetting<f64> = GucSetting::new(0.01);
|
||||
static TRAJECTORY_BUFFER_SIZE: GucSetting<i32> = GucSetting::new(10000);
|
||||
static PATTERN_CONSOLIDATION_INTERVAL: GucSetting<i32> = GucSetting::new(3600);
|
||||
```
|
||||
|
||||
## Optimization Strategies
|
||||
|
||||
### 1. Adaptive ef_search
|
||||
|
||||
```sql
|
||||
-- Before: Static ef_search
|
||||
SET ruvector.ef_search = 40;
|
||||
SELECT * FROM items ORDER BY embedding <-> query_vec LIMIT 10;
|
||||
|
||||
-- After: Adaptive ef_search based on learned patterns
|
||||
SELECT * FROM items
|
||||
ORDER BY embedding <-> query_vec
|
||||
LIMIT 10
|
||||
WITH (adaptive_search := true);
|
||||
```
|
||||
|
||||
### 2. Query-Aware Probing
|
||||
|
||||
For IVFFlat, learn optimal probe counts per query cluster:
|
||||
|
||||
```rust
|
||||
pub fn adaptive_probes(&self, query: &[f32]) -> u32 {
|
||||
let cluster_id = self.assign_cluster(query);
|
||||
self.learned_probes.get(&cluster_id).unwrap_or(&self.default_probes)
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Index Selection
|
||||
|
||||
Learn when to use HNSW vs IVFFlat:
|
||||
|
||||
```rust
|
||||
pub fn select_index(&self, query: &[f32], k: usize) -> IndexType {
|
||||
let features = QueryFeatures::extract(query, k);
|
||||
self.index_selector.predict(&features)
|
||||
}
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
### Metrics to Track
|
||||
|
||||
| Metric | Baseline | Target | Measurement |
|
||||
|--------|----------|--------|-------------|
|
||||
| Recall@10 | 0.95 | 0.98 | After 10K queries |
|
||||
| p99 Latency | 5ms | 3ms | After learning |
|
||||
| Memory Overhead | 0 | <100MB | Pattern storage |
|
||||
| Learning Time | N/A | <1s/1K queries | Background processing |
|
||||
|
||||
### Benchmark Queries
|
||||
|
||||
```sql
|
||||
-- Measure recall improvement
|
||||
SELECT ruvector_benchmark_recall(
|
||||
table_name := 'embeddings',
|
||||
ground_truth_table := 'embeddings_ground_truth',
|
||||
num_queries := 1000,
|
||||
k := 10
|
||||
);
|
||||
|
||||
-- Measure latency improvement
|
||||
SELECT ruvector_benchmark_latency(
|
||||
table_name := 'embeddings',
|
||||
num_queries := 10000,
|
||||
k := 10,
|
||||
percentiles := ARRAY[50, 90, 99]
|
||||
);
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# Existing ruvector crates (optional integration)
|
||||
# ruvector-core = { path = "../ruvector-core", optional = true }
|
||||
|
||||
# Pattern storage
|
||||
dashmap = "6.0"
|
||||
parking_lot = "0.12"
|
||||
|
||||
# Statistics
|
||||
statrs = "0.16"
|
||||
|
||||
# Clustering for pattern extraction
|
||||
linfa = "0.7"
|
||||
linfa-clustering = "0.7"
|
||||
|
||||
# Serialization for pattern export/import
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
learning = []
|
||||
learning-advanced = ["learning", "linfa", "linfa-clustering"]
|
||||
learning-distributed = ["learning", "ruvector-replication"]
|
||||
```
|
||||
|
||||
## Migration Path
|
||||
|
||||
1. **v0.2.0**: Basic trajectory tracking, manual feedback
|
||||
2. **v0.3.0**: Verdict judgment, automatic pattern extraction
|
||||
3. **v0.4.0**: Full ReasoningBank, adaptive search
|
||||
4. **v0.5.0**: Distributed learning across replicas
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Pattern data is stored locally, no external transmission
|
||||
- Trajectory data can be anonymized (hash query vectors)
|
||||
- Learning can be disabled per-table for sensitive data
|
||||
- Export/import requires superuser privileges
|
||||
545
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/02-attention-mechanisms.md
vendored
Normal file
545
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/02-attention-mechanisms.md
vendored
Normal file
@@ -0,0 +1,545 @@
|
||||
# Attention Mechanisms Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate 39 attention mechanisms from `ruvector-attention` into PostgreSQL, enabling attention-weighted vector search, transformer-style queries, and neural reranking directly in SQL.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├──────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────────────────────────────────────────────────┐ │
|
||||
│ │ Attention Registry │ │
|
||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────────┐ │ │
|
||||
│ │ │ Flash │ │ Linear │ │ MoE │ │ Hyperbolic │ │ │
|
||||
│ │ └────┬────┘ └────┬────┘ └────┬────┘ └────────┬────────┘ │ │
|
||||
│ └───────┼───────────┼───────────┼───────────────┼──────────┘ │
|
||||
│ └───────────┴───────────┴───────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────┐ │
|
||||
│ │ SIMD-Accelerated Core │ │
|
||||
│ │ (AVX-512/AVX2/NEON) │ │
|
||||
│ └───────────────────────────┘ │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── attention/
|
||||
│ ├── mod.rs # Module exports & registry
|
||||
│ ├── core/
|
||||
│ │ ├── scaled_dot.rs # Scaled dot-product attention
|
||||
│ │ ├── multi_head.rs # Multi-head attention
|
||||
│ │ ├── flash.rs # Flash Attention v2
|
||||
│ │ └── linear.rs # Linear attention O(n)
|
||||
│ ├── graph/
|
||||
│ │ ├── gat.rs # Graph Attention
|
||||
│ │ ├── gatv2.rs # GATv2 (dynamic)
|
||||
│ │ └── sparse.rs # Sparse attention patterns
|
||||
│ ├── specialized/
|
||||
│ │ ├── moe.rs # Mixture of Experts
|
||||
│ │ ├── cross.rs # Cross-attention
|
||||
│ │ └── sliding.rs # Sliding window
|
||||
│ ├── hyperbolic/
|
||||
│ │ ├── poincare.rs # Poincaré attention
|
||||
│ │ └── lorentz.rs # Lorentzian attention
|
||||
│ └── operators.rs # PostgreSQL operators
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Basic Attention Operations
|
||||
|
||||
```sql
|
||||
-- Create attention-weighted index
|
||||
CREATE INDEX ON documents USING ruvector_attention (
|
||||
embedding vector(768)
|
||||
) WITH (
|
||||
attention_type = 'flash',
|
||||
num_heads = 8,
|
||||
head_dim = 96
|
||||
);
|
||||
|
||||
-- Attention-weighted search
|
||||
SELECT id, content,
|
||||
ruvector_attention_score(embedding, query_vec, 'scaled_dot') AS score
|
||||
FROM documents
|
||||
ORDER BY score DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- Multi-head attention search
|
||||
SELECT * FROM ruvector_mha_search(
|
||||
table_name := 'documents',
|
||||
query := query_embedding,
|
||||
num_heads := 8,
|
||||
k := 10
|
||||
);
|
||||
```
|
||||
|
||||
### Advanced Attention Queries
|
||||
|
||||
```sql
|
||||
-- Cross-attention between two tables (Q from queries, K/V from documents)
|
||||
SELECT q.id AS query_id, d.id AS doc_id, score
|
||||
FROM ruvector_cross_attention(
|
||||
query_table := 'queries',
|
||||
query_column := 'embedding',
|
||||
document_table := 'documents',
|
||||
document_column := 'embedding',
|
||||
attention_type := 'scaled_dot'
|
||||
) AS (query_id int, doc_id int, score float);
|
||||
|
||||
-- Mixture of Experts routing
|
||||
SELECT id,
|
||||
ruvector_moe_route(embedding, num_experts := 8, top_k := 2) AS expert_weights
|
||||
FROM documents;
|
||||
|
||||
-- Sliding window attention for long sequences
|
||||
SELECT * FROM ruvector_sliding_attention(
|
||||
embeddings := embedding_array,
|
||||
window_size := 256,
|
||||
stride := 128
|
||||
);
|
||||
```
|
||||
|
||||
### Attention Types
|
||||
|
||||
```sql
|
||||
-- List available attention mechanisms
|
||||
SELECT * FROM ruvector_attention_types();
|
||||
|
||||
-- Result:
|
||||
-- | name | complexity | best_for |
|
||||
-- |-------------------|------------|-----------------------------|
|
||||
-- | scaled_dot | O(n²) | Small sequences (<512) |
|
||||
-- | flash_v2 | O(n²) | GPU, memory-efficient |
|
||||
-- | linear | O(n) | Long sequences (>4K) |
|
||||
-- | sparse | O(n√n) | Very long sequences |
|
||||
-- | gat | O(E) | Graph-structured data |
|
||||
-- | moe | O(n*k) | Conditional computation |
|
||||
-- | hyperbolic | O(n²) | Hierarchical data |
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Core Attention (Week 1-3)
|
||||
|
||||
```rust
|
||||
// src/attention/core/scaled_dot.rs
|
||||
|
||||
use simsimd::SpatialSimilarity;
|
||||
|
||||
pub struct ScaledDotAttention {
|
||||
scale: f32,
|
||||
dropout: Option<f32>,
|
||||
}
|
||||
|
||||
impl ScaledDotAttention {
|
||||
pub fn new(head_dim: usize) -> Self {
|
||||
Self {
|
||||
scale: 1.0 / (head_dim as f32).sqrt(),
|
||||
dropout: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute attention scores between query and keys
|
||||
/// Returns softmax(Q·K^T / √d_k)
|
||||
#[inline]
|
||||
pub fn attention_scores(&self, query: &[f32], keys: &[&[f32]]) -> Vec<f32> {
|
||||
let mut scores: Vec<f32> = keys.iter()
|
||||
.map(|k| self.dot_product(query, k) * self.scale)
|
||||
.collect();
|
||||
|
||||
softmax_inplace(&mut scores);
|
||||
scores
|
||||
}
|
||||
|
||||
/// SIMD-accelerated dot product
|
||||
#[inline]
|
||||
fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
f32::dot(a, b).unwrap_or_else(|| {
|
||||
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL function
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_attention_score(
|
||||
query: Vec<f32>,
|
||||
key: Vec<f32>,
|
||||
attention_type: default!(&str, "'scaled_dot'"),
|
||||
) -> f32 {
|
||||
let attention = get_attention_impl(attention_type);
|
||||
attention.score(&query, &key)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: Multi-Head Attention (Week 4-5)
|
||||
|
||||
```rust
|
||||
// src/attention/core/multi_head.rs
|
||||
|
||||
pub struct MultiHeadAttention {
|
||||
num_heads: usize,
|
||||
head_dim: usize,
|
||||
w_q: Matrix,
|
||||
w_k: Matrix,
|
||||
w_v: Matrix,
|
||||
w_o: Matrix,
|
||||
}
|
||||
|
||||
impl MultiHeadAttention {
|
||||
pub fn forward(&self, query: &[f32], keys: &[&[f32]], values: &[&[f32]]) -> Vec<f32> {
|
||||
// Project to heads
|
||||
let q_heads = self.split_heads(&self.project(query, &self.w_q));
|
||||
let k_heads: Vec<_> = keys.iter()
|
||||
.map(|k| self.split_heads(&self.project(k, &self.w_k)))
|
||||
.collect();
|
||||
let v_heads: Vec<_> = values.iter()
|
||||
.map(|v| self.split_heads(&self.project(v, &self.w_v)))
|
||||
.collect();
|
||||
|
||||
// Attention per head (parallelizable)
|
||||
let head_outputs: Vec<Vec<f32>> = (0..self.num_heads)
|
||||
.into_par_iter()
|
||||
.map(|h| {
|
||||
let scores = self.attention_scores(&q_heads[h], &k_heads, h);
|
||||
self.weighted_sum(&scores, &v_heads, h)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Concatenate and project
|
||||
let concat = self.concat_heads(&head_outputs);
|
||||
self.project(&concat, &self.w_o)
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL aggregate for batch attention
|
||||
#[pg_extern]
|
||||
fn ruvector_mha_search(
|
||||
table_name: &str,
|
||||
query: Vec<f32>,
|
||||
num_heads: default!(i32, 8),
|
||||
k: default!(i32, 10),
|
||||
) -> TableIterator<'static, (name!(id, i64), name!(score, f32))> {
|
||||
// Implementation using SPI
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Flash Attention (Week 6-7)
|
||||
|
||||
```rust
|
||||
// src/attention/core/flash.rs
|
||||
|
||||
/// Flash Attention v2 - memory-efficient attention
|
||||
/// Processes attention in blocks to minimize memory bandwidth
|
||||
pub struct FlashAttention {
|
||||
block_size_q: usize,
|
||||
block_size_kv: usize,
|
||||
scale: f32,
|
||||
}
|
||||
|
||||
impl FlashAttention {
|
||||
/// Tiled attention computation
|
||||
/// Memory: O(√N) instead of O(N²)
|
||||
pub fn forward(
|
||||
&self,
|
||||
q: &[f32], // [seq_len, head_dim]
|
||||
k: &[f32], // [seq_len, head_dim]
|
||||
v: &[f32], // [seq_len, head_dim]
|
||||
) -> Vec<f32> {
|
||||
let seq_len = q.len() / self.head_dim;
|
||||
let mut output = vec![0.0; q.len()];
|
||||
let mut row_max = vec![f32::NEG_INFINITY; seq_len];
|
||||
let mut row_sum = vec![0.0; seq_len];
|
||||
|
||||
// Process in blocks
|
||||
for q_block in (0..seq_len).step_by(self.block_size_q) {
|
||||
for kv_block in (0..seq_len).step_by(self.block_size_kv) {
|
||||
self.process_block(
|
||||
q, k, v,
|
||||
q_block, kv_block,
|
||||
&mut output, &mut row_max, &mut row_sum
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Graph Attention (Week 8-9)
|
||||
|
||||
```rust
|
||||
// src/attention/graph/gat.rs
|
||||
|
||||
/// Graph Attention Network layer
|
||||
pub struct GATLayer {
|
||||
num_heads: usize,
|
||||
in_features: usize,
|
||||
out_features: usize,
|
||||
attention_weights: Vec<Vec<f32>>, // [num_heads, 2 * out_features]
|
||||
leaky_relu_slope: f32,
|
||||
}
|
||||
|
||||
impl GATLayer {
|
||||
/// Compute attention coefficients for graph edges
|
||||
pub fn forward(
|
||||
&self,
|
||||
node_features: &[Vec<f32>], // [num_nodes, in_features]
|
||||
edge_index: &[(usize, usize)], // [(src, dst), ...]
|
||||
) -> Vec<Vec<f32>> {
|
||||
// Transform features
|
||||
let h = self.linear_transform(node_features);
|
||||
|
||||
// Compute attention for each edge
|
||||
let edge_attention: Vec<Vec<f32>> = edge_index.par_iter()
|
||||
.map(|(src, dst)| {
|
||||
(0..self.num_heads)
|
||||
.map(|head| self.edge_attention(head, &h[*src], &h[*dst]))
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Aggregate with attention weights
|
||||
self.aggregate(&h, edge_index, &edge_attention)
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL function for graph-based search
|
||||
#[pg_extern]
|
||||
fn ruvector_gat_search(
|
||||
node_table: &str,
|
||||
edge_table: &str,
|
||||
query_node_id: i64,
|
||||
num_heads: default!(i32, 4),
|
||||
k: default!(i32, 10),
|
||||
) -> TableIterator<'static, (name!(node_id, i64), name!(attention_score, f32))> {
|
||||
// Implementation
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Hyperbolic Attention (Week 10-11)
|
||||
|
||||
```rust
|
||||
// src/attention/hyperbolic/poincare.rs
|
||||
|
||||
/// Poincaré ball attention for hierarchical data
|
||||
pub struct PoincareAttention {
|
||||
curvature: f32, // -1/c² where c is the ball radius
|
||||
head_dim: usize,
|
||||
}
|
||||
|
||||
impl PoincareAttention {
|
||||
/// Möbius addition in Poincaré ball
|
||||
fn mobius_add(&self, x: &[f32], y: &[f32]) -> Vec<f32> {
|
||||
let x_norm_sq = self.norm_sq(x);
|
||||
let y_norm_sq = self.norm_sq(y);
|
||||
let xy_dot = self.dot(x, y);
|
||||
|
||||
let c = -self.curvature;
|
||||
let num_coef = 1.0 + 2.0 * c * xy_dot + c * y_norm_sq;
|
||||
let denom = 1.0 + 2.0 * c * xy_dot + c * c * x_norm_sq * y_norm_sq;
|
||||
|
||||
x.iter().zip(y.iter())
|
||||
.map(|(xi, yi)| (num_coef * xi + (1.0 - c * x_norm_sq) * yi) / denom)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Hyperbolic distance
|
||||
fn distance(&self, x: &[f32], y: &[f32]) -> f32 {
|
||||
let diff = self.mobius_add(x, &self.negate(y));
|
||||
let c = -self.curvature;
|
||||
let norm = self.norm(&diff);
|
||||
(2.0 / c.sqrt()) * (c.sqrt() * norm).atanh()
|
||||
}
|
||||
|
||||
/// Attention in hyperbolic space
|
||||
pub fn attention_scores(&self, query: &[f32], keys: &[&[f32]]) -> Vec<f32> {
|
||||
let distances: Vec<f32> = keys.iter()
|
||||
.map(|k| -self.distance(query, k)) // Negative distance as similarity
|
||||
.collect();
|
||||
|
||||
softmax(&distances)
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_hyperbolic_distance(
|
||||
a: Vec<f32>,
|
||||
b: Vec<f32>,
|
||||
curvature: default!(f32, 1.0),
|
||||
) -> f32 {
|
||||
let attention = PoincareAttention::new(curvature, a.len());
|
||||
attention.distance(&a, &b)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 6: Mixture of Experts (Week 12)
|
||||
|
||||
```rust
|
||||
// src/attention/specialized/moe.rs
|
||||
|
||||
/// Mixture of Experts with learned routing
|
||||
pub struct MixtureOfExperts {
|
||||
num_experts: usize,
|
||||
top_k: usize,
|
||||
gate: GatingNetwork,
|
||||
experts: Vec<Expert>,
|
||||
}
|
||||
|
||||
impl MixtureOfExperts {
|
||||
/// Route input to top-k experts
|
||||
pub fn forward(&self, input: &[f32]) -> Vec<f32> {
|
||||
// Get routing weights
|
||||
let gate_logits = self.gate.forward(input);
|
||||
let (top_k_indices, top_k_weights) = self.top_k_gating(&gate_logits);
|
||||
|
||||
// Aggregate expert outputs
|
||||
let mut output = vec![0.0; self.experts[0].output_dim()];
|
||||
for (idx, weight) in top_k_indices.iter().zip(top_k_weights.iter()) {
|
||||
let expert_output = self.experts[*idx].forward(input);
|
||||
for (o, e) in output.iter_mut().zip(expert_output.iter()) {
|
||||
*o += weight * e;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_moe_route(
|
||||
embedding: Vec<f32>,
|
||||
num_experts: default!(i32, 8),
|
||||
top_k: default!(i32, 2),
|
||||
) -> pgrx::JsonB {
|
||||
let moe = get_moe_model(num_experts as usize, top_k as usize);
|
||||
let (indices, weights) = moe.route(&embedding);
|
||||
|
||||
pgrx::JsonB(serde_json::json!({
|
||||
"expert_indices": indices,
|
||||
"expert_weights": weights,
|
||||
}))
|
||||
}
|
||||
```
|
||||
|
||||
## Attention Type Registry
|
||||
|
||||
```rust
|
||||
// src/attention/mod.rs
|
||||
|
||||
pub enum AttentionType {
|
||||
// Core
|
||||
ScaledDot,
|
||||
MultiHead { num_heads: usize },
|
||||
FlashV2 { block_size: usize },
|
||||
Linear,
|
||||
|
||||
// Graph
|
||||
GAT { num_heads: usize },
|
||||
GATv2 { num_heads: usize },
|
||||
Sparse { pattern: SparsePattern },
|
||||
|
||||
// Specialized
|
||||
MoE { num_experts: usize, top_k: usize },
|
||||
Cross,
|
||||
SlidingWindow { size: usize },
|
||||
|
||||
// Hyperbolic
|
||||
Poincare { curvature: f32 },
|
||||
Lorentz { curvature: f32 },
|
||||
}
|
||||
|
||||
pub fn get_attention(attention_type: AttentionType) -> Box<dyn Attention> {
|
||||
match attention_type {
|
||||
AttentionType::ScaledDot => Box::new(ScaledDotAttention::default()),
|
||||
AttentionType::FlashV2 { block_size } => Box::new(FlashAttention::new(block_size)),
|
||||
// ... etc
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Performance Optimizations
|
||||
|
||||
### SIMD Acceleration
|
||||
|
||||
```rust
|
||||
// Use simsimd for all vector operations
|
||||
use simsimd::{SpatialSimilarity, BinarySimilarity};
|
||||
|
||||
#[inline]
|
||||
fn batched_dot_products(query: &[f32], keys: &[&[f32]]) -> Vec<f32> {
|
||||
keys.iter()
|
||||
.map(|k| f32::dot(query, k).unwrap())
|
||||
.collect()
|
||||
}
|
||||
```
|
||||
|
||||
### Memory Layout
|
||||
|
||||
```rust
|
||||
// Contiguous memory for cache efficiency
|
||||
pub struct AttentionCache {
|
||||
// Keys stored in column-major for efficient attention
|
||||
keys: Vec<f32>, // [num_keys * head_dim]
|
||||
values: Vec<f32>, // [num_keys * head_dim]
|
||||
num_keys: usize,
|
||||
head_dim: usize,
|
||||
}
|
||||
```
|
||||
|
||||
### Parallel Processing
|
||||
|
||||
```rust
|
||||
// Parallel attention across heads
|
||||
let head_outputs: Vec<_> = (0..num_heads)
|
||||
.into_par_iter()
|
||||
.map(|h| compute_head_attention(h, query, keys, values))
|
||||
.collect();
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| Operation | Sequence Length | Heads | Time (μs) | Memory |
|
||||
|-----------|-----------------|-------|-----------|--------|
|
||||
| ScaledDot | 512 | 8 | 45 | 2MB |
|
||||
| Flash | 512 | 8 | 38 | 0.5MB |
|
||||
| Linear | 4096 | 8 | 120 | 4MB |
|
||||
| GAT | 1000 nodes | 4 | 85 | 1MB |
|
||||
| MoE (8 experts) | 512 | 8 | 95 | 3MB |
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# Link to ruvector-attention for implementations
|
||||
ruvector-attention = { path = "../ruvector-attention", optional = true }
|
||||
|
||||
# SIMD
|
||||
simsimd = "5.9"
|
||||
|
||||
# Parallel processing
|
||||
rayon = "1.10"
|
||||
|
||||
# Matrix operations (optional, for weight matrices)
|
||||
ndarray = { version = "0.15", optional = true }
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
attention = []
|
||||
attention-flash = ["attention"]
|
||||
attention-graph = ["attention"]
|
||||
attention-hyperbolic = ["attention"]
|
||||
attention-moe = ["attention"]
|
||||
attention-all = ["attention-flash", "attention-graph", "attention-hyperbolic", "attention-moe"]
|
||||
```
|
||||
669
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/03-gnn-layers.md
vendored
Normal file
669
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/03-gnn-layers.md
vendored
Normal file
@@ -0,0 +1,669 @@
|
||||
# GNN Layers Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate Graph Neural Network layers from `ruvector-gnn` into PostgreSQL, enabling graph-aware vector search, message passing, and neural graph queries directly in SQL.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ GNN Layer Registry │ │
|
||||
│ │ ┌───────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌───────────┐ │ │
|
||||
│ │ │ GCN │ │GraphSAGE│ │ GAT │ │ GIN │ │ RuVector │ │ │
|
||||
│ │ └───┬───┘ └───┬───┘ └───┬───┘ └───┬───┘ └─────┬─────┘ │ │
|
||||
│ └──────┼─────────┼─────────┼─────────┼───────────┼────────┘ │
|
||||
│ └─────────┴─────────┴─────────┴───────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────┐ │
|
||||
│ │ Message Passing Engine │ │
|
||||
│ │ (SIMD + Parallel) │ │
|
||||
│ └───────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── gnn/
|
||||
│ ├── mod.rs # Module exports & registry
|
||||
│ ├── layers/
|
||||
│ │ ├── gcn.rs # Graph Convolutional Network
|
||||
│ │ ├── graphsage.rs # GraphSAGE (sampling)
|
||||
│ │ ├── gat.rs # Graph Attention Network
|
||||
│ │ ├── gin.rs # Graph Isomorphism Network
|
||||
│ │ └── ruvector.rs # Custom RuVector layer
|
||||
│ ├── message_passing.rs # Core message passing
|
||||
│ ├── aggregators.rs # Sum, Mean, Max, LSTM
|
||||
│ ├── graph_store.rs # PostgreSQL graph storage
|
||||
│ └── operators.rs # SQL operators
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Graph Table Setup
|
||||
|
||||
```sql
|
||||
-- Create node table with embeddings
|
||||
CREATE TABLE nodes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(256),
|
||||
features jsonb
|
||||
);
|
||||
|
||||
-- Create edge table
|
||||
CREATE TABLE edges (
|
||||
src_id INTEGER REFERENCES nodes(id),
|
||||
dst_id INTEGER REFERENCES nodes(id),
|
||||
weight FLOAT DEFAULT 1.0,
|
||||
edge_type TEXT,
|
||||
PRIMARY KEY (src_id, dst_id)
|
||||
);
|
||||
|
||||
-- Create GNN-enhanced index
|
||||
CREATE INDEX ON nodes USING ruvector_gnn (
|
||||
embedding vector(256)
|
||||
) WITH (
|
||||
edge_table = 'edges',
|
||||
layer_type = 'graphsage',
|
||||
num_layers = 2,
|
||||
hidden_dim = 128,
|
||||
aggregator = 'mean'
|
||||
);
|
||||
```
|
||||
|
||||
### GNN Queries
|
||||
|
||||
```sql
|
||||
-- GNN-enhanced similarity search (considers graph structure)
|
||||
SELECT n.id, n.embedding,
|
||||
ruvector_gnn_score(n.embedding, query_vec, 'edges', 2) AS score
|
||||
FROM nodes n
|
||||
ORDER BY score DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- Message passing to get updated embeddings
|
||||
SELECT node_id, updated_embedding
|
||||
FROM ruvector_message_pass(
|
||||
node_table := 'nodes',
|
||||
edge_table := 'edges',
|
||||
embedding_column := 'embedding',
|
||||
num_hops := 2,
|
||||
layer_type := 'gcn'
|
||||
);
|
||||
|
||||
-- Subgraph-aware search
|
||||
SELECT * FROM ruvector_subgraph_search(
|
||||
center_node := 42,
|
||||
query_embedding := query_vec,
|
||||
max_hops := 3,
|
||||
k := 10
|
||||
);
|
||||
|
||||
-- Node classification with GNN
|
||||
SELECT node_id,
|
||||
ruvector_gnn_classify(embedding, 'edges', model_name := 'node_classifier') AS class
|
||||
FROM nodes;
|
||||
```
|
||||
|
||||
### Graph Construction from Vectors
|
||||
|
||||
```sql
|
||||
-- Build k-NN graph from embeddings
|
||||
SELECT ruvector_build_knn_graph(
|
||||
node_table := 'nodes',
|
||||
embedding_column := 'embedding',
|
||||
edge_table := 'edges_knn',
|
||||
k := 10,
|
||||
distance_metric := 'cosine'
|
||||
);
|
||||
|
||||
-- Build epsilon-neighborhood graph
|
||||
SELECT ruvector_build_eps_graph(
|
||||
node_table := 'nodes',
|
||||
embedding_column := 'embedding',
|
||||
edge_table := 'edges_eps',
|
||||
epsilon := 0.5
|
||||
);
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Message Passing Core (Week 1-3)
|
||||
|
||||
```rust
|
||||
// src/gnn/message_passing.rs
|
||||
|
||||
/// Generic message passing framework
|
||||
pub trait MessagePassing {
|
||||
/// Compute messages from neighbors
|
||||
fn message(&self, x_j: &[f32], edge_attr: Option<&[f32]>) -> Vec<f32>;
|
||||
|
||||
/// Aggregate messages
|
||||
fn aggregate(&self, messages: &[Vec<f32>]) -> Vec<f32>;
|
||||
|
||||
/// Update node embedding
|
||||
fn update(&self, x_i: &[f32], aggregated: &[f32]) -> Vec<f32>;
|
||||
}
|
||||
|
||||
/// SIMD-optimized message passing
|
||||
pub struct MessagePassingEngine {
|
||||
aggregator: Aggregator,
|
||||
}
|
||||
|
||||
impl MessagePassingEngine {
|
||||
pub fn propagate(
|
||||
&self,
|
||||
node_features: &[Vec<f32>],
|
||||
edge_index: &[(usize, usize)],
|
||||
edge_weights: Option<&[f32]>,
|
||||
layer: &dyn MessagePassing,
|
||||
) -> Vec<Vec<f32>> {
|
||||
let num_nodes = node_features.len();
|
||||
|
||||
// Build adjacency list
|
||||
let adj_list = self.build_adjacency_list(edge_index, num_nodes);
|
||||
|
||||
// Parallel message passing
|
||||
(0..num_nodes)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let neighbors = &adj_list[i];
|
||||
if neighbors.is_empty() {
|
||||
return node_features[i].clone();
|
||||
}
|
||||
|
||||
// Collect messages from neighbors
|
||||
let messages: Vec<Vec<f32>> = neighbors.iter()
|
||||
.map(|&j| {
|
||||
let edge_attr = edge_weights.map(|w| &w[j..j+1]);
|
||||
layer.message(&node_features[j], edge_attr.map(|e| e.as_ref()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Aggregate
|
||||
let aggregated = layer.aggregate(&messages);
|
||||
|
||||
// Update
|
||||
layer.update(&node_features[i], &aggregated)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: GCN Layer (Week 4-5)
|
||||
|
||||
```rust
|
||||
// src/gnn/layers/gcn.rs
|
||||
|
||||
/// Graph Convolutional Network layer
|
||||
/// H' = σ(D^(-1/2) A D^(-1/2) H W)
|
||||
pub struct GCNLayer {
|
||||
in_features: usize,
|
||||
out_features: usize,
|
||||
weights: Vec<f32>, // [in_features, out_features]
|
||||
bias: Option<Vec<f32>>,
|
||||
activation: Activation,
|
||||
}
|
||||
|
||||
impl GCNLayer {
|
||||
pub fn new(in_features: usize, out_features: usize, bias: bool) -> Self {
|
||||
let weights = Self::glorot_init(in_features, out_features);
|
||||
Self {
|
||||
in_features,
|
||||
out_features,
|
||||
weights,
|
||||
bias: if bias { Some(vec![0.0; out_features]) } else { None },
|
||||
activation: Activation::ReLU,
|
||||
}
|
||||
}
|
||||
|
||||
/// Forward pass with normalized adjacency
|
||||
pub fn forward(
|
||||
&self,
|
||||
x: &[Vec<f32>],
|
||||
edge_index: &[(usize, usize)],
|
||||
edge_weights: &[f32],
|
||||
) -> Vec<Vec<f32>> {
|
||||
// Transform features: XW
|
||||
let transformed: Vec<Vec<f32>> = x.par_iter()
|
||||
.map(|xi| self.linear_transform(xi))
|
||||
.collect();
|
||||
|
||||
// Message passing with normalized weights
|
||||
let propagated = self.propagate(&transformed, edge_index, edge_weights);
|
||||
|
||||
// Apply activation
|
||||
propagated.into_iter()
|
||||
.map(|h| self.activate(&h))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn linear_transform(&self, x: &[f32]) -> Vec<f32> {
|
||||
let mut out = vec![0.0; self.out_features];
|
||||
for i in 0..self.out_features {
|
||||
for j in 0..self.in_features {
|
||||
out[i] += x[j] * self.weights[j * self.out_features + i];
|
||||
}
|
||||
if let Some(ref bias) = self.bias {
|
||||
out[i] += bias[i];
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL function
|
||||
#[pg_extern]
|
||||
fn ruvector_gcn_forward(
|
||||
node_embeddings: Vec<Vec<f32>>,
|
||||
edge_src: Vec<i64>,
|
||||
edge_dst: Vec<i64>,
|
||||
edge_weights: Vec<f32>,
|
||||
out_features: i32,
|
||||
) -> Vec<Vec<f32>> {
|
||||
let layer = GCNLayer::new(
|
||||
node_embeddings[0].len(),
|
||||
out_features as usize,
|
||||
true
|
||||
);
|
||||
|
||||
let edges: Vec<_> = edge_src.iter()
|
||||
.zip(edge_dst.iter())
|
||||
.map(|(&s, &d)| (s as usize, d as usize))
|
||||
.collect();
|
||||
|
||||
layer.forward(&node_embeddings, &edges, &edge_weights)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: GraphSAGE Layer (Week 6-7)
|
||||
|
||||
```rust
|
||||
// src/gnn/layers/graphsage.rs
|
||||
|
||||
/// GraphSAGE with neighborhood sampling
|
||||
pub struct GraphSAGELayer {
|
||||
in_features: usize,
|
||||
out_features: usize,
|
||||
aggregator: SAGEAggregator,
|
||||
sample_size: usize,
|
||||
weights_self: Vec<f32>,
|
||||
weights_neigh: Vec<f32>,
|
||||
}
|
||||
|
||||
pub enum SAGEAggregator {
|
||||
Mean,
|
||||
MaxPool { mlp: MLP },
|
||||
LSTM { lstm: LSTMCell },
|
||||
GCN,
|
||||
}
|
||||
|
||||
impl GraphSAGELayer {
|
||||
pub fn forward_with_sampling(
|
||||
&self,
|
||||
x: &[Vec<f32>],
|
||||
edge_index: &[(usize, usize)],
|
||||
num_samples: usize,
|
||||
) -> Vec<Vec<f32>> {
|
||||
let adj_list = build_adjacency_list(edge_index, x.len());
|
||||
|
||||
x.par_iter().enumerate()
|
||||
.map(|(i, xi)| {
|
||||
// Sample neighbors
|
||||
let neighbors = self.sample_neighbors(&adj_list[i], num_samples);
|
||||
|
||||
// Aggregate neighbor features
|
||||
let neighbor_features: Vec<&[f32]> = neighbors.iter()
|
||||
.map(|&j| x[j].as_slice())
|
||||
.collect();
|
||||
let aggregated = self.aggregate(&neighbor_features);
|
||||
|
||||
// Combine self and neighbor
|
||||
self.combine(xi, &aggregated)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn sample_neighbors(&self, neighbors: &[usize], k: usize) -> Vec<usize> {
|
||||
if neighbors.len() <= k {
|
||||
return neighbors.to_vec();
|
||||
}
|
||||
// Uniform random sampling
|
||||
neighbors.choose_multiple(&mut rand::thread_rng(), k)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn aggregate(&self, features: &[&[f32]]) -> Vec<f32> {
|
||||
match &self.aggregator {
|
||||
SAGEAggregator::Mean => {
|
||||
let dim = features[0].len();
|
||||
let mut result = vec![0.0; dim];
|
||||
for f in features {
|
||||
for (r, &v) in result.iter_mut().zip(f.iter()) {
|
||||
*r += v;
|
||||
}
|
||||
}
|
||||
let n = features.len() as f32;
|
||||
result.iter_mut().for_each(|r| *r /= n);
|
||||
result
|
||||
}
|
||||
SAGEAggregator::MaxPool { mlp } => {
|
||||
features.iter()
|
||||
.map(|f| mlp.forward(f))
|
||||
.reduce(|a, b| element_wise_max(&a, &b))
|
||||
.unwrap()
|
||||
}
|
||||
// ... other aggregators
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_graphsage_search(
|
||||
node_table: &str,
|
||||
edge_table: &str,
|
||||
query: Vec<f32>,
|
||||
num_layers: default!(i32, 2),
|
||||
sample_size: default!(i32, 10),
|
||||
k: default!(i32, 10),
|
||||
) -> TableIterator<'static, (name!(id, i64), name!(score, f32))> {
|
||||
// Implementation using SPI
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Graph Isomorphism Network (Week 8)
|
||||
|
||||
```rust
|
||||
// src/gnn/layers/gin.rs
|
||||
|
||||
/// Graph Isomorphism Network - maximally expressive
|
||||
/// h_v = MLP((1 + ε) * h_v + Σ h_u)
|
||||
pub struct GINLayer {
|
||||
mlp: MLP,
|
||||
eps: f32,
|
||||
train_eps: bool,
|
||||
}
|
||||
|
||||
impl GINLayer {
|
||||
pub fn forward(
|
||||
&self,
|
||||
x: &[Vec<f32>],
|
||||
edge_index: &[(usize, usize)],
|
||||
) -> Vec<Vec<f32>> {
|
||||
let adj_list = build_adjacency_list(edge_index, x.len());
|
||||
|
||||
x.par_iter().enumerate()
|
||||
.map(|(i, xi)| {
|
||||
// Sum neighbor features
|
||||
let sum_neighbors: Vec<f32> = adj_list[i].iter()
|
||||
.fold(vec![0.0; xi.len()], |mut acc, &j| {
|
||||
for (a, &v) in acc.iter_mut().zip(x[j].iter()) {
|
||||
*a += v;
|
||||
}
|
||||
acc
|
||||
});
|
||||
|
||||
// (1 + eps) * self + sum_neighbors
|
||||
let combined: Vec<f32> = xi.iter()
|
||||
.zip(sum_neighbors.iter())
|
||||
.map(|(&s, &n)| (1.0 + self.eps) * s + n)
|
||||
.collect();
|
||||
|
||||
// MLP
|
||||
self.mlp.forward(&combined)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Custom RuVector Layer (Week 9-10)
|
||||
|
||||
```rust
|
||||
// src/gnn/layers/ruvector.rs
|
||||
|
||||
/// RuVector's custom differentiable search layer
|
||||
/// Combines HNSW navigation with learned message passing
|
||||
pub struct RuVectorLayer {
|
||||
in_features: usize,
|
||||
out_features: usize,
|
||||
num_hops: usize,
|
||||
attention: MultiHeadAttention,
|
||||
transform: Linear,
|
||||
}
|
||||
|
||||
impl RuVectorLayer {
|
||||
/// Forward pass using HNSW graph structure
|
||||
pub fn forward(
|
||||
&self,
|
||||
query: &[f32],
|
||||
hnsw_index: &HnswIndex,
|
||||
k_neighbors: usize,
|
||||
) -> Vec<f32> {
|
||||
// Get k nearest neighbors from HNSW
|
||||
let neighbors = hnsw_index.search(query, k_neighbors);
|
||||
|
||||
// Multi-hop aggregation following HNSW structure
|
||||
let mut current = query.to_vec();
|
||||
for hop in 0..self.num_hops {
|
||||
let neighbor_features: Vec<&[f32]> = neighbors.iter()
|
||||
.flat_map(|n| hnsw_index.get_neighbors(n.id))
|
||||
.map(|id| hnsw_index.get_vector(id))
|
||||
.collect();
|
||||
|
||||
// Attention-weighted aggregation
|
||||
current = self.attention.forward(¤t, &neighbor_features);
|
||||
}
|
||||
|
||||
self.transform.forward(¤t)
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_differentiable_search(
|
||||
query: Vec<f32>,
|
||||
index_name: &str,
|
||||
num_hops: default!(i32, 2),
|
||||
k: default!(i32, 10),
|
||||
) -> TableIterator<'static, (name!(id, i64), name!(score, f32), name!(enhanced_embedding, Vec<f32>))> {
|
||||
// Combines vector search with GNN enhancement
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 6: Graph Storage (Week 11-12)
|
||||
|
||||
```rust
|
||||
// src/gnn/graph_store.rs
|
||||
|
||||
/// Efficient graph storage for PostgreSQL
|
||||
pub struct GraphStore {
|
||||
node_embeddings: SharedMemory<Vec<f32>>,
|
||||
adjacency: CompressedSparseRow,
|
||||
edge_features: Option<SharedMemory<Vec<f32>>>,
|
||||
}
|
||||
|
||||
impl GraphStore {
|
||||
/// Load graph from PostgreSQL tables
|
||||
pub fn from_tables(
|
||||
node_table: &str,
|
||||
embedding_column: &str,
|
||||
edge_table: &str,
|
||||
) -> Result<Self, GraphError> {
|
||||
Spi::connect(|client| {
|
||||
// Load nodes
|
||||
let nodes = client.select(
|
||||
&format!("SELECT id, {} FROM {}", embedding_column, node_table),
|
||||
None, None
|
||||
)?;
|
||||
|
||||
// Load edges
|
||||
let edges = client.select(
|
||||
&format!("SELECT src_id, dst_id, weight FROM {}", edge_table),
|
||||
None, None
|
||||
)?;
|
||||
|
||||
// Build CSR
|
||||
let csr = CompressedSparseRow::from_edges(&edges);
|
||||
|
||||
Ok(Self {
|
||||
node_embeddings: SharedMemory::new(nodes),
|
||||
adjacency: csr,
|
||||
edge_features: None,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Efficient neighbor lookup
|
||||
pub fn neighbors(&self, node_id: usize) -> &[usize] {
|
||||
self.adjacency.neighbors(node_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compressed Sparse Row format for adjacency
|
||||
pub struct CompressedSparseRow {
|
||||
indptr: Vec<usize>, // Row pointers
|
||||
indices: Vec<usize>, // Column indices
|
||||
data: Vec<f32>, // Edge weights
|
||||
}
|
||||
```
|
||||
|
||||
## Aggregator Functions
|
||||
|
||||
```rust
|
||||
// src/gnn/aggregators.rs
|
||||
|
||||
pub enum Aggregator {
|
||||
Sum,
|
||||
Mean,
|
||||
Max,
|
||||
Min,
|
||||
Attention { heads: usize },
|
||||
Set2Set { steps: usize },
|
||||
}
|
||||
|
||||
impl Aggregator {
|
||||
pub fn aggregate(&self, messages: &[Vec<f32>]) -> Vec<f32> {
|
||||
match self {
|
||||
Aggregator::Sum => Self::sum_aggregate(messages),
|
||||
Aggregator::Mean => Self::mean_aggregate(messages),
|
||||
Aggregator::Max => Self::max_aggregate(messages),
|
||||
Aggregator::Attention { heads } => Self::attention_aggregate(messages, *heads),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn sum_aggregate(messages: &[Vec<f32>]) -> Vec<f32> {
|
||||
let dim = messages[0].len();
|
||||
let mut result = vec![0.0; dim];
|
||||
for msg in messages {
|
||||
for (r, &m) in result.iter_mut().zip(msg.iter()) {
|
||||
*r += m;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn attention_aggregate(messages: &[Vec<f32>], heads: usize) -> Vec<f32> {
|
||||
// Multi-head attention over messages
|
||||
let mha = MultiHeadAttention::new(messages[0].len(), heads);
|
||||
mha.aggregate(messages)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Performance Optimizations
|
||||
|
||||
### Batch Processing
|
||||
|
||||
```rust
|
||||
/// Process multiple nodes in parallel batches
|
||||
pub fn batch_message_passing(
|
||||
nodes: &[Vec<f32>],
|
||||
edge_index: &[(usize, usize)],
|
||||
batch_size: usize,
|
||||
) -> Vec<Vec<f32>> {
|
||||
nodes.par_chunks(batch_size)
|
||||
.flat_map(|batch| {
|
||||
// Process batch with SIMD
|
||||
process_batch(batch, edge_index)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
```
|
||||
|
||||
### Sparse Operations
|
||||
|
||||
```rust
|
||||
/// Sparse matrix multiplication for message passing
|
||||
pub fn sparse_mm(
|
||||
node_features: &[Vec<f32>],
|
||||
csr: &CompressedSparseRow,
|
||||
) -> Vec<Vec<f32>> {
|
||||
let dim = node_features[0].len();
|
||||
let num_nodes = node_features.len();
|
||||
|
||||
(0..num_nodes).into_par_iter()
|
||||
.map(|i| {
|
||||
let start = csr.indptr[i];
|
||||
let end = csr.indptr[i + 1];
|
||||
|
||||
let mut result = vec![0.0; dim];
|
||||
for j in start..end {
|
||||
let neighbor = csr.indices[j];
|
||||
let weight = csr.data[j];
|
||||
for (r, &f) in result.iter_mut().zip(node_features[neighbor].iter()) {
|
||||
*r += weight * f;
|
||||
}
|
||||
}
|
||||
result
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| Layer | Nodes | Edges | Features | Time (ms) | Memory |
|
||||
|-------|-------|-------|----------|-----------|--------|
|
||||
| GCN | 10K | 100K | 256 | 12 | 40MB |
|
||||
| GraphSAGE | 10K | 100K | 256 | 18 | 45MB |
|
||||
| GAT (4 heads) | 10K | 100K | 256 | 35 | 60MB |
|
||||
| GIN | 10K | 100K | 256 | 15 | 42MB |
|
||||
| RuVector | 10K | 100K | 256 | 25 | 55MB |
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# Link to ruvector-gnn
|
||||
ruvector-gnn = { path = "../ruvector-gnn", optional = true }
|
||||
|
||||
# Sparse matrix
|
||||
sprs = "0.11"
|
||||
|
||||
# Parallel
|
||||
rayon = "1.10"
|
||||
|
||||
# SIMD
|
||||
simsimd = "5.9"
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
gnn = []
|
||||
gnn-gcn = ["gnn"]
|
||||
gnn-sage = ["gnn"]
|
||||
gnn-gat = ["gnn", "attention"]
|
||||
gnn-gin = ["gnn"]
|
||||
gnn-all = ["gnn-gcn", "gnn-sage", "gnn-gat", "gnn-gin"]
|
||||
```
|
||||
634
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/04-hyperbolic-embeddings.md
vendored
Normal file
634
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/04-hyperbolic-embeddings.md
vendored
Normal file
@@ -0,0 +1,634 @@
|
||||
# Hyperbolic Embeddings Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate hyperbolic geometry operations into PostgreSQL for hierarchical data representation, enabling embeddings in Poincaré ball and Lorentz (hyperboloid) models with native distance functions and indexing.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Hyperbolic Type System │ │
|
||||
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │
|
||||
│ │ │ Poincaré │ │ Lorentz │ │ Klein │ │ │
|
||||
│ │ │ Ball │ │ Hyperboloid │ │ Model │ │ │
|
||||
│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │
|
||||
│ └─────────┼─────────────────┼─────────────────┼───────────┘ │
|
||||
│ └─────────────────┴─────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────┐ │
|
||||
│ │ Riemannian Operations │ │
|
||||
│ │ (Exponential, Log, PT) │ │
|
||||
│ └───────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── hyperbolic/
|
||||
│ ├── mod.rs # Module exports
|
||||
│ ├── types/
|
||||
│ │ ├── poincare.rs # Poincaré ball model
|
||||
│ │ ├── lorentz.rs # Lorentz/hyperboloid model
|
||||
│ │ └── klein.rs # Klein model (projective)
|
||||
│ ├── manifold.rs # Manifold operations
|
||||
│ ├── distance.rs # Distance functions
|
||||
│ ├── index/
|
||||
│ │ ├── htree.rs # Hyperbolic tree index
|
||||
│ │ └── hnsw_hyper.rs # HNSW for hyperbolic space
|
||||
│ └── operators.rs # SQL operators
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Hyperbolic Types
|
||||
|
||||
```sql
|
||||
-- Create hyperbolic embedding column
|
||||
CREATE TABLE hierarchical_nodes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name TEXT,
|
||||
euclidean_embedding vector(128),
|
||||
poincare_embedding hyperbolic(128), -- Poincaré ball
|
||||
lorentz_embedding hyperboloid(129), -- Lorentz model (d+1 dims)
|
||||
curvature FLOAT DEFAULT -1.0
|
||||
);
|
||||
|
||||
-- Insert with automatic projection
|
||||
INSERT INTO hierarchical_nodes (name, euclidean_embedding)
|
||||
VALUES ('root', '[0.1, 0.2, ...]');
|
||||
|
||||
-- Auto-project to hyperbolic space
|
||||
UPDATE hierarchical_nodes
|
||||
SET poincare_embedding = ruvector_to_poincare(euclidean_embedding, curvature);
|
||||
```
|
||||
|
||||
### Distance Operations
|
||||
|
||||
```sql
|
||||
-- Poincaré distance
|
||||
SELECT id, name,
|
||||
ruvector_poincare_distance(poincare_embedding, query_point) AS dist
|
||||
FROM hierarchical_nodes
|
||||
ORDER BY dist
|
||||
LIMIT 10;
|
||||
|
||||
-- Lorentz distance (often more numerically stable)
|
||||
SELECT id, name,
|
||||
ruvector_lorentz_distance(lorentz_embedding, query_point) AS dist
|
||||
FROM hierarchical_nodes
|
||||
ORDER BY dist
|
||||
LIMIT 10;
|
||||
|
||||
-- Custom curvature
|
||||
SELECT ruvector_hyperbolic_distance(
|
||||
a := point_a,
|
||||
b := point_b,
|
||||
model := 'poincare',
|
||||
curvature := -0.5
|
||||
);
|
||||
```
|
||||
|
||||
### Hyperbolic Operations
|
||||
|
||||
```sql
|
||||
-- Möbius addition (translation in Poincaré ball)
|
||||
SELECT ruvector_mobius_add(point_a, point_b, curvature := -1.0);
|
||||
|
||||
-- Exponential map (tangent vector → manifold point)
|
||||
SELECT ruvector_exp_map(base_point, tangent_vector, curvature := -1.0);
|
||||
|
||||
-- Logarithmic map (manifold point → tangent vector)
|
||||
SELECT ruvector_log_map(base_point, target_point, curvature := -1.0);
|
||||
|
||||
-- Parallel transport (move vector along geodesic)
|
||||
SELECT ruvector_parallel_transport(vector, from_point, to_point, curvature := -1.0);
|
||||
|
||||
-- Geodesic midpoint
|
||||
SELECT ruvector_geodesic_midpoint(point_a, point_b);
|
||||
|
||||
-- Project Euclidean to hyperbolic
|
||||
SELECT ruvector_project_to_hyperbolic(euclidean_vec, model := 'poincare');
|
||||
```
|
||||
|
||||
### Hyperbolic Index
|
||||
|
||||
```sql
|
||||
-- Create hyperbolic HNSW index
|
||||
CREATE INDEX ON hierarchical_nodes USING ruvector_hyperbolic (
|
||||
poincare_embedding hyperbolic(128)
|
||||
) WITH (
|
||||
model = 'poincare',
|
||||
curvature = -1.0,
|
||||
m = 16,
|
||||
ef_construction = 64
|
||||
);
|
||||
|
||||
-- Hyperbolic k-NN search
|
||||
SELECT * FROM hierarchical_nodes
|
||||
ORDER BY poincare_embedding <~> query_point -- <~> is hyperbolic distance
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Poincaré Ball Model (Week 1-3)
|
||||
|
||||
```rust
|
||||
// src/hyperbolic/types/poincare.rs
|
||||
|
||||
use simsimd::SpatialSimilarity;
|
||||
|
||||
/// Poincaré ball model B^n_c = {x ∈ R^n : c||x||² < 1}
|
||||
pub struct PoincareBall {
|
||||
dim: usize,
|
||||
curvature: f32, // Negative curvature, typically -1.0
|
||||
}
|
||||
|
||||
impl PoincareBall {
|
||||
pub fn new(dim: usize, curvature: f32) -> Self {
|
||||
assert!(curvature < 0.0, "Curvature must be negative");
|
||||
Self { dim, curvature }
|
||||
}
|
||||
|
||||
/// Conformal factor λ_c(x) = 2 / (1 - c||x||²)
|
||||
#[inline]
|
||||
fn conformal_factor(&self, x: &[f32]) -> f32 {
|
||||
let c = -self.curvature;
|
||||
let norm_sq = self.norm_sq(x);
|
||||
2.0 / (1.0 - c * norm_sq)
|
||||
}
|
||||
|
||||
/// Poincaré distance: d(x,y) = (2/√c) * arctanh(√c * ||−x ⊕_c y||)
|
||||
pub fn distance(&self, x: &[f32], y: &[f32]) -> f32 {
|
||||
let c = -self.curvature;
|
||||
let sqrt_c = c.sqrt();
|
||||
|
||||
// Möbius addition: -x ⊕ y
|
||||
let neg_x: Vec<f32> = x.iter().map(|&xi| -xi).collect();
|
||||
let mobius_sum = self.mobius_add(&neg_x, y);
|
||||
let norm = self.norm(&mobius_sum);
|
||||
|
||||
(2.0 / sqrt_c) * (sqrt_c * norm).atanh()
|
||||
}
|
||||
|
||||
/// Möbius addition in Poincaré ball
|
||||
pub fn mobius_add(&self, x: &[f32], y: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let x_norm_sq = self.norm_sq(x);
|
||||
let y_norm_sq = self.norm_sq(y);
|
||||
let xy_dot = self.dot(x, y);
|
||||
|
||||
let num_coef = 1.0 + 2.0 * c * xy_dot + c * y_norm_sq;
|
||||
let y_coef = 1.0 - c * x_norm_sq;
|
||||
let denom = 1.0 + 2.0 * c * xy_dot + c * c * x_norm_sq * y_norm_sq;
|
||||
|
||||
x.iter().zip(y.iter())
|
||||
.map(|(&xi, &yi)| (num_coef * xi + y_coef * yi) / denom)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Exponential map: tangent space → manifold
|
||||
pub fn exp_map(&self, base: &[f32], tangent: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let sqrt_c = c.sqrt();
|
||||
|
||||
let lambda = self.conformal_factor(base);
|
||||
let tangent_norm = self.norm(tangent);
|
||||
|
||||
if tangent_norm < 1e-10 {
|
||||
return base.to_vec();
|
||||
}
|
||||
|
||||
let coef = (sqrt_c * lambda * tangent_norm / 2.0).tanh() / (sqrt_c * tangent_norm);
|
||||
let direction: Vec<f32> = tangent.iter().map(|&t| t * coef).collect();
|
||||
|
||||
self.mobius_add(base, &direction)
|
||||
}
|
||||
|
||||
/// Logarithmic map: manifold → tangent space
|
||||
pub fn log_map(&self, base: &[f32], target: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let sqrt_c = c.sqrt();
|
||||
|
||||
// -base ⊕ target
|
||||
let neg_base: Vec<f32> = base.iter().map(|&b| -b).collect();
|
||||
let addition = self.mobius_add(&neg_base, target);
|
||||
let add_norm = self.norm(&addition);
|
||||
|
||||
if add_norm < 1e-10 {
|
||||
return vec![0.0; self.dim];
|
||||
}
|
||||
|
||||
let lambda = self.conformal_factor(base);
|
||||
let coef = (2.0 / (sqrt_c * lambda)) * (sqrt_c * add_norm).atanh() / add_norm;
|
||||
|
||||
addition.iter().map(|&a| a * coef).collect()
|
||||
}
|
||||
|
||||
/// Project point to ball (clamp norm)
|
||||
pub fn project(&self, x: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let max_norm = (1.0 / c).sqrt() - 1e-5;
|
||||
let norm = self.norm(x);
|
||||
|
||||
if norm <= max_norm {
|
||||
x.to_vec()
|
||||
} else {
|
||||
let scale = max_norm / norm;
|
||||
x.iter().map(|&xi| xi * scale).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn norm_sq(&self, x: &[f32]) -> f32 {
|
||||
f32::dot(x, x).unwrap_or_else(|| x.iter().map(|&xi| xi * xi).sum())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn norm(&self, x: &[f32]) -> f32 {
|
||||
self.norm_sq(x).sqrt()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn dot(&self, x: &[f32], y: &[f32]) -> f32 {
|
||||
f32::dot(x, y).unwrap_or_else(|| x.iter().zip(y.iter()).map(|(&a, &b)| a * b).sum())
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL type
|
||||
#[derive(PostgresType, Serialize, Deserialize)]
|
||||
#[pgx(sql = "CREATE TYPE hyperbolic")]
|
||||
pub struct Hyperbolic {
|
||||
data: Vec<f32>,
|
||||
curvature: f32,
|
||||
}
|
||||
|
||||
// PostgreSQL functions
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_poincare_distance(a: Vec<f32>, b: Vec<f32>, curvature: default!(f32, -1.0)) -> f32 {
|
||||
let ball = PoincareBall::new(a.len(), curvature);
|
||||
ball.distance(&a, &b)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_mobius_add(a: Vec<f32>, b: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
|
||||
let ball = PoincareBall::new(a.len(), curvature);
|
||||
ball.mobius_add(&a, &b)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_exp_map(base: Vec<f32>, tangent: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
|
||||
let ball = PoincareBall::new(base.len(), curvature);
|
||||
ball.exp_map(&base, &tangent)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_log_map(base: Vec<f32>, target: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
|
||||
let ball = PoincareBall::new(base.len(), curvature);
|
||||
ball.log_map(&base, &target)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: Lorentz Model (Week 4-5)
|
||||
|
||||
```rust
|
||||
// src/hyperbolic/types/lorentz.rs
|
||||
|
||||
/// Lorentz (hyperboloid) model: H^n = {x ∈ R^{n+1} : <x,x>_L = -1/c, x_0 > 0}
|
||||
/// More numerically stable than Poincaré for high dimensions
|
||||
pub struct LorentzModel {
|
||||
dim: usize, // Ambient dimension (n+1)
|
||||
curvature: f32,
|
||||
}
|
||||
|
||||
impl LorentzModel {
|
||||
/// Minkowski inner product: <x,y>_L = -x_0*y_0 + Σ x_i*y_i
|
||||
#[inline]
|
||||
pub fn minkowski_dot(&self, x: &[f32], y: &[f32]) -> f32 {
|
||||
-x[0] * y[0] + x[1..].iter().zip(y[1..].iter())
|
||||
.map(|(&a, &b)| a * b)
|
||||
.sum::<f32>()
|
||||
}
|
||||
|
||||
/// Lorentz distance: d(x,y) = (1/√c) * arcosh(-c * <x,y>_L)
|
||||
pub fn distance(&self, x: &[f32], y: &[f32]) -> f32 {
|
||||
let c = -self.curvature;
|
||||
let sqrt_c = c.sqrt();
|
||||
let inner = self.minkowski_dot(x, y);
|
||||
|
||||
(1.0 / sqrt_c) * (-c * inner).acosh()
|
||||
}
|
||||
|
||||
/// Exponential map on hyperboloid
|
||||
pub fn exp_map(&self, base: &[f32], tangent: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let sqrt_c = c.sqrt();
|
||||
|
||||
let tangent_norm_sq = self.minkowski_dot(tangent, tangent);
|
||||
if tangent_norm_sq < 1e-10 {
|
||||
return base.to_vec();
|
||||
}
|
||||
let tangent_norm = tangent_norm_sq.sqrt();
|
||||
|
||||
let coef1 = (sqrt_c * tangent_norm).cosh();
|
||||
let coef2 = (sqrt_c * tangent_norm).sinh() / tangent_norm;
|
||||
|
||||
base.iter().zip(tangent.iter())
|
||||
.map(|(&b, &t)| coef1 * b + coef2 * t)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Logarithmic map on hyperboloid
|
||||
pub fn log_map(&self, base: &[f32], target: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let sqrt_c = c.sqrt();
|
||||
|
||||
let inner = self.minkowski_dot(base, target);
|
||||
let dist = self.distance(base, target);
|
||||
|
||||
if dist < 1e-10 {
|
||||
return vec![0.0; self.dim];
|
||||
}
|
||||
|
||||
let coef = dist / (dist * sqrt_c).sinh();
|
||||
|
||||
target.iter().zip(base.iter())
|
||||
.map(|(&t, &b)| coef * (t - inner * b))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Project to hyperboloid (ensure constraint satisfied)
|
||||
pub fn project(&self, x: &[f32]) -> Vec<f32> {
|
||||
let c = -self.curvature;
|
||||
let space_norm_sq: f32 = x[1..].iter().map(|&xi| xi * xi).sum();
|
||||
let x0 = ((1.0 / c) + space_norm_sq).sqrt();
|
||||
|
||||
let mut result = vec![x0];
|
||||
result.extend_from_slice(&x[1..]);
|
||||
result
|
||||
}
|
||||
|
||||
/// Convert from Poincaré ball to Lorentz
|
||||
pub fn from_poincare(&self, poincare: &[f32], poincare_curvature: f32) -> Vec<f32> {
|
||||
let c = -poincare_curvature;
|
||||
let norm_sq: f32 = poincare.iter().map(|&x| x * x).sum();
|
||||
|
||||
let x0 = (1.0 + c * norm_sq) / (1.0 - c * norm_sq);
|
||||
let coef = 2.0 / (1.0 - c * norm_sq);
|
||||
|
||||
let mut result = vec![x0];
|
||||
result.extend(poincare.iter().map(|&p| coef * p));
|
||||
result
|
||||
}
|
||||
|
||||
/// Convert from Lorentz to Poincaré ball
|
||||
pub fn to_poincare(&self, lorentz: &[f32]) -> Vec<f32> {
|
||||
let denom = 1.0 + lorentz[0];
|
||||
lorentz[1..].iter().map(|&x| x / denom).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_lorentz_distance(a: Vec<f32>, b: Vec<f32>, curvature: default!(f32, -1.0)) -> f32 {
|
||||
let model = LorentzModel::new(a.len(), curvature);
|
||||
model.distance(&a, &b)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_poincare_to_lorentz(poincare: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
|
||||
let model = LorentzModel::new(poincare.len() + 1, curvature);
|
||||
model.from_poincare(&poincare, curvature)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_lorentz_to_poincare(lorentz: Vec<f32>) -> Vec<f32> {
|
||||
let model = LorentzModel::new(lorentz.len(), -1.0);
|
||||
model.to_poincare(&lorentz)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Hyperbolic HNSW Index (Week 6-8)
|
||||
|
||||
```rust
|
||||
// src/hyperbolic/index/hnsw_hyper.rs
|
||||
|
||||
/// HNSW index adapted for hyperbolic space
|
||||
pub struct HyperbolicHnsw {
|
||||
layers: Vec<HnswLayer>,
|
||||
manifold: HyperbolicManifold,
|
||||
m: usize,
|
||||
ef_construction: usize,
|
||||
}
|
||||
|
||||
pub enum HyperbolicManifold {
|
||||
Poincare(PoincareBall),
|
||||
Lorentz(LorentzModel),
|
||||
}
|
||||
|
||||
impl HyperbolicHnsw {
|
||||
/// Distance function based on manifold
|
||||
fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
match &self.manifold {
|
||||
HyperbolicManifold::Poincare(ball) => ball.distance(a, b),
|
||||
HyperbolicManifold::Lorentz(model) => model.distance(a, b),
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert with hyperbolic distance
|
||||
pub fn insert(&mut self, id: u64, vector: &[f32]) {
|
||||
// Project to manifold first
|
||||
let projected = match &self.manifold {
|
||||
HyperbolicManifold::Poincare(ball) => ball.project(vector),
|
||||
HyperbolicManifold::Lorentz(model) => model.project(vector),
|
||||
};
|
||||
|
||||
// Standard HNSW insertion with hyperbolic distance
|
||||
let entry_point = self.entry_point();
|
||||
let level = self.random_level();
|
||||
|
||||
for l in (0..=level).rev() {
|
||||
let candidates = self.search_layer(&projected, entry_point, self.ef_construction, l);
|
||||
let neighbors = self.select_neighbors(&projected, &candidates, self.m);
|
||||
self.connect(id, &neighbors, l);
|
||||
}
|
||||
|
||||
self.vectors.insert(id, projected);
|
||||
}
|
||||
|
||||
/// Search with hyperbolic distance
|
||||
pub fn search(&self, query: &[f32], k: usize, ef: usize) -> Vec<(u64, f32)> {
|
||||
let projected = match &self.manifold {
|
||||
HyperbolicManifold::Poincare(ball) => ball.project(query),
|
||||
HyperbolicManifold::Lorentz(model) => model.project(query),
|
||||
};
|
||||
|
||||
let mut candidates = self.search_layer(&projected, self.entry_point(), ef, 0);
|
||||
candidates.truncate(k);
|
||||
candidates
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL index access method
|
||||
#[pg_extern]
|
||||
fn ruvector_hyperbolic_hnsw_handler(internal: Internal) -> Internal {
|
||||
// Index AM handler
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Euclidean to Hyperbolic Projection (Week 9-10)
|
||||
|
||||
```rust
|
||||
// src/hyperbolic/manifold.rs
|
||||
|
||||
/// Project Euclidean embeddings to hyperbolic space
|
||||
pub struct HyperbolicProjection {
|
||||
model: HyperbolicModel,
|
||||
method: ProjectionMethod,
|
||||
}
|
||||
|
||||
pub enum ProjectionMethod {
|
||||
/// Direct scaling to fit in ball
|
||||
Scale,
|
||||
/// Learned exponential map from origin
|
||||
ExponentialMap,
|
||||
/// Centroid-based projection
|
||||
Centroid { centroid: Vec<f32> },
|
||||
}
|
||||
|
||||
impl HyperbolicProjection {
|
||||
/// Project batch of Euclidean vectors
|
||||
pub fn project_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<f32>> {
|
||||
match &self.method {
|
||||
ProjectionMethod::Scale => {
|
||||
vectors.par_iter()
|
||||
.map(|v| self.scale_project(v))
|
||||
.collect()
|
||||
}
|
||||
ProjectionMethod::ExponentialMap => {
|
||||
let origin = vec![0.0; vectors[0].len()];
|
||||
vectors.par_iter()
|
||||
.map(|v| self.model.exp_map(&origin, v))
|
||||
.collect()
|
||||
}
|
||||
ProjectionMethod::Centroid { centroid } => {
|
||||
vectors.par_iter()
|
||||
.map(|v| {
|
||||
let tangent: Vec<f32> = v.iter()
|
||||
.zip(centroid.iter())
|
||||
.map(|(&vi, &ci)| vi - ci)
|
||||
.collect();
|
||||
self.model.exp_map(centroid, &tangent)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn scale_project(&self, v: &[f32]) -> Vec<f32> {
|
||||
let norm: f32 = v.iter().map(|&x| x * x).sum::<f32>().sqrt();
|
||||
let max_norm = 0.99; // Stay within ball
|
||||
|
||||
if norm <= max_norm {
|
||||
v.to_vec()
|
||||
} else {
|
||||
let scale = max_norm / norm;
|
||||
v.iter().map(|&x| x * scale).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_to_poincare(
|
||||
euclidean: Vec<f32>,
|
||||
curvature: default!(f32, -1.0),
|
||||
method: default!(&str, "'scale'"),
|
||||
) -> Vec<f32> {
|
||||
let model = PoincareBall::new(euclidean.len(), curvature);
|
||||
let projection = HyperbolicProjection::new(model, method.into());
|
||||
projection.project(&euclidean)
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_batch_to_poincare(
|
||||
table_name: &str,
|
||||
euclidean_column: &str,
|
||||
output_column: &str,
|
||||
curvature: default!(f32, -1.0),
|
||||
) -> i64 {
|
||||
// Batch projection using SPI
|
||||
Spi::connect(|client| {
|
||||
// ... batch update
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Hierarchical Data (Taxonomies, Org Charts)
|
||||
|
||||
```sql
|
||||
-- Embed taxonomy with parent-child relationships preserved
|
||||
-- Children naturally cluster closer to parents in hyperbolic space
|
||||
CREATE TABLE taxonomy (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name TEXT,
|
||||
parent_id INTEGER REFERENCES taxonomy(id),
|
||||
embedding hyperbolic(64)
|
||||
);
|
||||
|
||||
-- Find all items in subtree (leveraging hyperbolic geometry)
|
||||
SELECT * FROM taxonomy
|
||||
WHERE ruvector_poincare_distance(embedding, root_embedding) < subtree_radius
|
||||
ORDER BY ruvector_poincare_distance(embedding, root_embedding);
|
||||
```
|
||||
|
||||
### Knowledge Graphs
|
||||
|
||||
```sql
|
||||
-- Entities with hierarchical relationships
|
||||
-- Hyperbolic space captures asymmetric relations naturally
|
||||
SELECT entity_a.name, entity_b.name,
|
||||
ruvector_poincare_distance(entity_a.embedding, entity_b.embedding) AS distance
|
||||
FROM entities entity_a, entities entity_b
|
||||
WHERE entity_a.id != entity_b.id
|
||||
ORDER BY distance
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| Operation | Dimension | Curvature | Time (μs) | vs Euclidean |
|
||||
|-----------|-----------|-----------|-----------|--------------|
|
||||
| Poincaré Distance | 128 | -1.0 | 2.1 | 1.8x slower |
|
||||
| Lorentz Distance | 129 | -1.0 | 1.5 | 1.3x slower |
|
||||
| Möbius Addition | 128 | -1.0 | 3.2 | N/A |
|
||||
| Exp Map | 128 | -1.0 | 4.5 | N/A |
|
||||
| HNSW Search (hyper) | 128 | -1.0 | 850 | 1.5x slower |
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# SIMD for fast operations
|
||||
simsimd = "5.9"
|
||||
|
||||
# Numerical stability
|
||||
num-traits = "0.2"
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
hyperbolic = []
|
||||
hyperbolic-poincare = ["hyperbolic"]
|
||||
hyperbolic-lorentz = ["hyperbolic"]
|
||||
hyperbolic-index = ["hyperbolic", "index-hnsw"]
|
||||
hyperbolic-all = ["hyperbolic-poincare", "hyperbolic-lorentz", "hyperbolic-index"]
|
||||
```
|
||||
703
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/05-sparse-vectors.md
vendored
Normal file
703
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/05-sparse-vectors.md
vendored
Normal file
@@ -0,0 +1,703 @@
|
||||
# Sparse Vectors Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate sparse vector support into PostgreSQL for efficient storage and search of high-dimensional sparse embeddings (BM25, SPLADE, learned sparse representations).
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Sparse Vector Type │ │
|
||||
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │
|
||||
│ │ │ COO Format │ │ CSR Format │ │ Dictionary │ │ │
|
||||
│ │ │ (indices, │ │ (sorted, │ │ (hash-based │ │ │
|
||||
│ │ │ values) │ │ compact) │ │ lookup) │ │ │
|
||||
│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │
|
||||
│ └─────────┼─────────────────┼─────────────────┼───────────┘ │
|
||||
│ └─────────────────┴─────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────┐ │
|
||||
│ │ Sparse Distance Funcs │ │
|
||||
│ │ (Dot, Cosine, BM25) │ │
|
||||
│ └───────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── sparse/
|
||||
│ ├── mod.rs # Module exports
|
||||
│ ├── types/
|
||||
│ │ ├── sparsevec.rs # Core sparse vector type
|
||||
│ │ ├── coo.rs # COO format (coordinate)
|
||||
│ │ └── csr.rs # CSR format (compressed sparse row)
|
||||
│ ├── distance.rs # Sparse distance functions
|
||||
│ ├── index/
|
||||
│ │ ├── inverted.rs # Inverted index for sparse search
|
||||
│ │ └── sparse_hnsw.rs # HNSW adapted for sparse vectors
|
||||
│ ├── hybrid.rs # Dense + sparse hybrid search
|
||||
│ └── operators.rs # SQL operators
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Sparse Vector Type
|
||||
|
||||
```sql
|
||||
-- Create table with sparse vectors
|
||||
CREATE TABLE documents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
content TEXT,
|
||||
dense_embedding vector(768),
|
||||
sparse_embedding sparsevec(30000), -- BM25 or SPLADE
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
-- Insert sparse vector (indices:values format)
|
||||
INSERT INTO documents (content, sparse_embedding)
|
||||
VALUES (
|
||||
'Machine learning for natural language processing',
|
||||
'{1024:0.5, 2048:0.3, 4096:0.8, 15000:0.2}'::sparsevec
|
||||
);
|
||||
|
||||
-- Insert from array representation
|
||||
INSERT INTO documents (sparse_embedding)
|
||||
VALUES (ruvector_to_sparse(
|
||||
indices := ARRAY[1024, 2048, 4096, 15000],
|
||||
values := ARRAY[0.5, 0.3, 0.8, 0.2],
|
||||
dim := 30000
|
||||
));
|
||||
```
|
||||
|
||||
### Distance Operations
|
||||
|
||||
```sql
|
||||
-- Sparse dot product (inner product similarity)
|
||||
SELECT id, content,
|
||||
ruvector_sparse_dot(sparse_embedding, query_sparse) AS score
|
||||
FROM documents
|
||||
ORDER BY score DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- Sparse cosine similarity
|
||||
SELECT id,
|
||||
ruvector_sparse_cosine(sparse_embedding, query_sparse) AS similarity
|
||||
FROM documents
|
||||
WHERE ruvector_sparse_cosine(sparse_embedding, query_sparse) > 0.5;
|
||||
|
||||
-- Custom operator: <#> for sparse inner product
|
||||
SELECT * FROM documents
|
||||
ORDER BY sparse_embedding <#> query_sparse DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
### Sparse Index
|
||||
|
||||
```sql
|
||||
-- Create inverted index for sparse vectors
|
||||
CREATE INDEX ON documents USING ruvector_sparse (
|
||||
sparse_embedding sparsevec(30000)
|
||||
) WITH (
|
||||
pruning_threshold = 0.1, -- Prune low-weight terms
|
||||
quantization = 'int8' -- Optional quantization
|
||||
);
|
||||
|
||||
-- Approximate sparse search
|
||||
SELECT * FROM documents
|
||||
ORDER BY sparse_embedding <#> query_sparse
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
### Hybrid Dense + Sparse Search
|
||||
|
||||
```sql
|
||||
-- Hybrid search combining dense and sparse
|
||||
SELECT id, content,
|
||||
0.7 * (1 - (dense_embedding <=> query_dense)) +
|
||||
0.3 * ruvector_sparse_dot(sparse_embedding, query_sparse) AS hybrid_score
|
||||
FROM documents
|
||||
ORDER BY hybrid_score DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- Built-in hybrid search function
|
||||
SELECT * FROM ruvector_hybrid_search(
|
||||
table_name := 'documents',
|
||||
dense_column := 'dense_embedding',
|
||||
sparse_column := 'sparse_embedding',
|
||||
dense_query := query_dense,
|
||||
sparse_query := query_sparse,
|
||||
dense_weight := 0.7,
|
||||
sparse_weight := 0.3,
|
||||
k := 10
|
||||
);
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Sparse Vector Type (Week 1-2)
|
||||
|
||||
```rust
|
||||
// src/sparse/types/sparsevec.rs
|
||||
|
||||
use pgrx::prelude::*;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
/// Sparse vector stored as sorted (index, value) pairs
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SparseVec {
|
||||
indices: Vec<u32>,
|
||||
values: Vec<f32>,
|
||||
dim: u32,
|
||||
}
|
||||
|
||||
impl SparseVec {
|
||||
pub fn new(indices: Vec<u32>, values: Vec<f32>, dim: u32) -> Result<Self, SparseError> {
|
||||
if indices.len() != values.len() {
|
||||
return Err(SparseError::LengthMismatch);
|
||||
}
|
||||
|
||||
// Ensure sorted and unique
|
||||
let mut pairs: Vec<_> = indices.into_iter().zip(values.into_iter()).collect();
|
||||
pairs.sort_by_key(|(i, _)| *i);
|
||||
pairs.dedup_by_key(|(i, _)| *i);
|
||||
|
||||
let (indices, values): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
|
||||
|
||||
if indices.last().map_or(false, |&i| i >= dim) {
|
||||
return Err(SparseError::IndexOutOfBounds);
|
||||
}
|
||||
|
||||
Ok(Self { indices, values, dim })
|
||||
}
|
||||
|
||||
/// Number of non-zero elements
|
||||
#[inline]
|
||||
pub fn nnz(&self) -> usize {
|
||||
self.indices.len()
|
||||
}
|
||||
|
||||
/// Get value at index (O(log n) binary search)
|
||||
pub fn get(&self, index: u32) -> f32 {
|
||||
match self.indices.binary_search(&index) {
|
||||
Ok(pos) => self.values[pos],
|
||||
Err(_) => 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterate over non-zero elements
|
||||
pub fn iter(&self) -> impl Iterator<Item = (u32, f32)> + '_ {
|
||||
self.indices.iter().copied().zip(self.values.iter().copied())
|
||||
}
|
||||
|
||||
/// L2 norm
|
||||
pub fn norm(&self) -> f32 {
|
||||
self.values.iter().map(|&v| v * v).sum::<f32>().sqrt()
|
||||
}
|
||||
|
||||
/// Prune elements below threshold
|
||||
pub fn prune(&mut self, threshold: f32) {
|
||||
let pairs: Vec<_> = self.indices.iter().copied()
|
||||
.zip(self.values.iter().copied())
|
||||
.filter(|(_, v)| v.abs() >= threshold)
|
||||
.collect();
|
||||
|
||||
self.indices = pairs.iter().map(|(i, _)| *i).collect();
|
||||
self.values = pairs.iter().map(|(_, v)| *v).collect();
|
||||
}
|
||||
|
||||
/// Top-k sparsification
|
||||
pub fn top_k(&self, k: usize) -> SparseVec {
|
||||
let mut indexed: Vec<_> = self.indices.iter().copied()
|
||||
.zip(self.values.iter().copied())
|
||||
.collect();
|
||||
|
||||
indexed.sort_by(|(_, a), (_, b)| b.abs().partial_cmp(&a.abs()).unwrap());
|
||||
indexed.truncate(k);
|
||||
indexed.sort_by_key(|(i, _)| *i);
|
||||
|
||||
let (indices, values): (Vec<_>, Vec<_>) = indexed.into_iter().unzip();
|
||||
|
||||
SparseVec { indices, values, dim: self.dim }
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL type registration
|
||||
#[derive(PostgresType, Serialize, Deserialize)]
|
||||
#[pgx(sql = "CREATE TYPE sparsevec")]
|
||||
pub struct PgSparseVec(SparseVec);
|
||||
|
||||
impl FromDatum for PgSparseVec {
|
||||
// ... TOAST-aware deserialization
|
||||
}
|
||||
|
||||
impl IntoDatum for PgSparseVec {
|
||||
// ... serialization
|
||||
}
|
||||
|
||||
// Parse from string: '{1:0.5, 2:0.3}'
|
||||
impl std::str::FromStr for SparseVec {
|
||||
type Err = SparseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let s = s.trim().trim_start_matches('{').trim_end_matches('}');
|
||||
let mut indices = Vec::new();
|
||||
let mut values = Vec::new();
|
||||
let mut max_index = 0u32;
|
||||
|
||||
for pair in s.split(',') {
|
||||
let parts: Vec<_> = pair.trim().split(':').collect();
|
||||
if parts.len() != 2 {
|
||||
return Err(SparseError::ParseError);
|
||||
}
|
||||
let idx: u32 = parts[0].trim().parse().map_err(|_| SparseError::ParseError)?;
|
||||
let val: f32 = parts[1].trim().parse().map_err(|_| SparseError::ParseError)?;
|
||||
indices.push(idx);
|
||||
values.push(val);
|
||||
max_index = max_index.max(idx);
|
||||
}
|
||||
|
||||
SparseVec::new(indices, values, max_index + 1)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: Sparse Distance Functions (Week 3-4)
|
||||
|
||||
```rust
|
||||
// src/sparse/distance.rs
|
||||
|
||||
use simsimd::SpatialSimilarity;
|
||||
|
||||
/// Sparse dot product (inner product)
|
||||
/// Only iterates over shared non-zero indices
|
||||
pub fn sparse_dot(a: &SparseVec, b: &SparseVec) -> f32 {
|
||||
let mut result = 0.0;
|
||||
let mut i = 0;
|
||||
let mut j = 0;
|
||||
|
||||
while i < a.indices.len() && j < b.indices.len() {
|
||||
match a.indices[i].cmp(&b.indices[j]) {
|
||||
std::cmp::Ordering::Less => i += 1,
|
||||
std::cmp::Ordering::Greater => j += 1,
|
||||
std::cmp::Ordering::Equal => {
|
||||
result += a.values[i] * b.values[j];
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Sparse cosine similarity
|
||||
pub fn sparse_cosine(a: &SparseVec, b: &SparseVec) -> f32 {
|
||||
let dot = sparse_dot(a, b);
|
||||
let norm_a = a.norm();
|
||||
let norm_b = b.norm();
|
||||
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
/// Sparse Euclidean distance
|
||||
pub fn sparse_euclidean(a: &SparseVec, b: &SparseVec) -> f32 {
|
||||
let mut result = 0.0;
|
||||
let mut i = 0;
|
||||
let mut j = 0;
|
||||
|
||||
while i < a.indices.len() || j < b.indices.len() {
|
||||
let idx_a = a.indices.get(i).copied().unwrap_or(u32::MAX);
|
||||
let idx_b = b.indices.get(j).copied().unwrap_or(u32::MAX);
|
||||
|
||||
match idx_a.cmp(&idx_b) {
|
||||
std::cmp::Ordering::Less => {
|
||||
result += a.values[i] * a.values[i];
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
result += b.values[j] * b.values[j];
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let diff = a.values[i] - b.values[j];
|
||||
result += diff * diff;
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.sqrt()
|
||||
}
|
||||
|
||||
/// BM25 scoring for sparse term vectors
|
||||
pub fn sparse_bm25(
|
||||
query: &SparseVec,
|
||||
doc: &SparseVec,
|
||||
doc_len: f32,
|
||||
avg_doc_len: f32,
|
||||
k1: f32,
|
||||
b: f32,
|
||||
) -> f32 {
|
||||
let mut score = 0.0;
|
||||
let mut i = 0;
|
||||
let mut j = 0;
|
||||
|
||||
while i < query.indices.len() && j < doc.indices.len() {
|
||||
match query.indices[i].cmp(&doc.indices[j]) {
|
||||
std::cmp::Ordering::Less => i += 1,
|
||||
std::cmp::Ordering::Greater => j += 1,
|
||||
std::cmp::Ordering::Equal => {
|
||||
let idf = query.values[i]; // Assume query values are IDF weights
|
||||
let tf = doc.values[j]; // Doc values are TF
|
||||
|
||||
let numerator = tf * (k1 + 1.0);
|
||||
let denominator = tf + k1 * (1.0 - b + b * doc_len / avg_doc_len);
|
||||
|
||||
score += idf * numerator / denominator;
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
// PostgreSQL functions
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_sparse_dot(a: PgSparseVec, b: PgSparseVec) -> f32 {
|
||||
sparse_dot(&a.0, &b.0)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_sparse_cosine(a: PgSparseVec, b: PgSparseVec) -> f32 {
|
||||
sparse_cosine(&a.0, &b.0)
|
||||
}
|
||||
|
||||
#[pg_extern(immutable, parallel_safe)]
|
||||
fn ruvector_sparse_euclidean(a: PgSparseVec, b: PgSparseVec) -> f32 {
|
||||
sparse_euclidean(&a.0, &b.0)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Inverted Index (Week 5-7)
|
||||
|
||||
```rust
|
||||
// src/sparse/index/inverted.rs
|
||||
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::RwLock;
|
||||
|
||||
/// Inverted index for efficient sparse vector search
|
||||
pub struct InvertedIndex {
|
||||
/// term_id -> [(doc_id, weight), ...]
|
||||
postings: DashMap<u32, Vec<(u64, f32)>>,
|
||||
/// doc_id -> sparse vector (for re-ranking)
|
||||
documents: DashMap<u64, SparseVec>,
|
||||
/// Document norms for cosine similarity
|
||||
doc_norms: DashMap<u64, f32>,
|
||||
/// Configuration
|
||||
config: InvertedIndexConfig,
|
||||
}
|
||||
|
||||
pub struct InvertedIndexConfig {
|
||||
pub pruning_threshold: f32,
|
||||
pub max_postings_per_term: usize,
|
||||
pub quantization: Option<Quantization>,
|
||||
}
|
||||
|
||||
impl InvertedIndex {
|
||||
pub fn new(config: InvertedIndexConfig) -> Self {
|
||||
Self {
|
||||
postings: DashMap::new(),
|
||||
documents: DashMap::new(),
|
||||
doc_norms: DashMap::new(),
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert document into index
|
||||
pub fn insert(&self, doc_id: u64, vector: SparseVec) {
|
||||
let norm = vector.norm();
|
||||
|
||||
// Index each non-zero term
|
||||
for (term_id, weight) in vector.iter() {
|
||||
if weight.abs() < self.config.pruning_threshold {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.postings
|
||||
.entry(term_id)
|
||||
.or_insert_with(Vec::new)
|
||||
.push((doc_id, weight));
|
||||
}
|
||||
|
||||
self.doc_norms.insert(doc_id, norm);
|
||||
self.documents.insert(doc_id, vector);
|
||||
}
|
||||
|
||||
/// Search using WAND algorithm for top-k
|
||||
pub fn search(&self, query: &SparseVec, k: usize) -> Vec<(u64, f32)> {
|
||||
// Collect candidate documents
|
||||
let mut doc_scores: HashMap<u64, f32> = HashMap::new();
|
||||
|
||||
for (term_id, query_weight) in query.iter() {
|
||||
if let Some(postings) = self.postings.get(&term_id) {
|
||||
for &(doc_id, doc_weight) in postings.iter() {
|
||||
*doc_scores.entry(doc_id).or_insert(0.0) += query_weight * doc_weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get top-k
|
||||
let mut results: Vec<_> = doc_scores.into_iter().collect();
|
||||
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
||||
results.truncate(k);
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// WAND (Weak AND) algorithm for efficient top-k retrieval
|
||||
pub fn search_wand(&self, query: &SparseVec, k: usize) -> Vec<(u64, f32)> {
|
||||
// Sort query terms by max contribution (upper bound)
|
||||
let mut term_info: Vec<_> = query.iter()
|
||||
.filter_map(|(term_id, weight)| {
|
||||
self.postings.get(&term_id).map(|p| {
|
||||
let max_doc_weight = p.iter().map(|(_, w)| *w).fold(0.0f32, f32::max);
|
||||
(term_id, weight, max_doc_weight * weight)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
term_info.sort_by(|(_, _, a), (_, _, b)| b.partial_cmp(a).unwrap());
|
||||
|
||||
// WAND traversal
|
||||
let mut heap: BinaryHeap<(OrderedFloat<f32>, u64)> = BinaryHeap::new();
|
||||
let threshold = 0.0f32;
|
||||
|
||||
// ... WAND implementation
|
||||
|
||||
heap.into_iter().map(|(s, id)| (id, s.0)).collect()
|
||||
}
|
||||
}
|
||||
|
||||
// PostgreSQL index access method
|
||||
#[pg_extern]
|
||||
fn ruvector_sparse_handler(internal: Internal) -> Internal {
|
||||
// Index AM handler for sparse inverted index
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Hybrid Search (Week 8-9)
|
||||
|
||||
```rust
|
||||
// src/sparse/hybrid.rs
|
||||
|
||||
/// Hybrid dense + sparse search
|
||||
pub struct HybridSearch {
|
||||
dense_weight: f32,
|
||||
sparse_weight: f32,
|
||||
fusion_method: FusionMethod,
|
||||
}
|
||||
|
||||
pub enum FusionMethod {
|
||||
/// Linear combination of scores
|
||||
Linear,
|
||||
/// Reciprocal Rank Fusion
|
||||
RRF { k: f32 },
|
||||
/// Learned fusion weights
|
||||
Learned { model: FusionModel },
|
||||
}
|
||||
|
||||
impl HybridSearch {
|
||||
/// Combine dense and sparse results
|
||||
pub fn search(
|
||||
&self,
|
||||
dense_results: &[(u64, f32)],
|
||||
sparse_results: &[(u64, f32)],
|
||||
k: usize,
|
||||
) -> Vec<(u64, f32)> {
|
||||
match &self.fusion_method {
|
||||
FusionMethod::Linear => {
|
||||
self.linear_fusion(dense_results, sparse_results, k)
|
||||
}
|
||||
FusionMethod::RRF { k: rrf_k } => {
|
||||
self.rrf_fusion(dense_results, sparse_results, k, *rrf_k)
|
||||
}
|
||||
FusionMethod::Learned { model } => {
|
||||
model.fuse(dense_results, sparse_results, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn linear_fusion(
|
||||
&self,
|
||||
dense: &[(u64, f32)],
|
||||
sparse: &[(u64, f32)],
|
||||
k: usize,
|
||||
) -> Vec<(u64, f32)> {
|
||||
let mut scores: HashMap<u64, f32> = HashMap::new();
|
||||
|
||||
// Normalize dense scores to [0, 1]
|
||||
let dense_max = dense.iter().map(|(_, s)| *s).fold(0.0f32, f32::max);
|
||||
for (id, score) in dense {
|
||||
let normalized = if dense_max > 0.0 { score / dense_max } else { 0.0 };
|
||||
*scores.entry(*id).or_insert(0.0) += self.dense_weight * normalized;
|
||||
}
|
||||
|
||||
// Normalize sparse scores to [0, 1]
|
||||
let sparse_max = sparse.iter().map(|(_, s)| *s).fold(0.0f32, f32::max);
|
||||
for (id, score) in sparse {
|
||||
let normalized = if sparse_max > 0.0 { score / sparse_max } else { 0.0 };
|
||||
*scores.entry(*id).or_insert(0.0) += self.sparse_weight * normalized;
|
||||
}
|
||||
|
||||
let mut results: Vec<_> = scores.into_iter().collect();
|
||||
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
||||
results.truncate(k);
|
||||
results
|
||||
}
|
||||
|
||||
fn rrf_fusion(
|
||||
&self,
|
||||
dense: &[(u64, f32)],
|
||||
sparse: &[(u64, f32)],
|
||||
k: usize,
|
||||
rrf_k: f32,
|
||||
) -> Vec<(u64, f32)> {
|
||||
let mut scores: HashMap<u64, f32> = HashMap::new();
|
||||
|
||||
// RRF: 1 / (k + rank)
|
||||
for (rank, (id, _)) in dense.iter().enumerate() {
|
||||
*scores.entry(*id).or_insert(0.0) += self.dense_weight / (rrf_k + rank as f32 + 1.0);
|
||||
}
|
||||
|
||||
for (rank, (id, _)) in sparse.iter().enumerate() {
|
||||
*scores.entry(*id).or_insert(0.0) += self.sparse_weight / (rrf_k + rank as f32 + 1.0);
|
||||
}
|
||||
|
||||
let mut results: Vec<_> = scores.into_iter().collect();
|
||||
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
||||
results.truncate(k);
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_hybrid_search(
|
||||
table_name: &str,
|
||||
dense_column: &str,
|
||||
sparse_column: &str,
|
||||
dense_query: Vec<f32>,
|
||||
sparse_query: PgSparseVec,
|
||||
dense_weight: default!(f32, 0.7),
|
||||
sparse_weight: default!(f32, 0.3),
|
||||
k: default!(i32, 10),
|
||||
fusion: default!(&str, "'linear'"),
|
||||
) -> TableIterator<'static, (name!(id, i64), name!(score, f32))> {
|
||||
// Implementation using SPI
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: SPLADE Integration (Week 10)
|
||||
|
||||
```rust
|
||||
// src/sparse/splade.rs
|
||||
|
||||
/// SPLADE-style learned sparse representations
|
||||
pub struct SpladeEncoder {
|
||||
/// Vocab size for term indices
|
||||
vocab_size: usize,
|
||||
/// Sparsity threshold
|
||||
threshold: f32,
|
||||
}
|
||||
|
||||
impl SpladeEncoder {
|
||||
/// Convert dense embedding to SPLADE-style sparse
|
||||
/// (typically done externally, but we support post-processing)
|
||||
pub fn sparsify(&self, logits: &[f32]) -> SparseVec {
|
||||
let mut indices = Vec::new();
|
||||
let mut values = Vec::new();
|
||||
|
||||
for (i, &logit) in logits.iter().enumerate() {
|
||||
// ReLU + log(1 + x) activation
|
||||
if logit > 0.0 {
|
||||
let value = (1.0 + logit).ln();
|
||||
if value > self.threshold {
|
||||
indices.push(i as u32);
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SparseVec::new(indices, values, self.vocab_size as u32).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_to_sparse(
|
||||
indices: Vec<i32>,
|
||||
values: Vec<f32>,
|
||||
dim: i32,
|
||||
) -> PgSparseVec {
|
||||
let indices: Vec<u32> = indices.into_iter().map(|i| i as u32).collect();
|
||||
PgSparseVec(SparseVec::new(indices, values, dim as u32).unwrap())
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_sparse_top_k(sparse: PgSparseVec, k: i32) -> PgSparseVec {
|
||||
PgSparseVec(sparse.0.top_k(k as usize))
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_sparse_prune(sparse: PgSparseVec, threshold: f32) -> PgSparseVec {
|
||||
let mut result = sparse.0.clone();
|
||||
result.prune(threshold);
|
||||
PgSparseVec(result)
|
||||
}
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| Operation | NNZ (query) | NNZ (doc) | Dim | Time (μs) |
|
||||
|-----------|-------------|-----------|-----|-----------|
|
||||
| Dot Product | 100 | 100 | 30K | 0.8 |
|
||||
| Cosine | 100 | 100 | 30K | 1.2 |
|
||||
| Inverted Search | 100 | - | 30K | 450 |
|
||||
| Hybrid Search | 100 | 768 | 30K | 1200 |
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# Concurrent collections
|
||||
dashmap = "6.0"
|
||||
|
||||
# Ordered floats for heaps
|
||||
ordered-float = "4.2"
|
||||
|
||||
# Serialization
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
bincode = "2.0.0-rc.3"
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
sparse = []
|
||||
sparse-inverted = ["sparse"]
|
||||
sparse-hybrid = ["sparse"]
|
||||
sparse-all = ["sparse-inverted", "sparse-hybrid"]
|
||||
```
|
||||
954
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/06-graph-operations.md
vendored
Normal file
954
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/06-graph-operations.md
vendored
Normal file
@@ -0,0 +1,954 @@
|
||||
# Graph Operations & Cypher Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate graph database capabilities from `ruvector-graph` into PostgreSQL, enabling Cypher query language support, property graph operations, and vector-enhanced graph traversals directly in SQL.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Cypher Engine │ │
|
||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌─────────┐ │ │
|
||||
│ │ │ Parser │→│ Planner │→│ Executor │→│ Result │ │ │
|
||||
│ │ └──────────┘ └──────────┘ └──────────┘ └─────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Property Graph Store │ │
|
||||
│ │ ┌───────────┐ ┌───────────┐ ┌───────────────────┐ │ │
|
||||
│ │ │ Nodes │ │ Edges │ │ Vector Embeddings │ │ │
|
||||
│ │ │ (Labels) │ │ (Types) │ │ (HNSW Index) │ │ │
|
||||
│ │ └───────────┘ └───────────┘ └───────────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── graph/
|
||||
│ ├── mod.rs # Module exports
|
||||
│ ├── cypher/
|
||||
│ │ ├── parser.rs # Cypher parser (pest/nom)
|
||||
│ │ ├── ast.rs # Abstract syntax tree
|
||||
│ │ ├── planner.rs # Query planner
|
||||
│ │ ├── executor.rs # Query executor
|
||||
│ │ └── functions.rs # Built-in Cypher functions
|
||||
│ ├── storage/
|
||||
│ │ ├── nodes.rs # Node storage
|
||||
│ │ ├── edges.rs # Edge storage
|
||||
│ │ └── properties.rs # Property storage
|
||||
│ ├── traversal/
|
||||
│ │ ├── bfs.rs # Breadth-first search
|
||||
│ │ ├── dfs.rs # Depth-first search
|
||||
│ │ ├── shortest_path.rs # Shortest path algorithms
|
||||
│ │ └── vector_walk.rs # Vector-guided traversal
|
||||
│ ├── index/
|
||||
│ │ ├── label_index.rs # Label-based index
|
||||
│ │ └── property_index.rs # Property index
|
||||
│ └── operators.rs # SQL operators
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Graph Schema Setup
|
||||
|
||||
```sql
|
||||
-- Create a property graph
|
||||
SELECT ruvector_create_graph('social_network');
|
||||
|
||||
-- Define node labels
|
||||
SELECT ruvector_create_node_label('social_network', 'Person',
|
||||
properties := '{
|
||||
"name": "text",
|
||||
"age": "integer",
|
||||
"embedding": "vector(768)"
|
||||
}'
|
||||
);
|
||||
|
||||
SELECT ruvector_create_node_label('social_network', 'Company',
|
||||
properties := '{
|
||||
"name": "text",
|
||||
"industry": "text",
|
||||
"embedding": "vector(768)"
|
||||
}'
|
||||
);
|
||||
|
||||
-- Define edge types
|
||||
SELECT ruvector_create_edge_type('social_network', 'KNOWS',
|
||||
properties := '{"since": "date", "strength": "float"}'
|
||||
);
|
||||
|
||||
SELECT ruvector_create_edge_type('social_network', 'WORKS_AT',
|
||||
properties := '{"role": "text", "since": "date"}'
|
||||
);
|
||||
```
|
||||
|
||||
### Cypher Queries
|
||||
|
||||
```sql
|
||||
-- Execute Cypher queries
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH (p:Person)-[:KNOWS]->(friend:Person)
|
||||
WHERE p.name = 'Alice'
|
||||
RETURN friend.name, friend.age
|
||||
$$);
|
||||
|
||||
-- Create nodes
|
||||
SELECT ruvector_cypher('social_network', $$
|
||||
CREATE (p:Person {name: 'Bob', age: 30, embedding: $embedding})
|
||||
RETURN p
|
||||
$$, params := '{"embedding": [0.1, 0.2, ...]}');
|
||||
|
||||
-- Create relationships
|
||||
SELECT ruvector_cypher('social_network', $$
|
||||
MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'})
|
||||
CREATE (a)-[:KNOWS {since: date('2024-01-15'), strength: 0.8}]->(b)
|
||||
$$);
|
||||
|
||||
-- Pattern matching
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH (p:Person)-[:WORKS_AT]->(c:Company {industry: 'Tech'})
|
||||
RETURN p.name, c.name
|
||||
ORDER BY p.age DESC
|
||||
LIMIT 10
|
||||
$$);
|
||||
```
|
||||
|
||||
### Vector-Enhanced Graph Queries
|
||||
|
||||
```sql
|
||||
-- Find similar nodes using vector search + graph structure
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH (p:Person)
|
||||
WHERE ruvector.similarity(p.embedding, $query) > 0.8
|
||||
RETURN p.name, p.age, ruvector.similarity(p.embedding, $query) AS similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT 10
|
||||
$$, params := '{"query": [0.1, 0.2, ...]}');
|
||||
|
||||
-- Graph-aware semantic search
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH (p:Person)-[:KNOWS*1..3]->(friend:Person)
|
||||
WHERE p.name = 'Alice'
|
||||
WITH friend, ruvector.similarity(friend.embedding, $query) AS sim
|
||||
WHERE sim > 0.7
|
||||
RETURN friend.name, sim
|
||||
ORDER BY sim DESC
|
||||
$$, params := '{"query": [0.1, 0.2, ...]}');
|
||||
|
||||
-- Personalized PageRank with vector similarity
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
CALL ruvector.pagerank('Person', 'KNOWS', {
|
||||
dampingFactor: 0.85,
|
||||
iterations: 20,
|
||||
personalizedOn: $seed_embedding
|
||||
})
|
||||
YIELD node, score
|
||||
RETURN node.name, score
|
||||
ORDER BY score DESC
|
||||
LIMIT 20
|
||||
$$, params := '{"seed_embedding": [0.1, 0.2, ...]}');
|
||||
```
|
||||
|
||||
### Path Finding
|
||||
|
||||
```sql
|
||||
-- Shortest path
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH p = shortestPath((a:Person {name: 'Alice'})-[:KNOWS*1..6]-(b:Person {name: 'Bob'}))
|
||||
RETURN p, length(p)
|
||||
$$);
|
||||
|
||||
-- All shortest paths
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH p = allShortestPaths((a:Person {name: 'Alice'})-[:KNOWS*1..6]-(b:Person {name: 'Bob'}))
|
||||
RETURN p, length(p)
|
||||
$$);
|
||||
|
||||
-- Vector-guided path (minimize embedding distance along path)
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
MATCH p = ruvector.vectorPath(
|
||||
(a:Person {name: 'Alice'}),
|
||||
(b:Person {name: 'Bob'}),
|
||||
'KNOWS',
|
||||
{
|
||||
maxHops: 6,
|
||||
vectorProperty: 'embedding',
|
||||
optimization: 'minTotalDistance'
|
||||
}
|
||||
)
|
||||
RETURN p, ruvector.pathEmbeddingDistance(p) AS distance
|
||||
$$);
|
||||
```
|
||||
|
||||
### Graph Algorithms
|
||||
|
||||
```sql
|
||||
-- Community detection (Louvain)
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
CALL ruvector.louvain('Person', 'KNOWS', {resolution: 1.0})
|
||||
YIELD node, communityId
|
||||
RETURN node.name, communityId
|
||||
$$);
|
||||
|
||||
-- Node similarity (Jaccard)
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
CALL ruvector.nodeSimilarity('Person', 'KNOWS', {
|
||||
similarityCutoff: 0.5,
|
||||
topK: 10
|
||||
})
|
||||
YIELD node1, node2, similarity
|
||||
RETURN node1.name, node2.name, similarity
|
||||
$$);
|
||||
|
||||
-- Centrality measures
|
||||
SELECT * FROM ruvector_cypher('social_network', $$
|
||||
CALL ruvector.betweenness('Person', 'KNOWS')
|
||||
YIELD node, score
|
||||
RETURN node.name, score
|
||||
ORDER BY score DESC
|
||||
LIMIT 10
|
||||
$$);
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Cypher Parser (Week 1-3)
|
||||
|
||||
```rust
|
||||
// src/graph/cypher/parser.rs
|
||||
|
||||
use pest::Parser;
|
||||
use pest_derive::Parser;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[grammar = "graph/cypher/cypher.pest"]
|
||||
pub struct CypherParser;
|
||||
|
||||
/// Parse Cypher query string into AST
|
||||
pub fn parse_cypher(query: &str) -> Result<CypherQuery, ParseError> {
|
||||
let pairs = CypherParser::parse(Rule::query, query)?;
|
||||
|
||||
let mut builder = AstBuilder::new();
|
||||
for pair in pairs {
|
||||
builder.process(pair)?;
|
||||
}
|
||||
|
||||
Ok(builder.build())
|
||||
}
|
||||
|
||||
// src/graph/cypher/ast.rs
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CypherQuery {
|
||||
Match(MatchClause),
|
||||
Create(CreateClause),
|
||||
Merge(MergeClause),
|
||||
Delete(DeleteClause),
|
||||
Return(ReturnClause),
|
||||
With(WithClause),
|
||||
Compound(Vec<CypherQuery>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MatchClause {
|
||||
pub patterns: Vec<Pattern>,
|
||||
pub where_clause: Option<WhereClause>,
|
||||
pub optional: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Pattern {
|
||||
pub nodes: Vec<NodePattern>,
|
||||
pub relationships: Vec<RelationshipPattern>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodePattern {
|
||||
pub variable: Option<String>,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: Option<Properties>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RelationshipPattern {
|
||||
pub variable: Option<String>,
|
||||
pub types: Vec<String>,
|
||||
pub properties: Option<Properties>,
|
||||
pub direction: Direction,
|
||||
pub length: RelationshipLength,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RelationshipLength {
|
||||
Exactly(usize),
|
||||
Range(Option<usize>, Option<usize>), // *1..3
|
||||
Any, // *
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: Query Planner (Week 4-5)
|
||||
|
||||
```rust
|
||||
// src/graph/cypher/planner.rs
|
||||
|
||||
pub struct QueryPlanner {
|
||||
graph_store: Arc<GraphStore>,
|
||||
statistics: Arc<GraphStatistics>,
|
||||
}
|
||||
|
||||
impl QueryPlanner {
|
||||
pub fn plan(&self, query: &CypherQuery) -> Result<QueryPlan, PlanError> {
|
||||
let logical_plan = self.to_logical(query)?;
|
||||
let optimized = self.optimize(logical_plan)?;
|
||||
let physical_plan = self.to_physical(optimized)?;
|
||||
|
||||
Ok(physical_plan)
|
||||
}
|
||||
|
||||
fn to_logical(&self, query: &CypherQuery) -> Result<LogicalPlan, PlanError> {
|
||||
match query {
|
||||
CypherQuery::Match(m) => self.plan_match(m),
|
||||
CypherQuery::Create(c) => self.plan_create(c),
|
||||
CypherQuery::Return(r) => self.plan_return(r),
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
fn plan_match(&self, match_clause: &MatchClause) -> Result<LogicalPlan, PlanError> {
|
||||
let mut plan = LogicalPlan::Scan;
|
||||
|
||||
for pattern in &match_clause.patterns {
|
||||
// Choose optimal starting point based on selectivity
|
||||
let start_node = self.choose_start_node(pattern);
|
||||
|
||||
// Build expand operations
|
||||
for rel in &pattern.relationships {
|
||||
plan = LogicalPlan::Expand {
|
||||
input: Box::new(plan),
|
||||
relationship: rel.clone(),
|
||||
direction: rel.direction,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Add filter for WHERE clause
|
||||
if let Some(where_clause) = &match_clause.where_clause {
|
||||
plan = LogicalPlan::Filter {
|
||||
input: Box::new(plan),
|
||||
predicate: where_clause.predicate.clone(),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan, PlanError> {
|
||||
let mut optimized = plan;
|
||||
|
||||
// Push down filters
|
||||
optimized = self.push_down_filters(optimized);
|
||||
|
||||
// Reorder joins based on selectivity
|
||||
optimized = self.reorder_joins(optimized);
|
||||
|
||||
// Use indexes where available
|
||||
optimized = self.apply_indexes(optimized);
|
||||
|
||||
Ok(optimized)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LogicalPlan {
|
||||
Scan,
|
||||
NodeByLabel { label: String },
|
||||
NodeById { ids: Vec<u64> },
|
||||
Expand {
|
||||
input: Box<LogicalPlan>,
|
||||
relationship: RelationshipPattern,
|
||||
direction: Direction,
|
||||
},
|
||||
Filter {
|
||||
input: Box<LogicalPlan>,
|
||||
predicate: Expression,
|
||||
},
|
||||
Project {
|
||||
input: Box<LogicalPlan>,
|
||||
expressions: Vec<(String, Expression)>,
|
||||
},
|
||||
VectorSearch {
|
||||
label: String,
|
||||
property: String,
|
||||
query: Vec<f32>,
|
||||
k: usize,
|
||||
},
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Query Executor (Week 6-8)
|
||||
|
||||
```rust
|
||||
// src/graph/cypher/executor.rs
|
||||
|
||||
pub struct QueryExecutor {
|
||||
graph_store: Arc<GraphStore>,
|
||||
}
|
||||
|
||||
impl QueryExecutor {
|
||||
pub fn execute(&self, plan: &QueryPlan) -> Result<QueryResult, ExecuteError> {
|
||||
match plan {
|
||||
QueryPlan::Scan { label } => self.scan_nodes(label),
|
||||
QueryPlan::Expand { input, rel, dir } => {
|
||||
let source_rows = self.execute(input)?;
|
||||
self.expand_relationships(&source_rows, rel, dir)
|
||||
}
|
||||
QueryPlan::Filter { input, predicate } => {
|
||||
let rows = self.execute(input)?;
|
||||
self.filter_rows(&rows, predicate)
|
||||
}
|
||||
QueryPlan::VectorSearch { label, property, query, k } => {
|
||||
self.vector_search(label, property, query, *k)
|
||||
}
|
||||
QueryPlan::ShortestPath { start, end, rel_types, max_hops } => {
|
||||
self.find_shortest_path(start, end, rel_types, *max_hops)
|
||||
}
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_relationships(
|
||||
&self,
|
||||
source_rows: &QueryResult,
|
||||
rel_pattern: &RelationshipPattern,
|
||||
direction: &Direction,
|
||||
) -> Result<QueryResult, ExecuteError> {
|
||||
let mut result_rows = Vec::new();
|
||||
|
||||
for row in source_rows.rows() {
|
||||
let node_id = row.get_node_id()?;
|
||||
|
||||
let edges = match direction {
|
||||
Direction::Outgoing => self.graph_store.outgoing_edges(node_id, &rel_pattern.types),
|
||||
Direction::Incoming => self.graph_store.incoming_edges(node_id, &rel_pattern.types),
|
||||
Direction::Both => self.graph_store.all_edges(node_id, &rel_pattern.types),
|
||||
};
|
||||
|
||||
for edge in edges {
|
||||
let target = match direction {
|
||||
Direction::Outgoing => edge.target,
|
||||
Direction::Incoming => edge.source,
|
||||
Direction::Both => if edge.source == node_id { edge.target } else { edge.source },
|
||||
};
|
||||
|
||||
let target_node = self.graph_store.get_node(target)?;
|
||||
|
||||
// Check relationship properties
|
||||
if let Some(props) = &rel_pattern.properties {
|
||||
if !self.matches_properties(&edge.properties, props) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_row = row.clone();
|
||||
if let Some(var) = &rel_pattern.variable {
|
||||
new_row.set(var, Value::Relationship(edge.clone()));
|
||||
}
|
||||
new_row.extend_with_node(target_node);
|
||||
|
||||
result_rows.push(new_row);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(QueryResult::from_rows(result_rows))
|
||||
}
|
||||
|
||||
fn vector_search(
|
||||
&self,
|
||||
label: &str,
|
||||
property: &str,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
) -> Result<QueryResult, ExecuteError> {
|
||||
// Use HNSW index for vector search
|
||||
let index = self.graph_store.get_vector_index(label, property)?;
|
||||
let results = index.search(query, k);
|
||||
|
||||
let mut rows = Vec::with_capacity(k);
|
||||
for (node_id, score) in results {
|
||||
let node = self.graph_store.get_node(node_id)?;
|
||||
let mut row = Row::new();
|
||||
row.set("node", Value::Node(node));
|
||||
row.set("score", Value::Float(score));
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
Ok(QueryResult::from_rows(rows))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Graph Storage (Week 9-10)
|
||||
|
||||
```rust
|
||||
// src/graph/storage/nodes.rs
|
||||
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::RwLock;
|
||||
|
||||
/// Node storage with label-based indexing
|
||||
pub struct NodeStore {
|
||||
/// node_id -> node data
|
||||
nodes: DashMap<u64, Node>,
|
||||
/// label -> set of node_ids
|
||||
label_index: DashMap<String, HashSet<u64>>,
|
||||
/// (label, property) -> property index
|
||||
property_indexes: DashMap<(String, String), PropertyIndex>,
|
||||
/// (label, property) -> vector index
|
||||
vector_indexes: DashMap<(String, String), HnswIndex>,
|
||||
/// Next node ID
|
||||
next_id: AtomicU64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Node {
|
||||
pub id: u64,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: Properties,
|
||||
}
|
||||
|
||||
impl NodeStore {
|
||||
pub fn create_node(&self, labels: Vec<String>, properties: Properties) -> u64 {
|
||||
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
let node = Node { id, labels: labels.clone(), properties: properties.clone() };
|
||||
|
||||
// Add to main store
|
||||
self.nodes.insert(id, node);
|
||||
|
||||
// Update label indexes
|
||||
for label in &labels {
|
||||
self.label_index
|
||||
.entry(label.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
}
|
||||
|
||||
// Update property indexes
|
||||
for (key, value) in &properties {
|
||||
for label in &labels {
|
||||
if let Some(idx) = self.property_indexes.get(&(label.clone(), key.clone())) {
|
||||
idx.insert(value.clone(), id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update vector indexes
|
||||
for (key, value) in &properties {
|
||||
if let Value::Vector(vec) = value {
|
||||
for label in &labels {
|
||||
if let Some(idx) = self.vector_indexes.get(&(label.clone(), key.clone())) {
|
||||
idx.insert(id, vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
pub fn nodes_by_label(&self, label: &str) -> Vec<&Node> {
|
||||
self.label_index
|
||||
.get(label)
|
||||
.map(|ids| {
|
||||
ids.iter()
|
||||
.filter_map(|id| self.nodes.get(id).map(|n| n.value()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
// src/graph/storage/edges.rs
|
||||
|
||||
/// Edge storage with adjacency lists
|
||||
pub struct EdgeStore {
|
||||
/// edge_id -> edge data
|
||||
edges: DashMap<u64, Edge>,
|
||||
/// node_id -> outgoing edges
|
||||
outgoing: DashMap<u64, Vec<u64>>,
|
||||
/// node_id -> incoming edges
|
||||
incoming: DashMap<u64, Vec<u64>>,
|
||||
/// edge_type -> set of edge_ids
|
||||
type_index: DashMap<String, HashSet<u64>>,
|
||||
/// Next edge ID
|
||||
next_id: AtomicU64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Edge {
|
||||
pub id: u64,
|
||||
pub source: u64,
|
||||
pub target: u64,
|
||||
pub edge_type: String,
|
||||
pub properties: Properties,
|
||||
}
|
||||
|
||||
impl EdgeStore {
|
||||
pub fn create_edge(
|
||||
&self,
|
||||
source: u64,
|
||||
target: u64,
|
||||
edge_type: String,
|
||||
properties: Properties,
|
||||
) -> u64 {
|
||||
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
let edge = Edge {
|
||||
id,
|
||||
source,
|
||||
target,
|
||||
edge_type: edge_type.clone(),
|
||||
properties,
|
||||
};
|
||||
|
||||
// Add to main store
|
||||
self.edges.insert(id, edge);
|
||||
|
||||
// Update adjacency lists
|
||||
self.outgoing.entry(source).or_insert_with(Vec::new).push(id);
|
||||
self.incoming.entry(target).or_insert_with(Vec::new).push(id);
|
||||
|
||||
// Update type index
|
||||
self.type_index
|
||||
.entry(edge_type)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
pub fn outgoing_edges(&self, node_id: u64, types: &[String]) -> Vec<&Edge> {
|
||||
self.outgoing
|
||||
.get(&node_id)
|
||||
.map(|edge_ids| {
|
||||
edge_ids.iter()
|
||||
.filter_map(|id| self.edges.get(id))
|
||||
.filter(|e| types.is_empty() || types.contains(&e.edge_type))
|
||||
.map(|e| e.value())
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Graph Algorithms (Week 11-12)
|
||||
|
||||
```rust
|
||||
// src/graph/traversal/shortest_path.rs
|
||||
|
||||
use std::collections::{BinaryHeap, HashMap, VecDeque};
|
||||
|
||||
/// BFS-based shortest path
|
||||
pub fn shortest_path_bfs(
|
||||
store: &GraphStore,
|
||||
start: u64,
|
||||
end: u64,
|
||||
edge_types: &[String],
|
||||
max_hops: usize,
|
||||
) -> Option<Vec<u64>> {
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
let mut parents: HashMap<u64, u64> = HashMap::new();
|
||||
|
||||
queue.push_back((start, 0));
|
||||
visited.insert(start);
|
||||
|
||||
while let Some((node, depth)) = queue.pop_front() {
|
||||
if node == end {
|
||||
// Reconstruct path
|
||||
return Some(reconstruct_path(&parents, start, end));
|
||||
}
|
||||
|
||||
if depth >= max_hops {
|
||||
continue;
|
||||
}
|
||||
|
||||
for edge in store.edges.outgoing_edges(node, edge_types) {
|
||||
if !visited.contains(&edge.target) {
|
||||
visited.insert(edge.target);
|
||||
parents.insert(edge.target, node);
|
||||
queue.push_back((edge.target, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Dijkstra's algorithm for weighted shortest path
|
||||
pub fn shortest_path_dijkstra(
|
||||
store: &GraphStore,
|
||||
start: u64,
|
||||
end: u64,
|
||||
edge_types: &[String],
|
||||
weight_property: &str,
|
||||
) -> Option<(Vec<u64>, f64)> {
|
||||
let mut distances: HashMap<u64, f64> = HashMap::new();
|
||||
let mut parents: HashMap<u64, u64> = HashMap::new();
|
||||
let mut heap = BinaryHeap::new();
|
||||
|
||||
distances.insert(start, 0.0);
|
||||
heap.push(Reverse((OrderedFloat(0.0), start)));
|
||||
|
||||
while let Some(Reverse((OrderedFloat(dist), node))) = heap.pop() {
|
||||
if node == end {
|
||||
return Some((reconstruct_path(&parents, start, end), dist));
|
||||
}
|
||||
|
||||
if dist > *distances.get(&node).unwrap_or(&f64::INFINITY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for edge in store.edges.outgoing_edges(node, edge_types) {
|
||||
let weight = edge.properties
|
||||
.get(weight_property)
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(1.0);
|
||||
|
||||
let new_dist = dist + weight;
|
||||
|
||||
if new_dist < *distances.get(&edge.target).unwrap_or(&f64::INFINITY) {
|
||||
distances.insert(edge.target, new_dist);
|
||||
parents.insert(edge.target, node);
|
||||
heap.push(Reverse((OrderedFloat(new_dist), edge.target)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Vector-guided path finding
|
||||
pub fn vector_guided_path(
|
||||
store: &GraphStore,
|
||||
start: u64,
|
||||
end: u64,
|
||||
edge_types: &[String],
|
||||
vector_property: &str,
|
||||
max_hops: usize,
|
||||
) -> Option<Vec<u64>> {
|
||||
let target_vec = store.nodes.get_node(end)?
|
||||
.properties.get(vector_property)?
|
||||
.as_vector()?;
|
||||
|
||||
let mut heap = BinaryHeap::new();
|
||||
let mut visited = HashSet::new();
|
||||
let mut parents: HashMap<u64, u64> = HashMap::new();
|
||||
|
||||
let start_vec = store.nodes.get_node(start)?
|
||||
.properties.get(vector_property)?
|
||||
.as_vector()?;
|
||||
|
||||
let start_dist = cosine_distance(start_vec, target_vec);
|
||||
heap.push(Reverse((OrderedFloat(start_dist), start, 0)));
|
||||
|
||||
while let Some(Reverse((_, node, depth))) = heap.pop() {
|
||||
if node == end {
|
||||
return Some(reconstruct_path(&parents, start, end));
|
||||
}
|
||||
|
||||
if visited.contains(&node) || depth >= max_hops {
|
||||
continue;
|
||||
}
|
||||
visited.insert(node);
|
||||
|
||||
for edge in store.edges.outgoing_edges(node, edge_types) {
|
||||
if visited.contains(&edge.target) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(vec) = store.nodes.get_node(edge.target)
|
||||
.and_then(|n| n.properties.get(vector_property))
|
||||
.and_then(|v| v.as_vector())
|
||||
{
|
||||
let dist = cosine_distance(vec, target_vec);
|
||||
parents.insert(edge.target, node);
|
||||
heap.push(Reverse((OrderedFloat(dist), edge.target, depth + 1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 6: PostgreSQL Integration (Week 13-14)
|
||||
|
||||
```rust
|
||||
// src/graph/operators.rs
|
||||
|
||||
// Main Cypher execution function
|
||||
#[pg_extern]
|
||||
fn ruvector_cypher(
|
||||
graph_name: &str,
|
||||
query: &str,
|
||||
params: default!(Option<pgrx::JsonB>, "NULL"),
|
||||
) -> TableIterator<'static, (name!(result, pgrx::JsonB),)> {
|
||||
let graph = get_or_create_graph(graph_name);
|
||||
|
||||
// Parse parameters
|
||||
let parameters = params
|
||||
.map(|p| serde_json::from_value(p.0).unwrap_or_default())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Parse query
|
||||
let ast = parse_cypher(query).expect("Failed to parse Cypher query");
|
||||
|
||||
// Plan query
|
||||
let plan = QueryPlanner::new(&graph).plan(&ast).expect("Failed to plan query");
|
||||
|
||||
// Execute query
|
||||
let result = QueryExecutor::new(&graph).execute(&plan).expect("Failed to execute query");
|
||||
|
||||
// Convert to table iterator
|
||||
let rows: Vec<_> = result.rows()
|
||||
.map(|row| (pgrx::JsonB(row.to_json()),))
|
||||
.collect();
|
||||
|
||||
TableIterator::new(rows)
|
||||
}
|
||||
|
||||
// Graph creation
|
||||
#[pg_extern]
|
||||
fn ruvector_create_graph(name: &str) -> bool {
|
||||
GRAPH_STORE.create_graph(name).is_ok()
|
||||
}
|
||||
|
||||
// Node label creation
|
||||
#[pg_extern]
|
||||
fn ruvector_create_node_label(
|
||||
graph_name: &str,
|
||||
label: &str,
|
||||
properties: pgrx::JsonB,
|
||||
) -> bool {
|
||||
let graph = get_graph(graph_name).expect("Graph not found");
|
||||
let schema: HashMap<String, String> = serde_json::from_value(properties.0)
|
||||
.expect("Invalid properties schema");
|
||||
|
||||
graph.create_label(label, schema).is_ok()
|
||||
}
|
||||
|
||||
// Edge type creation
|
||||
#[pg_extern]
|
||||
fn ruvector_create_edge_type(
|
||||
graph_name: &str,
|
||||
edge_type: &str,
|
||||
properties: pgrx::JsonB,
|
||||
) -> bool {
|
||||
let graph = get_graph(graph_name).expect("Graph not found");
|
||||
let schema: HashMap<String, String> = serde_json::from_value(properties.0)
|
||||
.expect("Invalid properties schema");
|
||||
|
||||
graph.create_edge_type(edge_type, schema).is_ok()
|
||||
}
|
||||
|
||||
// Helper to get graph statistics
|
||||
#[pg_extern]
|
||||
fn ruvector_graph_stats(graph_name: &str) -> pgrx::JsonB {
|
||||
let graph = get_graph(graph_name).expect("Graph not found");
|
||||
|
||||
pgrx::JsonB(serde_json::json!({
|
||||
"node_count": graph.node_count(),
|
||||
"edge_count": graph.edge_count(),
|
||||
"labels": graph.labels(),
|
||||
"edge_types": graph.edge_types(),
|
||||
"memory_mb": graph.memory_usage_mb(),
|
||||
}))
|
||||
}
|
||||
```
|
||||
|
||||
## Supported Cypher Features
|
||||
|
||||
### Clauses
|
||||
- `MATCH` - Pattern matching
|
||||
- `OPTIONAL MATCH` - Optional pattern matching
|
||||
- `CREATE` - Create nodes/relationships
|
||||
- `MERGE` - Match or create
|
||||
- `DELETE` / `DETACH DELETE` - Delete nodes/relationships
|
||||
- `SET` - Update properties
|
||||
- `REMOVE` - Remove properties/labels
|
||||
- `RETURN` - Return results
|
||||
- `WITH` - Query chaining
|
||||
- `WHERE` - Filtering
|
||||
- `ORDER BY` - Sorting
|
||||
- `SKIP` / `LIMIT` - Pagination
|
||||
- `UNION` / `UNION ALL` - Combining results
|
||||
|
||||
### Expressions
|
||||
- Property access: `n.name`
|
||||
- Labels: `n:Person`
|
||||
- Relationship types: `[:KNOWS]`
|
||||
- Variable length: `[:KNOWS*1..3]`
|
||||
- List comprehensions: `[x IN list WHERE x > 5]`
|
||||
- CASE expressions
|
||||
|
||||
### Functions
|
||||
- Aggregation: `count()`, `sum()`, `avg()`, `min()`, `max()`, `collect()`
|
||||
- String: `toUpper()`, `toLower()`, `trim()`, `split()`
|
||||
- Math: `abs()`, `ceil()`, `floor()`, `round()`, `sqrt()`
|
||||
- List: `head()`, `tail()`, `size()`, `range()`
|
||||
- Path: `length()`, `nodes()`, `relationships()`
|
||||
- **RuVector-specific**:
|
||||
- `ruvector.similarity(embedding1, embedding2)`
|
||||
- `ruvector.distance(embedding1, embedding2, metric)`
|
||||
- `ruvector.knn(embedding, k)`
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| Operation | Nodes | Edges | Time (ms) |
|
||||
|-----------|-------|-------|-----------|
|
||||
| Simple MATCH | 100K | 1M | 2.5 |
|
||||
| 2-hop traversal | 100K | 1M | 15 |
|
||||
| Shortest path (BFS) | 100K | 1M | 8 |
|
||||
| Vector-guided path | 100K | 1M | 25 |
|
||||
| PageRank (20 iter) | 100K | 1M | 450 |
|
||||
| Community detection | 100K | 1M | 1200 |
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# Link to ruvector-graph
|
||||
ruvector-graph = { path = "../ruvector-graph", optional = true }
|
||||
|
||||
# Parser
|
||||
pest = "2.7"
|
||||
pest_derive = "2.7"
|
||||
|
||||
# Concurrent collections
|
||||
dashmap = "6.0"
|
||||
parking_lot = "0.12"
|
||||
|
||||
# Graph algorithms
|
||||
petgraph = { version = "0.6", optional = true }
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
graph = []
|
||||
graph-cypher = ["graph", "pest", "pest_derive"]
|
||||
graph-algorithms = ["graph", "petgraph"]
|
||||
graph-vector = ["graph", "index-hnsw"]
|
||||
graph-all = ["graph-cypher", "graph-algorithms", "graph-vector"]
|
||||
```
|
||||
985
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md
vendored
Normal file
985
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md
vendored
Normal file
@@ -0,0 +1,985 @@
|
||||
# Tiny Dancer Routing Integration Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate AI agent routing capabilities from `ruvector-tiny-dancer` into PostgreSQL, enabling intelligent request routing, model selection, and cost optimization directly in SQL.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PostgreSQL Extension │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Tiny Dancer Router │ │
|
||||
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │
|
||||
│ │ │ FastGRNN │ │ Route │ │ Cost │ │ │
|
||||
│ │ │ Inference │ │ Classifier │ │ Optimizer │ │ │
|
||||
│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │
|
||||
│ └─────────┼─────────────────┼─────────────────┼───────────┘ │
|
||||
│ └─────────────────┴─────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────┐ │
|
||||
│ │ Agent Registry & Pool │ │
|
||||
│ │ (LLMs, Tools, APIs) │ │
|
||||
│ └───────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── routing/
|
||||
│ ├── mod.rs # Module exports
|
||||
│ ├── fastgrnn.rs # FastGRNN neural inference
|
||||
│ ├── router.rs # Main routing engine
|
||||
│ ├── classifier.rs # Route classification
|
||||
│ ├── cost_optimizer.rs # Cost/latency optimization
|
||||
│ ├── agents/
|
||||
│ │ ├── registry.rs # Agent registration
|
||||
│ │ ├── pool.rs # Agent pool management
|
||||
│ │ └── capabilities.rs # Capability matching
|
||||
│ ├── policies/
|
||||
│ │ ├── cost.rs # Cost-based routing
|
||||
│ │ ├── latency.rs # Latency-based routing
|
||||
│ │ ├── quality.rs # Quality-based routing
|
||||
│ │ └── hybrid.rs # Multi-objective routing
|
||||
│ └── operators.rs # SQL operators
|
||||
```
|
||||
|
||||
## SQL Interface
|
||||
|
||||
### Agent Registration
|
||||
|
||||
```sql
|
||||
-- Register AI agents/models
|
||||
SELECT ruvector_register_agent(
|
||||
name := 'gpt-4',
|
||||
agent_type := 'llm',
|
||||
capabilities := ARRAY['reasoning', 'code', 'analysis', 'creative'],
|
||||
cost_per_1k_tokens := 0.03,
|
||||
avg_latency_ms := 2500,
|
||||
quality_score := 0.95,
|
||||
metadata := '{"provider": "openai", "context_window": 128000}'
|
||||
);
|
||||
|
||||
SELECT ruvector_register_agent(
|
||||
name := 'claude-3-haiku',
|
||||
agent_type := 'llm',
|
||||
capabilities := ARRAY['fast-response', 'simple-tasks', 'classification'],
|
||||
cost_per_1k_tokens := 0.00025,
|
||||
avg_latency_ms := 400,
|
||||
quality_score := 0.80,
|
||||
metadata := '{"provider": "anthropic", "context_window": 200000}'
|
||||
);
|
||||
|
||||
SELECT ruvector_register_agent(
|
||||
name := 'code-specialist',
|
||||
agent_type := 'tool',
|
||||
capabilities := ARRAY['code-execution', 'debugging', 'testing'],
|
||||
cost_per_call := 0.001,
|
||||
avg_latency_ms := 100,
|
||||
quality_score := 0.90
|
||||
);
|
||||
|
||||
-- List registered agents
|
||||
SELECT * FROM ruvector_list_agents();
|
||||
```
|
||||
|
||||
### Basic Routing
|
||||
|
||||
```sql
|
||||
-- Route a request to the best agent
|
||||
SELECT * FROM ruvector_route(
|
||||
request := 'Write a Python function to calculate Fibonacci numbers',
|
||||
optimize_for := 'cost' -- or 'latency', 'quality', 'balanced'
|
||||
);
|
||||
|
||||
-- Result:
|
||||
-- | agent_name | confidence | estimated_cost | estimated_latency |
|
||||
-- |------------|------------|----------------|-------------------|
|
||||
-- | claude-3-haiku | 0.85 | 0.001 | 400ms |
|
||||
|
||||
-- Route with constraints
|
||||
SELECT * FROM ruvector_route(
|
||||
request := 'Analyze this complex legal document',
|
||||
required_capabilities := ARRAY['reasoning', 'analysis'],
|
||||
max_cost := 0.10,
|
||||
max_latency_ms := 5000,
|
||||
min_quality := 0.90
|
||||
);
|
||||
|
||||
-- Multi-agent routing (for complex tasks)
|
||||
SELECT * FROM ruvector_route_multi(
|
||||
request := 'Build and deploy a web application',
|
||||
num_agents := 3,
|
||||
strategy := 'pipeline' -- or 'parallel', 'ensemble'
|
||||
);
|
||||
```
|
||||
|
||||
### Semantic Routing
|
||||
|
||||
```sql
|
||||
-- Create semantic routes (like function calling)
|
||||
SELECT ruvector_create_route(
|
||||
name := 'customer_support',
|
||||
description := 'Handle customer support inquiries, complaints, and feedback',
|
||||
embedding := ruvector_embed('Customer support and help requests'),
|
||||
target_agent := 'support-agent',
|
||||
priority := 1
|
||||
);
|
||||
|
||||
SELECT ruvector_create_route(
|
||||
name := 'technical_docs',
|
||||
description := 'Answer questions about technical documentation and APIs',
|
||||
embedding := ruvector_embed('Technical documentation and API reference'),
|
||||
target_agent := 'docs-agent',
|
||||
priority := 2
|
||||
);
|
||||
|
||||
-- Semantic route matching
|
||||
SELECT * FROM ruvector_semantic_route(
|
||||
query := 'How do I reset my password?',
|
||||
top_k := 3
|
||||
);
|
||||
|
||||
-- Result:
|
||||
-- | route_name | similarity | target_agent | confidence |
|
||||
-- |------------|------------|--------------|------------|
|
||||
-- | customer_support | 0.92 | support-agent | 0.95 |
|
||||
```
|
||||
|
||||
### Cost Optimization
|
||||
|
||||
```sql
|
||||
-- Analyze routing costs
|
||||
SELECT * FROM ruvector_routing_analytics(
|
||||
time_range := '7 days',
|
||||
group_by := 'agent'
|
||||
);
|
||||
|
||||
-- Result:
|
||||
-- | agent | total_requests | total_cost | avg_latency | success_rate |
|
||||
-- |-------|----------------|------------|-------------|--------------|
|
||||
-- | gpt-4 | 1000 | $30.00 | 2.5s | 99.2% |
|
||||
-- | haiku | 5000 | $1.25 | 0.4s | 98.5% |
|
||||
|
||||
-- Optimize budget allocation
|
||||
SELECT * FROM ruvector_optimize_budget(
|
||||
monthly_budget := 100.00,
|
||||
quality_threshold := 0.85,
|
||||
latency_threshold_ms := 2000
|
||||
);
|
||||
|
||||
-- Auto-route with budget awareness
|
||||
SELECT * FROM ruvector_route(
|
||||
request := 'Summarize this article',
|
||||
budget_remaining := 10.00,
|
||||
optimize_for := 'quality_per_dollar'
|
||||
);
|
||||
```
|
||||
|
||||
### Batch Routing
|
||||
|
||||
```sql
|
||||
-- Route multiple requests efficiently
|
||||
SELECT * FROM ruvector_batch_route(
|
||||
requests := ARRAY[
|
||||
'Simple question 1',
|
||||
'Complex analysis task',
|
||||
'Code generation request'
|
||||
],
|
||||
optimize_for := 'total_cost'
|
||||
);
|
||||
|
||||
-- Classify requests in batch (for preprocessing)
|
||||
SELECT request_id, ruvector_classify_request(content) AS classification
|
||||
FROM pending_requests;
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: FastGRNN Core (Week 1-3)
|
||||
|
||||
```rust
|
||||
// src/routing/fastgrnn.rs
|
||||
|
||||
use simsimd::SpatialSimilarity;
|
||||
|
||||
/// FastGRNN (Fast Gated Recurrent Neural Network)
|
||||
/// Lightweight neural network for fast inference
|
||||
pub struct FastGRNN {
|
||||
// Gate weights
|
||||
w_gate: Vec<f32>, // [hidden, input]
|
||||
u_gate: Vec<f32>, // [hidden, hidden]
|
||||
b_gate: Vec<f32>, // [hidden]
|
||||
|
||||
// Update weights
|
||||
w_update: Vec<f32>, // [hidden, input]
|
||||
u_update: Vec<f32>, // [hidden, hidden]
|
||||
b_update: Vec<f32>, // [hidden]
|
||||
|
||||
// Hyperparameters
|
||||
zeta: f32, // Gate sparsity
|
||||
nu: f32, // Update sparsity
|
||||
|
||||
input_dim: usize,
|
||||
hidden_dim: usize,
|
||||
}
|
||||
|
||||
impl FastGRNN {
|
||||
pub fn new(input_dim: usize, hidden_dim: usize) -> Self {
|
||||
Self {
|
||||
w_gate: Self::init_weights(hidden_dim, input_dim),
|
||||
u_gate: Self::init_weights(hidden_dim, hidden_dim),
|
||||
b_gate: vec![0.0; hidden_dim],
|
||||
w_update: Self::init_weights(hidden_dim, input_dim),
|
||||
u_update: Self::init_weights(hidden_dim, hidden_dim),
|
||||
b_update: vec![0.0; hidden_dim],
|
||||
zeta: 1.0,
|
||||
nu: 1.0,
|
||||
input_dim,
|
||||
hidden_dim,
|
||||
}
|
||||
}
|
||||
|
||||
/// Single step forward pass
|
||||
/// h_t = (ζ * (1 - z_t) + ν) ⊙ tanh(Wx_t + Uh_{t-1} + b_h) + z_t ⊙ h_{t-1}
|
||||
pub fn step(&self, input: &[f32], hidden: &[f32]) -> Vec<f32> {
|
||||
// Gate: z = σ(W_z x + U_z h + b_z)
|
||||
let gate = self.sigmoid(&self.linear_combine(
|
||||
input, hidden,
|
||||
&self.w_gate, &self.u_gate, &self.b_gate
|
||||
));
|
||||
|
||||
// Update: h̃ = tanh(W_h x + U_h h + b_h)
|
||||
let update = self.tanh(&self.linear_combine(
|
||||
input, hidden,
|
||||
&self.w_update, &self.u_update, &self.b_update
|
||||
));
|
||||
|
||||
// New hidden: h = (ζ(1-z) + ν) ⊙ h̃ + z ⊙ h
|
||||
let mut new_hidden = vec![0.0; self.hidden_dim];
|
||||
for i in 0..self.hidden_dim {
|
||||
let gate_factor = self.zeta * (1.0 - gate[i]) + self.nu;
|
||||
new_hidden[i] = gate_factor * update[i] + gate[i] * hidden[i];
|
||||
}
|
||||
|
||||
new_hidden
|
||||
}
|
||||
|
||||
/// Process sequence
|
||||
pub fn forward(&self, sequence: &[Vec<f32>]) -> Vec<f32> {
|
||||
let mut hidden = vec![0.0; self.hidden_dim];
|
||||
|
||||
for input in sequence {
|
||||
hidden = self.step(input, &hidden);
|
||||
}
|
||||
|
||||
hidden
|
||||
}
|
||||
|
||||
/// Process single input (common case for routing)
|
||||
pub fn forward_single(&self, input: &[f32]) -> Vec<f32> {
|
||||
let hidden = vec![0.0; self.hidden_dim];
|
||||
self.step(input, &hidden)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn linear_combine(
|
||||
&self,
|
||||
input: &[f32],
|
||||
hidden: &[f32],
|
||||
w: &[f32],
|
||||
u: &[f32],
|
||||
b: &[f32],
|
||||
) -> Vec<f32> {
|
||||
let mut result = b.to_vec();
|
||||
|
||||
// W @ x
|
||||
for i in 0..self.hidden_dim {
|
||||
for j in 0..self.input_dim {
|
||||
result[i] += w[i * self.input_dim + j] * input[j];
|
||||
}
|
||||
}
|
||||
|
||||
// U @ h
|
||||
for i in 0..self.hidden_dim {
|
||||
for j in 0..self.hidden_dim {
|
||||
result[i] += u[i * self.hidden_dim + j] * hidden[j];
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn sigmoid(&self, x: &[f32]) -> Vec<f32> {
|
||||
x.iter().map(|&v| 1.0 / (1.0 + (-v).exp())).collect()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn tanh(&self, x: &[f32]) -> Vec<f32> {
|
||||
x.iter().map(|&v| v.tanh()).collect()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: Route Classifier (Week 4-5)
|
||||
|
||||
```rust
|
||||
// src/routing/classifier.rs
|
||||
|
||||
/// Route classifier using FastGRNN + linear head
|
||||
pub struct RouteClassifier {
|
||||
fastgrnn: FastGRNN,
|
||||
classifier_head: Vec<f32>, // [num_classes, hidden_dim]
|
||||
num_classes: usize,
|
||||
class_names: Vec<String>,
|
||||
}
|
||||
|
||||
impl RouteClassifier {
|
||||
/// Classify request to route category
|
||||
pub fn classify(&self, embedding: &[f32]) -> Vec<(String, f32)> {
|
||||
// FastGRNN encoding
|
||||
let hidden = self.fastgrnn.forward_single(embedding);
|
||||
|
||||
// Linear classifier
|
||||
let mut logits = vec![0.0; self.num_classes];
|
||||
for i in 0..self.num_classes {
|
||||
for j in 0..hidden.len() {
|
||||
logits[i] += self.classifier_head[i * hidden.len() + j] * hidden[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Softmax
|
||||
let probs = softmax(&logits);
|
||||
|
||||
// Return sorted by probability
|
||||
let mut results: Vec<_> = self.class_names.iter()
|
||||
.zip(probs.iter())
|
||||
.map(|(name, &prob)| (name.clone(), prob))
|
||||
.collect();
|
||||
|
||||
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
||||
results
|
||||
}
|
||||
|
||||
/// Multi-label classification (request may need multiple capabilities)
|
||||
pub fn classify_capabilities(&self, embedding: &[f32]) -> Vec<(String, f32)> {
|
||||
let hidden = self.fastgrnn.forward_single(embedding);
|
||||
|
||||
// Sigmoid for multi-label
|
||||
let mut results = Vec::new();
|
||||
for i in 0..self.num_classes {
|
||||
let mut logit = 0.0;
|
||||
for j in 0..hidden.len() {
|
||||
logit += self.classifier_head[i * hidden.len() + j] * hidden[j];
|
||||
}
|
||||
let prob = 1.0 / (1.0 + (-logit).exp());
|
||||
|
||||
if prob > 0.5 {
|
||||
results.push((self.class_names[i].clone(), prob));
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_classify_request(request: &str) -> pgrx::JsonB {
|
||||
let embedding = get_embedding(request);
|
||||
let classifier = get_route_classifier();
|
||||
|
||||
let classifications = classifier.classify(&embedding);
|
||||
|
||||
pgrx::JsonB(serde_json::json!({
|
||||
"classifications": classifications,
|
||||
"top_category": classifications.first().map(|(name, _)| name),
|
||||
"confidence": classifications.first().map(|(_, prob)| prob),
|
||||
}))
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Agent Registry (Week 6-7)
|
||||
|
||||
```rust
|
||||
// src/routing/agents/registry.rs
|
||||
|
||||
use dashmap::DashMap;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Agent {
|
||||
pub name: String,
|
||||
pub agent_type: AgentType,
|
||||
pub capabilities: Vec<String>,
|
||||
pub capability_embedding: Vec<f32>, // Embedding of capabilities for semantic matching
|
||||
pub cost_model: CostModel,
|
||||
pub performance: AgentPerformance,
|
||||
pub metadata: serde_json::Value,
|
||||
pub active: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum AgentType {
|
||||
LLM,
|
||||
Tool,
|
||||
API,
|
||||
Human,
|
||||
Ensemble,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CostModel {
|
||||
pub cost_per_1k_tokens: Option<f64>,
|
||||
pub cost_per_call: Option<f64>,
|
||||
pub cost_per_second: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AgentPerformance {
|
||||
pub avg_latency_ms: f64,
|
||||
pub p99_latency_ms: f64,
|
||||
pub quality_score: f64,
|
||||
pub success_rate: f64,
|
||||
pub total_requests: u64,
|
||||
}
|
||||
|
||||
/// Global agent registry
|
||||
pub struct AgentRegistry {
|
||||
agents: DashMap<String, Agent>,
|
||||
capability_index: HnswIndex, // For semantic capability matching
|
||||
}
|
||||
|
||||
impl AgentRegistry {
|
||||
pub fn register(&self, agent: Agent) -> Result<(), RegistryError> {
|
||||
// Index capability embedding
|
||||
let embedding = &agent.capability_embedding;
|
||||
self.capability_index.insert(&agent.name, embedding);
|
||||
|
||||
self.agents.insert(agent.name.clone(), agent);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get(&self, name: &str) -> Option<Agent> {
|
||||
self.agents.get(name).map(|a| a.clone())
|
||||
}
|
||||
|
||||
pub fn find_by_capability(&self, capability: &str, k: usize) -> Vec<&Agent> {
|
||||
let embedding = get_embedding(capability);
|
||||
let results = self.capability_index.search(&embedding, k);
|
||||
|
||||
results.iter()
|
||||
.filter_map(|(name, _)| self.agents.get(name.as_str()).map(|a| a.value()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn list_active(&self) -> Vec<Agent> {
|
||||
self.agents.iter()
|
||||
.filter(|a| a.active)
|
||||
.map(|a| a.clone())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_register_agent(
|
||||
name: &str,
|
||||
agent_type: &str,
|
||||
capabilities: Vec<String>,
|
||||
cost_per_1k_tokens: default!(Option<f64>, "NULL"),
|
||||
cost_per_call: default!(Option<f64>, "NULL"),
|
||||
avg_latency_ms: f64,
|
||||
quality_score: f64,
|
||||
metadata: default!(Option<pgrx::JsonB>, "NULL"),
|
||||
) -> bool {
|
||||
let registry = get_agent_registry();
|
||||
|
||||
// Create capability embedding
|
||||
let capability_text = capabilities.join(", ");
|
||||
let capability_embedding = get_embedding(&capability_text);
|
||||
|
||||
let agent = Agent {
|
||||
name: name.to_string(),
|
||||
agent_type: agent_type.parse().unwrap_or(AgentType::LLM),
|
||||
capabilities,
|
||||
capability_embedding,
|
||||
cost_model: CostModel {
|
||||
cost_per_1k_tokens,
|
||||
cost_per_call,
|
||||
cost_per_second: None,
|
||||
},
|
||||
performance: AgentPerformance {
|
||||
avg_latency_ms,
|
||||
p99_latency_ms: avg_latency_ms * 2.0,
|
||||
quality_score,
|
||||
success_rate: 1.0,
|
||||
total_requests: 0,
|
||||
},
|
||||
metadata: metadata.map(|m| m.0).unwrap_or(serde_json::json!({})),
|
||||
active: true,
|
||||
};
|
||||
|
||||
registry.register(agent).is_ok()
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Routing Engine (Week 8-9)
|
||||
|
||||
```rust
|
||||
// src/routing/router.rs
|
||||
|
||||
pub struct Router {
|
||||
registry: Arc<AgentRegistry>,
|
||||
classifier: Arc<RouteClassifier>,
|
||||
optimizer: Arc<CostOptimizer>,
|
||||
semantic_routes: Arc<SemanticRoutes>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RoutingDecision {
|
||||
pub agent: Agent,
|
||||
pub confidence: f64,
|
||||
pub estimated_cost: f64,
|
||||
pub estimated_latency_ms: f64,
|
||||
pub reasoning: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RoutingConstraints {
|
||||
pub required_capabilities: Option<Vec<String>>,
|
||||
pub max_cost: Option<f64>,
|
||||
pub max_latency_ms: Option<f64>,
|
||||
pub min_quality: Option<f64>,
|
||||
pub excluded_agents: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl Router {
|
||||
/// Route request to best agent
|
||||
pub fn route(
|
||||
&self,
|
||||
request: &str,
|
||||
constraints: &RoutingConstraints,
|
||||
optimize_for: OptimizationTarget,
|
||||
) -> Result<RoutingDecision, RoutingError> {
|
||||
let embedding = get_embedding(request);
|
||||
|
||||
// Get candidate agents
|
||||
let mut candidates = self.get_candidates(&embedding, constraints)?;
|
||||
|
||||
if candidates.is_empty() {
|
||||
return Err(RoutingError::NoSuitableAgent);
|
||||
}
|
||||
|
||||
// Score candidates
|
||||
let scored: Vec<_> = candidates.iter()
|
||||
.map(|agent| {
|
||||
let score = self.score_agent(agent, &embedding, optimize_for);
|
||||
(agent, score)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Select best
|
||||
let (best_agent, confidence) = scored.into_iter()
|
||||
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
|
||||
.unwrap();
|
||||
|
||||
Ok(RoutingDecision {
|
||||
agent: best_agent.clone(),
|
||||
confidence,
|
||||
estimated_cost: self.estimate_cost(best_agent, request),
|
||||
estimated_latency_ms: best_agent.performance.avg_latency_ms,
|
||||
reasoning: format!("Selected {} based on {:?} optimization", best_agent.name, optimize_for),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_candidates(
|
||||
&self,
|
||||
embedding: &[f32],
|
||||
constraints: &RoutingConstraints,
|
||||
) -> Result<Vec<Agent>, RoutingError> {
|
||||
let mut candidates: Vec<_> = self.registry.list_active();
|
||||
|
||||
// Filter by required capabilities
|
||||
if let Some(required) = &constraints.required_capabilities {
|
||||
candidates.retain(|a| {
|
||||
required.iter().all(|cap| a.capabilities.contains(cap))
|
||||
});
|
||||
}
|
||||
|
||||
// Filter by cost
|
||||
if let Some(max_cost) = constraints.max_cost {
|
||||
candidates.retain(|a| {
|
||||
a.cost_model.cost_per_1k_tokens.unwrap_or(0.0) <= max_cost ||
|
||||
a.cost_model.cost_per_call.unwrap_or(0.0) <= max_cost
|
||||
});
|
||||
}
|
||||
|
||||
// Filter by latency
|
||||
if let Some(max_latency) = constraints.max_latency_ms {
|
||||
candidates.retain(|a| a.performance.avg_latency_ms <= max_latency);
|
||||
}
|
||||
|
||||
// Filter by quality
|
||||
if let Some(min_quality) = constraints.min_quality {
|
||||
candidates.retain(|a| a.performance.quality_score >= min_quality);
|
||||
}
|
||||
|
||||
// Filter excluded
|
||||
if let Some(excluded) = &constraints.excluded_agents {
|
||||
candidates.retain(|a| !excluded.contains(&a.name));
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
fn score_agent(
|
||||
&self,
|
||||
agent: &Agent,
|
||||
request_embedding: &[f32],
|
||||
optimize_for: OptimizationTarget,
|
||||
) -> f64 {
|
||||
// Capability match score
|
||||
let capability_sim = cosine_similarity(request_embedding, &agent.capability_embedding);
|
||||
|
||||
match optimize_for {
|
||||
OptimizationTarget::Cost => {
|
||||
let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
|
||||
capability_sim * (1.0 / (1.0 + cost))
|
||||
}
|
||||
OptimizationTarget::Latency => {
|
||||
let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0);
|
||||
capability_sim * latency_factor
|
||||
}
|
||||
OptimizationTarget::Quality => {
|
||||
capability_sim * agent.performance.quality_score
|
||||
}
|
||||
OptimizationTarget::Balanced => {
|
||||
let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
|
||||
let cost_factor = 1.0 / (1.0 + cost);
|
||||
let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0);
|
||||
let quality = agent.performance.quality_score;
|
||||
|
||||
capability_sim * (0.3 * cost_factor + 0.3 * latency_factor + 0.4 * quality)
|
||||
}
|
||||
OptimizationTarget::QualityPerDollar => {
|
||||
let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
|
||||
capability_sim * agent.performance.quality_score / (cost + 0.001)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_cost(&self, agent: &Agent, request: &str) -> f64 {
|
||||
let estimated_tokens = (request.len() / 4) as f64; // Rough estimate
|
||||
|
||||
if let Some(cost_per_1k) = agent.cost_model.cost_per_1k_tokens {
|
||||
cost_per_1k * estimated_tokens / 1000.0
|
||||
} else if let Some(cost_per_call) = agent.cost_model.cost_per_call {
|
||||
cost_per_call
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum OptimizationTarget {
|
||||
Cost,
|
||||
Latency,
|
||||
Quality,
|
||||
Balanced,
|
||||
QualityPerDollar,
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_route(
|
||||
request: &str,
|
||||
optimize_for: default!(&str, "'balanced'"),
|
||||
required_capabilities: default!(Option<Vec<String>>, "NULL"),
|
||||
max_cost: default!(Option<f64>, "NULL"),
|
||||
max_latency_ms: default!(Option<f64>, "NULL"),
|
||||
min_quality: default!(Option<f64>, "NULL"),
|
||||
) -> pgrx::JsonB {
|
||||
let router = get_router();
|
||||
|
||||
let constraints = RoutingConstraints {
|
||||
required_capabilities,
|
||||
max_cost,
|
||||
max_latency_ms,
|
||||
min_quality,
|
||||
excluded_agents: None,
|
||||
};
|
||||
|
||||
let target = match optimize_for {
|
||||
"cost" => OptimizationTarget::Cost,
|
||||
"latency" => OptimizationTarget::Latency,
|
||||
"quality" => OptimizationTarget::Quality,
|
||||
"quality_per_dollar" => OptimizationTarget::QualityPerDollar,
|
||||
_ => OptimizationTarget::Balanced,
|
||||
};
|
||||
|
||||
match router.route(request, &constraints, target) {
|
||||
Ok(decision) => pgrx::JsonB(serde_json::json!({
|
||||
"agent_name": decision.agent.name,
|
||||
"confidence": decision.confidence,
|
||||
"estimated_cost": decision.estimated_cost,
|
||||
"estimated_latency_ms": decision.estimated_latency_ms,
|
||||
"reasoning": decision.reasoning,
|
||||
})),
|
||||
Err(e) => pgrx::JsonB(serde_json::json!({
|
||||
"error": format!("{:?}", e),
|
||||
})),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Semantic Routes (Week 10-11)
|
||||
|
||||
```rust
|
||||
// src/routing/semantic_routes.rs
|
||||
|
||||
pub struct SemanticRoutes {
|
||||
routes: DashMap<String, SemanticRoute>,
|
||||
index: HnswIndex,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SemanticRoute {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub embedding: Vec<f32>,
|
||||
pub target_agent: String,
|
||||
pub priority: i32,
|
||||
pub conditions: Option<RouteConditions>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RouteConditions {
|
||||
pub time_range: Option<(chrono::NaiveTime, chrono::NaiveTime)>,
|
||||
pub user_tier: Option<Vec<String>>,
|
||||
pub rate_limit: Option<u32>,
|
||||
}
|
||||
|
||||
impl SemanticRoutes {
|
||||
pub fn add_route(&self, route: SemanticRoute) {
|
||||
self.index.insert(&route.name, &route.embedding);
|
||||
self.routes.insert(route.name.clone(), route);
|
||||
}
|
||||
|
||||
pub fn match_route(&self, query_embedding: &[f32], k: usize) -> Vec<(SemanticRoute, f32)> {
|
||||
let results = self.index.search(query_embedding, k);
|
||||
|
||||
results.iter()
|
||||
.filter_map(|(name, score)| {
|
||||
self.routes.get(name.as_str())
|
||||
.map(|r| (r.clone(), *score))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_create_route(
|
||||
name: &str,
|
||||
description: &str,
|
||||
target_agent: &str,
|
||||
priority: default!(i32, 0),
|
||||
embedding: default!(Option<Vec<f32>>, "NULL"),
|
||||
) -> bool {
|
||||
let routes = get_semantic_routes();
|
||||
|
||||
let embedding = embedding.unwrap_or_else(|| get_embedding(description));
|
||||
|
||||
let route = SemanticRoute {
|
||||
name: name.to_string(),
|
||||
description: description.to_string(),
|
||||
embedding,
|
||||
target_agent: target_agent.to_string(),
|
||||
priority,
|
||||
conditions: None,
|
||||
};
|
||||
|
||||
routes.add_route(route);
|
||||
true
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_semantic_route(
|
||||
query: &str,
|
||||
top_k: default!(i32, 3),
|
||||
) -> TableIterator<'static, (
|
||||
name!(route_name, String),
|
||||
name!(similarity, f32),
|
||||
name!(target_agent, String),
|
||||
name!(confidence, f32),
|
||||
)> {
|
||||
let routes = get_semantic_routes();
|
||||
let embedding = get_embedding(query);
|
||||
|
||||
let matches = routes.match_route(&embedding, top_k as usize);
|
||||
|
||||
let results: Vec<_> = matches.into_iter()
|
||||
.map(|(route, similarity)| {
|
||||
let confidence = similarity * (route.priority as f32 + 1.0) / 10.0;
|
||||
(route.name, similarity, route.target_agent, confidence.min(1.0))
|
||||
})
|
||||
.collect();
|
||||
|
||||
TableIterator::new(results)
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 6: Cost Optimizer (Week 12)
|
||||
|
||||
```rust
|
||||
// src/routing/cost_optimizer.rs
|
||||
|
||||
pub struct CostOptimizer {
|
||||
budget_tracker: BudgetTracker,
|
||||
usage_history: UsageHistory,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BudgetAllocation {
|
||||
pub agent_budgets: HashMap<String, f64>,
|
||||
pub total_budget: f64,
|
||||
pub period: chrono::Duration,
|
||||
}
|
||||
|
||||
impl CostOptimizer {
|
||||
/// Optimize budget allocation across agents
|
||||
pub fn optimize_budget(
|
||||
&self,
|
||||
total_budget: f64,
|
||||
quality_threshold: f64,
|
||||
latency_threshold: f64,
|
||||
period_days: i64,
|
||||
) -> BudgetAllocation {
|
||||
let agents = get_agent_registry().list_active();
|
||||
let history = self.usage_history.get_period(period_days);
|
||||
|
||||
// Calculate value score for each agent
|
||||
let agent_values: HashMap<String, f64> = agents.iter()
|
||||
.filter(|a| {
|
||||
a.performance.quality_score >= quality_threshold &&
|
||||
a.performance.avg_latency_ms <= latency_threshold
|
||||
})
|
||||
.map(|a| {
|
||||
let historical_usage = history.get(&a.name).map(|h| h.request_count).unwrap_or(1);
|
||||
let quality = a.performance.quality_score;
|
||||
let cost_efficiency = 1.0 / (a.cost_model.cost_per_1k_tokens.unwrap_or(0.01) + 0.001);
|
||||
|
||||
let value = quality * cost_efficiency * (historical_usage as f64).ln();
|
||||
(a.name.clone(), value)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Allocate budget proportionally to value
|
||||
let total_value: f64 = agent_values.values().sum();
|
||||
let agent_budgets: HashMap<String, f64> = agent_values.iter()
|
||||
.map(|(name, value)| {
|
||||
let allocation = (value / total_value) * total_budget;
|
||||
(name.clone(), allocation)
|
||||
})
|
||||
.collect();
|
||||
|
||||
BudgetAllocation {
|
||||
agent_budgets,
|
||||
total_budget,
|
||||
period: chrono::Duration::days(period_days),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if request fits within budget
|
||||
pub fn check_budget(&self, agent: &str, estimated_cost: f64) -> bool {
|
||||
self.budget_tracker.remaining(agent) >= estimated_cost
|
||||
}
|
||||
|
||||
/// Record usage
|
||||
pub fn record_usage(&self, agent: &str, actual_cost: f64, success: bool, latency_ms: f64) {
|
||||
self.budget_tracker.deduct(agent, actual_cost);
|
||||
self.usage_history.record(agent, actual_cost, success, latency_ms);
|
||||
}
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_optimize_budget(
|
||||
monthly_budget: f64,
|
||||
quality_threshold: default!(f64, 0.8),
|
||||
latency_threshold_ms: default!(f64, 5000.0),
|
||||
) -> pgrx::JsonB {
|
||||
let optimizer = get_cost_optimizer();
|
||||
|
||||
let allocation = optimizer.optimize_budget(
|
||||
monthly_budget,
|
||||
quality_threshold,
|
||||
latency_threshold_ms,
|
||||
30,
|
||||
);
|
||||
|
||||
pgrx::JsonB(serde_json::json!({
|
||||
"allocations": allocation.agent_budgets,
|
||||
"total_budget": allocation.total_budget,
|
||||
"period_days": 30,
|
||||
}))
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_routing_analytics(
|
||||
time_range: default!(&str, "'7 days'"),
|
||||
group_by: default!(&str, "'agent'"),
|
||||
) -> TableIterator<'static, (
|
||||
name!(agent, String),
|
||||
name!(total_requests, i64),
|
||||
name!(total_cost, f64),
|
||||
name!(avg_latency_ms, f64),
|
||||
name!(success_rate, f64),
|
||||
)> {
|
||||
let optimizer = get_cost_optimizer();
|
||||
let days = parse_time_range(time_range);
|
||||
|
||||
let stats = optimizer.usage_history.aggregate(days, group_by);
|
||||
|
||||
TableIterator::new(stats)
|
||||
}
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| Operation | Input Size | Time (μs) | Memory |
|
||||
|-----------|------------|-----------|--------|
|
||||
| FastGRNN step | 768-dim | 45 | 1KB |
|
||||
| Route classification | 768-dim | 120 | 4KB |
|
||||
| Semantic route match (1K routes) | 768-dim | 250 | 8KB |
|
||||
| Full routing decision | 768-dim | 500 | 16KB |
|
||||
|
||||
## Dependencies
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# Link to ruvector-tiny-dancer
|
||||
ruvector-tiny-dancer-core = { path = "../ruvector-tiny-dancer-core", optional = true }
|
||||
|
||||
# SIMD
|
||||
simsimd = "5.9"
|
||||
|
||||
# Time handling
|
||||
chrono = "0.4"
|
||||
|
||||
# Concurrent collections
|
||||
dashmap = "6.0"
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
```toml
|
||||
[features]
|
||||
routing = []
|
||||
routing-fastgrnn = ["routing"]
|
||||
routing-semantic = ["routing", "index-hnsw"]
|
||||
routing-optimizer = ["routing"]
|
||||
routing-all = ["routing-fastgrnn", "routing-semantic", "routing-optimizer"]
|
||||
```
|
||||
666
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/08-optimization-strategy.md
vendored
Normal file
666
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/08-optimization-strategy.md
vendored
Normal file
@@ -0,0 +1,666 @@
|
||||
# Optimization Strategy
|
||||
|
||||
## Overview
|
||||
|
||||
Comprehensive optimization strategies for ruvector-postgres covering SIMD acceleration, memory management, query optimization, and PostgreSQL-specific tuning.
|
||||
|
||||
## SIMD Optimization
|
||||
|
||||
### Architecture Detection & Dispatch
|
||||
|
||||
```rust
|
||||
// src/simd/dispatch.rs
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum SimdCapability {
|
||||
AVX512,
|
||||
AVX2,
|
||||
NEON,
|
||||
Scalar,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SIMD_CAPABILITY: SimdCapability = detect_simd();
|
||||
}
|
||||
|
||||
fn detect_simd() -> SimdCapability {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") {
|
||||
return SimdCapability::AVX512;
|
||||
}
|
||||
if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
|
||||
return SimdCapability::AVX2;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{
|
||||
return SimdCapability::NEON;
|
||||
}
|
||||
|
||||
SimdCapability::Scalar
|
||||
}
|
||||
|
||||
/// Dispatch to optimal implementation
|
||||
#[inline]
|
||||
pub fn distance_dispatch(a: &[f32], b: &[f32], metric: DistanceMetric) -> f32 {
|
||||
match *SIMD_CAPABILITY {
|
||||
SimdCapability::AVX512 => distance_avx512(a, b, metric),
|
||||
SimdCapability::AVX2 => distance_avx2(a, b, metric),
|
||||
SimdCapability::NEON => distance_neon(a, b, metric),
|
||||
SimdCapability::Scalar => distance_scalar(a, b, metric),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Vectorized Operations
|
||||
|
||||
```rust
|
||||
// AVX-512 optimized distance
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx512f", enable = "avx512vl")]
|
||||
unsafe fn euclidean_avx512(a: &[f32], b: &[f32]) -> f32 {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
let mut sum = _mm512_setzero_ps();
|
||||
let chunks = a.len() / 16;
|
||||
|
||||
for i in 0..chunks {
|
||||
let va = _mm512_loadu_ps(a.as_ptr().add(i * 16));
|
||||
let vb = _mm512_loadu_ps(b.as_ptr().add(i * 16));
|
||||
let diff = _mm512_sub_ps(va, vb);
|
||||
sum = _mm512_fmadd_ps(diff, diff, sum);
|
||||
}
|
||||
|
||||
// Handle remainder
|
||||
let mut result = _mm512_reduce_add_ps(sum);
|
||||
for i in (chunks * 16)..a.len() {
|
||||
let diff = a[i] - b[i];
|
||||
result += diff * diff;
|
||||
}
|
||||
|
||||
result.sqrt()
|
||||
}
|
||||
|
||||
// ARM NEON optimized distance
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn euclidean_neon(a: &[f32], b: &[f32]) -> f32 {
|
||||
use std::arch::aarch64::*;
|
||||
|
||||
let mut sum = vdupq_n_f32(0.0);
|
||||
let chunks = a.len() / 4;
|
||||
|
||||
for i in 0..chunks {
|
||||
let va = vld1q_f32(a.as_ptr().add(i * 4));
|
||||
let vb = vld1q_f32(b.as_ptr().add(i * 4));
|
||||
let diff = vsubq_f32(va, vb);
|
||||
sum = vfmaq_f32(sum, diff, diff);
|
||||
}
|
||||
|
||||
let sum_array: [f32; 4] = std::mem::transmute(sum);
|
||||
let mut result: f32 = sum_array.iter().sum();
|
||||
|
||||
for i in (chunks * 4)..a.len() {
|
||||
let diff = a[i] - b[i];
|
||||
result += diff * diff;
|
||||
}
|
||||
|
||||
result.sqrt()
|
||||
}
|
||||
```
|
||||
|
||||
### Batch Processing
|
||||
|
||||
```rust
|
||||
/// Process multiple vectors in parallel batches
|
||||
pub fn batch_distances(
|
||||
query: &[f32],
|
||||
candidates: &[&[f32]],
|
||||
metric: DistanceMetric,
|
||||
) -> Vec<f32> {
|
||||
const BATCH_SIZE: usize = 256;
|
||||
|
||||
candidates
|
||||
.par_chunks(BATCH_SIZE)
|
||||
.flat_map(|batch| {
|
||||
batch.iter()
|
||||
.map(|c| distance_dispatch(query, c, metric))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Prefetch-optimized batch processing
|
||||
pub fn batch_distances_prefetch(
|
||||
query: &[f32],
|
||||
candidates: &[Vec<f32>],
|
||||
metric: DistanceMetric,
|
||||
) -> Vec<f32> {
|
||||
let mut results = Vec::with_capacity(candidates.len());
|
||||
|
||||
for i in 0..candidates.len() {
|
||||
// Prefetch next vectors
|
||||
if i + 4 < candidates.len() {
|
||||
prefetch_read(&candidates[i + 4]);
|
||||
}
|
||||
|
||||
results.push(distance_dispatch(query, &candidates[i], metric));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn prefetch_read<T>(data: &T) {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
unsafe {
|
||||
std::arch::x86_64::_mm_prefetch(
|
||||
data as *const T as *const i8,
|
||||
std::arch::x86_64::_MM_HINT_T0,
|
||||
);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Memory Optimization
|
||||
|
||||
### Zero-Copy Operations
|
||||
|
||||
```rust
|
||||
/// Memory-mapped vector storage
|
||||
pub struct MappedVectors {
|
||||
mmap: memmap2::Mmap,
|
||||
dim: usize,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl MappedVectors {
|
||||
pub fn open(path: &Path, dim: usize) -> io::Result<Self> {
|
||||
let file = File::open(path)?;
|
||||
let mmap = unsafe { memmap2::Mmap::map(&file)? };
|
||||
let count = mmap.len() / (dim * std::mem::size_of::<f32>());
|
||||
|
||||
Ok(Self { mmap, dim, count })
|
||||
}
|
||||
|
||||
/// Zero-copy access to vector
|
||||
#[inline]
|
||||
pub fn get(&self, index: usize) -> &[f32] {
|
||||
let offset = index * self.dim;
|
||||
let bytes = &self.mmap[offset * 4..(offset + self.dim) * 4];
|
||||
unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f32, self.dim) }
|
||||
}
|
||||
}
|
||||
|
||||
/// PostgreSQL shared memory integration
|
||||
pub struct SharedVectorCache {
|
||||
shmem: pg_sys::dsm_segment,
|
||||
vectors: *mut f32,
|
||||
capacity: usize,
|
||||
dim: usize,
|
||||
}
|
||||
|
||||
impl SharedVectorCache {
|
||||
pub fn create(capacity: usize, dim: usize) -> Self {
|
||||
let size = capacity * dim * std::mem::size_of::<f32>();
|
||||
let shmem = unsafe { pg_sys::dsm_create(size, 0) };
|
||||
let vectors = unsafe { pg_sys::dsm_segment_address(shmem) as *mut f32 };
|
||||
|
||||
Self { shmem, vectors, capacity, dim }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get(&self, index: usize) -> &[f32] {
|
||||
unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
self.vectors.add(index * self.dim),
|
||||
self.dim
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Memory Pool
|
||||
|
||||
```rust
|
||||
/// Thread-local memory pool for temporary allocations
|
||||
thread_local! {
|
||||
static VECTOR_POOL: RefCell<VectorPool> = RefCell::new(VectorPool::new());
|
||||
}
|
||||
|
||||
pub struct VectorPool {
|
||||
pools: HashMap<usize, Vec<Vec<f32>>>,
|
||||
max_cached: usize,
|
||||
}
|
||||
|
||||
impl VectorPool {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
pools: HashMap::new(),
|
||||
max_cached: 1024,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn acquire(&mut self, dim: usize) -> Vec<f32> {
|
||||
self.pools
|
||||
.get_mut(&dim)
|
||||
.and_then(|pool| pool.pop())
|
||||
.unwrap_or_else(|| vec![0.0; dim])
|
||||
}
|
||||
|
||||
pub fn release(&mut self, mut vec: Vec<f32>) {
|
||||
let dim = vec.len();
|
||||
let pool = self.pools.entry(dim).or_insert_with(Vec::new);
|
||||
|
||||
if pool.len() < self.max_cached {
|
||||
vec.iter_mut().for_each(|x| *x = 0.0);
|
||||
pool.push(vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// RAII guard for pooled vectors
|
||||
pub struct PooledVec(Vec<f32>);
|
||||
|
||||
impl Drop for PooledVec {
|
||||
fn drop(&mut self) {
|
||||
VECTOR_POOL.with(|pool| {
|
||||
pool.borrow_mut().release(std::mem::take(&mut self.0));
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Quantization for Memory Reduction
|
||||
|
||||
```rust
|
||||
/// 8-bit scalar quantization (4x memory reduction)
|
||||
pub struct ScalarQuantized {
|
||||
data: Vec<u8>,
|
||||
scale: f32,
|
||||
offset: f32,
|
||||
dim: usize,
|
||||
}
|
||||
|
||||
impl ScalarQuantized {
|
||||
pub fn from_f32(vectors: &[Vec<f32>]) -> Self {
|
||||
let (min, max) = find_minmax(vectors);
|
||||
let scale = (max - min) / 255.0;
|
||||
let offset = min;
|
||||
|
||||
let data: Vec<u8> = vectors.iter()
|
||||
.flat_map(|v| {
|
||||
v.iter().map(|&x| ((x - offset) / scale) as u8)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self { data, scale, offset, dim: vectors[0].len() }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn distance(&self, query: &[f32], index: usize) -> f32 {
|
||||
let start = index * self.dim;
|
||||
let quantized = &self.data[start..start + self.dim];
|
||||
|
||||
let mut sum = 0.0f32;
|
||||
for (i, &q) in quantized.iter().enumerate() {
|
||||
let reconstructed = q as f32 * self.scale + self.offset;
|
||||
let diff = query[i] - reconstructed;
|
||||
sum += diff * diff;
|
||||
}
|
||||
sum.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
/// Binary quantization (32x memory reduction)
|
||||
pub struct BinaryQuantized {
|
||||
data: BitVec,
|
||||
dim: usize,
|
||||
}
|
||||
|
||||
impl BinaryQuantized {
|
||||
pub fn from_f32(vectors: &[Vec<f32>]) -> Self {
|
||||
let dim = vectors[0].len();
|
||||
let mut data = BitVec::with_capacity(vectors.len() * dim);
|
||||
|
||||
for vec in vectors {
|
||||
for &x in vec {
|
||||
data.push(x > 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
Self { data, dim }
|
||||
}
|
||||
|
||||
/// Hamming distance (extremely fast)
|
||||
#[inline]
|
||||
pub fn hamming_distance(&self, query_bits: &BitVec, index: usize) -> u32 {
|
||||
let start = index * self.dim;
|
||||
let doc_bits = &self.data[start..start + self.dim];
|
||||
|
||||
// XOR and popcount
|
||||
doc_bits.iter()
|
||||
.zip(query_bits.iter())
|
||||
.filter(|(a, b)| a != b)
|
||||
.count() as u32
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Query Optimization
|
||||
|
||||
### Query Plan Caching
|
||||
|
||||
```rust
|
||||
/// Cache compiled query plans
|
||||
pub struct QueryPlanCache {
|
||||
cache: DashMap<u64, Arc<QueryPlan>>,
|
||||
max_size: usize,
|
||||
hit_count: AtomicU64,
|
||||
miss_count: AtomicU64,
|
||||
}
|
||||
|
||||
impl QueryPlanCache {
|
||||
pub fn get_or_compile<F>(&self, query_hash: u64, compile: F) -> Arc<QueryPlan>
|
||||
where
|
||||
F: FnOnce() -> QueryPlan,
|
||||
{
|
||||
if let Some(plan) = self.cache.get(&query_hash) {
|
||||
self.hit_count.fetch_add(1, Ordering::Relaxed);
|
||||
return plan.clone();
|
||||
}
|
||||
|
||||
self.miss_count.fetch_add(1, Ordering::Relaxed);
|
||||
let plan = Arc::new(compile());
|
||||
|
||||
// LRU eviction if needed
|
||||
if self.cache.len() >= self.max_size {
|
||||
self.evict_lru();
|
||||
}
|
||||
|
||||
self.cache.insert(query_hash, plan.clone());
|
||||
plan
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Adaptive Index Selection
|
||||
|
||||
```rust
|
||||
/// Choose optimal index based on query characteristics
|
||||
pub fn select_index(
|
||||
query: &SearchQuery,
|
||||
available_indexes: &[IndexInfo],
|
||||
table_stats: &TableStats,
|
||||
) -> &IndexInfo {
|
||||
let selectivity = estimate_selectivity(query, table_stats);
|
||||
let expected_results = (table_stats.row_count as f64 * selectivity) as usize;
|
||||
|
||||
// Decision tree for index selection
|
||||
if expected_results < 100 {
|
||||
// Sequential scan may be faster for very small result sets
|
||||
return &available_indexes.iter()
|
||||
.find(|i| i.index_type == IndexType::BTree)
|
||||
.unwrap_or(&available_indexes[0]);
|
||||
}
|
||||
|
||||
if query.has_vector_similarity() {
|
||||
// Prefer HNSW for similarity search
|
||||
if let Some(hnsw) = available_indexes.iter()
|
||||
.find(|i| i.index_type == IndexType::Hnsw)
|
||||
{
|
||||
return hnsw;
|
||||
}
|
||||
}
|
||||
|
||||
// Default to IVFFlat for range queries
|
||||
available_indexes.iter()
|
||||
.find(|i| i.index_type == IndexType::IvfFlat)
|
||||
.unwrap_or(&available_indexes[0])
|
||||
}
|
||||
|
||||
/// Adaptive ef_search based on query complexity
|
||||
pub fn adaptive_ef_search(
|
||||
query: &[f32],
|
||||
index: &HnswIndex,
|
||||
target_recall: f64,
|
||||
) -> usize {
|
||||
// Start with learned baseline
|
||||
let baseline = index.learned_ef_for_query(query);
|
||||
|
||||
// Adjust based on query density
|
||||
let query_norm = query.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let density_factor = if query_norm < 1.0 { 1.2 } else { 1.0 };
|
||||
|
||||
// Adjust based on target recall
|
||||
let recall_factor = match target_recall {
|
||||
r if r >= 0.99 => 2.0,
|
||||
r if r >= 0.95 => 1.5,
|
||||
r if r >= 0.90 => 1.2,
|
||||
_ => 1.0,
|
||||
};
|
||||
|
||||
((baseline as f64 * density_factor * recall_factor) as usize).max(10)
|
||||
}
|
||||
```
|
||||
|
||||
### Parallel Query Execution
|
||||
|
||||
```rust
|
||||
/// Parallel index scan
|
||||
pub fn parallel_search(
|
||||
query: &[f32],
|
||||
index: &HnswIndex,
|
||||
k: usize,
|
||||
num_threads: usize,
|
||||
) -> Vec<(u64, f32)> {
|
||||
// Divide search into regions
|
||||
let entry_points = index.get_diverse_entry_points(num_threads);
|
||||
|
||||
let results: Vec<_> = entry_points
|
||||
.into_par_iter()
|
||||
.map(|entry| index.search_from(query, entry, k * 2))
|
||||
.collect();
|
||||
|
||||
// Merge results
|
||||
let mut merged: Vec<_> = results.into_iter().flatten().collect();
|
||||
merged.sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap());
|
||||
merged.dedup_by_key(|(id, _)| *id);
|
||||
merged.truncate(k);
|
||||
merged
|
||||
}
|
||||
|
||||
/// Intra-query parallelism for complex queries
|
||||
pub fn parallel_filter_search(
|
||||
query: &[f32],
|
||||
filters: &[Filter],
|
||||
index: &HnswIndex,
|
||||
k: usize,
|
||||
) -> Vec<(u64, f32)> {
|
||||
// Stage 1: Parallel filter evaluation
|
||||
let filter_results: Vec<HashSet<u64>> = filters
|
||||
.par_iter()
|
||||
.map(|f| evaluate_filter(f))
|
||||
.collect();
|
||||
|
||||
// Stage 2: Intersect filter results
|
||||
let valid_ids = filter_results
|
||||
.into_iter()
|
||||
.reduce(|a, b| a.intersection(&b).copied().collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Stage 3: Vector search with filter
|
||||
index.search_with_filter(query, k, |id| valid_ids.contains(&id))
|
||||
}
|
||||
```
|
||||
|
||||
## PostgreSQL-Specific Optimizations
|
||||
|
||||
### Buffer Management
|
||||
|
||||
```rust
|
||||
/// Custom buffer pool for vector data
|
||||
pub struct VectorBufferPool {
|
||||
buffers: Vec<Buffer>,
|
||||
free_list: Mutex<Vec<usize>>,
|
||||
usage_count: Vec<AtomicU32>,
|
||||
}
|
||||
|
||||
impl VectorBufferPool {
|
||||
/// Pin buffer with usage tracking
|
||||
pub fn pin(&self, index: usize) -> PinnedBuffer {
|
||||
self.usage_count[index].fetch_add(1, Ordering::Relaxed);
|
||||
PinnedBuffer { pool: self, index }
|
||||
}
|
||||
|
||||
/// Clock sweep eviction
|
||||
pub fn evict_if_needed(&self) -> Option<usize> {
|
||||
let mut hand = 0;
|
||||
loop {
|
||||
let count = self.usage_count[hand].load(Ordering::Relaxed);
|
||||
if count == 0 {
|
||||
return Some(hand);
|
||||
}
|
||||
self.usage_count[hand].store(count - 1, Ordering::Relaxed);
|
||||
hand = (hand + 1) % self.buffers.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### WAL Optimization
|
||||
|
||||
```rust
|
||||
/// Batch WAL writes for bulk operations
|
||||
pub fn bulk_insert_optimized(
|
||||
vectors: &[Vec<f32>],
|
||||
ids: &[u64],
|
||||
batch_size: usize,
|
||||
) {
|
||||
// Group into batches
|
||||
for batch in vectors.chunks(batch_size).zip(ids.chunks(batch_size)) {
|
||||
// Single WAL record for batch
|
||||
let wal_record = create_batch_wal_record(batch.0, batch.1);
|
||||
|
||||
unsafe {
|
||||
// Write single WAL entry
|
||||
pg_sys::XLogInsert(RUVECTOR_RMGR_ID, XLOG_RUVECTOR_BATCH_INSERT);
|
||||
}
|
||||
|
||||
// Apply batch
|
||||
apply_batch(batch.0, batch.1);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Statistics Collection
|
||||
|
||||
```rust
|
||||
/// Collect statistics for query planner
|
||||
pub fn analyze_vector_column(
|
||||
table_oid: pg_sys::Oid,
|
||||
column_num: i16,
|
||||
sample_rows: &[pg_sys::HeapTuple],
|
||||
) -> VectorStats {
|
||||
let mut vectors: Vec<Vec<f32>> = Vec::new();
|
||||
|
||||
// Extract sample vectors
|
||||
for tuple in sample_rows {
|
||||
if let Some(vec) = extract_vector(tuple, column_num) {
|
||||
vectors.push(vec);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute statistics
|
||||
let dim = vectors[0].len();
|
||||
let centroid = compute_centroid(&vectors);
|
||||
let avg_norm = vectors.iter()
|
||||
.map(|v| v.iter().map(|x| x * x).sum::<f32>().sqrt())
|
||||
.sum::<f32>() / vectors.len() as f32;
|
||||
|
||||
// Compute distribution statistics
|
||||
let distances: Vec<f32> = vectors.iter()
|
||||
.map(|v| euclidean_distance(v, ¢roid))
|
||||
.collect();
|
||||
|
||||
VectorStats {
|
||||
dim,
|
||||
avg_norm,
|
||||
centroid,
|
||||
distance_histogram: compute_histogram(&distances, 100),
|
||||
null_fraction: 0.0, // TODO: compute from sample
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration Recommendations
|
||||
|
||||
### GUC Parameters
|
||||
|
||||
```sql
|
||||
-- Memory settings
|
||||
SET ruvector.shared_cache_size = '256MB';
|
||||
SET ruvector.work_mem = '64MB';
|
||||
|
||||
-- Parallelism
|
||||
SET ruvector.max_parallel_workers = 4;
|
||||
SET ruvector.parallel_search_threshold = 10000;
|
||||
|
||||
-- Index tuning
|
||||
SET ruvector.ef_search = 64; -- HNSW search quality
|
||||
SET ruvector.probes = 10; -- IVFFlat probe count
|
||||
SET ruvector.quantization = 'sq8'; -- Default quantization
|
||||
|
||||
-- Learning
|
||||
SET ruvector.learning_enabled = on;
|
||||
SET ruvector.learning_rate = 0.01;
|
||||
|
||||
-- Maintenance
|
||||
SET ruvector.maintenance_work_mem = '512MB';
|
||||
SET ruvector.autovacuum_enabled = on;
|
||||
```
|
||||
|
||||
### Hardware-Specific Tuning
|
||||
|
||||
```yaml
|
||||
# Intel Xeon (AVX-512)
|
||||
ruvector.simd_mode: 'avx512'
|
||||
ruvector.vector_batch_size: 256
|
||||
ruvector.prefetch_distance: 4
|
||||
|
||||
# AMD EPYC (AVX2)
|
||||
ruvector.simd_mode: 'avx2'
|
||||
ruvector.vector_batch_size: 128
|
||||
ruvector.prefetch_distance: 8
|
||||
|
||||
# Apple M1/M2 (NEON)
|
||||
ruvector.simd_mode: 'neon'
|
||||
ruvector.vector_batch_size: 64
|
||||
ruvector.prefetch_distance: 4
|
||||
|
||||
# Memory-constrained
|
||||
ruvector.quantization: 'binary'
|
||||
ruvector.shared_cache_size: '64MB'
|
||||
ruvector.enable_mmap: on
|
||||
```
|
||||
|
||||
## Performance Monitoring
|
||||
|
||||
```sql
|
||||
-- View SIMD statistics
|
||||
SELECT * FROM ruvector_simd_stats();
|
||||
|
||||
-- Memory usage
|
||||
SELECT * FROM ruvector_memory_stats();
|
||||
|
||||
-- Cache hit rates
|
||||
SELECT * FROM ruvector_cache_stats();
|
||||
|
||||
-- Query performance
|
||||
SELECT * FROM ruvector_query_stats()
|
||||
ORDER BY total_time DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
694
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md
vendored
Normal file
694
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md
vendored
Normal file
@@ -0,0 +1,694 @@
|
||||
# Benchmarking Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Comprehensive benchmarking strategy for ruvector-postgres covering micro-benchmarks, integration tests, comparison with competitors, and production workload simulation.
|
||||
|
||||
## Benchmark Categories
|
||||
|
||||
### 1. Micro-Benchmarks
|
||||
|
||||
Test individual operations in isolation.
|
||||
|
||||
```rust
|
||||
// benches/distance_bench.rs
|
||||
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
|
||||
|
||||
fn bench_euclidean_distance(c: &mut Criterion) {
|
||||
let dims = [128, 256, 512, 768, 1024, 1536];
|
||||
|
||||
let mut group = c.benchmark_group("euclidean_distance");
|
||||
|
||||
for dim in dims {
|
||||
let a: Vec<f32> = (0..dim).map(|_| rand::random()).collect();
|
||||
let b: Vec<f32> = (0..dim).map(|_| rand::random()).collect();
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("scalar", dim),
|
||||
&dim,
|
||||
|bench, _| bench.iter(|| euclidean_scalar(&a, &b))
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("simd_auto", dim),
|
||||
&dim,
|
||||
|bench, _| bench.iter(|| euclidean_simd(&a, &b))
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("avx2", dim),
|
||||
&dim,
|
||||
|bench, _| bench.iter(|| unsafe { euclidean_avx2(&a, &b) })
|
||||
);
|
||||
|
||||
if is_x86_feature_detected!("avx512f") {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("avx512", dim),
|
||||
&dim,
|
||||
|bench, _| bench.iter(|| unsafe { euclidean_avx512(&a, &b) })
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_cosine_distance(c: &mut Criterion) {
|
||||
// Similar structure for cosine
|
||||
}
|
||||
|
||||
fn bench_dot_product(c: &mut Criterion) {
|
||||
// Similar structure for dot product
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
distance_benches,
|
||||
bench_euclidean_distance,
|
||||
bench_cosine_distance,
|
||||
bench_dot_product
|
||||
);
|
||||
criterion_main!(distance_benches);
|
||||
```
|
||||
|
||||
### Expected Results: Distance Functions
|
||||
|
||||
| Operation | Dimension | Scalar (ns) | AVX2 (ns) | AVX-512 (ns) | Speedup |
|
||||
|-----------|-----------|-------------|-----------|--------------|---------|
|
||||
| Euclidean | 128 | 180 | 45 | 28 | 6.4x |
|
||||
| Euclidean | 768 | 980 | 210 | 125 | 7.8x |
|
||||
| Euclidean | 1536 | 1950 | 420 | 245 | 8.0x |
|
||||
| Cosine | 128 | 240 | 62 | 38 | 6.3x |
|
||||
| Cosine | 768 | 1280 | 285 | 168 | 7.6x |
|
||||
| Dot Product | 768 | 450 | 95 | 58 | 7.8x |
|
||||
|
||||
### 2. Index Benchmarks
|
||||
|
||||
```rust
|
||||
// benches/index_bench.rs
|
||||
|
||||
fn bench_hnsw_build(c: &mut Criterion) {
|
||||
let sizes = [10_000, 100_000, 1_000_000];
|
||||
let dims = [128, 768];
|
||||
|
||||
let mut group = c.benchmark_group("hnsw_build");
|
||||
group.sample_size(10);
|
||||
group.measurement_time(Duration::from_secs(30));
|
||||
|
||||
for size in sizes {
|
||||
for dim in dims {
|
||||
let vectors = generate_random_vectors(size, dim);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(format!("{}d", dim), size),
|
||||
&(&vectors, dim),
|
||||
|bench, (vecs, _)| {
|
||||
bench.iter(|| {
|
||||
let mut index = HnswIndex::new(HnswConfig {
|
||||
m: 16,
|
||||
ef_construction: 200,
|
||||
..Default::default()
|
||||
});
|
||||
for (i, v) in vecs.iter().enumerate() {
|
||||
index.insert(i as u64, v);
|
||||
}
|
||||
})
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_hnsw_search(c: &mut Criterion) {
|
||||
// Pre-build index
|
||||
let index = build_hnsw_index(1_000_000, 768);
|
||||
let queries = generate_random_vectors(1000, 768);
|
||||
|
||||
let ef_values = [10, 50, 100, 200, 500];
|
||||
let k_values = [1, 10, 100];
|
||||
|
||||
let mut group = c.benchmark_group("hnsw_search");
|
||||
|
||||
for ef in ef_values {
|
||||
for k in k_values {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(format!("ef{}_k{}", ef, k), "1M"),
|
||||
&(&index, &queries, ef, k),
|
||||
|bench, (idx, qs, ef, k)| {
|
||||
bench.iter(|| {
|
||||
for q in qs.iter() {
|
||||
idx.search(q, *k, *ef);
|
||||
}
|
||||
})
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_ivfflat_search(c: &mut Criterion) {
|
||||
let index = build_ivfflat_index(1_000_000, 768, 1000); // 1000 lists
|
||||
let queries = generate_random_vectors(1000, 768);
|
||||
|
||||
let probe_values = [1, 5, 10, 20, 50];
|
||||
|
||||
let mut group = c.benchmark_group("ivfflat_search");
|
||||
|
||||
for probes in probe_values {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new(format!("probes{}", probes), "1M"),
|
||||
&probes,
|
||||
|bench, probes| {
|
||||
bench.iter(|| {
|
||||
for q in queries.iter() {
|
||||
index.search(q, 10, *probes);
|
||||
}
|
||||
})
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
### Expected Results: Index Operations
|
||||
|
||||
| Index | Size | Build Time | Memory | Search (p50) | Search (p99) | Recall@10 |
|
||||
|-------|------|------------|--------|--------------|--------------|-----------|
|
||||
| HNSW | 100K | 45s | 450MB | 0.8ms | 2.1ms | 0.98 |
|
||||
| HNSW | 1M | 8min | 4.5GB | 1.2ms | 4.5ms | 0.97 |
|
||||
| HNSW | 10M | 95min | 45GB | 2.1ms | 8.2ms | 0.96 |
|
||||
| IVFFlat | 100K | 12s | 320MB | 1.5ms | 4.2ms | 0.92 |
|
||||
| IVFFlat | 1M | 2min | 3.2GB | 3.2ms | 9.5ms | 0.91 |
|
||||
| IVFFlat | 10M | 25min | 32GB | 8.5ms | 25ms | 0.89 |
|
||||
|
||||
### 3. Quantization Benchmarks
|
||||
|
||||
```rust
|
||||
// benches/quantization_bench.rs
|
||||
|
||||
fn bench_quantization_build(c: &mut Criterion) {
|
||||
let vectors = generate_random_vectors(100_000, 768);
|
||||
|
||||
let mut group = c.benchmark_group("quantization_build");
|
||||
|
||||
group.bench_function("scalar_q8", |bench| {
|
||||
bench.iter(|| ScalarQuantized::from_f32(&vectors))
|
||||
});
|
||||
|
||||
group.bench_function("binary", |bench| {
|
||||
bench.iter(|| BinaryQuantized::from_f32(&vectors))
|
||||
});
|
||||
|
||||
group.bench_function("product_q", |bench| {
|
||||
bench.iter(|| ProductQuantized::from_f32(&vectors, 96, 256))
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_quantized_search(c: &mut Criterion) {
|
||||
let vectors = generate_random_vectors(1_000_000, 768);
|
||||
let query = generate_random_vectors(1, 768).pop().unwrap();
|
||||
|
||||
let sq8 = ScalarQuantized::from_f32(&vectors);
|
||||
let binary = BinaryQuantized::from_f32(&vectors);
|
||||
let pq = ProductQuantized::from_f32(&vectors, 96, 256);
|
||||
|
||||
let mut group = c.benchmark_group("quantized_search_1M");
|
||||
|
||||
group.bench_function("full_precision", |bench| {
|
||||
bench.iter(|| {
|
||||
vectors.iter()
|
||||
.enumerate()
|
||||
.map(|(i, v)| (i, euclidean_distance(&query, v)))
|
||||
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("scalar_q8", |bench| {
|
||||
bench.iter(|| {
|
||||
(0..vectors.len())
|
||||
.map(|i| (i, sq8.distance(&query, i)))
|
||||
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("binary_hamming", |bench| {
|
||||
let query_bits = binary.quantize_query(&query);
|
||||
bench.iter(|| {
|
||||
(0..vectors.len())
|
||||
.map(|i| (i, binary.hamming_distance(&query_bits, i)))
|
||||
.min_by(|a, b| a.1.cmp(&b.1))
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
```
|
||||
|
||||
### Expected Results: Quantization
|
||||
|
||||
| Method | Memory (1M 768d) | Search Time | Recall Loss |
|
||||
|--------|------------------|-------------|-------------|
|
||||
| Full Precision | 3GB | 850ms | 0% |
|
||||
| Scalar Q8 | 750MB | 420ms | 1-2% |
|
||||
| Binary | 94MB | 95ms | 5-10% |
|
||||
| Product Q | 200MB | 180ms | 2-4% |
|
||||
|
||||
### 4. PostgreSQL Integration Benchmarks
|
||||
|
||||
```sql
|
||||
-- Test setup script
|
||||
CREATE EXTENSION ruvector;
|
||||
|
||||
-- Create test table
|
||||
CREATE TABLE bench_vectors (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector(768),
|
||||
category TEXT,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO bench_vectors (embedding, category)
|
||||
SELECT
|
||||
array_agg(random())::vector(768),
|
||||
'category_' || (i % 100)::text
|
||||
FROM generate_series(1, 1000000) i
|
||||
GROUP BY i;
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX ON bench_vectors USING hnsw (embedding vector_cosine_ops)
|
||||
WITH (m = 16, ef_construction = 200);
|
||||
|
||||
CREATE INDEX ON bench_vectors USING ivfflat (embedding vector_cosine_ops)
|
||||
WITH (lists = 1000);
|
||||
|
||||
-- Benchmark queries
|
||||
\timing on
|
||||
|
||||
-- Simple k-NN
|
||||
EXPLAIN ANALYZE
|
||||
SELECT id, embedding <=> '[...]'::vector AS distance
|
||||
FROM bench_vectors
|
||||
ORDER BY distance
|
||||
LIMIT 10;
|
||||
|
||||
-- k-NN with filter
|
||||
EXPLAIN ANALYZE
|
||||
SELECT id, embedding <=> '[...]'::vector AS distance
|
||||
FROM bench_vectors
|
||||
WHERE category = 'category_42'
|
||||
ORDER BY distance
|
||||
LIMIT 10;
|
||||
|
||||
-- Batch search
|
||||
EXPLAIN ANALYZE
|
||||
SELECT b.id, q.query_id,
|
||||
b.embedding <=> q.embedding AS distance
|
||||
FROM bench_vectors b
|
||||
CROSS JOIN (
|
||||
SELECT 1 AS query_id, '[...]'::vector AS embedding
|
||||
UNION ALL
|
||||
SELECT 2, '[...]'::vector
|
||||
-- ... more queries
|
||||
) q
|
||||
ORDER BY q.query_id, distance
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
### 5. Competitor Comparison
|
||||
|
||||
```python
|
||||
# benchmark_comparison.py
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
from typing import List, Tuple
|
||||
|
||||
# Test data
|
||||
SIZES = [10_000, 100_000, 1_000_000]
|
||||
DIMS = [128, 768, 1536]
|
||||
K = 10
|
||||
QUERIES = 1000
|
||||
|
||||
def run_pgvector_benchmark(conn, size, dim):
|
||||
"""Benchmark pgvector"""
|
||||
# Setup
|
||||
conn.execute(f"""
|
||||
CREATE TABLE pgvector_test (
|
||||
id SERIAL PRIMARY KEY,
|
||||
embedding vector({dim})
|
||||
);
|
||||
CREATE INDEX ON pgvector_test USING hnsw (embedding vector_cosine_ops);
|
||||
""")
|
||||
|
||||
# Insert
|
||||
start = time.time()
|
||||
# ... bulk insert
|
||||
build_time = time.time() - start
|
||||
|
||||
# Search
|
||||
query = np.random.randn(dim).astype(np.float32)
|
||||
start = time.time()
|
||||
for _ in range(QUERIES):
|
||||
conn.execute(f"""
|
||||
SELECT id FROM pgvector_test
|
||||
ORDER BY embedding <=> %s
|
||||
LIMIT {K}
|
||||
""", (query.tolist(),))
|
||||
search_time = (time.time() - start) / QUERIES * 1000
|
||||
|
||||
return {
|
||||
'build_time': build_time,
|
||||
'search_time_ms': search_time,
|
||||
}
|
||||
|
||||
def run_ruvector_benchmark(conn, size, dim):
|
||||
"""Benchmark ruvector-postgres"""
|
||||
# Similar setup with ruvector
|
||||
pass
|
||||
|
||||
def run_pinecone_benchmark(index, size, dim):
|
||||
"""Benchmark Pinecone (cloud)"""
|
||||
pass
|
||||
|
||||
def run_qdrant_benchmark(client, size, dim):
|
||||
"""Benchmark Qdrant"""
|
||||
pass
|
||||
|
||||
def run_milvus_benchmark(collection, size, dim):
|
||||
"""Benchmark Milvus"""
|
||||
pass
|
||||
|
||||
# Run all benchmarks
|
||||
results = {}
|
||||
for size in SIZES:
|
||||
for dim in DIMS:
|
||||
results[(size, dim)] = {
|
||||
'pgvector': run_pgvector_benchmark(...),
|
||||
'ruvector': run_ruvector_benchmark(...),
|
||||
'qdrant': run_qdrant_benchmark(...),
|
||||
'milvus': run_milvus_benchmark(...),
|
||||
}
|
||||
|
||||
# Generate comparison report
|
||||
```
|
||||
|
||||
### Expected Comparison Results
|
||||
|
||||
| System | 1M Build | 1M Search (p50) | 1M Search (p99) | Memory | Recall@10 |
|
||||
|--------|----------|-----------------|-----------------|--------|-----------|
|
||||
| **ruvector-postgres** | **5min** | **0.9ms** | **3.2ms** | **4.2GB** | **0.97** |
|
||||
| pgvector | 12min | 2.1ms | 8.5ms | 4.8GB | 0.95 |
|
||||
| Qdrant | 7min | 1.2ms | 4.1ms | 4.5GB | 0.96 |
|
||||
| Milvus | 8min | 1.5ms | 5.2ms | 5.1GB | 0.96 |
|
||||
| Pinecone (P1) | 3min* | 5ms* | 15ms* | N/A | 0.98 |
|
||||
|
||||
*Cloud latency includes network overhead
|
||||
|
||||
### 6. Stress Testing
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# stress_test.sh
|
||||
|
||||
# Configuration
|
||||
DURATION=3600 # 1 hour
|
||||
CONCURRENCY=100
|
||||
QPS_TARGET=10000
|
||||
|
||||
# Start PostgreSQL with ruvector
|
||||
pg_ctl start -D $PGDATA
|
||||
|
||||
# Run pgbench-style workload
|
||||
pgbench -c $CONCURRENCY -j 10 -T $DURATION \
|
||||
-f stress_queries.sql \
|
||||
-P 10 \
|
||||
--rate=$QPS_TARGET \
|
||||
testdb
|
||||
|
||||
# Monitor during test
|
||||
while true; do
|
||||
psql -c "SELECT * FROM ruvector_stats();" >> stats.log
|
||||
psql -c "SELECT * FROM pg_stat_activity WHERE state = 'active';" >> activity.log
|
||||
sleep 10
|
||||
done
|
||||
```
|
||||
|
||||
### stress_queries.sql
|
||||
|
||||
```sql
|
||||
-- Mixed workload
|
||||
\set query_type random(1, 100)
|
||||
|
||||
\if :query_type <= 60
|
||||
-- 60% simple k-NN
|
||||
SELECT id FROM vectors
|
||||
ORDER BY embedding <=> :'random_vector'::vector
|
||||
LIMIT 10;
|
||||
\elif :query_type <= 80
|
||||
-- 20% filtered k-NN
|
||||
SELECT id FROM vectors
|
||||
WHERE category = :'random_category'
|
||||
ORDER BY embedding <=> :'random_vector'::vector
|
||||
LIMIT 10;
|
||||
\elif :query_type <= 90
|
||||
-- 10% batch search
|
||||
SELECT v.id, q.id as query_id
|
||||
FROM vectors v, query_batch q
|
||||
ORDER BY v.embedding <=> q.embedding
|
||||
LIMIT 100;
|
||||
\else
|
||||
-- 10% insert
|
||||
INSERT INTO vectors (embedding, category)
|
||||
VALUES (:'random_vector'::vector, :'random_category');
|
||||
\endif
|
||||
```
|
||||
|
||||
### 7. Memory Benchmarks
|
||||
|
||||
```rust
|
||||
// benches/memory_bench.rs
|
||||
|
||||
fn bench_memory_footprint(c: &mut Criterion) {
|
||||
let sizes = [100_000, 1_000_000, 10_000_000];
|
||||
|
||||
println!("\n=== Memory Footprint Analysis ===\n");
|
||||
|
||||
for size in sizes {
|
||||
println!("Size: {} vectors", size);
|
||||
|
||||
// Full precision vectors
|
||||
let vectors: Vec<Vec<f32>> = generate_random_vectors(size, 768);
|
||||
let raw_size = size * 768 * 4;
|
||||
println!(" Raw vectors: {} MB", raw_size / 1_000_000);
|
||||
|
||||
// HNSW index
|
||||
let hnsw = HnswIndex::new(HnswConfig::default());
|
||||
for (i, v) in vectors.iter().enumerate() {
|
||||
hnsw.insert(i as u64, v);
|
||||
}
|
||||
println!(" HNSW overhead: {} MB", hnsw.memory_usage() / 1_000_000);
|
||||
|
||||
// Quantized
|
||||
let sq8 = ScalarQuantized::from_f32(&vectors);
|
||||
println!(" SQ8 size: {} MB", sq8.memory_usage() / 1_000_000);
|
||||
|
||||
let binary = BinaryQuantized::from_f32(&vectors);
|
||||
println!(" Binary size: {} MB", binary.memory_usage() / 1_000_000);
|
||||
|
||||
println!();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8. Recall vs Latency Analysis
|
||||
|
||||
```python
|
||||
# recall_latency_analysis.py
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
def measure_recall_latency_tradeoff(index, queries, ground_truth, ef_values):
|
||||
"""Measure recall vs latency for different ef values"""
|
||||
results = []
|
||||
|
||||
for ef in ef_values:
|
||||
latencies = []
|
||||
recalls = []
|
||||
|
||||
for i, query in enumerate(queries):
|
||||
start = time.time()
|
||||
results = index.search(query, k=10, ef=ef)
|
||||
latency = (time.time() - start) * 1000
|
||||
|
||||
recall = len(set(results) & set(ground_truth[i])) / 10
|
||||
|
||||
latencies.append(latency)
|
||||
recalls.append(recall)
|
||||
|
||||
results.append({
|
||||
'ef': ef,
|
||||
'avg_latency': np.mean(latencies),
|
||||
'p99_latency': np.percentile(latencies, 99),
|
||||
'avg_recall': np.mean(recalls),
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
# Plot results
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.plot([r['avg_latency'] for r in results],
|
||||
[r['avg_recall'] for r in results], 'b-o')
|
||||
plt.xlabel('Latency (ms)')
|
||||
plt.ylabel('Recall@10')
|
||||
plt.title('Recall vs Latency Tradeoff')
|
||||
plt.savefig('recall_latency.png')
|
||||
```
|
||||
|
||||
## Benchmark Automation
|
||||
|
||||
### CI/CD Integration
|
||||
|
||||
```yaml
|
||||
# .github/workflows/benchmark.yml
|
||||
name: Benchmarks
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get install postgresql-16
|
||||
cargo install cargo-criterion
|
||||
|
||||
- name: Run micro-benchmarks
|
||||
run: |
|
||||
cargo criterion --output-format json > bench_results.json
|
||||
|
||||
- name: Run PostgreSQL benchmarks
|
||||
run: |
|
||||
./scripts/run_pg_benchmarks.sh
|
||||
|
||||
- name: Compare with baseline
|
||||
run: |
|
||||
python scripts/compare_benchmarks.py \
|
||||
--baseline baseline.json \
|
||||
--current bench_results.json \
|
||||
--threshold 10
|
||||
|
||||
- name: Upload results
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: benchmark-results
|
||||
path: bench_results.json
|
||||
```
|
||||
|
||||
### Benchmark Dashboard
|
||||
|
||||
```sql
|
||||
-- Create benchmark results table
|
||||
CREATE TABLE benchmark_results (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_date TIMESTAMP DEFAULT NOW(),
|
||||
git_commit TEXT,
|
||||
benchmark_name TEXT,
|
||||
metric_name TEXT,
|
||||
value FLOAT,
|
||||
unit TEXT,
|
||||
metadata JSONB
|
||||
);
|
||||
|
||||
-- Query for trend analysis
|
||||
SELECT
|
||||
date_trunc('day', run_date) AS day,
|
||||
benchmark_name,
|
||||
AVG(value) AS avg_value,
|
||||
MIN(value) AS min_value,
|
||||
MAX(value) AS max_value
|
||||
FROM benchmark_results
|
||||
WHERE metric_name = 'search_latency_p50'
|
||||
AND run_date > NOW() - INTERVAL '30 days'
|
||||
GROUP BY 1, 2
|
||||
ORDER BY 1, 2;
|
||||
```
|
||||
|
||||
## Reporting Format
|
||||
|
||||
### Performance Report Template
|
||||
|
||||
```markdown
|
||||
# RuVector-Postgres Performance Report
|
||||
|
||||
**Date:** 2024-XX-XX
|
||||
**Version:** 0.X.0
|
||||
**Commit:** abc123
|
||||
|
||||
## Summary
|
||||
|
||||
- Overall performance: **X% faster** than pgvector
|
||||
- Memory efficiency: **X% less** than competitors
|
||||
- Recall@10: **0.97** (target: 0.95)
|
||||
|
||||
## Detailed Results
|
||||
|
||||
### Index Build Performance
|
||||
| Size | HNSW Time | IVFFlat Time | Memory |
|
||||
|------|-----------|--------------|--------|
|
||||
| 100K | Xs | Xs | XMB |
|
||||
| 1M | Xm | Xm | XGB |
|
||||
|
||||
### Search Latency (1M vectors, 768d)
|
||||
| Metric | HNSW | IVFFlat | Target |
|
||||
|--------|------|---------|--------|
|
||||
| p50 | Xms | Xms | <2ms |
|
||||
| p99 | Xms | Xms | <10ms |
|
||||
| QPS | X | X | >5000 |
|
||||
|
||||
### Comparison with Competitors
|
||||
[Charts and tables]
|
||||
|
||||
## Recommendations
|
||||
|
||||
1. For latency-sensitive workloads: Use HNSW with ef_search=64
|
||||
2. For memory-constrained: Use IVFFlat with SQ8 quantization
|
||||
3. For maximum throughput: Enable parallel search with 4 workers
|
||||
```
|
||||
|
||||
## Running Benchmarks
|
||||
|
||||
```bash
|
||||
# Run all micro-benchmarks
|
||||
cargo bench --features bench
|
||||
|
||||
# Run specific benchmark
|
||||
cargo bench -- distance
|
||||
|
||||
# Run PostgreSQL benchmarks
|
||||
./scripts/run_pg_benchmarks.sh
|
||||
|
||||
# Generate comparison report
|
||||
python scripts/generate_report.py
|
||||
|
||||
# Quick smoke test
|
||||
cargo bench -- --quick
|
||||
```
|
||||
165
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/README.md
vendored
Normal file
165
vendor/ruvector/crates/ruvector-postgres/docs/integration-plans/README.md
vendored
Normal file
@@ -0,0 +1,165 @@
|
||||
# RuVector-Postgres Integration Plans
|
||||
|
||||
Comprehensive implementation plans for integrating advanced capabilities into the ruvector-postgres PostgreSQL extension.
|
||||
|
||||
## Overview
|
||||
|
||||
These documents outline the roadmap to transform ruvector-postgres from a pgvector-compatible extension into a full-featured AI database with self-learning, attention mechanisms, GNN layers, and more.
|
||||
|
||||
## Current State
|
||||
|
||||
ruvector-postgres v0.1.0 includes:
|
||||
- ✅ SIMD-optimized distance functions (AVX-512, AVX2, NEON)
|
||||
- ✅ HNSW index with configurable parameters
|
||||
- ✅ IVFFlat index for memory-efficient search
|
||||
- ✅ Scalar (SQ8), Binary, and Product quantization
|
||||
- ✅ pgvector-compatible SQL interface
|
||||
- ✅ Parallel query execution
|
||||
|
||||
## Planned Integrations
|
||||
|
||||
| Feature | Document | Priority | Complexity | Est. Weeks |
|
||||
|---------|----------|----------|------------|------------|
|
||||
| Self-Learning / ReasoningBank | [01-self-learning.md](./01-self-learning.md) | High | High | 10 |
|
||||
| Attention Mechanisms (39 types) | [02-attention-mechanisms.md](./02-attention-mechanisms.md) | High | Medium | 12 |
|
||||
| GNN Layers | [03-gnn-layers.md](./03-gnn-layers.md) | High | High | 12 |
|
||||
| Hyperbolic Embeddings | [04-hyperbolic-embeddings.md](./04-hyperbolic-embeddings.md) | Medium | Medium | 10 |
|
||||
| Sparse Vectors | [05-sparse-vectors.md](./05-sparse-vectors.md) | High | Medium | 10 |
|
||||
| Graph Operations & Cypher | [06-graph-operations.md](./06-graph-operations.md) | High | High | 14 |
|
||||
| Tiny Dancer Routing | [07-tiny-dancer-routing.md](./07-tiny-dancer-routing.md) | Medium | Medium | 12 |
|
||||
|
||||
## Supporting Documents
|
||||
|
||||
| Document | Description |
|
||||
|----------|-------------|
|
||||
| [Optimization Strategy](./08-optimization-strategy.md) | SIMD, memory, query optimization techniques |
|
||||
| [Benchmarking Plan](./09-benchmarking-plan.md) | Performance testing and comparison methodology |
|
||||
|
||||
## Architecture Principles
|
||||
|
||||
### Modularity
|
||||
Each feature is implemented as a separate module with feature flags:
|
||||
|
||||
```toml
|
||||
[features]
|
||||
# Core (always enabled)
|
||||
default = ["pg16"]
|
||||
|
||||
# Advanced features (opt-in)
|
||||
learning = []
|
||||
attention = []
|
||||
gnn = []
|
||||
hyperbolic = []
|
||||
sparse = []
|
||||
graph = []
|
||||
routing = []
|
||||
|
||||
# Feature bundles
|
||||
ai-complete = ["learning", "attention", "gnn", "routing"]
|
||||
graph-complete = ["hyperbolic", "sparse", "graph"]
|
||||
all = ["ai-complete", "graph-complete"]
|
||||
```
|
||||
|
||||
### Dependency Strategy
|
||||
|
||||
```
|
||||
ruvector-postgres
|
||||
├── ruvector-core (shared types, SIMD)
|
||||
├── ruvector-attention (optional)
|
||||
├── ruvector-gnn (optional)
|
||||
├── ruvector-graph (optional)
|
||||
├── ruvector-tiny-dancer-core (optional)
|
||||
└── External
|
||||
├── pgrx (PostgreSQL FFI)
|
||||
├── simsimd (SIMD operations)
|
||||
└── rayon (parallelism)
|
||||
```
|
||||
|
||||
### SQL Interface Design
|
||||
|
||||
All features follow consistent SQL patterns:
|
||||
|
||||
```sql
|
||||
-- Enable features
|
||||
SELECT ruvector_enable_feature('learning', table_name := 'embeddings');
|
||||
|
||||
-- Configuration via GUCs
|
||||
SET ruvector.learning_rate = 0.01;
|
||||
SET ruvector.attention_type = 'flash';
|
||||
|
||||
-- Feature-specific functions prefixed with ruvector_
|
||||
SELECT ruvector_attention_score(a, b, 'scaled_dot');
|
||||
SELECT ruvector_gnn_search(query, 'edges', num_hops := 2);
|
||||
SELECT ruvector_route(request, optimize_for := 'cost');
|
||||
|
||||
-- Cypher queries via dedicated function
|
||||
SELECT * FROM ruvector_cypher('graph_name', $$
|
||||
MATCH (n:Person)-[:KNOWS]->(friend)
|
||||
RETURN friend.name
|
||||
$$);
|
||||
```
|
||||
|
||||
## Implementation Roadmap
|
||||
|
||||
### Phase 1: Foundation (Months 1-3)
|
||||
- [ ] Sparse vectors (BM25, SPLADE support)
|
||||
- [ ] Hyperbolic embeddings (Poincaré ball model)
|
||||
- [ ] Basic attention operations (scaled dot-product)
|
||||
|
||||
### Phase 2: Graph (Months 4-6)
|
||||
- [ ] Property graph storage
|
||||
- [ ] Cypher query parser
|
||||
- [ ] Basic graph algorithms (BFS, shortest path)
|
||||
- [ ] Vector-guided traversal
|
||||
|
||||
### Phase 3: Neural (Months 7-9)
|
||||
- [ ] GNN message passing framework
|
||||
- [ ] GCN, GraphSAGE, GAT layers
|
||||
- [ ] Multi-head attention
|
||||
- [ ] Flash attention
|
||||
|
||||
### Phase 4: Intelligence (Months 10-12)
|
||||
- [ ] Self-learning trajectory tracking
|
||||
- [ ] ReasoningBank pattern storage
|
||||
- [ ] Adaptive search optimization
|
||||
- [ ] AI agent routing (Tiny Dancer)
|
||||
|
||||
### Phase 5: Production (Months 13-15)
|
||||
- [ ] Performance optimization
|
||||
- [ ] Comprehensive benchmarking
|
||||
- [ ] Documentation and examples
|
||||
- [ ] Production hardening
|
||||
|
||||
## Performance Targets
|
||||
|
||||
| Metric | Target | Notes |
|
||||
|--------|--------|-------|
|
||||
| Vector search (1M, 768d) | <2ms p50 | HNSW with ef=64 |
|
||||
| Recall@10 | >0.95 | At target latency |
|
||||
| GNN forward (10K nodes) | <20ms | Single layer |
|
||||
| Cypher simple query | <5ms | Pattern match |
|
||||
| Memory overhead | <20% | vs raw vectors |
|
||||
| Build throughput | >50K vec/s | HNSW M=16 |
|
||||
|
||||
## Contributing
|
||||
|
||||
Each integration plan includes:
|
||||
1. Architecture diagrams
|
||||
2. Module structure
|
||||
3. SQL interface specification
|
||||
4. Implementation phases with timelines
|
||||
5. Code examples
|
||||
6. Benchmark targets
|
||||
7. Dependencies and feature flags
|
||||
|
||||
When implementing:
|
||||
1. Start with the module structure
|
||||
2. Implement core functionality with tests
|
||||
3. Add PostgreSQL integration
|
||||
4. Write benchmarks
|
||||
5. Document SQL interface
|
||||
6. Update this README
|
||||
|
||||
## License
|
||||
|
||||
MIT License - See main repository for details.
|
||||
Reference in New Issue
Block a user