Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
211
vendor/ruvector/docs/examples/btsp_usage.rs
vendored
Normal file
211
vendor/ruvector/docs/examples/btsp_usage.rs
vendored
Normal file
@@ -0,0 +1,211 @@
|
||||
//! # BTSP Usage Examples
|
||||
//!
|
||||
//! Demonstrates one-shot learning for vector database applications
|
||||
|
||||
use ruvector_nervous_system::plasticity::btsp::{
|
||||
BTSPAssociativeMemory, BTSPLayer, BTSPSynapse,
|
||||
};
|
||||
|
||||
/// Example 1: Basic one-shot learning
|
||||
fn example_one_shot_learning() {
|
||||
println!("=== Example 1: One-Shot Learning ===\n");
|
||||
|
||||
// Create a layer with 128 inputs, 2-second time constant
|
||||
let mut layer = BTSPLayer::new(128, 2000.0);
|
||||
|
||||
// Learn pattern -> target association instantly
|
||||
let pattern = vec![0.5; 128];
|
||||
let target = 0.9;
|
||||
|
||||
layer.one_shot_associate(&pattern, target);
|
||||
println!("Learned: pattern -> {}", target);
|
||||
|
||||
// Immediate recall (no training iterations needed)
|
||||
let output = layer.forward(&pattern);
|
||||
let error = (output - target).abs();
|
||||
|
||||
println!("Recalled: {} (error: {:.4})", output, error);
|
||||
println!("One-shot learning: {}\n", if error < 0.1 { "✓" } else { "✗" });
|
||||
}
|
||||
|
||||
/// Example 2: Vector embedding storage
|
||||
fn example_embedding_storage() {
|
||||
println!("=== Example 2: Embedding Storage ===\n");
|
||||
|
||||
// Create associative memory for 384-dim embeddings -> 128-dim metadata
|
||||
let mut memory = BTSPAssociativeMemory::new(384, 128);
|
||||
|
||||
// Store embeddings instantly (no batch training)
|
||||
let embedding1 = vec![0.5; 384];
|
||||
let metadata1 = vec![1.0, 0.0, 0.0, 0.5, 0.8];
|
||||
metadata1.extend(vec![0.0; 123]); // Pad to 128
|
||||
|
||||
let embedding2 = vec![0.3; 384];
|
||||
let metadata2 = vec![0.0, 1.0, 0.5, 0.2, 0.9];
|
||||
metadata2.extend(vec![0.0; 123]);
|
||||
|
||||
memory.store_one_shot(&embedding1, &metadata1).unwrap();
|
||||
memory.store_one_shot(&embedding2, &metadata2).unwrap();
|
||||
|
||||
println!("Stored 2 embeddings instantly (no iterations)");
|
||||
|
||||
// Retrieve
|
||||
let retrieved = memory.retrieve(&embedding1).unwrap();
|
||||
println!("Retrieved metadata dim: {}", retrieved.len());
|
||||
println!("First 5 values: {:?}\n", &retrieved[..5]);
|
||||
}
|
||||
|
||||
/// Example 3: Adaptive query routing
|
||||
fn example_adaptive_routing() {
|
||||
println!("=== Example 3: Adaptive Query Routing ===\n");
|
||||
|
||||
let mut layer = BTSPLayer::new(64, 2000.0);
|
||||
|
||||
// Learn query patterns -> optimal routes
|
||||
let queries = vec![
|
||||
(vec![1.0; 64], 0.9), // High priority
|
||||
(vec![0.5; 64], 0.5), // Medium priority
|
||||
(vec![0.1; 64], 0.1), // Low priority
|
||||
];
|
||||
|
||||
for (query, route) in &queries {
|
||||
layer.one_shot_associate(query, *route);
|
||||
println!("Learned route: {:?} -> {}", &query[..3], route);
|
||||
}
|
||||
|
||||
// Test routing
|
||||
let test_query = vec![1.0; 64];
|
||||
let route = layer.forward(&test_query);
|
||||
println!("\nQuery route: {:.2} (should be ~0.9)", route);
|
||||
}
|
||||
|
||||
/// Example 4: Temporal learning with eligibility traces
|
||||
fn example_eligibility_traces() {
|
||||
println!("\n=== Example 4: Eligibility Traces ===\n");
|
||||
|
||||
let mut synapse = BTSPSynapse::new(0.5, 2000.0).unwrap();
|
||||
|
||||
// Simulate 1 second of activity
|
||||
println!("Time\tActive\tPlateau\tTrace\tWeight");
|
||||
for t in 0..100 {
|
||||
let time = t as f32 * 10.0; // 10ms steps
|
||||
let active = t < 50; // Active for first 500ms
|
||||
let plateau = t == 60; // Plateau at 600ms
|
||||
|
||||
synapse.update(active, plateau, 10.0);
|
||||
|
||||
if t % 10 == 0 {
|
||||
println!(
|
||||
"{}ms\t{}\t{}\t{:.3}\t{:.3}",
|
||||
time,
|
||||
if active { "Y" } else { "N" },
|
||||
if plateau { "Y" } else { "N" },
|
||||
synapse.eligibility_trace(),
|
||||
synapse.weight()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Example 5: Batch association storage
|
||||
fn example_batch_storage() {
|
||||
println!("\n=== Example 5: Batch Storage ===\n");
|
||||
|
||||
let mut memory = BTSPAssociativeMemory::new(64, 32);
|
||||
|
||||
// Store multiple associations
|
||||
let pairs = vec![
|
||||
(vec![1.0; 64], vec![0.1; 32]),
|
||||
(vec![0.8; 64], vec![0.2; 32]),
|
||||
(vec![0.6; 64], vec![0.3; 32]),
|
||||
(vec![0.4; 64], vec![0.4; 32]),
|
||||
(vec![0.2; 64], vec![0.5; 32]),
|
||||
];
|
||||
|
||||
let pair_refs: Vec<_> = pairs
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_slice(), v.as_slice()))
|
||||
.collect();
|
||||
|
||||
memory.store_batch(&pair_refs).unwrap();
|
||||
println!("Stored {} associations instantly", pairs.len());
|
||||
|
||||
// Verify storage
|
||||
for (i, (key, expected)) in pairs.iter().enumerate() {
|
||||
let retrieved = memory.retrieve(key).unwrap();
|
||||
let error: f32 = expected
|
||||
.iter()
|
||||
.zip(retrieved.iter())
|
||||
.map(|(e, r)| (e - r).abs())
|
||||
.sum::<f32>()
|
||||
/ expected.len() as f32;
|
||||
|
||||
println!("Pair {}: recall error = {:.4}", i + 1, error);
|
||||
}
|
||||
}
|
||||
|
||||
/// Example 6: Real-world vector database scenario
|
||||
fn example_vector_database() {
|
||||
println!("\n=== Example 6: Vector Database Integration ===\n");
|
||||
|
||||
// Scenario: Store document embeddings with instant indexing
|
||||
|
||||
struct Document {
|
||||
id: String,
|
||||
embedding: Vec<f32>,
|
||||
metadata: Vec<f32>,
|
||||
}
|
||||
|
||||
let documents = vec![
|
||||
Document {
|
||||
id: "doc1".into(),
|
||||
embedding: vec![0.8; 768],
|
||||
metadata: vec![1.0, 0.0, 0.5, 0.8],
|
||||
},
|
||||
Document {
|
||||
id: "doc2".into(),
|
||||
embedding: vec![0.6; 768],
|
||||
metadata: vec![0.0, 1.0, 0.3, 0.6],
|
||||
},
|
||||
Document {
|
||||
id: "doc3".into(),
|
||||
embedding: vec![0.4; 768],
|
||||
metadata: vec![0.5, 0.5, 0.7, 0.4],
|
||||
},
|
||||
];
|
||||
|
||||
// Create BTSP memory for 768-dim embeddings (common size)
|
||||
let mut db_memory = BTSPAssociativeMemory::new(768, 4);
|
||||
|
||||
println!("Indexing documents with one-shot learning:");
|
||||
for doc in &documents {
|
||||
db_memory
|
||||
.store_one_shot(&doc.embedding, &doc.metadata)
|
||||
.unwrap();
|
||||
println!(" ✓ Indexed {} instantly", doc.id);
|
||||
}
|
||||
|
||||
// Query
|
||||
println!("\nQuerying:");
|
||||
let query = vec![0.8; 768];
|
||||
let result = db_memory.retrieve(&query).unwrap();
|
||||
println!(
|
||||
" Query result: {:?} (closest to doc1)",
|
||||
&result[..4]
|
||||
);
|
||||
|
||||
println!("\n✓ Vector database with instant, no-iteration indexing");
|
||||
}
|
||||
|
||||
fn main() {
|
||||
example_one_shot_learning();
|
||||
example_embedding_storage();
|
||||
example_adaptive_routing();
|
||||
example_eligibility_traces();
|
||||
example_batch_storage();
|
||||
example_vector_database();
|
||||
|
||||
println!("\n═══════════════════════════════════════════");
|
||||
println!("All BTSP examples completed successfully!");
|
||||
println!("═══════════════════════════════════════════\n");
|
||||
}
|
||||
202
vendor/ruvector/docs/examples/monitoring_example.md
vendored
Normal file
202
vendor/ruvector/docs/examples/monitoring_example.md
vendored
Normal file
@@ -0,0 +1,202 @@
|
||||
# Real-Time Monitoring Example
|
||||
|
||||
This example demonstrates the event-driven monitoring system for the dynamic minimum cut algorithm.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```rust
|
||||
use ruvector_mincut::monitoring::{MinCutMonitor, MonitorConfig, EventType};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
// Create a monitor with default configuration
|
||||
let monitor = MinCutMonitor::new(MonitorConfig::default());
|
||||
|
||||
// Register a callback for all events
|
||||
let counter = Arc::new(AtomicU64::new(0));
|
||||
let counter_clone = counter.clone();
|
||||
|
||||
monitor.on_event("event_counter", move |event| {
|
||||
counter_clone.fetch_add(1, Ordering::SeqCst);
|
||||
println!("Event: {:?}, New cut: {}", event.event_type, event.new_value);
|
||||
}).unwrap();
|
||||
|
||||
// Simulate cut changes
|
||||
monitor.notify(0.0, 10.0, None);
|
||||
monitor.notify(10.0, 5.0, None);
|
||||
|
||||
// Check metrics
|
||||
let metrics = monitor.metrics();
|
||||
println!("Total events: {}", metrics.total_events);
|
||||
println!("Average cut: {}", metrics.avg_cut);
|
||||
```
|
||||
|
||||
## Event Type Filtering
|
||||
|
||||
```rust
|
||||
use std::sync::atomic::AtomicU64;
|
||||
|
||||
let monitor = MinCutMonitor::new(MonitorConfig::default());
|
||||
let decrease_counter = Arc::new(AtomicU64::new(0));
|
||||
let counter_clone = decrease_counter.clone();
|
||||
|
||||
// Only track when cut decreases
|
||||
monitor.on_event_type(EventType::CutDecreased, "decrease_tracker", move |event| {
|
||||
counter_clone.fetch_add(1, Ordering::SeqCst);
|
||||
println!("Cut decreased from {} to {}", event.old_value, event.new_value);
|
||||
}).unwrap();
|
||||
|
||||
monitor.notify(10.0, 5.0, None); // Triggers callback
|
||||
monitor.notify(5.0, 15.0, None); // Does not trigger
|
||||
```
|
||||
|
||||
## Threshold Monitoring
|
||||
|
||||
```rust
|
||||
use ruvector_mincut::monitoring::{Threshold, MonitorBuilder};
|
||||
|
||||
let alert_counter = Arc::new(AtomicU64::new(0));
|
||||
let counter_clone = alert_counter.clone();
|
||||
|
||||
// Build monitor with thresholds
|
||||
let monitor = MonitorBuilder::new()
|
||||
.threshold_below(10.0, "critical") // Alert when cut goes below 10
|
||||
.threshold_above(100.0, "warning") // Alert when cut goes above 100
|
||||
.on_event_type(EventType::ThresholdCrossedBelow, "alert", move |event| {
|
||||
counter_clone.fetch_add(1, Ordering::SeqCst);
|
||||
println!("CRITICAL: Cut crossed below threshold!");
|
||||
})
|
||||
.build();
|
||||
|
||||
// Cross the threshold
|
||||
monitor.notify(50.0, 5.0, None); // Triggers alert
|
||||
|
||||
// Check threshold status
|
||||
let status = monitor.threshold_status();
|
||||
for (name, active) in status {
|
||||
println!("Threshold '{}': {}", name, if active { "ACTIVE" } else { "inactive" });
|
||||
}
|
||||
```
|
||||
|
||||
## Connectivity Monitoring
|
||||
|
||||
```rust
|
||||
let disconnected_counter = Arc::new(AtomicU64::new(0));
|
||||
let connected_counter = Arc::new(AtomicU64::new(0));
|
||||
|
||||
let disc_clone = disconnected_counter.clone();
|
||||
let conn_clone = connected_counter.clone();
|
||||
|
||||
let monitor = MinCutMonitor::new(MonitorConfig::default());
|
||||
|
||||
monitor.on_event_type(EventType::Disconnected, "disc", move |_| {
|
||||
disc_clone.fetch_add(1, Ordering::SeqCst);
|
||||
println!("WARNING: Graph became disconnected!");
|
||||
}).unwrap();
|
||||
|
||||
monitor.on_event_type(EventType::Connected, "conn", move |_| {
|
||||
conn_clone.fetch_add(1, Ordering::SeqCst);
|
||||
println!("Graph reconnected");
|
||||
}).unwrap();
|
||||
|
||||
// Simulate disconnection
|
||||
monitor.notify(10.0, 0.0, None); // Graph disconnected
|
||||
|
||||
// Simulate reconnection
|
||||
monitor.notify(0.0, 5.0, None); // Graph connected
|
||||
```
|
||||
|
||||
## Custom Configuration
|
||||
|
||||
```rust
|
||||
use std::time::Duration;
|
||||
|
||||
let config = MonitorConfig {
|
||||
max_callbacks: 50, // Allow up to 50 callbacks
|
||||
sample_interval: Duration::from_millis(100), // Sample history every 100ms
|
||||
max_history_size: 500, // Keep last 500 samples
|
||||
collect_metrics: true, // Enable metrics collection
|
||||
};
|
||||
|
||||
let monitor = MinCutMonitor::new(config);
|
||||
```
|
||||
|
||||
## Metrics Collection
|
||||
|
||||
```rust
|
||||
let monitor = MinCutMonitor::new(MonitorConfig::default());
|
||||
|
||||
// Simulate various events
|
||||
for i in 0..100 {
|
||||
let value = (i as f64 * 10.0) % 100.0;
|
||||
monitor.notify(value, value + 5.0, Some((i, i + 1)));
|
||||
}
|
||||
|
||||
// Get metrics
|
||||
let metrics = monitor.metrics();
|
||||
println!("Total events: {}", metrics.total_events);
|
||||
println!("Average cut: {:.2}", metrics.avg_cut);
|
||||
println!("Min observed: {:.2}", metrics.min_observed);
|
||||
println!("Max observed: {:.2}", metrics.max_observed);
|
||||
println!("Events by type:");
|
||||
for (event_type, count) in &metrics.events_by_type {
|
||||
println!(" {}: {}", event_type, count);
|
||||
}
|
||||
println!("History samples: {}", metrics.cut_history.len());
|
||||
```
|
||||
|
||||
## Thread-Safe Concurrent Monitoring
|
||||
|
||||
```rust
|
||||
use std::thread;
|
||||
|
||||
let monitor = Arc::new(MinCutMonitor::new(MonitorConfig::default()));
|
||||
let counter = Arc::new(AtomicU64::new(0));
|
||||
|
||||
// Register callbacks from multiple threads
|
||||
for i in 0..10 {
|
||||
let monitor_clone = monitor.clone();
|
||||
let counter_clone = counter.clone();
|
||||
|
||||
monitor_clone.on_event(&format!("callback_{}", i), move |_| {
|
||||
counter_clone.fetch_add(1, Ordering::SeqCst);
|
||||
}).unwrap();
|
||||
}
|
||||
|
||||
// Trigger events from multiple threads
|
||||
let handles: Vec<_> = (0..5).map(|i| {
|
||||
let monitor_clone = monitor.clone();
|
||||
thread::spawn(move || {
|
||||
monitor_clone.notify(i as f64, (i + 1) as f64, None);
|
||||
})
|
||||
}).collect();
|
||||
|
||||
for handle in handles {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
|
||||
println!("Total callback invocations: {}", counter.load(Ordering::SeqCst));
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
### Event-Driven Architecture
|
||||
- **Non-blocking callbacks**: Callbacks are executed synchronously but errors are caught
|
||||
- **Event filtering**: Register callbacks for specific event types
|
||||
- **Panic safety**: Callbacks that panic are caught and logged
|
||||
|
||||
### Threshold Monitoring
|
||||
- **Hysteresis**: Prevents alert storms by only triggering on state transitions
|
||||
- **Bi-directional**: Support for "below" and "above" threshold alerts
|
||||
- **Dynamic management**: Add/remove thresholds at runtime
|
||||
|
||||
### Metrics Collection
|
||||
- **Running statistics**: Average, min, max cut values
|
||||
- **Event counting**: Track events by type
|
||||
- **Sampled history**: Time-series data with configurable sampling
|
||||
- **Resource bounded**: Automatic history size management
|
||||
|
||||
### Thread Safety
|
||||
- **Lock-free reads**: RwLock allows concurrent reads
|
||||
- **Safe updates**: Write locks protect critical sections
|
||||
- **Arc-friendly**: Safe to share across threads
|
||||
335
vendor/ruvector/docs/examples/sparsevec_examples.sql
vendored
Normal file
335
vendor/ruvector/docs/examples/sparsevec_examples.sql
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
-- ============================================================================
|
||||
-- SparseVec PostgreSQL Type - Usage Examples
|
||||
-- ============================================================================
|
||||
|
||||
-- Basic Usage
|
||||
-- ============================================================================
|
||||
|
||||
-- Create a sparse vector with format {idx:val,idx:val,...}/dimensions
|
||||
SELECT '{0:1.5,3:2.5,7:3.5}/10'::sparsevec;
|
||||
|
||||
-- Create an empty sparse vector
|
||||
SELECT '{}/100'::sparsevec;
|
||||
|
||||
-- Create a dense sparse vector (many non-zeros)
|
||||
SELECT '{0:1.0,1:2.0,2:3.0,3:4.0,4:5.0}/5'::sparsevec;
|
||||
|
||||
-- Introspection
|
||||
-- ============================================================================
|
||||
|
||||
-- Get dimensions
|
||||
SELECT sparsevec_dims('{0:1.5,3:2.5,7:3.5}/10'::sparsevec);
|
||||
-- Returns: 10
|
||||
|
||||
-- Get number of non-zero elements
|
||||
SELECT sparsevec_nnz('{0:1.5,3:2.5,7:3.5}/10'::sparsevec);
|
||||
-- Returns: 3
|
||||
|
||||
-- Get sparsity ratio
|
||||
SELECT sparsevec_sparsity('{0:1.5,3:2.5,7:3.5}/10'::sparsevec);
|
||||
-- Returns: 0.3 (30% non-zero)
|
||||
|
||||
-- Get L2 norm
|
||||
SELECT sparsevec_norm('{0:3.0,1:4.0}/5'::sparsevec);
|
||||
-- Returns: 5.0
|
||||
|
||||
-- Get value at specific index
|
||||
SELECT sparsevec_get('{0:1.5,3:2.5,7:3.5}/10'::sparsevec, 3);
|
||||
-- Returns: 2.5
|
||||
|
||||
SELECT sparsevec_get('{0:1.5,3:2.5,7:3.5}/10'::sparsevec, 5);
|
||||
-- Returns: 0.0 (not present)
|
||||
|
||||
-- Parse and inspect
|
||||
SELECT sparsevec_parse('{0:1.5,3:2.5,7:3.5}/10');
|
||||
-- Returns JSON with full details
|
||||
|
||||
-- Distance Calculations
|
||||
-- ============================================================================
|
||||
|
||||
-- L2 (Euclidean) distance
|
||||
SELECT sparsevec_l2_distance(
|
||||
'{0:1.0,2:2.0,4:3.0}/5'::sparsevec,
|
||||
'{1:1.0,2:1.0,3:2.0}/5'::sparsevec
|
||||
);
|
||||
|
||||
-- Inner product distance (negative dot product)
|
||||
SELECT sparsevec_ip_distance(
|
||||
'{0:1.0,2:2.0}/5'::sparsevec,
|
||||
'{2:1.0,4:3.0}/5'::sparsevec
|
||||
);
|
||||
-- Returns: -2.0 (only index 2 overlaps: -(2*1))
|
||||
|
||||
-- Cosine distance
|
||||
SELECT sparsevec_cosine_distance(
|
||||
'{0:1.0,2:2.0}/5'::sparsevec,
|
||||
'{0:2.0,2:4.0}/5'::sparsevec
|
||||
);
|
||||
-- Returns: ~0.0 (same direction)
|
||||
|
||||
-- Mixed sparse-dense distances
|
||||
SELECT sparsevec_vector_l2_distance(
|
||||
'{0:1.0,3:2.0}/5'::sparsevec,
|
||||
'[1.0,0.0,0.0,2.0,0.0]'::ruvector
|
||||
);
|
||||
|
||||
SELECT sparsevec_vector_cosine_distance(
|
||||
'{0:1.0,3:2.0}/5'::sparsevec,
|
||||
'[1.0,0.0,0.0,2.0,0.0]'::ruvector
|
||||
);
|
||||
|
||||
-- Vector Operations
|
||||
-- ============================================================================
|
||||
|
||||
-- Normalize to unit length
|
||||
SELECT sparsevec_normalize('{0:3.0,1:4.0}/5'::sparsevec);
|
||||
-- Returns: {0:0.6,1:0.8}/5
|
||||
|
||||
-- Add two sparse vectors
|
||||
SELECT sparsevec_add(
|
||||
'{0:1.0,2:2.0}/5'::sparsevec,
|
||||
'{1:3.0,2:1.0}/5'::sparsevec
|
||||
);
|
||||
-- Returns: {0:1.0,1:3.0,2:3.0}/5
|
||||
|
||||
-- Multiply by scalar
|
||||
SELECT sparsevec_mul_scalar('{0:1.0,2:2.0}/5'::sparsevec, 2.5);
|
||||
-- Returns: {0:2.5,2:5.0}/5
|
||||
|
||||
-- Conversions
|
||||
-- ============================================================================
|
||||
|
||||
-- Sparse to dense vector
|
||||
SELECT sparsevec_to_vector('{0:1.0,3:2.0}/5'::sparsevec);
|
||||
-- Returns: [1.0, 0.0, 0.0, 2.0, 0.0]
|
||||
|
||||
-- Dense to sparse with threshold
|
||||
SELECT vector_to_sparsevec('[0.001,0.5,0.002,1.0,0.003]'::ruvector, 0.01);
|
||||
-- Returns: {1:0.5,3:1.0}/5 (filters values ≤ 0.01)
|
||||
|
||||
-- Sparse to array
|
||||
SELECT sparsevec_to_array('{0:1.0,3:2.0}/5'::sparsevec);
|
||||
|
||||
-- Array to sparse
|
||||
SELECT array_to_sparsevec(ARRAY[0.001, 0.5, 0.002, 1.0, 0.003]::float4[], 0.01);
|
||||
|
||||
-- Table Creation and Queries
|
||||
-- ============================================================================
|
||||
|
||||
-- Create table for text embeddings (TF-IDF)
|
||||
CREATE TABLE documents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT,
|
||||
embedding sparsevec(10000) -- 10K vocabulary
|
||||
);
|
||||
|
||||
-- Insert documents with sparse embeddings
|
||||
INSERT INTO documents (title, content, embedding) VALUES
|
||||
('Document 1', 'machine learning artificial intelligence',
|
||||
'{45:0.8,123:0.6,789:0.9,1024:0.7}/10000'),
|
||||
('Document 2', 'deep learning neural networks',
|
||||
'{45:0.3,234:0.9,789:0.4,2048:0.8}/10000'),
|
||||
('Document 3', 'natural language processing',
|
||||
'{123:0.7,456:0.9,3072:0.6}/10000');
|
||||
|
||||
-- Find similar documents using cosine distance
|
||||
SELECT
|
||||
d.id,
|
||||
d.title,
|
||||
sparsevec_cosine_distance(d.embedding, query.embedding) AS distance
|
||||
FROM
|
||||
documents d,
|
||||
(SELECT embedding FROM documents WHERE id = 1) AS query
|
||||
WHERE
|
||||
d.id != 1
|
||||
ORDER BY
|
||||
distance ASC
|
||||
LIMIT 5;
|
||||
|
||||
-- Find nearest neighbors using L2 distance
|
||||
SELECT
|
||||
d.id,
|
||||
d.title,
|
||||
sparsevec_l2_distance(d.embedding,
|
||||
'{45:0.8,123:0.6,789:0.9}/10000'::sparsevec) AS distance
|
||||
FROM
|
||||
documents d
|
||||
ORDER BY
|
||||
distance ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- Recommender System Example
|
||||
-- ============================================================================
|
||||
|
||||
-- User-item interaction matrix (sparse)
|
||||
CREATE TABLE user_profiles (
|
||||
user_id INT PRIMARY KEY,
|
||||
username TEXT NOT NULL,
|
||||
preferences sparsevec(100000) -- 100K items
|
||||
);
|
||||
|
||||
-- Insert user profiles with sparse preference vectors
|
||||
INSERT INTO user_profiles (user_id, username, preferences) VALUES
|
||||
(1, 'alice', '{123:5.0,456:4.5,789:3.5,1024:4.0}/100000'),
|
||||
(2, 'bob', '{123:4.0,234:5.0,789:4.5,2048:3.5}/100000'),
|
||||
(3, 'carol', '{456:5.0,890:4.0,2048:4.5,3072:5.0}/100000');
|
||||
|
||||
-- Collaborative filtering: Find similar users
|
||||
SELECT
|
||||
u2.user_id,
|
||||
u2.username,
|
||||
sparsevec_cosine_distance(u1.preferences, u2.preferences) AS similarity
|
||||
FROM
|
||||
user_profiles u1,
|
||||
user_profiles u2
|
||||
WHERE
|
||||
u1.user_id = 1
|
||||
AND u2.user_id != 1
|
||||
ORDER BY
|
||||
similarity ASC
|
||||
LIMIT 10;
|
||||
|
||||
-- Find items user might like (based on similar users)
|
||||
WITH similar_users AS (
|
||||
SELECT
|
||||
u2.user_id,
|
||||
u2.preferences,
|
||||
sparsevec_cosine_distance(u1.preferences, u2.preferences) AS similarity
|
||||
FROM
|
||||
user_profiles u1,
|
||||
user_profiles u2
|
||||
WHERE
|
||||
u1.user_id = 1
|
||||
AND u2.user_id != 1
|
||||
ORDER BY
|
||||
similarity ASC
|
||||
LIMIT 5
|
||||
)
|
||||
SELECT
|
||||
user_id,
|
||||
similarity
|
||||
FROM
|
||||
similar_users;
|
||||
|
||||
-- Graph Embeddings Example
|
||||
-- ============================================================================
|
||||
|
||||
-- Store graph node embeddings
|
||||
CREATE TABLE graph_nodes (
|
||||
node_id BIGINT PRIMARY KEY,
|
||||
node_type TEXT,
|
||||
sparse_embedding sparsevec(50000)
|
||||
);
|
||||
|
||||
-- Insert graph nodes with embeddings
|
||||
INSERT INTO graph_nodes (node_id, node_type, sparse_embedding) VALUES
|
||||
(1, 'person', '{100:0.9,500:0.7,1000:0.8}/50000'),
|
||||
(2, 'product', '{200:0.8,600:0.9,1500:0.7}/50000'),
|
||||
(3, 'company', '{100:0.5,300:0.8,2000:0.9}/50000');
|
||||
|
||||
-- Find nearest neighbors in embedding space
|
||||
SELECT
|
||||
node_id,
|
||||
node_type,
|
||||
sparsevec_l2_distance(sparse_embedding,
|
||||
'{100:0.9,500:0.7,1000:0.8}/50000'::sparsevec) AS distance
|
||||
FROM
|
||||
graph_nodes
|
||||
WHERE
|
||||
node_id != 1
|
||||
ORDER BY
|
||||
distance ASC
|
||||
LIMIT 20;
|
||||
|
||||
-- Statistics and Analytics
|
||||
-- ============================================================================
|
||||
|
||||
-- Analyze sparsity distribution
|
||||
SELECT
|
||||
percentile_cont(0.5) WITHIN GROUP (ORDER BY sparsevec_sparsity(embedding)) AS median_sparsity,
|
||||
AVG(sparsevec_sparsity(embedding)) AS avg_sparsity,
|
||||
MIN(sparsevec_nnz(embedding)) AS min_nnz,
|
||||
MAX(sparsevec_nnz(embedding)) AS max_nnz
|
||||
FROM
|
||||
documents;
|
||||
|
||||
-- Find documents with highest/lowest sparsity
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
sparsevec_nnz(embedding) AS non_zeros,
|
||||
sparsevec_sparsity(embedding) AS sparsity_ratio
|
||||
FROM
|
||||
documents
|
||||
ORDER BY
|
||||
sparsity_ratio DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- Performance Comparison
|
||||
-- ============================================================================
|
||||
|
||||
-- Compare storage efficiency
|
||||
SELECT
|
||||
'Dense' AS type,
|
||||
pg_column_size('[' || array_to_string(array_agg(i::text), ',') || ']'::ruvector) AS bytes
|
||||
FROM generate_series(1, 10000) AS i
|
||||
UNION ALL
|
||||
SELECT
|
||||
'Sparse (1% non-zero)' AS type,
|
||||
pg_column_size('{' || array_to_string(
|
||||
array_agg(i || ':1.0'), ',') || '}/10000'::sparsevec) AS bytes
|
||||
FROM generate_series(1, 100) AS i;
|
||||
|
||||
-- Advanced Queries
|
||||
-- ============================================================================
|
||||
|
||||
-- Batch distance calculation
|
||||
WITH query_vector AS (
|
||||
SELECT '{0:1.0,100:2.0,500:3.0}/10000'::sparsevec AS vec
|
||||
)
|
||||
SELECT
|
||||
d.id,
|
||||
d.title,
|
||||
sparsevec_cosine_distance(d.embedding, q.vec) AS distance
|
||||
FROM
|
||||
documents d,
|
||||
query_vector q
|
||||
ORDER BY
|
||||
distance ASC;
|
||||
|
||||
-- Filter by distance threshold
|
||||
SELECT
|
||||
d.id,
|
||||
d.title
|
||||
FROM
|
||||
documents d
|
||||
WHERE
|
||||
sparsevec_cosine_distance(d.embedding,
|
||||
'{45:0.8,123:0.6}/10000'::sparsevec) < 0.5
|
||||
ORDER BY
|
||||
id;
|
||||
|
||||
-- Aggregate operations
|
||||
SELECT
|
||||
AVG(sparsevec_norm(embedding)) AS avg_norm,
|
||||
STDDEV(sparsevec_norm(embedding)) AS stddev_norm
|
||||
FROM
|
||||
documents;
|
||||
|
||||
-- Index Creation (Future Enhancement)
|
||||
-- ============================================================================
|
||||
|
||||
-- These would be available once index support is added:
|
||||
-- CREATE INDEX idx_doc_embedding ON documents
|
||||
-- USING hnsw (embedding sparsevec_cosine_ops);
|
||||
|
||||
-- CREATE INDEX idx_user_prefs ON user_profiles
|
||||
-- USING ivfflat (preferences sparsevec_l2_ops);
|
||||
|
||||
-- Cleanup
|
||||
-- ============================================================================
|
||||
|
||||
-- DROP TABLE IF EXISTS documents;
|
||||
-- DROP TABLE IF EXISTS user_profiles;
|
||||
-- DROP TABLE IF EXISTS graph_nodes;
|
||||
Reference in New Issue
Block a user