Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,322 @@
-- =============================================================================
-- RuVector Self-Learning Module Usage Examples
-- =============================================================================
-- This file demonstrates how to use the self-learning and ReasoningBank
-- features for adaptive query optimization.
-- -----------------------------------------------------------------------------
-- 1. Basic Setup: Enable Learning
-- -----------------------------------------------------------------------------
-- Enable learning for a table with default configuration
SELECT ruvector_enable_learning('my_vectors');
-- Enable with custom configuration
SELECT ruvector_enable_learning(
'my_vectors',
'{"max_trajectories": 2000, "num_clusters": 15}'::jsonb
);
-- -----------------------------------------------------------------------------
-- 2. Recording Query Trajectories
-- -----------------------------------------------------------------------------
-- Trajectories are typically recorded automatically by search functions,
-- but you can also record them manually for testing or custom workflows.
-- Record a query trajectory
SELECT ruvector_record_trajectory(
'my_vectors', -- table name
ARRAY[0.1, 0.2, 0.3, 0.4], -- query vector
ARRAY[1, 2, 3, 4, 5]::bigint[], -- result IDs
1500, -- latency in microseconds
50, -- ef_search used
10 -- probes used
);
-- -----------------------------------------------------------------------------
-- 3. Providing Relevance Feedback
-- -----------------------------------------------------------------------------
-- After seeing query results, users can provide feedback about which
-- results were actually relevant
SELECT ruvector_record_feedback(
'my_vectors', -- table name
ARRAY[0.1, 0.2, 0.3, 0.4], -- query vector
ARRAY[1, 2, 5]::bigint[], -- relevant IDs
ARRAY[3, 4]::bigint[] -- irrelevant IDs
);
-- -----------------------------------------------------------------------------
-- 4. Extracting and Managing Patterns
-- -----------------------------------------------------------------------------
-- Extract patterns from recorded trajectories using k-means clustering
SELECT ruvector_extract_patterns(
'my_vectors', -- table name
10 -- number of clusters
);
-- Get current learning statistics
SELECT ruvector_learning_stats('my_vectors');
-- Example output:
-- {
-- "trajectories": {
-- "total": 150,
-- "with_feedback": 45,
-- "avg_latency_us": 1234.5,
-- "avg_precision": 0.85,
-- "avg_recall": 0.78
-- },
-- "patterns": {
-- "total": 10,
-- "total_samples": 150,
-- "avg_confidence": 0.87,
-- "total_usage": 523
-- }
-- }
-- -----------------------------------------------------------------------------
-- 5. Auto-Tuning Search Parameters
-- -----------------------------------------------------------------------------
-- Auto-tune for balanced performance (default)
SELECT ruvector_auto_tune('my_vectors');
-- Auto-tune optimizing for speed
SELECT ruvector_auto_tune('my_vectors', 'speed');
-- Auto-tune optimizing for accuracy
SELECT ruvector_auto_tune('my_vectors', 'accuracy');
-- Auto-tune with sample queries
SELECT ruvector_auto_tune(
'my_vectors',
'balanced',
ARRAY[
ARRAY[0.1, 0.2, 0.3],
ARRAY[0.4, 0.5, 0.6],
ARRAY[0.7, 0.8, 0.9]
]
);
-- -----------------------------------------------------------------------------
-- 6. Getting Optimized Search Parameters
-- -----------------------------------------------------------------------------
-- Get optimized search parameters for a specific query
SELECT ruvector_get_search_params(
'my_vectors',
ARRAY[0.1, 0.2, 0.3, 0.4]
);
-- Example output:
-- {
-- "ef_search": 52,
-- "probes": 12,
-- "confidence": 0.89
-- }
-- Use these parameters in your search:
-- SET ruvector.ef_search = 52;
-- SET ruvector.probes = 12;
-- SELECT * FROM my_vectors ORDER BY embedding <-> '[0.1, 0.2, 0.3, 0.4]' LIMIT 10;
-- -----------------------------------------------------------------------------
-- 7. Pattern Consolidation and Pruning
-- -----------------------------------------------------------------------------
-- Consolidate similar patterns to reduce memory usage
-- Patterns with similarity >= 0.95 will be merged
SELECT ruvector_consolidate_patterns('my_vectors', 0.95);
-- Prune low-quality patterns
-- Remove patterns with usage < 5 or confidence < 0.5
SELECT ruvector_prune_patterns(
'my_vectors',
5, -- min_usage
0.5 -- min_confidence
);
-- -----------------------------------------------------------------------------
-- 8. Complete Workflow Example
-- -----------------------------------------------------------------------------
-- Create a table with vectors
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
title TEXT,
embedding vector(384)
);
-- Insert some sample data
INSERT INTO documents (title, embedding)
SELECT
'Document ' || i,
ruvector_random(384)
FROM generate_series(1, 1000) i;
-- Create an HNSW index
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
-- Enable learning for adaptive optimization
SELECT ruvector_enable_learning('documents');
-- Simulate user queries and collect trajectories
DO $$
DECLARE
query_vec vector(384);
results bigint[];
start_time bigint;
end_time bigint;
BEGIN
FOR i IN 1..50 LOOP
-- Generate random query
query_vec := ruvector_random(384);
-- Execute search and measure time
start_time := EXTRACT(EPOCH FROM clock_timestamp()) * 1000000;
SELECT array_agg(id) INTO results
FROM (
SELECT id FROM documents
ORDER BY embedding <=> query_vec
LIMIT 10
) t;
end_time := EXTRACT(EPOCH FROM clock_timestamp()) * 1000000;
-- Record trajectory
PERFORM ruvector_record_trajectory(
'documents',
query_vec::float4[],
results,
(end_time - start_time)::bigint,
50, -- current ef_search
10 -- current probes
);
-- Occasionally provide feedback
IF i % 5 = 0 THEN
PERFORM ruvector_record_feedback(
'documents',
query_vec::float4[],
results[1:3], -- first 3 were relevant
results[8:10] -- last 3 were not relevant
);
END IF;
END LOOP;
END $$;
-- Extract patterns from collected data
SELECT ruvector_extract_patterns('documents', 10);
-- View learning statistics
SELECT ruvector_learning_stats('documents');
-- Auto-tune for optimal performance
SELECT ruvector_auto_tune('documents', 'balanced');
-- Get optimized parameters for a new query
WITH query AS (
SELECT ruvector_random(384) AS vec
),
params AS (
SELECT ruvector_get_search_params('documents', (SELECT vec::float4[] FROM query)) AS p
)
SELECT
(p->'ef_search')::int AS ef_search,
(p->'probes')::int AS probes,
(p->'confidence')::float AS confidence
FROM params;
-- -----------------------------------------------------------------------------
-- 9. Monitoring and Maintenance
-- -----------------------------------------------------------------------------
-- Regularly consolidate patterns (can be run in a cron job)
SELECT ruvector_consolidate_patterns('documents', 0.92);
-- Prune low-quality patterns monthly
SELECT ruvector_prune_patterns('documents', 10, 0.6);
-- Clear all learning data if needed
SELECT ruvector_clear_learning('documents');
-- -----------------------------------------------------------------------------
-- 10. Advanced: Integration with Application Code
-- -----------------------------------------------------------------------------
-- Example: Python application using learned parameters
/*
import psycopg2
def search_with_learning(conn, table, query_vector, limit=10):
"""Search using learned optimal parameters"""
# Get optimized parameters
with conn.cursor() as cur:
cur.execute("""
SELECT ruvector_get_search_params(%s, %s::float4[])
""", (table, query_vector))
params = cur.fetchone()[0]
# Apply parameters and search
with conn.cursor() as cur:
cur.execute(f"""
SET ruvector.ef_search = {params['ef_search']};
SET ruvector.probes = {params['probes']};
SELECT id, title, embedding <=> %s::vector AS distance
FROM {table}
ORDER BY embedding <=> %s::vector
LIMIT %s
""", (query_vector, query_vector, limit))
results = cur.fetchall()
return results, params
# Use it
conn = psycopg2.connect("dbname=mydb")
results, params = search_with_learning(
conn,
'documents',
[0.1, 0.2, 0.3, ...],
limit=10
)
print(f"Search completed with ef_search={params['ef_search']}, "
f"confidence={params['confidence']:.2f}")
*/
-- -----------------------------------------------------------------------------
-- 11. Best Practices
-- -----------------------------------------------------------------------------
-- 1. Collect enough trajectories before extracting patterns (50+ recommended)
-- 2. Provide relevance feedback when possible for better learning
-- 3. Consolidate patterns regularly to manage memory
-- 4. Prune low-quality patterns periodically
-- 5. Monitor learning statistics to track improvement
-- 6. Start with balanced optimization, adjust based on needs
-- 7. Re-extract patterns when query patterns change significantly
-- Example monitoring query:
SELECT
jsonb_pretty(ruvector_learning_stats('documents')) AS stats,
CASE
WHEN (stats->'trajectories'->>'total')::int < 50
THEN 'Collecting data - need more trajectories'
WHEN (stats->'patterns'->>'total')::int = 0
THEN 'Ready to extract patterns'
WHEN (stats->'patterns'->>'avg_confidence')::float < 0.7
THEN 'Low confidence - collect more feedback'
ELSE 'System is learning well'
END AS recommendation
FROM (
SELECT ruvector_learning_stats('documents') AS stats
) t;