Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
322
vendor/ruvector/crates/ruvector-postgres/docs/examples/self-learning-usage.sql
vendored
Normal file
322
vendor/ruvector/crates/ruvector-postgres/docs/examples/self-learning-usage.sql
vendored
Normal file
@@ -0,0 +1,322 @@
|
||||
-- =============================================================================
|
||||
-- RuVector Self-Learning Module Usage Examples
|
||||
-- =============================================================================
|
||||
-- This file demonstrates how to use the self-learning and ReasoningBank
|
||||
-- features for adaptive query optimization.
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 1. Basic Setup: Enable Learning
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Enable learning for a table with default configuration
|
||||
SELECT ruvector_enable_learning('my_vectors');
|
||||
|
||||
-- Enable with custom configuration
|
||||
SELECT ruvector_enable_learning(
|
||||
'my_vectors',
|
||||
'{"max_trajectories": 2000, "num_clusters": 15}'::jsonb
|
||||
);
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 2. Recording Query Trajectories
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Trajectories are typically recorded automatically by search functions,
|
||||
-- but you can also record them manually for testing or custom workflows.
|
||||
|
||||
-- Record a query trajectory
|
||||
SELECT ruvector_record_trajectory(
|
||||
'my_vectors', -- table name
|
||||
ARRAY[0.1, 0.2, 0.3, 0.4], -- query vector
|
||||
ARRAY[1, 2, 3, 4, 5]::bigint[], -- result IDs
|
||||
1500, -- latency in microseconds
|
||||
50, -- ef_search used
|
||||
10 -- probes used
|
||||
);
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 3. Providing Relevance Feedback
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- After seeing query results, users can provide feedback about which
|
||||
-- results were actually relevant
|
||||
|
||||
SELECT ruvector_record_feedback(
|
||||
'my_vectors', -- table name
|
||||
ARRAY[0.1, 0.2, 0.3, 0.4], -- query vector
|
||||
ARRAY[1, 2, 5]::bigint[], -- relevant IDs
|
||||
ARRAY[3, 4]::bigint[] -- irrelevant IDs
|
||||
);
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 4. Extracting and Managing Patterns
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Extract patterns from recorded trajectories using k-means clustering
|
||||
SELECT ruvector_extract_patterns(
|
||||
'my_vectors', -- table name
|
||||
10 -- number of clusters
|
||||
);
|
||||
|
||||
-- Get current learning statistics
|
||||
SELECT ruvector_learning_stats('my_vectors');
|
||||
|
||||
-- Example output:
|
||||
-- {
|
||||
-- "trajectories": {
|
||||
-- "total": 150,
|
||||
-- "with_feedback": 45,
|
||||
-- "avg_latency_us": 1234.5,
|
||||
-- "avg_precision": 0.85,
|
||||
-- "avg_recall": 0.78
|
||||
-- },
|
||||
-- "patterns": {
|
||||
-- "total": 10,
|
||||
-- "total_samples": 150,
|
||||
-- "avg_confidence": 0.87,
|
||||
-- "total_usage": 523
|
||||
-- }
|
||||
-- }
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 5. Auto-Tuning Search Parameters
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Auto-tune for balanced performance (default)
|
||||
SELECT ruvector_auto_tune('my_vectors');
|
||||
|
||||
-- Auto-tune optimizing for speed
|
||||
SELECT ruvector_auto_tune('my_vectors', 'speed');
|
||||
|
||||
-- Auto-tune optimizing for accuracy
|
||||
SELECT ruvector_auto_tune('my_vectors', 'accuracy');
|
||||
|
||||
-- Auto-tune with sample queries
|
||||
SELECT ruvector_auto_tune(
|
||||
'my_vectors',
|
||||
'balanced',
|
||||
ARRAY[
|
||||
ARRAY[0.1, 0.2, 0.3],
|
||||
ARRAY[0.4, 0.5, 0.6],
|
||||
ARRAY[0.7, 0.8, 0.9]
|
||||
]
|
||||
);
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 6. Getting Optimized Search Parameters
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Get optimized search parameters for a specific query
|
||||
SELECT ruvector_get_search_params(
|
||||
'my_vectors',
|
||||
ARRAY[0.1, 0.2, 0.3, 0.4]
|
||||
);
|
||||
|
||||
-- Example output:
|
||||
-- {
|
||||
-- "ef_search": 52,
|
||||
-- "probes": 12,
|
||||
-- "confidence": 0.89
|
||||
-- }
|
||||
|
||||
-- Use these parameters in your search:
|
||||
-- SET ruvector.ef_search = 52;
|
||||
-- SET ruvector.probes = 12;
|
||||
-- SELECT * FROM my_vectors ORDER BY embedding <-> '[0.1, 0.2, 0.3, 0.4]' LIMIT 10;
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 7. Pattern Consolidation and Pruning
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Consolidate similar patterns to reduce memory usage
|
||||
-- Patterns with similarity >= 0.95 will be merged
|
||||
SELECT ruvector_consolidate_patterns('my_vectors', 0.95);
|
||||
|
||||
-- Prune low-quality patterns
|
||||
-- Remove patterns with usage < 5 or confidence < 0.5
|
||||
SELECT ruvector_prune_patterns(
|
||||
'my_vectors',
|
||||
5, -- min_usage
|
||||
0.5 -- min_confidence
|
||||
);
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 8. Complete Workflow Example
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Create a table with vectors
|
||||
CREATE TABLE documents (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
title TEXT,
|
||||
embedding vector(384)
|
||||
);
|
||||
|
||||
-- Insert some sample data
|
||||
INSERT INTO documents (title, embedding)
|
||||
SELECT
|
||||
'Document ' || i,
|
||||
ruvector_random(384)
|
||||
FROM generate_series(1, 1000) i;
|
||||
|
||||
-- Create an HNSW index
|
||||
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
|
||||
|
||||
-- Enable learning for adaptive optimization
|
||||
SELECT ruvector_enable_learning('documents');
|
||||
|
||||
-- Simulate user queries and collect trajectories
|
||||
DO $$
|
||||
DECLARE
|
||||
query_vec vector(384);
|
||||
results bigint[];
|
||||
start_time bigint;
|
||||
end_time bigint;
|
||||
BEGIN
|
||||
FOR i IN 1..50 LOOP
|
||||
-- Generate random query
|
||||
query_vec := ruvector_random(384);
|
||||
|
||||
-- Execute search and measure time
|
||||
start_time := EXTRACT(EPOCH FROM clock_timestamp()) * 1000000;
|
||||
|
||||
SELECT array_agg(id) INTO results
|
||||
FROM (
|
||||
SELECT id FROM documents
|
||||
ORDER BY embedding <=> query_vec
|
||||
LIMIT 10
|
||||
) t;
|
||||
|
||||
end_time := EXTRACT(EPOCH FROM clock_timestamp()) * 1000000;
|
||||
|
||||
-- Record trajectory
|
||||
PERFORM ruvector_record_trajectory(
|
||||
'documents',
|
||||
query_vec::float4[],
|
||||
results,
|
||||
(end_time - start_time)::bigint,
|
||||
50, -- current ef_search
|
||||
10 -- current probes
|
||||
);
|
||||
|
||||
-- Occasionally provide feedback
|
||||
IF i % 5 = 0 THEN
|
||||
PERFORM ruvector_record_feedback(
|
||||
'documents',
|
||||
query_vec::float4[],
|
||||
results[1:3], -- first 3 were relevant
|
||||
results[8:10] -- last 3 were not relevant
|
||||
);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END $$;
|
||||
|
||||
-- Extract patterns from collected data
|
||||
SELECT ruvector_extract_patterns('documents', 10);
|
||||
|
||||
-- View learning statistics
|
||||
SELECT ruvector_learning_stats('documents');
|
||||
|
||||
-- Auto-tune for optimal performance
|
||||
SELECT ruvector_auto_tune('documents', 'balanced');
|
||||
|
||||
-- Get optimized parameters for a new query
|
||||
WITH query AS (
|
||||
SELECT ruvector_random(384) AS vec
|
||||
),
|
||||
params AS (
|
||||
SELECT ruvector_get_search_params('documents', (SELECT vec::float4[] FROM query)) AS p
|
||||
)
|
||||
SELECT
|
||||
(p->'ef_search')::int AS ef_search,
|
||||
(p->'probes')::int AS probes,
|
||||
(p->'confidence')::float AS confidence
|
||||
FROM params;
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 9. Monitoring and Maintenance
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Regularly consolidate patterns (can be run in a cron job)
|
||||
SELECT ruvector_consolidate_patterns('documents', 0.92);
|
||||
|
||||
-- Prune low-quality patterns monthly
|
||||
SELECT ruvector_prune_patterns('documents', 10, 0.6);
|
||||
|
||||
-- Clear all learning data if needed
|
||||
SELECT ruvector_clear_learning('documents');
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 10. Advanced: Integration with Application Code
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- Example: Python application using learned parameters
|
||||
|
||||
/*
|
||||
import psycopg2
|
||||
|
||||
def search_with_learning(conn, table, query_vector, limit=10):
|
||||
"""Search using learned optimal parameters"""
|
||||
|
||||
# Get optimized parameters
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT ruvector_get_search_params(%s, %s::float4[])
|
||||
""", (table, query_vector))
|
||||
params = cur.fetchone()[0]
|
||||
|
||||
# Apply parameters and search
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f"""
|
||||
SET ruvector.ef_search = {params['ef_search']};
|
||||
SET ruvector.probes = {params['probes']};
|
||||
|
||||
SELECT id, title, embedding <=> %s::vector AS distance
|
||||
FROM {table}
|
||||
ORDER BY embedding <=> %s::vector
|
||||
LIMIT %s
|
||||
""", (query_vector, query_vector, limit))
|
||||
|
||||
results = cur.fetchall()
|
||||
|
||||
return results, params
|
||||
|
||||
# Use it
|
||||
conn = psycopg2.connect("dbname=mydb")
|
||||
results, params = search_with_learning(
|
||||
conn,
|
||||
'documents',
|
||||
[0.1, 0.2, 0.3, ...],
|
||||
limit=10
|
||||
)
|
||||
|
||||
print(f"Search completed with ef_search={params['ef_search']}, "
|
||||
f"confidence={params['confidence']:.2f}")
|
||||
*/
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- 11. Best Practices
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
-- 1. Collect enough trajectories before extracting patterns (50+ recommended)
|
||||
-- 2. Provide relevance feedback when possible for better learning
|
||||
-- 3. Consolidate patterns regularly to manage memory
|
||||
-- 4. Prune low-quality patterns periodically
|
||||
-- 5. Monitor learning statistics to track improvement
|
||||
-- 6. Start with balanced optimization, adjust based on needs
|
||||
-- 7. Re-extract patterns when query patterns change significantly
|
||||
|
||||
-- Example monitoring query:
|
||||
SELECT
|
||||
jsonb_pretty(ruvector_learning_stats('documents')) AS stats,
|
||||
CASE
|
||||
WHEN (stats->'trajectories'->>'total')::int < 50
|
||||
THEN 'Collecting data - need more trajectories'
|
||||
WHEN (stats->'patterns'->>'total')::int = 0
|
||||
THEN 'Ready to extract patterns'
|
||||
WHEN (stats->'patterns'->>'avg_confidence')::float < 0.7
|
||||
THEN 'Low confidence - collect more feedback'
|
||||
ELSE 'System is learning well'
|
||||
END AS recommendation
|
||||
FROM (
|
||||
SELECT ruvector_learning_stats('documents') AS stats
|
||||
) t;
|
||||
Reference in New Issue
Block a user