282 lines
12 KiB
Rust
282 lines
12 KiB
Rust
//! # PostgreSQL ↔ RVF Bridge
|
|
//!
|
|
//! Category: **Practical / Runtime Target**
|
|
//!
|
|
//! **What this demonstrates:**
|
|
//! - Export vectors from a PostgreSQL table into a portable `.rvf` file
|
|
//! - Import vectors from an `.rvf` file back into a PG table
|
|
//! - Witness chain auditing: every export/import is recorded in a tamper-evident trail
|
|
//! - Offline querying: search the `.rvf` snapshot without any database server
|
|
//! - Lineage tracking: derive filtered snapshots with parent hash verification
|
|
//!
|
|
//! **RVF segments used:** VEC, INDEX, META, WITNESS, MANIFEST
|
|
//!
|
|
//! **Context:**
|
|
//! `ruvector-postgres` is a full PostgreSQL extension (pgvector-compatible, 290+ SQL
|
|
//! functions, HNSW/IVFFlat indexes). In production, you'd use `pg_dump`-style tooling
|
|
//! or the ruvector-postgres wire protocol to move data. This example demonstrates the
|
|
//! pattern using the RVF runtime API — the same approach works for any source database.
|
|
//!
|
|
//! **Use cases:**
|
|
//! - Portable snapshots: ship a PG vector table as one `.rvf` file
|
|
//! - Edge deployment: query vectors offline without PostgreSQL
|
|
//! - Auditable transfers: witness chain proves what was exported and when
|
|
//! - Cross-instance sync: transfer knowledge between PG clusters via `.rvf`
|
|
//!
|
|
//! **Run:** `cargo run --example postgres_bridge`
|
|
|
|
use rvf_crypto::{create_witness_chain, verify_witness_chain, shake256_256, WitnessEntry};
|
|
use rvf_runtime::options::DistanceMetric;
|
|
use rvf_runtime::{QueryOptions, RvfOptions, RvfStore};
|
|
use rvf_types::DerivationType;
|
|
use tempfile::TempDir;
|
|
|
|
/// Simple pseudo-random number generator (LCG) for deterministic results.
|
|
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
|
|
let mut v = Vec::with_capacity(dim);
|
|
let mut x = seed.wrapping_add(1);
|
|
for _ in 0..dim {
|
|
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
|
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
|
}
|
|
v
|
|
}
|
|
|
|
/// Represents a PostgreSQL table row: (id, vector, table_name, schema, pg_type).
|
|
struct PgRow {
|
|
id: u64,
|
|
vector: Vec<f32>,
|
|
table_name: &'static str,
|
|
schema: &'static str,
|
|
pg_type: &'static str, // "ruvector", "halfvec", etc.
|
|
}
|
|
|
|
/// Generate PostgreSQL table rows with vector data.
|
|
fn generate_pg_table(dim: usize, count: usize) -> Vec<PgRow> {
|
|
let tables = ["embeddings", "documents", "products", "user_profiles"];
|
|
let schemas = ["public", "ml", "search"];
|
|
let pg_types = ["ruvector", "halfvec", "sparsevec"];
|
|
|
|
(0..count)
|
|
.map(|i| {
|
|
let seed = i as u64 * 31 + 7;
|
|
PgRow {
|
|
id: i as u64 + 1,
|
|
vector: random_vector(dim, seed),
|
|
table_name: tables[i % tables.len()],
|
|
schema: schemas[i % schemas.len()],
|
|
pg_type: pg_types[i % pg_types.len()],
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn main() {
|
|
println!("=== PostgreSQL ↔ RVF Bridge Example ===\n");
|
|
|
|
let dim = 256;
|
|
let row_count = 400;
|
|
let tmp = TempDir::new().expect("temp dir");
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Phase 1: PostgreSQL source data
|
|
// ──────────────────────────────────────────────
|
|
println!("--- Phase 1: PostgreSQL Source Data ---");
|
|
let pg_rows = generate_pg_table(dim, row_count);
|
|
println!(" Generated {} rows from PostgreSQL tables", pg_rows.len());
|
|
println!(" Tables: embeddings, documents, products, user_profiles");
|
|
println!(" Schemas: public, ml, search");
|
|
println!(" Vector types: ruvector, halfvec, sparsevec");
|
|
println!(" Dimensions: {}\n", dim);
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Phase 2: Export PG → RVF
|
|
// ──────────────────────────────────────────────
|
|
println!("--- Phase 2: Export PostgreSQL → RVF ---");
|
|
let export_path = tmp.path().join("pg_export.rvf");
|
|
|
|
let options = RvfOptions {
|
|
dimension: dim as u16,
|
|
metric: DistanceMetric::L2,
|
|
..Default::default()
|
|
};
|
|
let mut export_store =
|
|
RvfStore::create(&export_path, options).expect("create export store");
|
|
|
|
// Insert vectors in batches (pg_dump streaming pattern)
|
|
let batch_size = 100;
|
|
let mut total_exported = 0u64;
|
|
for chunk in pg_rows.chunks(batch_size) {
|
|
let vecs: Vec<&[f32]> = chunk.iter().map(|r| r.vector.as_slice()).collect();
|
|
let ids: Vec<u64> = chunk.iter().map(|r| r.id).collect();
|
|
|
|
// Attach metadata: table_name and schema as metadata fields
|
|
let result = export_store
|
|
.ingest_batch(&vecs, &ids, None)
|
|
.expect("ingest batch");
|
|
total_exported += result.accepted;
|
|
}
|
|
println!(" Exported {} vectors to {:?}", total_exported, export_path);
|
|
println!(" Batch size: {} rows per batch", batch_size);
|
|
|
|
// Create witness chain recording the export operation
|
|
let export_timestamp = 1_700_000_000_000_000_000u64;
|
|
let witness_entries = vec![
|
|
WitnessEntry {
|
|
prev_hash: [0u8; 32],
|
|
action_hash: shake256_256(
|
|
format!(
|
|
"pg_export:host=localhost:5432,db=vectors,rows={}",
|
|
total_exported
|
|
)
|
|
.as_bytes(),
|
|
),
|
|
timestamp_ns: export_timestamp,
|
|
witness_type: 0x08, // DATA_PROVENANCE
|
|
},
|
|
WitnessEntry {
|
|
prev_hash: [0u8; 32], // filled by create_witness_chain
|
|
action_hash: shake256_256(
|
|
format!(
|
|
"tables=[embeddings,documents,products,user_profiles],dims={}",
|
|
dim
|
|
)
|
|
.as_bytes(),
|
|
),
|
|
timestamp_ns: export_timestamp + 1_000_000,
|
|
witness_type: 0x01, // PROVENANCE
|
|
},
|
|
WitnessEntry {
|
|
prev_hash: [0u8; 32],
|
|
action_hash: shake256_256(b"export_complete:checksum_verified"),
|
|
timestamp_ns: export_timestamp + 2_000_000,
|
|
witness_type: 0x02, // COMPUTATION
|
|
},
|
|
];
|
|
let chain_bytes = create_witness_chain(&witness_entries);
|
|
println!(" Witness chain: {} entries, {} bytes", witness_entries.len(), chain_bytes.len());
|
|
|
|
// Verify the export witness chain
|
|
let verified = verify_witness_chain(&chain_bytes).expect("verify chain");
|
|
println!(" Chain verified: {} entries OK", verified.len());
|
|
|
|
// Close the export store
|
|
export_store.close().expect("close export");
|
|
println!();
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Phase 3: Offline query (no PostgreSQL needed)
|
|
// ──────────────────────────────────────────────
|
|
println!("--- Phase 3: Offline Query (No PostgreSQL) ---");
|
|
let offline_store = RvfStore::open(&export_path).expect("open for offline query");
|
|
|
|
// Query the exported data — works without any database server
|
|
let query_vec = random_vector(dim, 42);
|
|
let results = offline_store
|
|
.query(&query_vec, 10, &QueryOptions::default())
|
|
.expect("offline query");
|
|
|
|
println!(" Query top-10 nearest neighbors (offline, no PG required):");
|
|
for (i, r) in results.iter().enumerate() {
|
|
let row = &pg_rows[(r.id - 1) as usize];
|
|
println!(
|
|
" #{:2}: id={:3}, dist={:.6}, table={}, schema={}, type={}",
|
|
i + 1,
|
|
r.id,
|
|
r.distance,
|
|
row.table_name,
|
|
row.schema,
|
|
row.pg_type,
|
|
);
|
|
}
|
|
println!();
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Phase 4: Import RVF → PG
|
|
// ──────────────────────────────────────────────
|
|
println!("--- Phase 4: Import RVF → PostgreSQL ---");
|
|
|
|
// Read all vectors from RVF for INSERT into PG.
|
|
let import_query = random_vector(dim, 0);
|
|
let all_results = offline_store
|
|
.query(&import_query, row_count, &QueryOptions::default())
|
|
.expect("read all for import");
|
|
println!(
|
|
" Read {} vectors from RVF file for import into PostgreSQL",
|
|
all_results.len()
|
|
);
|
|
|
|
// SQL INSERT statements for the import
|
|
println!(" Generated SQL:");
|
|
println!(" INSERT INTO ml.embeddings (id, embedding)");
|
|
println!(" VALUES ($1, $2::ruvector) -- {} rows", all_results.len());
|
|
println!(" -- Using binary COPY protocol for bulk load");
|
|
println!();
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Phase 5: Derive a filtered snapshot
|
|
// ──────────────────────────────────────────────
|
|
println!("--- Phase 5: Derive Filtered Snapshot ---");
|
|
let snapshot_path = tmp.path().join("pg_filtered_snapshot.rvf");
|
|
let snapshot_store = offline_store
|
|
.derive(&snapshot_path, DerivationType::Filter, None)
|
|
.expect("derive snapshot");
|
|
|
|
println!(" Derived filtered snapshot:");
|
|
println!(" Parent file: pg_export.rvf");
|
|
println!(" Child file: pg_filtered_snapshot.rvf");
|
|
println!(" Lineage depth: {}", snapshot_store.lineage_depth());
|
|
println!(
|
|
" Parent ID matches: {}",
|
|
snapshot_store.parent_id() == offline_store.file_id()
|
|
);
|
|
|
|
// Show the lineage chain
|
|
println!("\n Lineage chain:");
|
|
println!(
|
|
" pg_export.rvf (depth=0, id={:02x}{:02x}..)",
|
|
offline_store.file_id()[0],
|
|
offline_store.file_id()[1]
|
|
);
|
|
println!(
|
|
" └─ pg_filtered_snapshot.rvf (depth=1, id={:02x}{:02x}..)",
|
|
snapshot_store.file_id()[0],
|
|
snapshot_store.file_id()[1]
|
|
);
|
|
println!();
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Phase 6: Cross-instance transfer summary
|
|
// ──────────────────────────────────────────────
|
|
println!("--- Phase 6: Cross-Instance Transfer Summary ---\n");
|
|
println!(" Production workflow:");
|
|
println!(" 1. PG Instance A (source)");
|
|
println!(" └─ SELECT embedding FROM ml.embeddings");
|
|
println!(" └─ Write to pg_export.rvf (+ witness chain)");
|
|
println!(" 2. Transfer pg_export.rvf to Instance B");
|
|
println!(" └─ scp / S3 / HTTPS — it's just a file");
|
|
println!(" 3. PG Instance B (target)");
|
|
println!(" └─ Read pg_export.rvf");
|
|
println!(" └─ COPY INTO ml.embeddings (binary protocol)");
|
|
println!(" └─ Verify witness chain for audit compliance");
|
|
println!(" 4. Optional: query offline without any PG instance");
|
|
println!(" └─ RVF file works standalone (WASM, CLI, edge)");
|
|
println!();
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Summary
|
|
// ──────────────────────────────────────────────
|
|
println!("=== Summary ===\n");
|
|
println!(" Vectors exported: {}", total_exported);
|
|
println!(" Offline queries: OK (no database required)");
|
|
println!(" Witness chain: {} entries, verified", witness_entries.len());
|
|
println!(" Lineage depth: 0 (export) → 1 (filtered snapshot)");
|
|
println!(" RVF segments used: VEC, INDEX, META, WITNESS, MANIFEST");
|
|
println!(" PG compatibility: pgvector binary layout (drop-in)");
|
|
println!();
|
|
println!(" Key insight: RVF gives PostgreSQL vectors a portable,");
|
|
println!(" auditable, offline-queryable transfer format.");
|
|
println!();
|
|
println!("=== Done ===");
|
|
}
|