Files
wifi-densepose/vendor/ruvector/examples/rvf-kernel-optimized/src/verified_ingest.rs

223 lines
7.5 KiB
Rust

//! Verified vector ingest pipeline using ruvector-verified ultra-optimizations.
//!
//! Every vector batch passes through:
//! 1. Gated proof routing (Reflex/Standard/Deep tier selection)
//! 2. FastTermArena dedup (4-wide linear probe, 95%+ hit rate)
//! 3. Dimension proof generation (prove_dim_eq with FxHash cache)
//! 4. ConversionCache (open-addressing equality cache)
//! 5. Thread-local pool resource acquisition
//! 6. ProofAttestation creation (82-byte witness, type 0x0E)
use anyhow::{anyhow, Result};
use ruvector_verified::{
cache::ConversionCache,
fast_arena::FastTermArena,
gated::{self, ProofKind},
pools,
proof_store::create_attestation,
vector_types, ProofAttestation, ProofEnvironment,
};
use rvf_runtime::RvfStore;
use tracing::{debug, info};
/// Statistics from a verified ingest run.
#[derive(Debug, Clone)]
pub struct IngestStats {
/// Total vectors verified and ingested.
pub vectors_verified: u64,
/// Total proof terms generated.
pub proofs_generated: u64,
/// Arena dedup cache hit rate (0.0-1.0).
pub arena_hit_rate: f64,
/// Conversion cache hit rate (0.0-1.0).
pub conversion_cache_hit_rate: f64,
/// Proof routing tier distribution [reflex, standard, deep].
pub tier_distribution: [u64; 3],
/// Number of attestations created.
pub attestations_created: u64,
/// Total ingest wall time in microseconds.
pub total_time_us: u64,
}
/// Verified ingest pipeline combining all ruvector-verified optimizations.
pub struct VerifiedIngestPipeline {
env: ProofEnvironment,
arena: FastTermArena,
cache: ConversionCache,
dim: u32,
tier_counts: [u64; 3],
attestations: Vec<ProofAttestation>,
}
impl VerifiedIngestPipeline {
/// Create a new pipeline for vectors of the given dimension.
pub fn new(dim: u32) -> Self {
Self {
env: ProofEnvironment::new(),
arena: FastTermArena::with_capacity(4096),
cache: ConversionCache::with_capacity(1024),
dim,
tier_counts: [0; 3],
attestations: Vec::new(),
}
}
/// Verify a batch of vectors and ingest into the RVF store.
///
/// Returns the number of vectors successfully ingested.
pub fn verify_and_ingest(
&mut self,
store: &mut RvfStore,
vectors: &[Vec<f32>],
ids: &[u64],
) -> Result<u64> {
// Acquire thread-local pooled resources (auto-returned on drop)
let _pooled = pools::acquire();
// Route proof to cheapest tier
let decision = gated::route_proof(
ProofKind::DimensionEquality {
expected: self.dim,
actual: self.dim,
},
&self.env,
);
match decision.tier {
ruvector_verified::gated::ProofTier::Reflex => self.tier_counts[0] += 1,
ruvector_verified::gated::ProofTier::Standard { .. } => self.tier_counts[1] += 1,
ruvector_verified::gated::ProofTier::Deep => self.tier_counts[2] += 1,
}
// Check arena dedup cache for dimension proof
let dim_hash = ruvector_verified::fast_arena::fx_hash_pair(self.dim, self.dim);
let (_term_id, was_cached) = self.arena.intern(dim_hash);
if was_cached {
debug!("arena cache hit for dim proof");
}
// Check conversion cache
let cached_proof = self.cache.get(_term_id, self.dim);
let proof_id = if let Some(pid) = cached_proof {
debug!(pid, "conversion cache hit");
pid
} else {
// Generate dimension equality proof (~500ns)
let pid = vector_types::prove_dim_eq(&mut self.env, self.dim, self.dim)?;
self.cache.insert(_term_id, self.dim, pid);
pid
};
// Verify all vectors in the batch have correct dimensions
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let _verified = vector_types::verify_batch_dimensions(&mut self.env, self.dim, &refs)?;
debug!(count = vectors.len(), proof_id, "batch verified");
// Ingest into RVF store
store
.ingest_batch(&refs, ids, None)
.map_err(|e| anyhow!("ingest: {e:?}"))?;
// Create proof attestation for this batch
let attestation = create_attestation(&self.env, proof_id);
self.attestations.push(attestation);
Ok(vectors.len() as u64)
}
/// Get current statistics.
pub fn stats(&self) -> IngestStats {
let arena_stats = self.arena.stats();
let cache_stats = self.cache.stats();
let (_pool_hits, _pool_misses, _) = pools::pool_stats();
IngestStats {
vectors_verified: self.env.stats().proofs_constructed,
proofs_generated: self.env.stats().proofs_constructed,
arena_hit_rate: arena_stats.cache_hit_rate(),
conversion_cache_hit_rate: cache_stats.hit_rate(),
tier_distribution: self.tier_counts,
attestations_created: self.attestations.len() as u64,
total_time_us: 0, // filled by caller
}
}
/// Get all attestations created during ingest.
pub fn attestations(&self) -> &[ProofAttestation] {
&self.attestations
}
/// Get the proof environment for inspection.
pub fn env(&self) -> &ProofEnvironment {
&self.env
}
/// Reset the pipeline for a new ingest cycle.
pub fn reset(&mut self) {
self.env.reset();
self.arena.reset();
self.cache.clear();
self.tier_counts = [0; 3];
self.attestations.clear();
}
}
/// Run a complete verified ingest cycle: generate vectors, verify, ingest.
///
/// Returns (IngestStats, store_file_size_bytes).
pub fn run_verified_ingest(
store: &mut RvfStore,
store_path: &std::path::Path,
dim: u32,
vec_count: usize,
seed: u64,
) -> Result<(IngestStats, u64)> {
use rand::prelude::*;
let start = std::time::Instant::now();
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
let mut pipeline = VerifiedIngestPipeline::new(dim);
// Generate vectors in batches of 1000
let batch_size = 1000.min(vec_count);
let mut total_ingested = 0u64;
for batch_start in (0..vec_count).step_by(batch_size) {
let batch_end = (batch_start + batch_size).min(vec_count);
let count = batch_end - batch_start;
let vectors: Vec<Vec<f32>> = (0..count)
.map(|_| (0..dim as usize).map(|_| rng.gen::<f32>()).collect())
.collect();
let ids: Vec<u64> = (batch_start as u64..batch_end as u64).collect();
let ingested = pipeline.verify_and_ingest(store, &vectors, &ids)?;
total_ingested += ingested;
}
let elapsed = start.elapsed();
let mut stats = pipeline.stats();
stats.total_time_us = elapsed.as_micros() as u64;
stats.vectors_verified = total_ingested;
info!(
vectors = total_ingested,
proofs = stats.proofs_generated,
arena_hit = format!("{:.1}%", stats.arena_hit_rate * 100.0),
cache_hit = format!("{:.1}%", stats.conversion_cache_hit_rate * 100.0),
tiers = format!(
"R:{}/S:{}/D:{}",
stats.tier_distribution[0], stats.tier_distribution[1], stats.tier_distribution[2]
),
attestations = stats.attestations_created,
time_us = stats.total_time_us,
"verified ingest complete"
);
// Get store file size
let store_size = std::fs::metadata(store_path).map(|m| m.len()).unwrap_or(0);
Ok((stats, store_size))
}