Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/rvf-kernel-optimized/src/kernel_embed.rs
+++ b/vendor/ruvector/examples/rvf-kernel-optimized/src/kernel_embed.rs
@@ -0,0 +1,81 @@
+//! Linux kernel + eBPF embedding into an RVF container.
+
+use anyhow::{anyhow, Result};
+use rvf_kernel::KernelBuilder;
+use rvf_runtime::RvfStore;
+use rvf_types::ebpf::EbpfProgramType;
+use tracing::info;
+
+/// Result of embedding a kernel and eBPF programs into the RVF store.
+pub struct KernelEmbedResult {
+    /// Size of the kernel image in bytes.
+    pub kernel_size: usize,
+    /// Number of eBPF programs embedded.
+    pub ebpf_programs: usize,
+    /// SHA3-256 hash of the kernel image.
+    pub kernel_hash: [u8; 32],
+    /// Kernel cmdline used.
+    pub cmdline: String,
+}
+
+/// Embed an optimized Linux kernel and precompiled eBPF programs into the store.
+///
+/// Uses `from_builtin_minimal()` for a 4KB kernel stub that works without
+/// Docker or a cross-compiler. In production, replace with a real kernel
+/// built via `KernelBuilder::build_docker()`.
+pub fn embed_optimized_kernel(
+    store: &mut RvfStore,
+    cmdline: &str,
+    enable_ebpf: bool,
+    max_dim: u16,
+) -> Result<KernelEmbedResult> {
+    // Stage 1: Build minimal kernel (4KB stub, always works)
+    let kernel =
+        KernelBuilder::from_builtin_minimal().map_err(|e| anyhow!("kernel build: {e:?}"))?;
+    let kernel_size = kernel.bzimage.len();
+    let kernel_hash = kernel.image_hash;
+
+    info!(size = kernel_size, "built minimal kernel image");
+
+    // Stage 2: Embed kernel with optimized cmdline
+    // arch=0 (x86_64), kernel_type=0 (MicroLinux), flags include COMPRESSED + VIRTIO
+    let kernel_flags = 0x01 | 0x02 | 0x04; // COMPRESSED | VIRTIO_NET | VIRTIO_BLK
+    store
+        .embed_kernel(0, 0, kernel_flags, &kernel.bzimage, 8080, Some(cmdline))
+        .map_err(|e| anyhow!("embed kernel: {e:?}"))?;
+
+    info!("embedded kernel into RVF store");
+
+    // Stage 3: Embed precompiled eBPF programs
+    let mut ebpf_count = 0;
+    if enable_ebpf {
+        let programs = [
+            (EbpfProgramType::XdpDistance, 1u8, 1u8),
+            (EbpfProgramType::SocketFilter, 3u8, 3u8),
+            (EbpfProgramType::TcFilter, 2u8, 2u8),
+        ];
+
+        for (prog_type, seg_type, attach_type) in &programs {
+            let compiled = rvf_ebpf::EbpfCompiler::from_precompiled(*prog_type)
+                .map_err(|e| anyhow!("ebpf compile: {e:?}"))?;
+            store
+                .embed_ebpf(
+                    *seg_type,
+                    *attach_type,
+                    max_dim,
+                    &compiled.elf_bytes,
+                    compiled.btf_bytes.as_deref(),
+                )
+                .map_err(|e| anyhow!("embed ebpf: {e:?}"))?;
+            ebpf_count += 1;
+        }
+        info!(count = ebpf_count, "embedded eBPF programs");
+    }
+
+    Ok(KernelEmbedResult {
+        kernel_size,
+        ebpf_programs: ebpf_count,
+        kernel_hash,
+        cmdline: cmdline.to_string(),
+    })
+}
--- a/vendor/ruvector/examples/rvf-kernel-optimized/src/lib.rs
+++ b/vendor/ruvector/examples/rvf-kernel-optimized/src/lib.rs
@@ -0,0 +1,50 @@
+//! Hyper-optimized RVF example with Linux kernel embedding and formal verification.
+//!
+//! Demonstrates `ruvector-verified` as the optimization layer for a kernel-embedded
+//! RVF container. Every vector operation passes through verified proofs using:
+//! - `FastTermArena` — O(1) bump allocation with 4-wide dedup cache
+//! - `ConversionCache` — open-addressing conversion equality cache
+//! - Gated proof routing — 3-tier Reflex/Standard/Deep with auto-escalation
+//! - Thread-local pools — zero-contention resource reuse
+//! - `ProofAttestation` — 82-byte formal proof witness (type 0x0E)
+
+pub mod kernel_embed;
+pub mod verified_ingest;
+
+/// Default vector dimension (384 = 48x8 AVX2 / 96x4 NEON aligned).
+pub const DEFAULT_DIM: u32 = 384;
+
+/// Default vector count for benchmarks.
+pub const DEFAULT_VEC_COUNT: usize = 10_000;
+
+/// Optimized kernel cmdline for vector workload microVMs.
+///
+/// - `nokaslr nosmp`: deterministic single-core execution
+/// - `transparent_hugepage=always`: 2MB pages for vector arrays
+/// - `isolcpus=1 nohz_full=1 rcu_nocbs=1`: CPU isolation, no timer ticks
+/// - `mitigations=off`: full speed in trusted microVM
+pub const KERNEL_CMDLINE: &str = "console=ttyS0 quiet nokaslr nosmp \
+    transparent_hugepage=always isolcpus=1 nohz_full=1 rcu_nocbs=1 mitigations=off";
+
+/// Configuration for the verified RVF pipeline.
+pub struct VerifiedRvfConfig {
+    /// Vector dimensionality.
+    pub dim: u32,
+    /// Number of vectors to ingest.
+    pub vec_count: usize,
+    /// Embed precompiled eBPF programs (XDP, socket, TC).
+    pub enable_ebpf: bool,
+    /// Max reduction steps for Deep-tier proofs.
+    pub proof_fuel: usize,
+}
+
+impl Default for VerifiedRvfConfig {
+    fn default() -> Self {
+        Self {
+            dim: DEFAULT_DIM,
+            vec_count: 1_000,
+            enable_ebpf: true,
+            proof_fuel: 10_000,
+        }
+    }
+}
--- a/vendor/ruvector/examples/rvf-kernel-optimized/src/main.rs
+++ b/vendor/ruvector/examples/rvf-kernel-optimized/src/main.rs
@@ -0,0 +1,97 @@
+//! CLI demo: build kernel -> embed -> verified ingest -> query -> report.
+
+use anyhow::Result;
+use rvf_runtime::{QueryOptions, RvfOptions, RvfStore};
+use tracing::info;
+
+fn main() -> Result<()> {
+    tracing_subscriber::fmt()
+        .with_max_level(tracing::Level::INFO)
+        .with_target(false)
+        .init();
+
+    let config = rvf_kernel_optimized::VerifiedRvfConfig::default();
+
+    info!("RVF Kernel-Optimized Example");
+    info!(
+        "  dim={}, vectors={}, ebpf={}",
+        config.dim, config.vec_count, config.enable_ebpf
+    );
+    info!("  cmdline: {}", rvf_kernel_optimized::KERNEL_CMDLINE);
+
+    // Create temp store
+    let dir = tempfile::tempdir()?;
+    let store_path = dir.path().join("optimized.rvf");
+
+    let options = RvfOptions {
+        dimension: config.dim as u16,
+        ..RvfOptions::default()
+    };
+    let mut store = RvfStore::create(&store_path, options)
+        .map_err(|e| anyhow::anyhow!("create store: {e:?}"))?;
+
+    // Stage 1: Embed kernel + eBPF
+    info!("--- Stage 1: Kernel + eBPF Embedding ---");
+    let kernel_result = rvf_kernel_optimized::kernel_embed::embed_optimized_kernel(
+        &mut store,
+        rvf_kernel_optimized::KERNEL_CMDLINE,
+        config.enable_ebpf,
+        config.dim as u16,
+    )?;
+    info!(
+        "  kernel: {} bytes, eBPF: {} programs",
+        kernel_result.kernel_size, kernel_result.ebpf_programs
+    );
+
+    // Stage 2: Verified ingest
+    info!("--- Stage 2: Verified Vector Ingest ---");
+    let (stats, store_size) = rvf_kernel_optimized::verified_ingest::run_verified_ingest(
+        &mut store,
+        &store_path,
+        config.dim,
+        config.vec_count,
+        42, // deterministic seed
+    )?;
+
+    info!("  vectors: {}", stats.vectors_verified);
+    info!("  proofs: {}", stats.proofs_generated);
+    info!("  arena hit rate: {:.1}%", stats.arena_hit_rate * 100.0);
+    info!(
+        "  cache hit rate: {:.1}%",
+        stats.conversion_cache_hit_rate * 100.0
+    );
+    info!(
+        "  tiers: reflex={}, standard={}, deep={}",
+        stats.tier_distribution[0], stats.tier_distribution[1], stats.tier_distribution[2]
+    );
+    info!("  attestations: {}", stats.attestations_created);
+    info!("  time: {} us", stats.total_time_us);
+
+    // Stage 3: Query
+    info!("--- Stage 3: Query ---");
+    let query_vec: Vec<f32> = (0..config.dim as usize)
+        .map(|i| (i as f32) * 0.001)
+        .collect();
+    let results = store
+        .query(&query_vec, 5, &QueryOptions::default())
+        .map_err(|e| anyhow::anyhow!("query: {e:?}"))?;
+    for (i, r) in results.iter().enumerate() {
+        info!("  #{}: id={}, distance={:.4}", i + 1, r.id, r.distance);
+    }
+
+    // Summary
+    info!("--- Summary ---");
+    info!("  store size: {} bytes", store_size);
+    info!(
+        "  kernel hash: {:02x}{:02x}{:02x}{:02x}...",
+        kernel_result.kernel_hash[0],
+        kernel_result.kernel_hash[1],
+        kernel_result.kernel_hash[2],
+        kernel_result.kernel_hash[3]
+    );
+
+    store.close().map_err(|e| anyhow::anyhow!("close: {e:?}"))?;
+    info!("done");
+
+    Ok(())
+}
--- a/vendor/ruvector/examples/rvf-kernel-optimized/src/verified_ingest.rs
+++ b/vendor/ruvector/examples/rvf-kernel-optimized/src/verified_ingest.rs
@@ -0,0 +1,222 @@
+//! Verified vector ingest pipeline using ruvector-verified ultra-optimizations.
+//!
+//! Every vector batch passes through:
+//! 1. Gated proof routing (Reflex/Standard/Deep tier selection)
+//! 2. FastTermArena dedup (4-wide linear probe, 95%+ hit rate)
+//! 3. Dimension proof generation (prove_dim_eq with FxHash cache)
+//! 4. ConversionCache (open-addressing equality cache)
+//! 5. Thread-local pool resource acquisition
+//! 6. ProofAttestation creation (82-byte witness, type 0x0E)
+
+use anyhow::{anyhow, Result};
+use ruvector_verified::{
+    cache::ConversionCache,
+    fast_arena::FastTermArena,
+    gated::{self, ProofKind},
+    pools,
+    proof_store::create_attestation,
+    vector_types, ProofAttestation, ProofEnvironment,
+};
+use rvf_runtime::RvfStore;
+use tracing::{debug, info};
+
+/// Statistics from a verified ingest run.
+#[derive(Debug, Clone)]
+pub struct IngestStats {
+    /// Total vectors verified and ingested.
+    pub vectors_verified: u64,
+    /// Total proof terms generated.
+    pub proofs_generated: u64,
+    /// Arena dedup cache hit rate (0.0-1.0).
+    pub arena_hit_rate: f64,
+    /// Conversion cache hit rate (0.0-1.0).
+    pub conversion_cache_hit_rate: f64,
+    /// Proof routing tier distribution [reflex, standard, deep].
+    pub tier_distribution: [u64; 3],
+    /// Number of attestations created.
+    pub attestations_created: u64,
+    /// Total ingest wall time in microseconds.
+    pub total_time_us: u64,
+}
+
+/// Verified ingest pipeline combining all ruvector-verified optimizations.
+pub struct VerifiedIngestPipeline {
+    env: ProofEnvironment,
+    arena: FastTermArena,
+    cache: ConversionCache,
+    dim: u32,
+    tier_counts: [u64; 3],
+    attestations: Vec<ProofAttestation>,
+}
+
+impl VerifiedIngestPipeline {
+    /// Create a new pipeline for vectors of the given dimension.
+    pub fn new(dim: u32) -> Self {
+        Self {
+            env: ProofEnvironment::new(),
+            arena: FastTermArena::with_capacity(4096),
+            cache: ConversionCache::with_capacity(1024),
+            dim,
+            tier_counts: [0; 3],
+            attestations: Vec::new(),
+        }
+    }
+
+    /// Verify a batch of vectors and ingest into the RVF store.
+    ///
+    /// Returns the number of vectors successfully ingested.
+    pub fn verify_and_ingest(
+        &mut self,
+        store: &mut RvfStore,
+        vectors: &[Vec<f32>],
+        ids: &[u64],
+    ) -> Result<u64> {
+        // Acquire thread-local pooled resources (auto-returned on drop)
+        let _pooled = pools::acquire();
+
+        // Route proof to cheapest tier
+        let decision = gated::route_proof(
+            ProofKind::DimensionEquality {
+                expected: self.dim,
+                actual: self.dim,
+            },
+            &self.env,
+        );
+        match decision.tier {
+            ruvector_verified::gated::ProofTier::Reflex => self.tier_counts[0] += 1,
+            ruvector_verified::gated::ProofTier::Standard { .. } => self.tier_counts[1] += 1,
+            ruvector_verified::gated::ProofTier::Deep => self.tier_counts[2] += 1,
+        }
+
+        // Check arena dedup cache for dimension proof
+        let dim_hash = ruvector_verified::fast_arena::fx_hash_pair(self.dim, self.dim);
+        let (_term_id, was_cached) = self.arena.intern(dim_hash);
+
+        if was_cached {
+            debug!("arena cache hit for dim proof");
+        }
+
+        // Check conversion cache
+        let cached_proof = self.cache.get(_term_id, self.dim);
+        let proof_id = if let Some(pid) = cached_proof {
+            debug!(pid, "conversion cache hit");
+            pid
+        } else {
+            // Generate dimension equality proof (~500ns)
+            let pid = vector_types::prove_dim_eq(&mut self.env, self.dim, self.dim)?;
+            self.cache.insert(_term_id, self.dim, pid);
+            pid
+        };
+
+        // Verify all vectors in the batch have correct dimensions
+        let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
+        let _verified = vector_types::verify_batch_dimensions(&mut self.env, self.dim, &refs)?;
+
+        debug!(count = vectors.len(), proof_id, "batch verified");
+
+        // Ingest into RVF store
+        store
+            .ingest_batch(&refs, ids, None)
+            .map_err(|e| anyhow!("ingest: {e:?}"))?;
+
+        // Create proof attestation for this batch
+        let attestation = create_attestation(&self.env, proof_id);
+        self.attestations.push(attestation);
+
+        Ok(vectors.len() as u64)
+    }
+
+    /// Get current statistics.
+    pub fn stats(&self) -> IngestStats {
+        let arena_stats = self.arena.stats();
+        let cache_stats = self.cache.stats();
+        let (_pool_hits, _pool_misses, _) = pools::pool_stats();
+
+        IngestStats {
+            vectors_verified: self.env.stats().proofs_constructed,
+            proofs_generated: self.env.stats().proofs_constructed,
+            arena_hit_rate: arena_stats.cache_hit_rate(),
+            conversion_cache_hit_rate: cache_stats.hit_rate(),
+            tier_distribution: self.tier_counts,
+            attestations_created: self.attestations.len() as u64,
+            total_time_us: 0, // filled by caller
+        }
+    }
+
+    /// Get all attestations created during ingest.
+    pub fn attestations(&self) -> &[ProofAttestation] {
+        &self.attestations
+    }
+
+    /// Get the proof environment for inspection.
+    pub fn env(&self) -> &ProofEnvironment {
+        &self.env
+    }
+
+    /// Reset the pipeline for a new ingest cycle.
+    pub fn reset(&mut self) {
+        self.env.reset();
+        self.arena.reset();
+        self.cache.clear();
+        self.tier_counts = [0; 3];
+        self.attestations.clear();
+    }
+}
+
+/// Run a complete verified ingest cycle: generate vectors, verify, ingest.
+///
+/// Returns (IngestStats, store_file_size_bytes).
+pub fn run_verified_ingest(
+    store: &mut RvfStore,
+    store_path: &std::path::Path,
+    dim: u32,
+    vec_count: usize,
+    seed: u64,
+) -> Result<(IngestStats, u64)> {
+    use rand::prelude::*;
+
+    let start = std::time::Instant::now();
+    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
+    let mut pipeline = VerifiedIngestPipeline::new(dim);
+
+    // Generate vectors in batches of 1000
+    let batch_size = 1000.min(vec_count);
+    let mut total_ingested = 0u64;
+
+    for batch_start in (0..vec_count).step_by(batch_size) {
+        let batch_end = (batch_start + batch_size).min(vec_count);
+        let count = batch_end - batch_start;
+
+        let vectors: Vec<Vec<f32>> = (0..count)
+            .map(|_| (0..dim as usize).map(|_| rng.gen::<f32>()).collect())
+            .collect();
+        let ids: Vec<u64> = (batch_start as u64..batch_end as u64).collect();
+
+        let ingested = pipeline.verify_and_ingest(store, &vectors, &ids)?;
+        total_ingested += ingested;
+    }
+
+    let elapsed = start.elapsed();
+    let mut stats = pipeline.stats();
+    stats.total_time_us = elapsed.as_micros() as u64;
+    stats.vectors_verified = total_ingested;
+
+    info!(
+        vectors = total_ingested,
+        proofs = stats.proofs_generated,
+        arena_hit = format!("{:.1}%", stats.arena_hit_rate * 100.0),
+        cache_hit = format!("{:.1}%", stats.conversion_cache_hit_rate * 100.0),
+        tiers = format!(
+            "R:{}/S:{}/D:{}",
+            stats.tier_distribution[0], stats.tier_distribution[1], stats.tier_distribution[2]
+        ),
+        attestations = stats.attestations_created,
+        time_us = stats.total_time_us,
+        "verified ingest complete"
+    );
+
+    // Get store file size
+    let store_size = std::fs::metadata(store_path).map(|m| m.len()).unwrap_or(0);
+
+    Ok((stats, store_size))
+}