Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
//! Linux kernel + eBPF embedding into an RVF container.
use anyhow::{anyhow, Result};
use rvf_kernel::KernelBuilder;
use rvf_runtime::RvfStore;
use rvf_types::ebpf::EbpfProgramType;
use tracing::info;
/// Result of embedding a kernel and eBPF programs into the RVF store.
pub struct KernelEmbedResult {
/// Size of the kernel image in bytes.
pub kernel_size: usize,
/// Number of eBPF programs embedded.
pub ebpf_programs: usize,
/// SHA3-256 hash of the kernel image.
pub kernel_hash: [u8; 32],
/// Kernel cmdline used.
pub cmdline: String,
}
/// Embed an optimized Linux kernel and precompiled eBPF programs into the store.
///
/// Uses `from_builtin_minimal()` for a 4KB kernel stub that works without
/// Docker or a cross-compiler. In production, replace with a real kernel
/// built via `KernelBuilder::build_docker()`.
pub fn embed_optimized_kernel(
store: &mut RvfStore,
cmdline: &str,
enable_ebpf: bool,
max_dim: u16,
) -> Result<KernelEmbedResult> {
// Stage 1: Build minimal kernel (4KB stub, always works)
let kernel =
KernelBuilder::from_builtin_minimal().map_err(|e| anyhow!("kernel build: {e:?}"))?;
let kernel_size = kernel.bzimage.len();
let kernel_hash = kernel.image_hash;
info!(size = kernel_size, "built minimal kernel image");
// Stage 2: Embed kernel with optimized cmdline
// arch=0 (x86_64), kernel_type=0 (MicroLinux), flags include COMPRESSED + VIRTIO
let kernel_flags = 0x01 | 0x02 | 0x04; // COMPRESSED | VIRTIO_NET | VIRTIO_BLK
store
.embed_kernel(0, 0, kernel_flags, &kernel.bzimage, 8080, Some(cmdline))
.map_err(|e| anyhow!("embed kernel: {e:?}"))?;
info!("embedded kernel into RVF store");
// Stage 3: Embed precompiled eBPF programs
let mut ebpf_count = 0;
if enable_ebpf {
let programs = [
(EbpfProgramType::XdpDistance, 1u8, 1u8),
(EbpfProgramType::SocketFilter, 3u8, 3u8),
(EbpfProgramType::TcFilter, 2u8, 2u8),
];
for (prog_type, seg_type, attach_type) in &programs {
let compiled = rvf_ebpf::EbpfCompiler::from_precompiled(*prog_type)
.map_err(|e| anyhow!("ebpf compile: {e:?}"))?;
store
.embed_ebpf(
*seg_type,
*attach_type,
max_dim,
&compiled.elf_bytes,
compiled.btf_bytes.as_deref(),
)
.map_err(|e| anyhow!("embed ebpf: {e:?}"))?;
ebpf_count += 1;
}
info!(count = ebpf_count, "embedded eBPF programs");
}
Ok(KernelEmbedResult {
kernel_size,
ebpf_programs: ebpf_count,
kernel_hash,
cmdline: cmdline.to_string(),
})
}

View File

@@ -0,0 +1,50 @@
//! Hyper-optimized RVF example with Linux kernel embedding and formal verification.
//!
//! Demonstrates `ruvector-verified` as the optimization layer for a kernel-embedded
//! RVF container. Every vector operation passes through verified proofs using:
//! - `FastTermArena` — O(1) bump allocation with 4-wide dedup cache
//! - `ConversionCache` — open-addressing conversion equality cache
//! - Gated proof routing — 3-tier Reflex/Standard/Deep with auto-escalation
//! - Thread-local pools — zero-contention resource reuse
//! - `ProofAttestation` — 82-byte formal proof witness (type 0x0E)
pub mod kernel_embed;
pub mod verified_ingest;
/// Default vector dimension (384 = 48x8 AVX2 / 96x4 NEON aligned).
pub const DEFAULT_DIM: u32 = 384;
/// Default vector count for benchmarks.
pub const DEFAULT_VEC_COUNT: usize = 10_000;
/// Optimized kernel cmdline for vector workload microVMs.
///
/// - `nokaslr nosmp`: deterministic single-core execution
/// - `transparent_hugepage=always`: 2MB pages for vector arrays
/// - `isolcpus=1 nohz_full=1 rcu_nocbs=1`: CPU isolation, no timer ticks
/// - `mitigations=off`: full speed in trusted microVM
pub const KERNEL_CMDLINE: &str = "console=ttyS0 quiet nokaslr nosmp \
transparent_hugepage=always isolcpus=1 nohz_full=1 rcu_nocbs=1 mitigations=off";
/// Configuration for the verified RVF pipeline.
pub struct VerifiedRvfConfig {
/// Vector dimensionality.
pub dim: u32,
/// Number of vectors to ingest.
pub vec_count: usize,
/// Embed precompiled eBPF programs (XDP, socket, TC).
pub enable_ebpf: bool,
/// Max reduction steps for Deep-tier proofs.
pub proof_fuel: usize,
}
impl Default for VerifiedRvfConfig {
fn default() -> Self {
Self {
dim: DEFAULT_DIM,
vec_count: 1_000,
enable_ebpf: true,
proof_fuel: 10_000,
}
}
}

View File

@@ -0,0 +1,97 @@
//! CLI demo: build kernel -> embed -> verified ingest -> query -> report.
use anyhow::Result;
use rvf_runtime::{QueryOptions, RvfOptions, RvfStore};
use tracing::info;
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.with_target(false)
.init();
let config = rvf_kernel_optimized::VerifiedRvfConfig::default();
info!("RVF Kernel-Optimized Example");
info!(
" dim={}, vectors={}, ebpf={}",
config.dim, config.vec_count, config.enable_ebpf
);
info!(" cmdline: {}", rvf_kernel_optimized::KERNEL_CMDLINE);
// Create temp store
let dir = tempfile::tempdir()?;
let store_path = dir.path().join("optimized.rvf");
let options = RvfOptions {
dimension: config.dim as u16,
..RvfOptions::default()
};
let mut store = RvfStore::create(&store_path, options)
.map_err(|e| anyhow::anyhow!("create store: {e:?}"))?;
// Stage 1: Embed kernel + eBPF
info!("--- Stage 1: Kernel + eBPF Embedding ---");
let kernel_result = rvf_kernel_optimized::kernel_embed::embed_optimized_kernel(
&mut store,
rvf_kernel_optimized::KERNEL_CMDLINE,
config.enable_ebpf,
config.dim as u16,
)?;
info!(
" kernel: {} bytes, eBPF: {} programs",
kernel_result.kernel_size, kernel_result.ebpf_programs
);
// Stage 2: Verified ingest
info!("--- Stage 2: Verified Vector Ingest ---");
let (stats, store_size) = rvf_kernel_optimized::verified_ingest::run_verified_ingest(
&mut store,
&store_path,
config.dim,
config.vec_count,
42, // deterministic seed
)?;
info!(" vectors: {}", stats.vectors_verified);
info!(" proofs: {}", stats.proofs_generated);
info!(" arena hit rate: {:.1}%", stats.arena_hit_rate * 100.0);
info!(
" cache hit rate: {:.1}%",
stats.conversion_cache_hit_rate * 100.0
);
info!(
" tiers: reflex={}, standard={}, deep={}",
stats.tier_distribution[0], stats.tier_distribution[1], stats.tier_distribution[2]
);
info!(" attestations: {}", stats.attestations_created);
info!(" time: {} us", stats.total_time_us);
// Stage 3: Query
info!("--- Stage 3: Query ---");
let query_vec: Vec<f32> = (0..config.dim as usize)
.map(|i| (i as f32) * 0.001)
.collect();
let results = store
.query(&query_vec, 5, &QueryOptions::default())
.map_err(|e| anyhow::anyhow!("query: {e:?}"))?;
for (i, r) in results.iter().enumerate() {
info!(" #{}: id={}, distance={:.4}", i + 1, r.id, r.distance);
}
// Summary
info!("--- Summary ---");
info!(" store size: {} bytes", store_size);
info!(
" kernel hash: {:02x}{:02x}{:02x}{:02x}...",
kernel_result.kernel_hash[0],
kernel_result.kernel_hash[1],
kernel_result.kernel_hash[2],
kernel_result.kernel_hash[3]
);
store.close().map_err(|e| anyhow::anyhow!("close: {e:?}"))?;
info!("done");
Ok(())
}

View File

@@ -0,0 +1,222 @@
//! Verified vector ingest pipeline using ruvector-verified ultra-optimizations.
//!
//! Every vector batch passes through:
//! 1. Gated proof routing (Reflex/Standard/Deep tier selection)
//! 2. FastTermArena dedup (4-wide linear probe, 95%+ hit rate)
//! 3. Dimension proof generation (prove_dim_eq with FxHash cache)
//! 4. ConversionCache (open-addressing equality cache)
//! 5. Thread-local pool resource acquisition
//! 6. ProofAttestation creation (82-byte witness, type 0x0E)
use anyhow::{anyhow, Result};
use ruvector_verified::{
cache::ConversionCache,
fast_arena::FastTermArena,
gated::{self, ProofKind},
pools,
proof_store::create_attestation,
vector_types, ProofAttestation, ProofEnvironment,
};
use rvf_runtime::RvfStore;
use tracing::{debug, info};
/// Statistics from a verified ingest run.
#[derive(Debug, Clone)]
pub struct IngestStats {
/// Total vectors verified and ingested.
pub vectors_verified: u64,
/// Total proof terms generated.
pub proofs_generated: u64,
/// Arena dedup cache hit rate (0.0-1.0).
pub arena_hit_rate: f64,
/// Conversion cache hit rate (0.0-1.0).
pub conversion_cache_hit_rate: f64,
/// Proof routing tier distribution [reflex, standard, deep].
pub tier_distribution: [u64; 3],
/// Number of attestations created.
pub attestations_created: u64,
/// Total ingest wall time in microseconds.
pub total_time_us: u64,
}
/// Verified ingest pipeline combining all ruvector-verified optimizations.
pub struct VerifiedIngestPipeline {
env: ProofEnvironment,
arena: FastTermArena,
cache: ConversionCache,
dim: u32,
tier_counts: [u64; 3],
attestations: Vec<ProofAttestation>,
}
impl VerifiedIngestPipeline {
/// Create a new pipeline for vectors of the given dimension.
pub fn new(dim: u32) -> Self {
Self {
env: ProofEnvironment::new(),
arena: FastTermArena::with_capacity(4096),
cache: ConversionCache::with_capacity(1024),
dim,
tier_counts: [0; 3],
attestations: Vec::new(),
}
}
/// Verify a batch of vectors and ingest into the RVF store.
///
/// Returns the number of vectors successfully ingested.
pub fn verify_and_ingest(
&mut self,
store: &mut RvfStore,
vectors: &[Vec<f32>],
ids: &[u64],
) -> Result<u64> {
// Acquire thread-local pooled resources (auto-returned on drop)
let _pooled = pools::acquire();
// Route proof to cheapest tier
let decision = gated::route_proof(
ProofKind::DimensionEquality {
expected: self.dim,
actual: self.dim,
},
&self.env,
);
match decision.tier {
ruvector_verified::gated::ProofTier::Reflex => self.tier_counts[0] += 1,
ruvector_verified::gated::ProofTier::Standard { .. } => self.tier_counts[1] += 1,
ruvector_verified::gated::ProofTier::Deep => self.tier_counts[2] += 1,
}
// Check arena dedup cache for dimension proof
let dim_hash = ruvector_verified::fast_arena::fx_hash_pair(self.dim, self.dim);
let (_term_id, was_cached) = self.arena.intern(dim_hash);
if was_cached {
debug!("arena cache hit for dim proof");
}
// Check conversion cache
let cached_proof = self.cache.get(_term_id, self.dim);
let proof_id = if let Some(pid) = cached_proof {
debug!(pid, "conversion cache hit");
pid
} else {
// Generate dimension equality proof (~500ns)
let pid = vector_types::prove_dim_eq(&mut self.env, self.dim, self.dim)?;
self.cache.insert(_term_id, self.dim, pid);
pid
};
// Verify all vectors in the batch have correct dimensions
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let _verified = vector_types::verify_batch_dimensions(&mut self.env, self.dim, &refs)?;
debug!(count = vectors.len(), proof_id, "batch verified");
// Ingest into RVF store
store
.ingest_batch(&refs, ids, None)
.map_err(|e| anyhow!("ingest: {e:?}"))?;
// Create proof attestation for this batch
let attestation = create_attestation(&self.env, proof_id);
self.attestations.push(attestation);
Ok(vectors.len() as u64)
}
/// Get current statistics.
pub fn stats(&self) -> IngestStats {
let arena_stats = self.arena.stats();
let cache_stats = self.cache.stats();
let (_pool_hits, _pool_misses, _) = pools::pool_stats();
IngestStats {
vectors_verified: self.env.stats().proofs_constructed,
proofs_generated: self.env.stats().proofs_constructed,
arena_hit_rate: arena_stats.cache_hit_rate(),
conversion_cache_hit_rate: cache_stats.hit_rate(),
tier_distribution: self.tier_counts,
attestations_created: self.attestations.len() as u64,
total_time_us: 0, // filled by caller
}
}
/// Get all attestations created during ingest.
pub fn attestations(&self) -> &[ProofAttestation] {
&self.attestations
}
/// Get the proof environment for inspection.
pub fn env(&self) -> &ProofEnvironment {
&self.env
}
/// Reset the pipeline for a new ingest cycle.
pub fn reset(&mut self) {
self.env.reset();
self.arena.reset();
self.cache.clear();
self.tier_counts = [0; 3];
self.attestations.clear();
}
}
/// Run a complete verified ingest cycle: generate vectors, verify, ingest.
///
/// Returns (IngestStats, store_file_size_bytes).
pub fn run_verified_ingest(
store: &mut RvfStore,
store_path: &std::path::Path,
dim: u32,
vec_count: usize,
seed: u64,
) -> Result<(IngestStats, u64)> {
use rand::prelude::*;
let start = std::time::Instant::now();
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
let mut pipeline = VerifiedIngestPipeline::new(dim);
// Generate vectors in batches of 1000
let batch_size = 1000.min(vec_count);
let mut total_ingested = 0u64;
for batch_start in (0..vec_count).step_by(batch_size) {
let batch_end = (batch_start + batch_size).min(vec_count);
let count = batch_end - batch_start;
let vectors: Vec<Vec<f32>> = (0..count)
.map(|_| (0..dim as usize).map(|_| rng.gen::<f32>()).collect())
.collect();
let ids: Vec<u64> = (batch_start as u64..batch_end as u64).collect();
let ingested = pipeline.verify_and_ingest(store, &vectors, &ids)?;
total_ingested += ingested;
}
let elapsed = start.elapsed();
let mut stats = pipeline.stats();
stats.total_time_us = elapsed.as_micros() as u64;
stats.vectors_verified = total_ingested;
info!(
vectors = total_ingested,
proofs = stats.proofs_generated,
arena_hit = format!("{:.1}%", stats.arena_hit_rate * 100.0),
cache_hit = format!("{:.1}%", stats.conversion_cache_hit_rate * 100.0),
tiers = format!(
"R:{}/S:{}/D:{}",
stats.tier_distribution[0], stats.tier_distribution[1], stats.tier_distribution[2]
),
attestations = stats.attestations_created,
time_us = stats.total_time_us,
"verified ingest complete"
);
// Get store file size
let store_size = std::fs::metadata(store_path).map(|m| m.len()).unwrap_or(0);
Ok((stats, store_size))
}