Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,72 @@
//! `rvf compact` -- Compact store to reclaim dead space.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct CompactArgs {
/// Path to the RVF store
path: String,
/// Strip unknown segment types (segments not recognized by this version)
#[arg(long)]
strip_unknown: bool,
/// Output as JSON
#[arg(long)]
json: bool,
}
pub fn run(args: CompactArgs) -> Result<(), Box<dyn std::error::Error>> {
if args.strip_unknown {
eprintln!(
"Warning: --strip-unknown will remove segment types not recognized by this version."
);
eprintln!(" This may discard data written by newer tools.");
}
let mut store = RvfStore::open(Path::new(&args.path)).map_err(map_rvf_err)?;
let status_before = store.status();
let result = store.compact().map_err(map_rvf_err)?;
let status_after = store.status();
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"segments_compacted": result.segments_compacted,
"bytes_reclaimed": result.bytes_reclaimed,
"epoch": result.epoch,
"vectors_before": status_before.total_vectors,
"vectors_after": status_after.total_vectors,
"file_size_before": status_before.file_size,
"file_size_after": status_after.file_size,
"strip_unknown": args.strip_unknown,
}));
} else {
println!("Compaction complete:");
crate::output::print_kv(
"Segments compacted:",
&result.segments_compacted.to_string(),
);
crate::output::print_kv("Bytes reclaimed:", &result.bytes_reclaimed.to_string());
crate::output::print_kv("Epoch:", &result.epoch.to_string());
crate::output::print_kv("Vectors before:", &status_before.total_vectors.to_string());
crate::output::print_kv("Vectors after:", &status_after.total_vectors.to_string());
crate::output::print_kv(
"File size before:",
&format!("{} bytes", status_before.file_size),
);
crate::output::print_kv(
"File size after:",
&format!("{} bytes", status_after.file_size),
);
if args.strip_unknown {
crate::output::print_kv("Strip unknown:", "yes");
}
}
Ok(())
}

View File

@@ -0,0 +1,71 @@
//! `rvf create` -- Create a new empty RVF store.
use clap::Args;
use std::path::Path;
use rvf_runtime::options::DistanceMetric;
use rvf_runtime::{RvfOptions, RvfStore};
use super::map_rvf_err;
#[derive(Args)]
pub struct CreateArgs {
/// Path for the new RVF store file
path: String,
/// Vector dimensionality
#[arg(short, long)]
dimension: u32,
/// Distance metric: l2, ip, cosine
#[arg(short, long, default_value = "l2")]
metric: String,
/// Hardware profile: 0-3
#[arg(short, long, default_value = "0")]
profile: u8,
/// Output as JSON
#[arg(long)]
json: bool,
}
pub fn run(args: CreateArgs) -> Result<(), Box<dyn std::error::Error>> {
if args.dimension == 0 || args.dimension > u16::MAX as u32 {
return Err(format!(
"Dimension must be between 1 and {} (got {})",
u16::MAX,
args.dimension
)
.into());
}
let metric = match args.metric.as_str() {
"l2" | "L2" => DistanceMetric::L2,
"ip" | "inner_product" => DistanceMetric::InnerProduct,
"cosine" => DistanceMetric::Cosine,
other => return Err(format!("Unknown metric: {other}").into()),
};
let opts = RvfOptions {
dimension: args.dimension as u16,
metric,
profile: args.profile,
..Default::default()
};
let store = RvfStore::create(Path::new(&args.path), opts).map_err(map_rvf_err)?;
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "created",
"path": args.path,
"dimension": args.dimension,
"metric": args.metric,
"profile": args.profile,
}));
} else {
println!("Created RVF store: {}", args.path);
crate::output::print_kv("Dimension:", &args.dimension.to_string());
crate::output::print_kv("Metric:", &args.metric);
crate::output::print_kv("Profile:", &args.profile.to_string());
}
Ok(())
}

View File

@@ -0,0 +1,61 @@
//! `rvf delete` -- Delete vectors by ID or filter.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct DeleteArgs {
/// Path to the RVF store
path: String,
/// Comma-separated vector IDs to delete (e.g. "1,2,3")
#[arg(long)]
ids: Option<String>,
/// Filter expression as JSON (e.g. '{"gt":{"field":0,"value":{"u64":10}}}')
#[arg(long)]
filter: Option<String>,
/// Output as JSON
#[arg(long)]
json: bool,
}
pub fn run(args: DeleteArgs) -> Result<(), Box<dyn std::error::Error>> {
if args.ids.is_none() && args.filter.is_none() {
return Err("must specify --ids or --filter".into());
}
let mut store = RvfStore::open(Path::new(&args.path)).map_err(map_rvf_err)?;
let result = if let Some(ids_str) = &args.ids {
let ids: Vec<u64> = ids_str
.split(',')
.map(|s| {
s.trim()
.parse::<u64>()
.map_err(|e| format!("Invalid ID '{s}': {e}"))
})
.collect::<Result<Vec<_>, _>>()?;
store.delete(&ids).map_err(map_rvf_err)?
} else {
let filter_str = args.filter.as_ref().unwrap();
let filter_expr = super::query::parse_filter_json(filter_str)?;
store.delete_by_filter(&filter_expr).map_err(map_rvf_err)?
};
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"deleted": result.deleted,
"epoch": result.epoch,
}));
} else {
println!("Delete complete:");
crate::output::print_kv("Deleted:", &result.deleted.to_string());
crate::output::print_kv("Epoch:", &result.epoch.to_string());
}
Ok(())
}

View File

@@ -0,0 +1,70 @@
//! `rvf derive` -- Derive a child store from a parent.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use rvf_types::DerivationType;
use super::map_rvf_err;
#[derive(Args)]
pub struct DeriveArgs {
/// Path to the parent RVF store
parent: String,
/// Path for the new child RVF store
child: String,
/// Derivation type: clone, filter, merge, quantize, reindex, transform, snapshot
#[arg(short = 't', long, default_value = "clone")]
derivation_type: String,
/// Output as JSON
#[arg(long)]
json: bool,
}
fn parse_derivation_type(s: &str) -> Result<DerivationType, Box<dyn std::error::Error>> {
match s.to_lowercase().as_str() {
"clone" => Ok(DerivationType::Clone),
"filter" => Ok(DerivationType::Filter),
"merge" => Ok(DerivationType::Merge),
"quantize" => Ok(DerivationType::Quantize),
"reindex" => Ok(DerivationType::Reindex),
"transform" => Ok(DerivationType::Transform),
"snapshot" => Ok(DerivationType::Snapshot),
other => Err(format!("Unknown derivation type: {other}").into()),
}
}
pub fn run(args: DeriveArgs) -> Result<(), Box<dyn std::error::Error>> {
let dt = parse_derivation_type(&args.derivation_type)?;
let parent = RvfStore::open_readonly(Path::new(&args.parent)).map_err(map_rvf_err)?;
let child = parent
.derive(Path::new(&args.child), dt, None)
.map_err(map_rvf_err)?;
let child_identity = *child.file_identity();
child.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "derived",
"parent": args.parent,
"child": args.child,
"derivation_type": args.derivation_type,
"child_file_id": crate::output::hex(&child_identity.file_id),
"parent_file_id": crate::output::hex(&child_identity.parent_id),
"lineage_depth": child_identity.lineage_depth,
}));
} else {
println!("Derived child store: {}", args.child);
crate::output::print_kv("Parent:", &args.parent);
crate::output::print_kv("Type:", &args.derivation_type);
crate::output::print_kv(
"Child file ID:",
&crate::output::hex(&child_identity.file_id),
);
crate::output::print_kv("Lineage depth:", &child_identity.lineage_depth.to_string());
}
Ok(())
}

View File

@@ -0,0 +1,68 @@
//! `rvf embed-ebpf` -- Compile and embed an eBPF program into an RVF file.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct EmbedEbpfArgs {
/// Path to the RVF store
pub file: String,
/// Path to the eBPF program (compiled .o or raw bytecode)
#[arg(long)]
pub program: String,
/// eBPF program type: xdp, socket_filter, tc_classifier
#[arg(long, default_value = "xdp")]
pub program_type: String,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
fn parse_program_type(s: &str) -> Result<u8, Box<dyn std::error::Error>> {
match s.to_lowercase().as_str() {
"xdp" => Ok(2),
"socket_filter" | "socket-filter" => Ok(1),
"tc_classifier" | "tc-classifier" | "tc" => Ok(3),
other => Err(format!("Unknown eBPF program type: {other}").into()),
}
}
pub fn run(args: EmbedEbpfArgs) -> Result<(), Box<dyn std::error::Error>> {
let program_type = parse_program_type(&args.program_type)?;
let bytecode = std::fs::read(&args.program)
.map_err(|e| format!("Failed to read eBPF program '{}': {}", args.program, e))?;
let mut store = RvfStore::open(Path::new(&args.file)).map_err(map_rvf_err)?;
let seg_id = store
.embed_ebpf(
program_type,
0, // attach_type
0, // max_dimension (auto)
&bytecode,
None, // no BTF
)
.map_err(map_rvf_err)?;
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "embedded",
"segment_id": seg_id,
"program_type": args.program_type,
"bytecode_size": bytecode.len(),
}));
} else {
println!("eBPF program embedded successfully:");
crate::output::print_kv("Segment ID:", &seg_id.to_string());
crate::output::print_kv("Program type:", &args.program_type);
crate::output::print_kv("Bytecode size:", &format!("{} bytes", bytecode.len()));
}
Ok(())
}

View File

@@ -0,0 +1,77 @@
//! `rvf embed-kernel` -- Embed a kernel image into an RVF file.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct EmbedKernelArgs {
/// Path to the RVF store
pub file: String,
/// Target architecture: x86_64, aarch64
#[arg(long, default_value = "x86_64")]
pub arch: String,
/// Use prebuilt kernel image instead of building
#[arg(long)]
pub prebuilt: bool,
/// Path to kernel image file (bzImage or similar)
#[arg(long)]
pub image_path: Option<String>,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
fn parse_arch(s: &str) -> Result<u8, Box<dyn std::error::Error>> {
match s.to_lowercase().as_str() {
"x86_64" | "x86-64" | "amd64" => Ok(1),
"aarch64" | "arm64" => Ok(2),
"riscv64" => Ok(3),
other => Err(format!("Unknown architecture: {other}").into()),
}
}
pub fn run(args: EmbedKernelArgs) -> Result<(), Box<dyn std::error::Error>> {
let arch = parse_arch(&args.arch)?;
let image_path = args
.image_path
.as_deref()
.ok_or("No kernel image path provided. Use --image-path <path> or --prebuilt")?;
let kernel_image = std::fs::read(image_path)
.map_err(|e| format!("Failed to read kernel image '{}': {}", image_path, e))?;
let mut store = RvfStore::open(Path::new(&args.file)).map_err(map_rvf_err)?;
let seg_id = store
.embed_kernel(
arch,
0, // kernel_type: unikernel
0x01, // kernel_flags: KERNEL_FLAG_SIGNED placeholder
&kernel_image,
8080,
None,
)
.map_err(map_rvf_err)?;
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "embedded",
"segment_id": seg_id,
"arch": args.arch,
"image_size": kernel_image.len(),
}));
} else {
println!("Kernel embedded successfully:");
crate::output::print_kv("Segment ID:", &seg_id.to_string());
crate::output::print_kv("Architecture:", &args.arch);
crate::output::print_kv("Image size:", &format!("{} bytes", kernel_image.len()));
}
Ok(())
}

View File

@@ -0,0 +1,162 @@
//! `rvf filter` -- Create a MEMBERSHIP_SEG with include/exclude filter.
use clap::Args;
use std::io::{BufWriter, Seek, SeekFrom, Write};
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct FilterArgs {
/// Path to the RVF store
pub file: String,
/// Comma-separated list of vector IDs to include
#[arg(long, value_delimiter = ',')]
pub include_ids: Option<Vec<u64>>,
/// Comma-separated list of vector IDs to exclude
#[arg(long, value_delimiter = ',')]
pub exclude_ids: Option<Vec<u64>>,
/// Output path (if different from input, creates a derived file)
#[arg(short, long)]
pub output: Option<String>,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
/// MEMBERSHIP_SEG magic: "RVMB"
const MEMBERSHIP_MAGIC: u32 = 0x5256_4D42;
pub fn run(args: FilterArgs) -> Result<(), Box<dyn std::error::Error>> {
let (filter_mode, ids) = match (&args.include_ids, &args.exclude_ids) {
(Some(inc), None) => (0u8, inc.clone()), // include mode
(None, Some(exc)) => (1u8, exc.clone()), // exclude mode
(Some(_), Some(_)) => {
return Err("Cannot specify both --include-ids and --exclude-ids".into());
}
(None, None) => {
return Err("Must specify either --include-ids or --exclude-ids".into());
}
};
let target_path = args.output.as_deref().unwrap_or(&args.file);
// If output is different, derive first
if target_path != args.file {
let parent = RvfStore::open_readonly(Path::new(&args.file)).map_err(map_rvf_err)?;
let child = parent
.derive(
Path::new(target_path),
rvf_types::DerivationType::Filter,
None,
)
.map_err(map_rvf_err)?;
child.close().map_err(map_rvf_err)?;
}
let store = RvfStore::open(Path::new(target_path)).map_err(map_rvf_err)?;
// Build a simple bitmap filter
let max_id = ids.iter().copied().max().unwrap_or(0);
let bitmap_bytes = (max_id / 8 + 1) as usize;
let mut bitmap = vec![0u8; bitmap_bytes];
for &id in &ids {
let byte_idx = (id / 8) as usize;
let bit_idx = (id % 8) as u8;
if byte_idx < bitmap.len() {
bitmap[byte_idx] |= 1 << bit_idx;
}
}
// Build the 96-byte MembershipHeader
let mut header = [0u8; 96];
header[0..4].copy_from_slice(&MEMBERSHIP_MAGIC.to_le_bytes());
header[4..6].copy_from_slice(&1u16.to_le_bytes()); // version
header[6] = 0; // filter_type: bitmap
header[7] = filter_mode;
// vector_count: use max_id+1 as approximation
header[8..16].copy_from_slice(&(max_id + 1).to_le_bytes());
// member_count
header[16..24].copy_from_slice(&(ids.len() as u64).to_le_bytes());
// filter_offset: will be 96 (right after header)
header[24..32].copy_from_slice(&96u64.to_le_bytes());
// filter_size
header[32..36].copy_from_slice(&(bitmap.len() as u32).to_le_bytes());
// generation_id
header[36..40].copy_from_slice(&1u32.to_le_bytes());
// filter_hash: simple hash of bitmap data
let filter_hash = simple_hash(&bitmap);
header[40..72].copy_from_slice(&filter_hash);
// bloom_offset, bloom_size, reserved: all zero (already zeroed)
// Write the MEMBERSHIP_SEG (0x22) as a raw segment
let membership_seg_type = 0x22u8;
let payload = [header.as_slice(), bitmap.as_slice()].concat();
// Write raw segment to end of file
let file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(target_path)?;
let mut writer = BufWriter::new(&file);
writer.seek(SeekFrom::End(0))?;
// Write segment header (64 bytes)
let seg_header = build_segment_header(1, membership_seg_type, payload.len() as u64);
writer.write_all(&seg_header)?;
writer.write_all(&payload)?;
writer.flush()?;
file.sync_all()?;
drop(writer);
drop(file);
store.close().map_err(map_rvf_err)?;
let mode_str = if filter_mode == 0 {
"include"
} else {
"exclude"
};
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "filtered",
"mode": mode_str,
"ids_count": ids.len(),
"target": target_path,
}));
} else {
println!("Membership filter created:");
crate::output::print_kv("Mode:", mode_str);
crate::output::print_kv("IDs:", &ids.len().to_string());
crate::output::print_kv("Target:", target_path);
}
Ok(())
}
fn simple_hash(data: &[u8]) -> [u8; 32] {
let mut out = [0u8; 32];
for (i, &b) in data.iter().enumerate() {
out[i % 32] = out[i % 32].wrapping_add(b);
let j = (i + 13) % 32;
out[j] = out[j].wrapping_add(out[i % 32].rotate_left(3));
}
out
}
fn build_segment_header(seg_id: u64, seg_type: u8, payload_len: u64) -> Vec<u8> {
let mut hdr = vec![0u8; 64];
// magic: RVFS = 0x5256_4653
hdr[0..4].copy_from_slice(&0x5256_4653u32.to_le_bytes());
// version
hdr[4] = 1;
// seg_type
hdr[5] = seg_type;
// flags (2 bytes) - zero
// seg_id (8 bytes at offset 0x08)
hdr[0x08..0x10].copy_from_slice(&seg_id.to_le_bytes());
// payload_length (8 bytes at offset 0x10)
hdr[0x10..0x18].copy_from_slice(&payload_len.to_le_bytes());
hdr
}

View File

@@ -0,0 +1,85 @@
//! `rvf freeze` -- Snapshot-freeze the current state of an RVF store.
use clap::Args;
use std::io::{BufWriter, Seek, SeekFrom, Write};
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct FreezeArgs {
/// Path to the RVF store
pub file: String,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
/// REFCOUNT_SEG magic: "RVRC"
const REFCOUNT_MAGIC: u32 = 0x5256_5243;
pub fn run(args: FreezeArgs) -> Result<(), Box<dyn std::error::Error>> {
let store = RvfStore::open(Path::new(&args.file)).map_err(map_rvf_err)?;
let status = store.status();
let snapshot_epoch = status.current_epoch + 1;
// Build a 32-byte RefcountHeader with snapshot_epoch set
let mut header = [0u8; 32];
header[0..4].copy_from_slice(&REFCOUNT_MAGIC.to_le_bytes());
header[4..6].copy_from_slice(&1u16.to_le_bytes()); // version
header[6] = 1; // refcount_width: 1 byte per entry
// cluster_count: 0 (no clusters tracked yet)
// max_refcount: 0
// array_offset: 0 (no array)
// snapshot_epoch
header[0x18..0x1C].copy_from_slice(&snapshot_epoch.to_le_bytes());
// Write a REFCOUNT_SEG (0x21) with the frozen epoch
let seg_type = 0x21u8; // Refcount
let payload = header;
let file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(&args.file)?;
let mut writer = BufWriter::new(&file);
writer.seek(SeekFrom::End(0))?;
let seg_header = build_segment_header(1, seg_type, payload.len() as u64);
writer.write_all(&seg_header)?;
writer.write_all(&payload)?;
writer.flush()?;
file.sync_all()?;
drop(writer);
drop(file);
// Emit a witness event for the snapshot
// (witness writing would go through the store's witness path when available)
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "frozen",
"snapshot_epoch": snapshot_epoch,
}));
} else {
println!("Store frozen:");
crate::output::print_kv("Snapshot epoch:", &snapshot_epoch.to_string());
println!(" All further writes will create a new derived generation.");
}
Ok(())
}
fn build_segment_header(seg_id: u64, seg_type: u8, payload_len: u64) -> Vec<u8> {
let mut hdr = vec![0u8; 64];
hdr[0..4].copy_from_slice(&0x5256_4653u32.to_le_bytes());
hdr[4] = 1;
hdr[5] = seg_type;
hdr[0x08..0x10].copy_from_slice(&seg_id.to_le_bytes());
hdr[0x10..0x18].copy_from_slice(&payload_len.to_le_bytes());
hdr
}

View File

@@ -0,0 +1,85 @@
//! `rvf ingest` -- Ingest vectors from a JSON file.
use clap::Args;
use serde::Deserialize;
use std::fs;
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct IngestArgs {
/// Path to the RVF store
path: String,
/// Path to the JSON input file (array of {id, vector} objects)
#[arg(short, long)]
input: String,
/// Batch size for ingestion
#[arg(short, long, default_value = "1000")]
batch_size: usize,
/// Output as JSON
#[arg(long)]
json: bool,
}
#[derive(Deserialize)]
struct VectorRecord {
id: u64,
vector: Vec<f32>,
}
pub fn run(args: IngestArgs) -> Result<(), Box<dyn std::error::Error>> {
let json_str = fs::read_to_string(&args.input)?;
let records: Vec<VectorRecord> = serde_json::from_str(&json_str)?;
if records.is_empty() {
if args.json {
crate::output::print_json(&serde_json::json!({
"accepted": 0,
"rejected": 0,
"epoch": 0,
}));
} else {
println!("No records to ingest.");
}
return Ok(());
}
let mut store = RvfStore::open(Path::new(&args.path)).map_err(map_rvf_err)?;
let batch_size = args.batch_size.max(1);
let mut total_accepted = 0u64;
let mut total_rejected = 0u64;
let mut last_epoch = 0u32;
for chunk in records.chunks(batch_size) {
let vec_data: Vec<Vec<f32>> = chunk.iter().map(|r| r.vector.clone()).collect();
let vec_refs: Vec<&[f32]> = vec_data.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = chunk.iter().map(|r| r.id).collect();
let result = store
.ingest_batch(&vec_refs, &ids, None)
.map_err(map_rvf_err)?;
total_accepted += result.accepted;
total_rejected += result.rejected;
last_epoch = result.epoch;
}
store.close().map_err(map_rvf_err)?;
if args.json {
crate::output::print_json(&serde_json::json!({
"accepted": total_accepted,
"rejected": total_rejected,
"epoch": last_epoch,
}));
} else {
println!("Ingestion complete:");
crate::output::print_kv("Accepted:", &total_accepted.to_string());
crate::output::print_kv("Rejected:", &total_rejected.to_string());
crate::output::print_kv("Epoch:", &last_epoch.to_string());
}
Ok(())
}

View File

@@ -0,0 +1,109 @@
//! `rvf inspect` -- Inspect segments and lineage.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use rvf_types::SegmentType;
use super::map_rvf_err;
#[derive(Args)]
pub struct InspectArgs {
/// Path to the RVF store
path: String,
/// Output as JSON
#[arg(long)]
json: bool,
}
fn segment_type_name(seg_type: u8) -> &'static str {
match seg_type {
t if t == SegmentType::Vec as u8 => "Vec",
t if t == SegmentType::Index as u8 => "Index",
t if t == SegmentType::Overlay as u8 => "Overlay",
t if t == SegmentType::Journal as u8 => "Journal",
t if t == SegmentType::Manifest as u8 => "Manifest",
t if t == SegmentType::Quant as u8 => "Quant",
t if t == SegmentType::Meta as u8 => "Meta",
t if t == SegmentType::Hot as u8 => "Hot",
t if t == SegmentType::Sketch as u8 => "Sketch",
t if t == SegmentType::Witness as u8 => "Witness",
t if t == SegmentType::Profile as u8 => "Profile",
t if t == SegmentType::Crypto as u8 => "Crypto",
t if t == SegmentType::MetaIdx as u8 => "MetaIdx",
t if t == SegmentType::Kernel as u8 => "Kernel",
t if t == SegmentType::Ebpf as u8 => "Ebpf",
t if t == SegmentType::CowMap as u8 => "CowMap",
t if t == SegmentType::Refcount as u8 => "Refcount",
t if t == SegmentType::Membership as u8 => "Membership",
t if t == SegmentType::Delta as u8 => "Delta",
_ => "Unknown",
}
}
pub fn run(args: InspectArgs) -> Result<(), Box<dyn std::error::Error>> {
let store = RvfStore::open_readonly(Path::new(&args.path)).map_err(map_rvf_err)?;
let seg_dir = store.segment_dir();
let dimension = store.dimension();
let identity = store.file_identity();
let status = store.status();
if args.json {
let segments: Vec<serde_json::Value> = seg_dir
.iter()
.map(|&(seg_id, offset, payload_len, seg_type)| {
serde_json::json!({
"seg_id": seg_id,
"offset": offset,
"payload_length": payload_len,
"seg_type": seg_type,
"seg_type_name": segment_type_name(seg_type),
})
})
.collect();
crate::output::print_json(&serde_json::json!({
"path": args.path,
"dimension": dimension,
"epoch": status.current_epoch,
"total_vectors": status.total_vectors,
"total_segments": status.total_segments,
"file_size": status.file_size,
"segments": segments,
"lineage": {
"file_id": crate::output::hex(&identity.file_id),
"parent_id": crate::output::hex(&identity.parent_id),
"parent_hash": crate::output::hex(&identity.parent_hash),
"lineage_depth": identity.lineage_depth,
"is_root": identity.is_root(),
},
}));
} else {
println!("RVF Store: {}", args.path);
crate::output::print_kv("Dimension:", &dimension.to_string());
crate::output::print_kv("Epoch:", &status.current_epoch.to_string());
crate::output::print_kv("Vectors:", &status.total_vectors.to_string());
crate::output::print_kv("File size:", &format!("{} bytes", status.file_size));
println!();
println!("Segments ({}):", seg_dir.len());
for &(seg_id, offset, payload_len, seg_type) in seg_dir {
println!(
" seg_id={:<4} type={:<10} offset={:<10} payload={} bytes",
seg_id,
segment_type_name(seg_type),
offset,
payload_len,
);
}
println!();
println!("Lineage:");
crate::output::print_kv("File ID:", &crate::output::hex(&identity.file_id));
crate::output::print_kv("Parent ID:", &crate::output::hex(&identity.parent_id));
crate::output::print_kv("Lineage depth:", &identity.lineage_depth.to_string());
crate::output::print_kv("Is root:", &identity.is_root().to_string());
}
Ok(())
}

View File

@@ -0,0 +1,111 @@
//! `rvf launch` -- Boot RVF in QEMU microVM.
use clap::Args;
#[derive(Args)]
pub struct LaunchArgs {
/// Path to the RVF store
pub file: String,
/// API port to forward from the microVM
#[arg(short, long, default_value = "8080")]
pub port: u16,
/// Memory allocation in MB
#[arg(short, long, default_value = "128")]
pub memory_mb: u32,
/// Number of virtual CPUs
#[arg(long, default_value = "1")]
pub vcpus: u32,
/// SSH port to forward (optional)
#[arg(long)]
pub ssh_port: Option<u16>,
/// Disable KVM acceleration (use TCG instead)
#[arg(long)]
pub no_kvm: bool,
/// Override QEMU binary path
#[arg(long)]
pub qemu_binary: Option<String>,
/// Override kernel image path (skip extraction from RVF)
#[arg(long)]
pub kernel: Option<String>,
/// Override initramfs path
#[arg(long)]
pub initramfs: Option<String>,
/// Extra arguments to pass to QEMU
#[arg(long, num_args = 1..)]
pub qemu_args: Vec<String>,
}
#[cfg(feature = "launch")]
pub fn run(args: LaunchArgs) -> Result<(), Box<dyn std::error::Error>> {
use std::path::PathBuf;
use std::time::Duration;
let config = rvf_launch::LaunchConfig {
rvf_path: PathBuf::from(&args.file),
memory_mb: args.memory_mb,
vcpus: args.vcpus,
api_port: args.port,
ssh_port: args.ssh_port,
enable_kvm: !args.no_kvm,
qemu_binary: args.qemu_binary.map(PathBuf::from),
extra_args: args.qemu_args,
kernel_path: args.kernel.map(PathBuf::from),
initramfs_path: args.initramfs.map(PathBuf::from),
};
eprintln!("Launching microVM from {}...", args.file);
eprintln!(" Memory: {} MiB", config.memory_mb);
eprintln!(" vCPUs: {}", config.vcpus);
eprintln!(" API port: {}", config.api_port);
if let Some(ssh) = config.ssh_port {
eprintln!(" SSH port: {}", ssh);
}
eprintln!(
" KVM: {}",
if config.enable_kvm {
"enabled (if available)"
} else {
"disabled"
}
);
let mut vm = rvf_launch::Launcher::launch(&config)?;
eprintln!("MicroVM started (PID {})", vm.pid());
eprintln!("Waiting for VM to become ready (timeout: 30s)...");
match vm.wait_ready(Duration::from_secs(30)) {
Ok(()) => {
eprintln!("VM ready.");
eprintln!(" API: http://127.0.0.1:{}", args.port);
}
Err(e) => {
eprintln!("Warning: VM did not become ready: {e}");
eprintln!("The VM may still be booting. Check the console output.");
}
}
eprintln!("Press Ctrl+C to stop the VM.");
// Wait for Ctrl+C
let (tx, rx) = std::sync::mpsc::channel();
ctrlc::set_handler(move || {
let _ = tx.send(());
})
.map_err(|e| format!("failed to set Ctrl+C handler: {e}"))?;
rx.recv()
.map_err(|e| format!("signal channel error: {e}"))?;
eprintln!("\nShutting down VM...");
vm.shutdown()?;
eprintln!("VM stopped.");
Ok(())
}
#[cfg(not(feature = "launch"))]
pub fn run(_args: LaunchArgs) -> Result<(), Box<dyn std::error::Error>> {
Err("QEMU launcher requires the 'launch' feature. \
Rebuild with: cargo build -p rvf-cli --features launch"
.into())
}

View File

@@ -0,0 +1,25 @@
pub mod compact;
pub mod create;
pub mod delete;
pub mod derive;
pub mod embed_ebpf;
pub mod embed_kernel;
pub mod filter;
pub mod freeze;
pub mod ingest;
pub mod inspect;
pub mod launch;
pub mod query;
pub mod rebuild_refcounts;
pub mod serve;
pub mod status;
pub mod verify_attestation;
pub mod verify_witness;
/// Convert an RvfError into a boxed std::error::Error.
///
/// RvfError implements Display but not std::error::Error (it is no_std),
/// so we wrap it in a std::io::Error for CLI error propagation.
pub fn map_rvf_err(e: rvf_types::RvfError) -> Box<dyn std::error::Error> {
Box::new(std::io::Error::other(format!("{e}")))
}

View File

@@ -0,0 +1,208 @@
//! `rvf query` -- Query nearest neighbors.
use clap::Args;
use std::path::Path;
use rvf_runtime::filter::FilterExpr;
use rvf_runtime::{QueryOptions, RvfStore};
use super::map_rvf_err;
#[derive(Args)]
pub struct QueryArgs {
/// Path to the RVF store
path: String,
/// Query vector as comma-separated floats (e.g. "1.0,0.0,0.5")
#[arg(short, long)]
vector: String,
/// Number of nearest neighbors to return
#[arg(short, long, default_value = "10")]
k: usize,
/// Optional filter as JSON (e.g. '{"eq":{"field":0,"value":{"u64":10}}}')
#[arg(short, long)]
filter: Option<String>,
/// Output as JSON
#[arg(long)]
json: bool,
}
pub fn run(args: QueryArgs) -> Result<(), Box<dyn std::error::Error>> {
let vector: Vec<f32> = args
.vector
.split(',')
.map(|s| {
s.trim()
.parse::<f32>()
.map_err(|e| format!("Invalid vector component '{s}': {e}"))
})
.collect::<Result<Vec<_>, _>>()?;
let filter = match &args.filter {
Some(f) => Some(parse_filter_json(f)?),
None => None,
};
let query_opts = QueryOptions {
filter,
..Default::default()
};
let store = RvfStore::open_readonly(Path::new(&args.path)).map_err(map_rvf_err)?;
let results = store
.query(&vector, args.k, &query_opts)
.map_err(map_rvf_err)?;
if args.json {
let json_results: Vec<serde_json::Value> = results
.iter()
.map(|r| {
serde_json::json!({
"id": r.id,
"distance": r.distance,
})
})
.collect();
crate::output::print_json(&serde_json::json!({
"results": json_results,
"count": results.len(),
}));
} else {
println!("Query results ({} neighbors):", results.len());
for (i, r) in results.iter().enumerate() {
println!(" [{i}] id={} distance={:.6}", r.id, r.distance);
}
}
Ok(())
}
/// Parse a JSON string into a FilterExpr.
///
/// Supported format:
/// {"eq": {"field": 0, "value": {"u64": 42}}}
/// {"ne": {"field": 0, "value": {"string": "cat_a"}}}
/// {"gt": {"field": 1, "value": {"f64": 3.14}}}
/// {"lt": {"field": 1, "value": {"i64": -5}}}
/// {"ge": {"field": 1, "value": {"u64": 100}}}
/// {"le": {"field": 1, "value": {"u64": 100}}}
/// {"and": [<expr>, <expr>, ...]}
/// {"or": [<expr>, <expr>, ...]}
/// {"not": <expr>}
pub fn parse_filter_json(json_str: &str) -> Result<FilterExpr, Box<dyn std::error::Error>> {
let v: serde_json::Value = serde_json::from_str(json_str)?;
parse_filter_value(&v)
}
fn parse_filter_value(v: &serde_json::Value) -> Result<FilterExpr, Box<dyn std::error::Error>> {
let obj = v.as_object().ok_or("filter must be a JSON object")?;
if let Some(inner) = obj.get("eq") {
let (field, val) = parse_field_value(inner)?;
return Ok(FilterExpr::Eq(field, val));
}
if let Some(inner) = obj.get("ne") {
let (field, val) = parse_field_value(inner)?;
return Ok(FilterExpr::Ne(field, val));
}
if let Some(inner) = obj.get("lt") {
let (field, val) = parse_field_value(inner)?;
return Ok(FilterExpr::Lt(field, val));
}
if let Some(inner) = obj.get("le") {
let (field, val) = parse_field_value(inner)?;
return Ok(FilterExpr::Le(field, val));
}
if let Some(inner) = obj.get("gt") {
let (field, val) = parse_field_value(inner)?;
return Ok(FilterExpr::Gt(field, val));
}
if let Some(inner) = obj.get("ge") {
let (field, val) = parse_field_value(inner)?;
return Ok(FilterExpr::Ge(field, val));
}
if let Some(inner) = obj.get("and") {
let arr = inner.as_array().ok_or("'and' value must be an array")?;
let exprs: Result<Vec<_>, _> = arr.iter().map(parse_filter_value).collect();
return Ok(FilterExpr::And(exprs?));
}
if let Some(inner) = obj.get("or") {
let arr = inner.as_array().ok_or("'or' value must be an array")?;
let exprs: Result<Vec<_>, _> = arr.iter().map(parse_filter_value).collect();
return Ok(FilterExpr::Or(exprs?));
}
if let Some(inner) = obj.get("not") {
let expr = parse_filter_value(inner)?;
return Ok(FilterExpr::Not(Box::new(expr)));
}
Err("unrecognized filter operator; expected: eq, ne, lt, le, gt, ge, and, or, not".into())
}
fn parse_field_value(
v: &serde_json::Value,
) -> Result<(u16, rvf_runtime::filter::FilterValue), Box<dyn std::error::Error>> {
let obj = v
.as_object()
.ok_or("comparison must be a JSON object with 'field' and 'value'")?;
let field = obj
.get("field")
.and_then(|f| f.as_u64())
.ok_or("missing or invalid 'field' (must be u16)")? as u16;
let value_obj = obj.get("value").ok_or("missing 'value' in comparison")?;
let filter_val = parse_filter_val(value_obj)?;
Ok((field, filter_val))
}
fn parse_filter_val(
v: &serde_json::Value,
) -> Result<rvf_runtime::filter::FilterValue, Box<dyn std::error::Error>> {
use rvf_runtime::filter::FilterValue;
if let Some(obj) = v.as_object() {
if let Some(val) = obj.get("u64") {
return Ok(FilterValue::U64(
val.as_u64().ok_or("u64 value must be a number")?,
));
}
if let Some(val) = obj.get("i64") {
return Ok(FilterValue::I64(
val.as_i64().ok_or("i64 value must be a number")?,
));
}
if let Some(val) = obj.get("f64") {
return Ok(FilterValue::F64(
val.as_f64().ok_or("f64 value must be a number")?,
));
}
if let Some(val) = obj.get("string") {
return Ok(FilterValue::String(
val.as_str()
.ok_or("string value must be a string")?
.to_string(),
));
}
if let Some(val) = obj.get("bool") {
return Ok(FilterValue::Bool(
val.as_bool().ok_or("bool value must be a boolean")?,
));
}
}
// Fallback: infer type from JSON value directly
if let Some(n) = v.as_u64() {
return Ok(FilterValue::U64(n));
}
if let Some(n) = v.as_i64() {
return Ok(FilterValue::I64(n));
}
if let Some(n) = v.as_f64() {
return Ok(FilterValue::F64(n));
}
if let Some(s) = v.as_str() {
return Ok(FilterValue::String(s.to_string()));
}
if let Some(b) = v.as_bool() {
return Ok(FilterValue::Bool(b));
}
Err("cannot parse filter value; expected {\"u64\": N}, {\"string\": \"...\"}, etc.".into())
}

View File

@@ -0,0 +1,161 @@
//! `rvf rebuild-refcounts` -- Recompute REFCOUNT_SEG from COW map chain.
use clap::Args;
use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write};
use std::path::Path;
use rvf_runtime::RvfStore;
use rvf_types::{SEGMENT_HEADER_SIZE, SEGMENT_MAGIC};
use super::map_rvf_err;
#[derive(Args)]
pub struct RebuildRefcountsArgs {
/// Path to the RVF store
pub file: String,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
/// COW_MAP_SEG magic: "RVCM"
const COW_MAP_MAGIC: u32 = 0x5256_434D;
/// REFCOUNT_SEG magic: "RVRC"
const REFCOUNT_MAGIC: u32 = 0x5256_5243;
/// COW_MAP_SEG type
const COW_MAP_TYPE: u8 = 0x20;
pub fn run(args: RebuildRefcountsArgs) -> Result<(), Box<dyn std::error::Error>> {
let store = RvfStore::open_readonly(Path::new(&args.file)).map_err(map_rvf_err)?;
// Read the raw file to scan for COW map segments
let file = std::fs::File::open(&args.file)?;
let mut reader = BufReader::new(file);
reader.seek(SeekFrom::Start(0))?;
let mut raw_bytes = Vec::new();
reader.read_to_end(&mut raw_bytes)?;
let magic_bytes = SEGMENT_MAGIC.to_le_bytes();
let mut cluster_count = 0u32;
let mut local_cluster_count = 0u32;
// Scan for COW_MAP_SEG entries
let mut i = 0usize;
while i + SEGMENT_HEADER_SIZE <= raw_bytes.len() {
if raw_bytes[i..i + 4] == magic_bytes && raw_bytes[i + 5] == COW_MAP_TYPE {
let payload_len = u64::from_le_bytes([
raw_bytes[i + 0x10],
raw_bytes[i + 0x11],
raw_bytes[i + 0x12],
raw_bytes[i + 0x13],
raw_bytes[i + 0x14],
raw_bytes[i + 0x15],
raw_bytes[i + 0x16],
raw_bytes[i + 0x17],
]);
let payload_start = i + SEGMENT_HEADER_SIZE;
let payload_end = payload_start + payload_len as usize;
if payload_end <= raw_bytes.len() && payload_len >= 64 {
// Read CowMapHeader fields
let cow_magic = u32::from_le_bytes([
raw_bytes[payload_start],
raw_bytes[payload_start + 1],
raw_bytes[payload_start + 2],
raw_bytes[payload_start + 3],
]);
if cow_magic == COW_MAP_MAGIC {
cluster_count = u32::from_le_bytes([
raw_bytes[payload_start + 0x48],
raw_bytes[payload_start + 0x49],
raw_bytes[payload_start + 0x4A],
raw_bytes[payload_start + 0x4B],
]);
local_cluster_count = u32::from_le_bytes([
raw_bytes[payload_start + 0x4C],
raw_bytes[payload_start + 0x4D],
raw_bytes[payload_start + 0x4E],
raw_bytes[payload_start + 0x4F],
]);
}
}
let advance = SEGMENT_HEADER_SIZE + payload_len as usize;
if advance > 0 && i.checked_add(advance).is_some() {
i += advance;
} else {
i += 1;
}
} else {
i += 1;
}
}
drop(store);
if cluster_count == 0 {
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "no_cow_map",
"message": "No COW map found; nothing to rebuild",
}));
} else {
println!("No COW map found in file. Nothing to rebuild.");
}
return Ok(());
}
// Build refcount array: 1 byte per cluster, all set to 1 (base reference)
let refcount_array = vec![1u8; cluster_count as usize];
// Build 32-byte RefcountHeader
let mut header = [0u8; 32];
header[0..4].copy_from_slice(&REFCOUNT_MAGIC.to_le_bytes());
header[4..6].copy_from_slice(&1u16.to_le_bytes()); // version
header[6] = 1; // refcount_width: 1 byte
header[8..12].copy_from_slice(&cluster_count.to_le_bytes());
header[12..16].copy_from_slice(&1u32.to_le_bytes()); // max_refcount
header[16..24].copy_from_slice(&32u64.to_le_bytes()); // array_offset (after header)
// snapshot_epoch: 0 (mutable)
// reserved: 0
let payload = [header.as_slice(), refcount_array.as_slice()].concat();
// Write REFCOUNT_SEG to end of file
let file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(&args.file)?;
let mut writer = BufWriter::new(&file);
writer.seek(SeekFrom::End(0))?;
let seg_header = build_segment_header(1, 0x21, payload.len() as u64);
writer.write_all(&seg_header)?;
writer.write_all(&payload)?;
writer.flush()?;
file.sync_all()?;
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "rebuilt",
"cluster_count": cluster_count,
"local_clusters": local_cluster_count,
}));
} else {
println!("Refcounts rebuilt:");
crate::output::print_kv("Cluster count:", &cluster_count.to_string());
crate::output::print_kv("Local clusters:", &local_cluster_count.to_string());
}
Ok(())
}
fn build_segment_header(seg_id: u64, seg_type: u8, payload_len: u64) -> Vec<u8> {
let mut hdr = vec![0u8; 64];
hdr[0..4].copy_from_slice(&0x5256_4653u32.to_le_bytes());
hdr[4] = 1;
hdr[5] = seg_type;
hdr[0x08..0x10].copy_from_slice(&seg_id.to_le_bytes());
hdr[0x10..0x18].copy_from_slice(&payload_len.to_le_bytes());
hdr
}

View File

@@ -0,0 +1,39 @@
//! `rvf serve` -- Start HTTP/TCP server for an RVF store.
use clap::Args;
#[derive(Args)]
pub struct ServeArgs {
/// Path to the RVF store
pub path: String,
/// HTTP server port
#[arg(short, long, default_value = "8080")]
pub port: u16,
/// TCP streaming port (defaults to HTTP port + 1000)
#[arg(long)]
pub tcp_port: Option<u16>,
}
pub fn run(args: ServeArgs) -> Result<(), Box<dyn std::error::Error>> {
#[cfg(feature = "serve")]
{
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(async {
let config = rvf_server::ServerConfig {
http_port: args.port,
tcp_port: args.tcp_port.unwrap_or(args.port + 1000),
data_path: std::path::PathBuf::from(&args.path),
dimension: 0, // auto-detect from file
};
rvf_server::run(config).await
})
}
#[cfg(not(feature = "serve"))]
{
let _ = args;
eprintln!(
"The 'serve' feature is not enabled. Rebuild with: cargo build -p rvf-cli --features serve"
);
Ok(())
}
}

View File

@@ -0,0 +1,46 @@
//! `rvf status` -- Show store status.
use clap::Args;
use std::path::Path;
use rvf_runtime::RvfStore;
use super::map_rvf_err;
#[derive(Args)]
pub struct StatusArgs {
/// Path to the RVF store
path: String,
/// Output as JSON
#[arg(long)]
json: bool,
}
pub fn run(args: StatusArgs) -> Result<(), Box<dyn std::error::Error>> {
let store = RvfStore::open_readonly(Path::new(&args.path)).map_err(map_rvf_err)?;
let status = store.status();
if args.json {
crate::output::print_json(&serde_json::json!({
"total_vectors": status.total_vectors,
"total_segments": status.total_segments,
"file_size": status.file_size,
"epoch": status.current_epoch,
"profile_id": status.profile_id,
"dead_space_ratio": status.dead_space_ratio,
"read_only": status.read_only,
}));
} else {
println!("RVF Store: {}", args.path);
crate::output::print_kv("Vectors:", &status.total_vectors.to_string());
crate::output::print_kv("Segments:", &status.total_segments.to_string());
crate::output::print_kv("File size:", &format!("{} bytes", status.file_size));
crate::output::print_kv("Epoch:", &status.current_epoch.to_string());
crate::output::print_kv("Profile:", &status.profile_id.to_string());
crate::output::print_kv(
"Dead space:",
&format!("{:.1}%", status.dead_space_ratio * 100.0),
);
}
Ok(())
}

View File

@@ -0,0 +1,267 @@
//! `rvf verify-attestation` -- Verify KernelBinding and attestation.
//!
//! Validates the KERNEL_SEG header magic, computes the SHAKE-256-256
//! hash of the kernel image and compares it against the hash stored
//! in the header, inspects the KernelBinding, and scans for any
//! WITNESS_SEG payloads that contain attestation witness chains.
use clap::Args;
use std::io::{BufReader, Read};
use std::path::Path;
use rvf_crypto::{shake256_256, verify_attestation_witness_payload};
use rvf_runtime::RvfStore;
use rvf_types::kernel::KERNEL_MAGIC;
use rvf_types::{SegmentType, SEGMENT_HEADER_SIZE, SEGMENT_MAGIC};
use super::map_rvf_err;
#[derive(Args)]
pub struct VerifyAttestationArgs {
/// Path to the RVF store
pub file: String,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
/// Scan raw file bytes for WITNESS_SEG payloads that look like attestation
/// witness payloads (first 4 bytes decode to a chain_entry_count > 0).
fn find_attestation_witness_payloads(raw: &[u8]) -> Vec<Vec<u8>> {
let magic_bytes = SEGMENT_MAGIC.to_le_bytes();
let mut results = Vec::new();
let mut i = 0usize;
while i + SEGMENT_HEADER_SIZE <= raw.len() {
if raw[i..i + 4] == magic_bytes {
let seg_type = raw[i + 5];
let payload_len = u64::from_le_bytes([
raw[i + 0x10],
raw[i + 0x11],
raw[i + 0x12],
raw[i + 0x13],
raw[i + 0x14],
raw[i + 0x15],
raw[i + 0x16],
raw[i + 0x17],
]) as usize;
let payload_start = i + SEGMENT_HEADER_SIZE;
let payload_end = payload_start + payload_len;
if seg_type == SegmentType::Witness as u8
&& payload_end <= raw.len()
&& payload_len >= 4
{
let payload = &raw[payload_start..payload_end];
// Attestation witness payloads start with a u32 count + offset
// table. A plain witness chain (raw entries) would have bytes
// that decode to a much larger count value, so this heuristic
// is reasonable. We attempt full verification below anyway.
let count =
u32::from_le_bytes([payload[0], payload[1], payload[2], payload[3]]) as usize;
// A plausible attestation payload: count fits in the payload
// with offset table + chain entries + at least some records.
let min_size = 4 + count * 8 + count * 73;
if count > 0 && count < 10_000 && payload_len >= min_size {
results.push(payload.to_vec());
}
}
let advance = SEGMENT_HEADER_SIZE + payload_len;
if advance > 0 && i.checked_add(advance).is_some() {
i += advance;
} else {
i += 1;
}
} else {
i += 1;
}
}
results
}
pub fn run(args: VerifyAttestationArgs) -> Result<(), Box<dyn std::error::Error>> {
let store = RvfStore::open_readonly(Path::new(&args.file)).map_err(map_rvf_err)?;
let kernel_data = store.extract_kernel().map_err(map_rvf_err)?;
// Also scan for attestation witness payloads in the file.
let raw_bytes = {
let file = std::fs::File::open(&args.file)?;
let mut reader = BufReader::new(file);
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
buf
};
let att_payloads = find_attestation_witness_payloads(&raw_bytes);
match kernel_data {
None => {
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "no_kernel",
"message": "No KERNEL_SEG found in file",
"attestation_witnesses": att_payloads.len(),
}));
} else {
println!("No KERNEL_SEG found in file.");
if !att_payloads.is_empty() {
println!();
println!(
" Found {} attestation witness payload(s) -- see verify-witness.",
att_payloads.len()
);
}
}
}
Some((header_bytes, image_bytes)) => {
// -- 1. Verify kernel header magic -----------------------------------
let magic = u32::from_le_bytes([
header_bytes[0],
header_bytes[1],
header_bytes[2],
header_bytes[3],
]);
let magic_valid = magic == KERNEL_MAGIC;
// -- 2. Verify image hash --------------------------------------------
// The header stores the SHAKE-256-256 hash of the image at offset
// 0x30..0x50 (32 bytes).
let stored_image_hash = &header_bytes[0x30..0x50];
let computed_image_hash = shake256_256(&image_bytes);
let image_hash_valid = stored_image_hash == computed_image_hash.as_slice();
let stored_hash_hex = crate::output::hex(stored_image_hash);
let computed_hash_hex = crate::output::hex(&computed_image_hash);
// -- 3. Check KernelBinding (128 bytes after 128-byte header) --------
let has_binding = image_bytes.len() >= 128;
let mut binding_valid = false;
let mut manifest_hash_hex = String::new();
let mut policy_hash_hex = String::new();
if has_binding {
let binding_bytes = &image_bytes[..128];
manifest_hash_hex = crate::output::hex(&binding_bytes[0..32]);
policy_hash_hex = crate::output::hex(&binding_bytes[32..64]);
let binding_version = u16::from_le_bytes([binding_bytes[64], binding_bytes[65]]);
binding_valid = binding_version > 0;
}
// -- 4. Verify arch --------------------------------------------------
let arch = header_bytes[0x06];
let arch_name = match arch {
1 => "x86_64",
2 => "aarch64",
3 => "riscv64",
_ => "unknown",
};
// -- 5. Verify attestation witness payloads --------------------------
let mut att_verified: usize = 0;
let mut att_entries_total: usize = 0;
let mut att_errors: Vec<String> = Vec::new();
for (idx, payload) in att_payloads.iter().enumerate() {
match verify_attestation_witness_payload(payload) {
Ok(entries) => {
att_verified += 1;
att_entries_total += entries.len();
}
Err(e) => {
att_errors.push(format!("Attestation witness #{}: {}", idx, e));
}
}
}
// -- 6. Overall status -----------------------------------------------
let overall_valid = magic_valid && image_hash_valid && att_errors.is_empty();
if args.json {
crate::output::print_json(&serde_json::json!({
"status": if overall_valid { "valid" } else { "invalid" },
"magic_valid": magic_valid,
"arch": arch_name,
"image_hash_valid": image_hash_valid,
"stored_image_hash": stored_hash_hex,
"computed_image_hash": computed_hash_hex,
"has_kernel_binding": binding_valid,
"manifest_root_hash": if binding_valid { &manifest_hash_hex } else { "" },
"policy_hash": if binding_valid { &policy_hash_hex } else { "" },
"image_size": image_bytes.len(),
"attestation_witnesses": att_payloads.len(),
"attestation_verified": att_verified,
"attestation_entries": att_entries_total,
"attestation_errors": att_errors,
}));
} else {
println!("Attestation verification:");
crate::output::print_kv("Magic valid:", &magic_valid.to_string());
crate::output::print_kv("Architecture:", arch_name);
crate::output::print_kv("Image size:", &format!("{} bytes", image_bytes.len()));
println!();
// Image hash verification output.
crate::output::print_kv("Stored image hash:", &stored_hash_hex);
crate::output::print_kv("Computed image hash:", &computed_hash_hex);
if image_hash_valid {
println!(" Image hash: MATCH");
} else {
println!(" Image hash: MISMATCH -- image may be tampered!");
}
if binding_valid {
println!();
println!(" KernelBinding present:");
crate::output::print_kv("Manifest hash:", &manifest_hash_hex);
crate::output::print_kv("Policy hash:", &policy_hash_hex);
} else {
println!();
println!(" No KernelBinding found (legacy format or unsigned stub).");
}
if !att_payloads.is_empty() {
println!();
crate::output::print_kv(
"Attestation witnesses:",
&format!(
"{} payload(s), {} verified, {} entries",
att_payloads.len(),
att_verified,
att_entries_total
),
);
if !att_errors.is_empty() {
println!(" WARNING: attestation witness errors:");
for err in &att_errors {
println!(" - {}", err);
}
}
}
println!();
if overall_valid {
println!(" Attestation verification PASSED.");
} else {
let mut reasons = Vec::new();
if !magic_valid {
reasons.push("invalid magic");
}
if !image_hash_valid {
reasons.push("image hash mismatch");
}
if !att_errors.is_empty() {
reasons.push("attestation witness error(s)");
}
println!(" Attestation verification FAILED: {}", reasons.join(", "));
}
}
}
}
Ok(())
}

View File

@@ -0,0 +1,258 @@
//! `rvf verify-witness` -- Verify all witness events in chain.
//!
//! Scans the RVF file for WITNESS_SEG segments, extracts the payload
//! bytes, and runs `rvf_crypto::verify_witness_chain()` to validate
//! the full SHAKE-256 hash chain. Reports entry count, chain
//! validity, first/last timestamps, and any chain breaks.
use clap::Args;
use std::io::{BufReader, Read};
use rvf_crypto::witness::{verify_witness_chain, WitnessEntry};
use rvf_types::{SegmentType, SEGMENT_HEADER_SIZE, SEGMENT_MAGIC};
#[derive(Args)]
pub struct VerifyWitnessArgs {
/// Path to the RVF store
pub file: String,
/// Output as JSON
#[arg(long)]
pub json: bool,
}
/// Result of verifying one witness segment's chain.
struct ChainResult {
/// Number of entries decoded from this segment.
entry_count: usize,
/// Whether the hash chain is intact.
chain_valid: bool,
/// Decoded entries (empty when chain_valid == false).
entries: Vec<WitnessEntry>,
/// Human-readable error, if any.
error: Option<String>,
}
/// Extract all WITNESS_SEG payloads from the raw file bytes.
///
/// Returns a vec of `(segment_offset, payload_bytes)`.
fn extract_witness_payloads(raw: &[u8]) -> Vec<(usize, Vec<u8>)> {
let magic_bytes = SEGMENT_MAGIC.to_le_bytes();
let mut results = Vec::new();
let mut i = 0usize;
while i + SEGMENT_HEADER_SIZE <= raw.len() {
if raw[i..i + 4] == magic_bytes {
let seg_type = raw[i + 5];
let payload_len = u64::from_le_bytes([
raw[i + 0x10],
raw[i + 0x11],
raw[i + 0x12],
raw[i + 0x13],
raw[i + 0x14],
raw[i + 0x15],
raw[i + 0x16],
raw[i + 0x17],
]) as usize;
let payload_start = i + SEGMENT_HEADER_SIZE;
let payload_end = payload_start + payload_len;
if seg_type == SegmentType::Witness as u8 && payload_end <= raw.len() {
let payload = raw[payload_start..payload_end].to_vec();
results.push((i, payload));
}
// Advance past this segment.
let advance = SEGMENT_HEADER_SIZE + payload_len;
if advance > 0 && i.checked_add(advance).is_some() {
i += advance;
} else {
i += 1;
}
} else {
i += 1;
}
}
results
}
/// Verify a single witness payload through the crypto chain.
fn verify_payload(payload: &[u8]) -> ChainResult {
if payload.is_empty() {
return ChainResult {
entry_count: 0,
chain_valid: true,
entries: Vec::new(),
error: None,
};
}
match verify_witness_chain(payload) {
Ok(entries) => ChainResult {
entry_count: entries.len(),
chain_valid: true,
entries,
error: None,
},
Err(e) => {
// Try to estimate how many entries were in the payload
// (73 bytes per entry).
let estimated = payload.len() / 73;
ChainResult {
entry_count: estimated,
chain_valid: false,
entries: Vec::new(),
error: Some(format!("{e}")),
}
}
}
}
/// Format a nanosecond timestamp as a human-readable UTC string.
fn format_timestamp_ns(ns: u64) -> String {
if ns == 0 {
return "0 (genesis)".to_string();
}
let secs = ns / 1_000_000_000;
let sub_ns = ns % 1_000_000_000;
format!("{secs}.{sub_ns:09}s (unix epoch)")
}
/// Map witness_type byte to a name.
fn witness_type_name(wt: u8) -> &'static str {
match wt {
0x01 => "PROVENANCE",
0x02 => "COMPUTATION",
0x03 => "PLATFORM_ATTESTATION",
0x04 => "KEY_BINDING",
0x05 => "DATA_PROVENANCE",
_ => "UNKNOWN",
}
}
pub fn run(args: VerifyWitnessArgs) -> Result<(), Box<dyn std::error::Error>> {
// Read the entire file into memory for segment scanning.
let file = std::fs::File::open(&args.file)?;
let mut reader = BufReader::new(file);
let mut raw_bytes = Vec::new();
reader.read_to_end(&mut raw_bytes)?;
let payloads = extract_witness_payloads(&raw_bytes);
if payloads.is_empty() {
if args.json {
crate::output::print_json(&serde_json::json!({
"status": "no_witnesses",
"witness_segments": 0,
"total_entries": 0,
}));
} else {
println!("No witness segments found in file.");
}
return Ok(());
}
// Verify each witness segment's chain.
let mut total_entries: usize = 0;
let mut total_valid_chains: usize = 0;
let mut all_entries: Vec<WitnessEntry> = Vec::new();
let mut chain_results: Vec<serde_json::Value> = Vec::new();
let mut chain_breaks: Vec<String> = Vec::new();
for (idx, (seg_offset, payload)) in payloads.iter().enumerate() {
let result = verify_payload(payload);
total_entries += result.entry_count;
if result.chain_valid {
total_valid_chains += 1;
all_entries.extend(result.entries.iter().cloned());
} else {
chain_breaks.push(format!(
"Segment #{} at offset 0x{:X}: {}",
idx,
seg_offset,
result.error.as_deref().unwrap_or("unknown error"),
));
}
if args.json {
let first_ts = result.entries.first().map(|e| e.timestamp_ns).unwrap_or(0);
let last_ts = result.entries.last().map(|e| e.timestamp_ns).unwrap_or(0);
chain_results.push(serde_json::json!({
"segment_index": idx,
"segment_offset": format!("0x{:X}", seg_offset),
"entry_count": result.entry_count,
"chain_valid": result.chain_valid,
"first_timestamp_ns": first_ts,
"last_timestamp_ns": last_ts,
"error": result.error,
}));
}
}
let first_ts = all_entries.first().map(|e| e.timestamp_ns).unwrap_or(0);
let last_ts = all_entries.last().map(|e| e.timestamp_ns).unwrap_or(0);
let all_valid = total_valid_chains == payloads.len();
if args.json {
crate::output::print_json(&serde_json::json!({
"status": if all_valid { "valid" } else { "invalid" },
"witness_segments": payloads.len(),
"valid_chains": total_valid_chains,
"total_entries": total_entries,
"first_timestamp_ns": first_ts,
"last_timestamp_ns": last_ts,
"chain_breaks": chain_breaks,
"segments": chain_results,
}));
} else {
println!("Witness chain verification (cryptographic):");
println!();
crate::output::print_kv("Witness segments:", &payloads.len().to_string());
crate::output::print_kv(
"Valid chains:",
&format!("{}/{}", total_valid_chains, payloads.len()),
);
crate::output::print_kv("Total entries:", &total_entries.to_string());
if !all_entries.is_empty() {
println!();
crate::output::print_kv("First timestamp:", &format_timestamp_ns(first_ts));
crate::output::print_kv("Last timestamp:", &format_timestamp_ns(last_ts));
// Show witness type distribution.
let mut type_counts = std::collections::HashMap::new();
for entry in &all_entries {
*type_counts.entry(entry.witness_type).or_insert(0u64) += 1;
}
println!();
println!(" Entry types:");
let mut types: Vec<_> = type_counts.iter().collect();
types.sort_by_key(|(k, _)| **k);
for (wt, count) in types {
println!(
" 0x{:02X} ({:20}): {}",
wt,
witness_type_name(*wt),
count
);
}
}
println!();
if all_valid {
println!(" All witness hash chains verified successfully.");
} else {
println!(
" WARNING: {} chain(s) failed verification:",
chain_breaks.len()
);
for brk in &chain_breaks {
println!(" - {}", brk);
}
}
}
Ok(())
}

View File

@@ -0,0 +1,77 @@
use clap::{Parser, Subcommand};
use std::process;
mod cmd;
mod output;
#[derive(Parser)]
#[command(name = "rvf", version, about = "RuVector Format CLI")]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Create a new empty RVF store
Create(cmd::create::CreateArgs),
/// Ingest vectors from a JSON file
Ingest(cmd::ingest::IngestArgs),
/// Query nearest neighbors
Query(cmd::query::QueryArgs),
/// Delete vectors by ID or filter
Delete(cmd::delete::DeleteArgs),
/// Show store status
Status(cmd::status::StatusArgs),
/// Inspect segments and lineage
Inspect(cmd::inspect::InspectArgs),
/// Compact to reclaim dead space
Compact(cmd::compact::CompactArgs),
/// Derive a child store from a parent
Derive(cmd::derive::DeriveArgs),
/// Start HTTP server (requires 'serve' feature)
Serve(cmd::serve::ServeArgs),
/// Boot RVF in QEMU microVM
Launch(cmd::launch::LaunchArgs),
/// Embed a kernel image into an RVF file
EmbedKernel(cmd::embed_kernel::EmbedKernelArgs),
/// Embed an eBPF program into an RVF file
EmbedEbpf(cmd::embed_ebpf::EmbedEbpfArgs),
/// Create a membership filter for shared HNSW
Filter(cmd::filter::FilterArgs),
/// Snapshot-freeze the current state
Freeze(cmd::freeze::FreezeArgs),
/// Verify all witness events in chain
VerifyWitness(cmd::verify_witness::VerifyWitnessArgs),
/// Verify KernelBinding and attestation
VerifyAttestation(cmd::verify_attestation::VerifyAttestationArgs),
/// Rebuild REFCOUNT_SEG from COW map chain
RebuildRefcounts(cmd::rebuild_refcounts::RebuildRefcountsArgs),
}
fn main() {
let cli = Cli::parse();
let result = match cli.command {
Commands::Create(args) => cmd::create::run(args),
Commands::Ingest(args) => cmd::ingest::run(args),
Commands::Query(args) => cmd::query::run(args),
Commands::Delete(args) => cmd::delete::run(args),
Commands::Status(args) => cmd::status::run(args),
Commands::Inspect(args) => cmd::inspect::run(args),
Commands::Compact(args) => cmd::compact::run(args),
Commands::Derive(args) => cmd::derive::run(args),
Commands::Serve(args) => cmd::serve::run(args),
Commands::Launch(args) => cmd::launch::run(args),
Commands::EmbedKernel(args) => cmd::embed_kernel::run(args),
Commands::EmbedEbpf(args) => cmd::embed_ebpf::run(args),
Commands::Filter(args) => cmd::filter::run(args),
Commands::Freeze(args) => cmd::freeze::run(args),
Commands::VerifyWitness(args) => cmd::verify_witness::run(args),
Commands::VerifyAttestation(args) => cmd::verify_attestation::run(args),
Commands::RebuildRefcounts(args) => cmd::rebuild_refcounts::run(args),
};
if let Err(e) = result {
eprintln!("error: {e}");
process::exit(1);
}
}

View File

@@ -0,0 +1,21 @@
//! Shared output formatting helpers.
use serde::Serialize;
/// Print a value as pretty-printed JSON.
pub fn print_json<T: Serialize>(value: &T) {
println!(
"{}",
serde_json::to_string_pretty(value).unwrap_or_default()
);
}
/// Print a key-value pair with aligned formatting.
pub fn print_kv(key: &str, value: &str) {
println!(" {:<20} {}", key, value);
}
/// Format a byte array as a hex string.
pub fn hex(bytes: &[u8]) -> String {
bytes.iter().map(|b| format!("{b:02x}")).collect()
}