Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,255 @@
//! Progressive Boot Sequence — read Level 0 from EOF, then Level 1.
//!
//! Phase 1: Read last 4 KB -> hotset pointers -> system is queryable.
//! Phase 2: Read Level 1 at l1_manifest_offset -> full directory.
use rvf_types::{
CentroidPtr, EntrypointPtr, ErrorCode, HotCachePtr, Level0Root, PrefetchMapPtr, QuantDictPtr,
RvfError, TopLayerPtr, ROOT_MANIFEST_SIZE,
};
use crate::directory::SegmentDirectory;
use crate::level0;
use crate::level1::{self, Level1Manifest};
/// Collected hotset offsets extracted from the Level 0 root.
#[derive(Clone, Debug)]
pub struct HotsetPointers {
pub entrypoint: EntrypointPtr,
pub toplayer: TopLayerPtr,
pub centroid: CentroidPtr,
pub quantdict: QuantDictPtr,
pub hot_cache: HotCachePtr,
pub prefetch_map: PrefetchMapPtr,
}
impl Default for HotsetPointers {
fn default() -> Self {
// Extract from a zeroed Level0Root
let root = Level0Root::zeroed();
Self {
entrypoint: root.entrypoint,
toplayer: root.toplayer,
centroid: root.centroid,
quantdict: root.quantdict,
hot_cache: root.hot_cache,
prefetch_map: root.prefetch_map,
}
}
}
/// Full boot state, progressively populated.
#[derive(Clone, Debug)]
pub struct BootState {
pub level0: Level0Root,
pub level1: Option<Level1Manifest>,
pub segment_dir: Option<SegmentDirectory>,
}
/// Boot phase 1: read the last 4096 bytes from `file_data` and parse Level 0.
///
/// After this call the system has hotset pointers and can answer approximate queries.
pub fn boot_phase1(file_data: &[u8]) -> Result<Level0Root, RvfError> {
if file_data.len() < ROOT_MANIFEST_SIZE {
return Err(RvfError::Code(ErrorCode::TruncatedSegment));
}
let start = file_data.len() - ROOT_MANIFEST_SIZE;
let tail: &[u8; ROOT_MANIFEST_SIZE] =
file_data[start..start + ROOT_MANIFEST_SIZE]
.try_into()
.map_err(|_| RvfError::Code(ErrorCode::TruncatedSegment))?;
level0::read_level0(tail)
}
/// Boot phase 2: using the Level 0 root, read and parse Level 1 (TLV records).
///
/// After this call the system has the full segment directory.
pub fn boot_phase2(file_data: &[u8], root: &Level0Root) -> Result<Level1Manifest, RvfError> {
let offset = root.l1_manifest_offset as usize;
let length = root.l1_manifest_length as usize;
if length == 0 {
return Ok(Level1Manifest::default());
}
let end = offset
.checked_add(length)
.ok_or(RvfError::Code(ErrorCode::TruncatedSegment))?;
if end > file_data.len() {
return Err(RvfError::Code(ErrorCode::TruncatedSegment));
}
let records = level1::read_tlv_records(&file_data[offset..end])?;
Ok(Level1Manifest { records })
}
/// Extract the six hotset pointers from a Level 0 root.
pub fn extract_hotset_offsets(root: &Level0Root) -> HotsetPointers {
HotsetPointers {
entrypoint: root.entrypoint,
toplayer: root.toplayer,
centroid: root.centroid,
quantdict: root.quantdict,
hot_cache: root.hot_cache,
prefetch_map: root.prefetch_map,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::directory::{self, SegmentDirEntry};
use crate::level0;
use crate::level1::{ManifestTag, TlvRecord};
fn make_test_file() -> Vec<u8> {
// Build a segment directory with a few entries
let dir = SegmentDirectory {
entries: vec![
SegmentDirEntry {
segment_id: 1,
seg_type: 0x01, // VEC
tier: 0,
file_offset: 0,
payload_length: 4096,
..SegmentDirEntry::default()
},
SegmentDirEntry {
segment_id: 2,
seg_type: 0x02, // INDEX
tier: 1,
file_offset: 4096,
payload_length: 8192,
..SegmentDirEntry::default()
},
],
};
// Build Level 1 TLV records
let dir_bytes = directory::write_directory(&dir);
let tlv_records = vec![TlvRecord {
tag: ManifestTag::SegmentDir,
length: dir_bytes.len() as u32,
value: dir_bytes,
}];
let l1_bytes = crate::level1::write_tlv_records(&tlv_records);
// Start with dummy segments data
let mut file_data = vec![0u8; 16384];
let l1_offset = file_data.len();
file_data.extend_from_slice(&l1_bytes);
// Build Level 0 pointing to L1
let mut root = Level0Root::zeroed();
root.version = 1;
root.l1_manifest_offset = l1_offset as u64;
root.l1_manifest_length = l1_bytes.len() as u64;
root.total_vector_count = 10_000;
root.dimension = 384;
root.base_dtype = 1;
root.profile_id = 2;
root.epoch = 1;
root.entrypoint = EntrypointPtr {
seg_offset: 0x100,
block_offset: 0,
count: 3,
};
root.toplayer = TopLayerPtr {
seg_offset: 0x200,
block_offset: 64,
node_count: 500,
};
root.centroid = CentroidPtr {
seg_offset: 0x300,
block_offset: 0,
count: 128,
};
root.quantdict = QuantDictPtr {
seg_offset: 0x400,
block_offset: 0,
size: 4096,
};
root.hot_cache = HotCachePtr {
seg_offset: 0x500,
block_offset: 0,
vector_count: 1000,
};
root.prefetch_map = PrefetchMapPtr {
offset: 0x600,
entries: 200,
_pad: 0,
};
let l0_bytes = level0::write_level0(&root);
file_data.extend_from_slice(&l0_bytes);
file_data
}
#[test]
fn boot_phase1_extracts_hotset() {
let file_data = make_test_file();
let l0 = boot_phase1(&file_data).unwrap();
assert_eq!(l0.dimension, 384);
assert_eq!(l0.total_vector_count, 10_000);
assert_eq!(l0.epoch, 1);
assert_eq!(l0.entrypoint.count, 3);
assert_eq!(l0.toplayer.node_count, 500);
assert_eq!(l0.centroid.count, 128);
}
#[test]
fn boot_phase2_loads_directory() {
let file_data = make_test_file();
let l0 = boot_phase1(&file_data).unwrap();
let l1 = boot_phase2(&file_data, &l0).unwrap();
assert!(!l1.records.is_empty());
let dir_rec = l1.find(ManifestTag::SegmentDir).unwrap();
let dir = directory::read_directory(&dir_rec.value).unwrap();
assert_eq!(dir.entries.len(), 2);
assert_eq!(dir.entries[0].segment_id, 1);
assert_eq!(dir.entries[1].segment_id, 2);
}
#[test]
fn extract_hotset_offsets_works() {
let file_data = make_test_file();
let l0 = boot_phase1(&file_data).unwrap();
let hotset = extract_hotset_offsets(&l0);
assert_eq!(hotset.entrypoint.seg_offset, 0x100);
assert_eq!(hotset.toplayer.seg_offset, 0x200);
assert_eq!(hotset.centroid.seg_offset, 0x300);
assert_eq!(hotset.quantdict.seg_offset, 0x400);
assert_eq!(hotset.hot_cache.seg_offset, 0x500);
assert_eq!(hotset.prefetch_map.offset, 0x600);
}
#[test]
fn boot_phase1_rejects_short_data() {
let result = boot_phase1(&[0u8; 100]);
assert!(result.is_err());
}
#[test]
fn full_boot_state() {
let file_data = make_test_file();
let l0 = boot_phase1(&file_data).unwrap();
let l1 = boot_phase2(&file_data, &l0).unwrap();
let dir_rec = l1.find(ManifestTag::SegmentDir).unwrap();
let dir = directory::read_directory(&dir_rec.value).unwrap();
let state = BootState {
level0: l0,
level1: Some(l1),
segment_dir: Some(dir),
};
assert_eq!(state.level0.epoch, 1);
assert_eq!(state.segment_dir.as_ref().unwrap().len(), 2);
}
}

View File

@@ -0,0 +1,146 @@
//! Overlay Chain — manifest rollback pointers for point-in-time recovery.
//!
//! Each `OVERLAY_CHAIN` TLV record stores the epoch, a pointer to the
//! previous MANIFEST_SEG, and a checkpoint hash for bisection debugging.
use alloc::vec::Vec;
use rvf_types::RvfError;
/// Fixed size of the serialized overlay chain record.
pub const OVERLAY_CHAIN_SIZE: usize = 40;
/// An overlay chain entry linking to the previous manifest.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct OverlayChain {
/// Current epoch number.
pub epoch: u32,
/// Byte offset of the previous MANIFEST_SEG in the file.
pub prev_manifest_offset: u64,
/// Segment ID of the previous MANIFEST_SEG.
pub prev_manifest_id: u64,
/// Hash of the complete state at this epoch (first 128 bits).
pub checkpoint_hash: [u8; 16],
}
/// Deserialize an overlay chain record.
///
/// Layout (36 bytes):
/// ```text
/// 0x00 u32 epoch
/// 0x04 u32 padding (must be zero)
/// 0x08 u64 prev_manifest_offset
/// 0x10 u64 prev_manifest_id
/// 0x18 [u8;16] checkpoint_hash
/// ```
pub fn read_overlay_chain(data: &[u8]) -> Result<OverlayChain, RvfError> {
if data.len() < OVERLAY_CHAIN_SIZE {
return Err(RvfError::SizeMismatch {
expected: OVERLAY_CHAIN_SIZE,
got: data.len(),
});
}
let epoch = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
let mut off8 = [0u8; 8];
off8.copy_from_slice(&data[0x08..0x10]);
let prev_manifest_offset = u64::from_le_bytes(off8);
off8.copy_from_slice(&data[0x10..0x18]);
let prev_manifest_id = u64::from_le_bytes(off8);
let mut checkpoint_hash = [0u8; 16];
checkpoint_hash.copy_from_slice(&data[0x18..0x28]);
Ok(OverlayChain {
epoch,
prev_manifest_offset,
prev_manifest_id,
checkpoint_hash,
})
}
/// Serialize an overlay chain record to bytes.
pub fn write_overlay_chain(chain: &OverlayChain) -> Vec<u8> {
let mut buf = vec![0u8; OVERLAY_CHAIN_SIZE];
buf[0..4].copy_from_slice(&chain.epoch.to_le_bytes());
// bytes 4..8 are padding (zero)
buf[0x08..0x10].copy_from_slice(&chain.prev_manifest_offset.to_le_bytes());
buf[0x10..0x18].copy_from_slice(&chain.prev_manifest_id.to_le_bytes());
buf[0x18..0x28].copy_from_slice(&chain.checkpoint_hash);
buf
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn round_trip() {
let chain = OverlayChain {
epoch: 42,
prev_manifest_offset: 0x1_0000,
prev_manifest_id: 7,
checkpoint_hash: [
0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
0x0B, 0x0C,
],
};
let bytes = write_overlay_chain(&chain);
assert_eq!(bytes.len(), OVERLAY_CHAIN_SIZE);
let decoded = read_overlay_chain(&bytes).unwrap();
assert_eq!(decoded, chain);
}
#[test]
fn truncated_data() {
let result = read_overlay_chain(&[0u8; 10]);
assert!(result.is_err());
}
#[test]
fn default_chain() {
let chain = OverlayChain::default();
let bytes = write_overlay_chain(&chain);
let decoded = read_overlay_chain(&bytes).unwrap();
assert_eq!(decoded.epoch, 0);
assert_eq!(decoded.prev_manifest_offset, 0);
assert_eq!(decoded.prev_manifest_id, 0);
assert_eq!(decoded.checkpoint_hash, [0u8; 16]);
}
#[test]
fn chain_sequence() {
let chain1 = OverlayChain {
epoch: 1,
prev_manifest_offset: 0,
prev_manifest_id: 0,
checkpoint_hash: [0x01; 16],
};
let chain2 = OverlayChain {
epoch: 2,
prev_manifest_offset: 0x1000,
prev_manifest_id: 1,
checkpoint_hash: [0x02; 16],
};
let chain3 = OverlayChain {
epoch: 3,
prev_manifest_offset: 0x2000,
prev_manifest_id: 2,
checkpoint_hash: [0x03; 16],
};
assert_eq!(chain3.prev_manifest_offset, 0x2000);
assert_eq!(chain3.prev_manifest_id, 2);
assert_eq!(chain2.prev_manifest_offset, 0x1000);
assert_eq!(chain2.prev_manifest_id, 1);
assert_eq!(chain1.prev_manifest_offset, 0);
for chain in [chain1, chain2, chain3] {
let bytes = write_overlay_chain(&chain);
let decoded = read_overlay_chain(&bytes).unwrap();
assert_eq!(decoded, chain);
}
}
}

View File

@@ -0,0 +1,266 @@
//! Segment Directory — the array of segment location entries
//! stored inside the `SEGMENT_DIR` TLV record of Level 1.
use alloc::vec::Vec;
use rvf_types::{RvfError, SegmentType};
/// Size of each directory entry in bytes (cache-line aligned).
pub const DIR_ENTRY_SIZE: usize = 64;
/// A single entry in the segment directory.
///
/// Binary layout (64 bytes):
/// ```text
/// 0x00 u64 segment_id
/// 0x08 u8 seg_type
/// 0x09 u8 tier (0=hot, 1=warm, 2=cold)
/// 0x0A u16 flags
/// 0x0C u32 reserved (must be zero)
/// 0x10 u64 file_offset
/// 0x18 u64 payload_length (decompressed)
/// 0x20 u64 compressed_length (0 if uncompressed)
/// 0x28 u16 shard_id (0 for main file)
/// 0x2A u16 compression
/// 0x2C u32 block_count
/// 0x30 [u8;16] content_hash (first 128 bits)
/// ```
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct SegmentDirEntry {
pub segment_id: u64,
pub seg_type: u8,
pub tier: u8,
pub flags: u16,
pub file_offset: u64,
pub payload_length: u64,
pub compressed_length: u64,
pub shard_id: u16,
pub compression: u16,
pub block_count: u32,
pub content_hash: [u8; 16],
}
/// The complete segment directory.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct SegmentDirectory {
pub entries: Vec<SegmentDirEntry>,
}
// ---------- helpers ----------
fn read_u16_le(buf: &[u8], off: usize) -> u16 {
u16::from_le_bytes([buf[off], buf[off + 1]])
}
fn read_u32_le(buf: &[u8], off: usize) -> u32 {
u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]])
}
fn read_u64_le(buf: &[u8], off: usize) -> u64 {
let mut b = [0u8; 8];
b.copy_from_slice(&buf[off..off + 8]);
u64::from_le_bytes(b)
}
fn write_u16_le(buf: &mut [u8], off: usize, v: u16) {
buf[off..off + 2].copy_from_slice(&v.to_le_bytes());
}
fn write_u32_le(buf: &mut [u8], off: usize, v: u32) {
buf[off..off + 4].copy_from_slice(&v.to_le_bytes());
}
fn write_u64_le(buf: &mut [u8], off: usize, v: u64) {
buf[off..off + 8].copy_from_slice(&v.to_le_bytes());
}
fn read_entry(buf: &[u8], base: usize) -> SegmentDirEntry {
let mut content_hash = [0u8; 16];
content_hash.copy_from_slice(&buf[base + 0x30..base + 0x40]);
SegmentDirEntry {
segment_id: read_u64_le(buf, base),
seg_type: buf[base + 0x08],
tier: buf[base + 0x09],
flags: read_u16_le(buf, base + 0x0A),
file_offset: read_u64_le(buf, base + 0x10),
payload_length: read_u64_le(buf, base + 0x18),
compressed_length: read_u64_le(buf, base + 0x20),
shard_id: read_u16_le(buf, base + 0x28),
compression: read_u16_le(buf, base + 0x2A),
block_count: read_u32_le(buf, base + 0x2C),
content_hash,
}
}
fn write_entry(buf: &mut [u8], base: usize, e: &SegmentDirEntry) {
write_u64_le(buf, base, e.segment_id);
buf[base + 0x08] = e.seg_type;
buf[base + 0x09] = e.tier;
write_u16_le(buf, base + 0x0A, e.flags);
write_u32_le(buf, base + 0x0C, 0); // reserved
write_u64_le(buf, base + 0x10, e.file_offset);
write_u64_le(buf, base + 0x18, e.payload_length);
write_u64_le(buf, base + 0x20, e.compressed_length);
write_u16_le(buf, base + 0x28, e.shard_id);
write_u16_le(buf, base + 0x2A, e.compression);
write_u32_le(buf, base + 0x2C, e.block_count);
buf[base + 0x30..base + 0x40].copy_from_slice(&e.content_hash);
}
/// Deserialize a segment directory from raw bytes.
pub fn read_directory(data: &[u8]) -> Result<SegmentDirectory, RvfError> {
if !data.len().is_multiple_of(DIR_ENTRY_SIZE) {
return Err(RvfError::SizeMismatch {
expected: (data.len() / DIR_ENTRY_SIZE + 1) * DIR_ENTRY_SIZE,
got: data.len(),
});
}
let count = data.len() / DIR_ENTRY_SIZE;
let mut entries = Vec::with_capacity(count);
for i in 0..count {
entries.push(read_entry(data, i * DIR_ENTRY_SIZE));
}
Ok(SegmentDirectory { entries })
}
/// Serialize a segment directory to raw bytes.
pub fn write_directory(dir: &SegmentDirectory) -> Vec<u8> {
let mut buf = vec![0u8; dir.entries.len() * DIR_ENTRY_SIZE];
for (i, entry) in dir.entries.iter().enumerate() {
write_entry(&mut buf, i * DIR_ENTRY_SIZE, entry);
}
buf
}
impl SegmentDirectory {
/// Find a segment by its ID.
pub fn find_segment(&self, id: u64) -> Option<&SegmentDirEntry> {
self.entries.iter().find(|e| e.segment_id == id)
}
/// Return all segments of the given type.
pub fn segments_by_type(&self, seg_type: SegmentType) -> Vec<&SegmentDirEntry> {
let raw = seg_type as u8;
self.entries.iter().filter(|e| e.seg_type == raw).collect()
}
/// Return the number of entries.
pub fn len(&self) -> usize {
self.entries.len()
}
/// Return true if there are no entries.
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_entry(id: u64, seg_type: u8, tier: u8) -> SegmentDirEntry {
let mut hash = [0u8; 16];
hash[0] = (id & 0xFF) as u8;
SegmentDirEntry {
segment_id: id,
seg_type,
tier,
flags: 0,
file_offset: id * 0x1000,
payload_length: 4096,
compressed_length: 0,
shard_id: 0,
compression: 0,
block_count: 1,
content_hash: hash,
}
}
#[test]
fn round_trip_single_entry() {
let dir = SegmentDirectory {
entries: vec![make_entry(1, SegmentType::Vec as u8, 0)],
};
let bytes = write_directory(&dir);
assert_eq!(bytes.len(), 64);
let decoded = read_directory(&bytes).unwrap();
assert_eq!(decoded.entries.len(), 1);
assert_eq!(decoded.entries[0], dir.entries[0]);
}
#[test]
fn round_trip_100_entries() {
let entries: Vec<_> = (0..100)
.map(|i| make_entry(i, (i % 13 + 1) as u8, (i % 3) as u8))
.collect();
let dir = SegmentDirectory {
entries: entries.clone(),
};
let bytes = write_directory(&dir);
assert_eq!(bytes.len(), 100 * 64);
let decoded = read_directory(&bytes).unwrap();
assert_eq!(decoded.entries.len(), 100);
for (a, b) in decoded.entries.iter().zip(entries.iter()) {
assert_eq!(a, b);
}
}
#[test]
fn find_segment_by_id() {
let dir = SegmentDirectory {
entries: vec![
make_entry(10, SegmentType::Vec as u8, 0),
make_entry(20, SegmentType::Index as u8, 1),
make_entry(30, SegmentType::Manifest as u8, 0),
],
};
assert_eq!(
dir.find_segment(20).unwrap().seg_type,
SegmentType::Index as u8
);
assert!(dir.find_segment(99).is_none());
}
#[test]
fn filter_by_type() {
let dir = SegmentDirectory {
entries: vec![
make_entry(1, SegmentType::Vec as u8, 0),
make_entry(2, SegmentType::Vec as u8, 1),
make_entry(3, SegmentType::Index as u8, 0),
make_entry(4, SegmentType::Vec as u8, 2),
],
};
let vecs = dir.segments_by_type(SegmentType::Vec);
assert_eq!(vecs.len(), 3);
let indexes = dir.segments_by_type(SegmentType::Index);
assert_eq!(indexes.len(), 1);
let manifests = dir.segments_by_type(SegmentType::Manifest);
assert_eq!(manifests.len(), 0);
}
#[test]
fn bad_size_returns_error() {
let data = vec![0u8; 65]; // not a multiple of 64
let result = read_directory(&data);
assert!(result.is_err());
}
#[test]
fn empty_directory() {
let dir = SegmentDirectory { entries: vec![] };
let bytes = write_directory(&dir);
assert!(bytes.is_empty());
let decoded = read_directory(&bytes).unwrap();
assert!(decoded.is_empty());
}
}

View File

@@ -0,0 +1,548 @@
//! Level 0 Root Manifest — fixed 4096 bytes at EOF.
//!
//! Provides read/write/validate functions that operate on raw byte arrays,
//! using the `Level0Root` repr(C) struct from `rvf_types`.
use rvf_types::{
CentroidPtr, EntrypointPtr, ErrorCode, FileIdentity, HotCachePtr, Level0Root, PrefetchMapPtr,
QuantDictPtr, RvfError, TopLayerPtr, ROOT_MANIFEST_MAGIC, ROOT_MANIFEST_SIZE,
};
// ---------- helpers for little-endian read/write ----------
fn read_u16_le(buf: &[u8], off: usize) -> u16 {
u16::from_le_bytes([buf[off], buf[off + 1]])
}
fn read_u32_le(buf: &[u8], off: usize) -> u32 {
u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]])
}
fn read_u64_le(buf: &[u8], off: usize) -> u64 {
let mut b = [0u8; 8];
b.copy_from_slice(&buf[off..off + 8]);
u64::from_le_bytes(b)
}
fn write_u16_le(buf: &mut [u8], off: usize, v: u16) {
buf[off..off + 2].copy_from_slice(&v.to_le_bytes());
}
fn write_u32_le(buf: &mut [u8], off: usize, v: u32) {
buf[off..off + 4].copy_from_slice(&v.to_le_bytes());
}
fn write_u64_le(buf: &mut [u8], off: usize, v: u64) {
buf[off..off + 8].copy_from_slice(&v.to_le_bytes());
}
// ---------- Offsets matching the spec ----------
const OFF_MAGIC: usize = 0x000;
const OFF_VERSION: usize = 0x004;
const OFF_FLAGS: usize = 0x006;
const OFF_L1_OFFSET: usize = 0x008;
const OFF_L1_LENGTH: usize = 0x010;
const OFF_TOTAL_VEC: usize = 0x018;
const OFF_DIM: usize = 0x020;
const OFF_DTYPE: usize = 0x022;
const OFF_PROFILE: usize = 0x023;
const OFF_EPOCH: usize = 0x024;
const OFF_CREATED: usize = 0x028;
const OFF_MODIFIED: usize = 0x030;
const OFF_ENTRYPOINT: usize = 0x038;
const OFF_TOPLAYER: usize = 0x048;
const OFF_CENTROID: usize = 0x058;
const OFF_QUANTDICT: usize = 0x068;
const OFF_HOT_CACHE: usize = 0x078;
const OFF_PREFETCH: usize = 0x088;
const OFF_SIG_ALGO: usize = 0x098;
const OFF_SIG_LEN: usize = 0x09A;
const OFF_SIGNATURE: usize = 0x09C;
// FileIdentity offsets within the reserved area (0xF00..0xF44)
const OFF_FILE_ID: usize = 0xF00;
const OFF_PARENT_ID: usize = 0xF10;
const OFF_PARENT_HASH: usize = 0xF20;
const OFF_LINEAGE_DEPTH: usize = 0xF40;
// COW pointer offsets within the reserved area (0xF44..0xF84)
// These follow FileIdentity and are backward-compatible (zeros = no COW).
const OFF_COW_MAP_OFFSET: usize = 0xF44;
const OFF_COW_MAP_GENERATION: usize = 0xF4C;
const OFF_MEMBERSHIP_OFFSET: usize = 0xF50;
const OFF_MEMBERSHIP_GENERATION: usize = 0xF58;
const OFF_SNAPSHOT_EPOCH: usize = 0xF5C;
const OFF_DOUBLE_ROOT_GENERATION: usize = 0xF60;
const OFF_DOUBLE_ROOT_HASH: usize = 0xF64;
const OFF_CHECKSUM: usize = 0xFFC;
/// Deserialize a Level 0 root manifest from exactly 4096 bytes.
pub fn read_level0(data: &[u8; ROOT_MANIFEST_SIZE]) -> Result<Level0Root, RvfError> {
let magic = read_u32_le(data, OFF_MAGIC);
if magic != ROOT_MANIFEST_MAGIC {
return Err(RvfError::BadMagic {
expected: ROOT_MANIFEST_MAGIC,
got: magic,
});
}
let stored_crc = read_u32_le(data, OFF_CHECKSUM);
let computed_crc = crc32c::crc32c(&data[..OFF_CHECKSUM]);
if stored_crc != computed_crc {
return Err(RvfError::Code(ErrorCode::InvalidChecksum));
}
let sig_length = read_u16_le(data, OFF_SIG_LEN);
let mut root = Level0Root::zeroed();
root.magic = magic;
root.version = read_u16_le(data, OFF_VERSION);
root.flags = read_u16_le(data, OFF_FLAGS);
root.l1_manifest_offset = read_u64_le(data, OFF_L1_OFFSET);
root.l1_manifest_length = read_u64_le(data, OFF_L1_LENGTH);
root.total_vector_count = read_u64_le(data, OFF_TOTAL_VEC);
root.dimension = read_u16_le(data, OFF_DIM);
root.base_dtype = data[OFF_DTYPE];
root.profile_id = data[OFF_PROFILE];
root.epoch = read_u32_le(data, OFF_EPOCH);
root.created_ns = read_u64_le(data, OFF_CREATED);
root.modified_ns = read_u64_le(data, OFF_MODIFIED);
root.entrypoint = EntrypointPtr {
seg_offset: read_u64_le(data, OFF_ENTRYPOINT),
block_offset: read_u32_le(data, OFF_ENTRYPOINT + 8),
count: read_u32_le(data, OFF_ENTRYPOINT + 12),
};
root.toplayer = TopLayerPtr {
seg_offset: read_u64_le(data, OFF_TOPLAYER),
block_offset: read_u32_le(data, OFF_TOPLAYER + 8),
node_count: read_u32_le(data, OFF_TOPLAYER + 12),
};
root.centroid = CentroidPtr {
seg_offset: read_u64_le(data, OFF_CENTROID),
block_offset: read_u32_le(data, OFF_CENTROID + 8),
count: read_u32_le(data, OFF_CENTROID + 12),
};
root.quantdict = QuantDictPtr {
seg_offset: read_u64_le(data, OFF_QUANTDICT),
block_offset: read_u32_le(data, OFF_QUANTDICT + 8),
size: read_u32_le(data, OFF_QUANTDICT + 12),
};
root.hot_cache = HotCachePtr {
seg_offset: read_u64_le(data, OFF_HOT_CACHE),
block_offset: read_u32_le(data, OFF_HOT_CACHE + 8),
vector_count: read_u32_le(data, OFF_HOT_CACHE + 12),
};
root.prefetch_map = PrefetchMapPtr {
offset: read_u64_le(data, OFF_PREFETCH),
entries: read_u32_le(data, OFF_PREFETCH + 8),
_pad: 0,
};
root.sig_algo = read_u16_le(data, OFF_SIG_ALGO);
root.sig_length = sig_length;
let sig_len = sig_length as usize;
let sig_max = Level0Root::SIG_BUF_SIZE.min(sig_len);
root.signature_buf[..sig_max].copy_from_slice(&data[OFF_SIGNATURE..OFF_SIGNATURE + sig_max]);
// Read FileIdentity from the reserved area
let mut file_id = [0u8; 16];
file_id.copy_from_slice(&data[OFF_FILE_ID..OFF_FILE_ID + 16]);
let mut parent_id = [0u8; 16];
parent_id.copy_from_slice(&data[OFF_PARENT_ID..OFF_PARENT_ID + 16]);
let mut parent_hash = [0u8; 32];
parent_hash.copy_from_slice(&data[OFF_PARENT_HASH..OFF_PARENT_HASH + 32]);
let lineage_depth = read_u32_le(data, OFF_LINEAGE_DEPTH);
let fi = FileIdentity {
file_id,
parent_id,
parent_hash,
lineage_depth,
};
let fi_bytes = fi.to_bytes();
root.reserved[..68].copy_from_slice(&fi_bytes);
// Read COW pointers from the reserved area (backward-compatible: zeros = no COW).
// These are stored as raw bytes in reserved[68..136].
let cow_map_offset = read_u64_le(data, OFF_COW_MAP_OFFSET);
let cow_map_generation = read_u32_le(data, OFF_COW_MAP_GENERATION);
let membership_offset = read_u64_le(data, OFF_MEMBERSHIP_OFFSET);
let membership_generation = read_u32_le(data, OFF_MEMBERSHIP_GENERATION);
let snapshot_epoch = read_u32_le(data, OFF_SNAPSHOT_EPOCH);
let double_root_generation = read_u32_le(data, OFF_DOUBLE_ROOT_GENERATION);
let mut double_root_hash = [0u8; 32];
double_root_hash.copy_from_slice(&data[OFF_DOUBLE_ROOT_HASH..OFF_DOUBLE_ROOT_HASH + 32]);
// Pack COW pointers into reserved[68..136]
let cow_off = 68;
root.reserved[cow_off..cow_off + 8].copy_from_slice(&cow_map_offset.to_le_bytes());
root.reserved[cow_off + 8..cow_off + 12].copy_from_slice(&cow_map_generation.to_le_bytes());
root.reserved[cow_off + 12..cow_off + 20].copy_from_slice(&membership_offset.to_le_bytes());
root.reserved[cow_off + 20..cow_off + 24].copy_from_slice(&membership_generation.to_le_bytes());
root.reserved[cow_off + 24..cow_off + 28].copy_from_slice(&snapshot_epoch.to_le_bytes());
root.reserved[cow_off + 28..cow_off + 32]
.copy_from_slice(&double_root_generation.to_le_bytes());
root.reserved[cow_off + 32..cow_off + 64].copy_from_slice(&double_root_hash);
root.root_checksum = stored_crc;
Ok(root)
}
/// Serialize a Level 0 root manifest into exactly 4096 bytes.
///
/// The `root_checksum` field on the input is ignored; the checksum is
/// computed over bytes 0x000..0xFFC and written at offset 0xFFC.
pub fn write_level0(root: &Level0Root) -> [u8; ROOT_MANIFEST_SIZE] {
let mut buf = [0u8; ROOT_MANIFEST_SIZE];
write_u32_le(&mut buf, OFF_MAGIC, root.magic);
write_u16_le(&mut buf, OFF_VERSION, root.version);
write_u16_le(&mut buf, OFF_FLAGS, root.flags);
write_u64_le(&mut buf, OFF_L1_OFFSET, root.l1_manifest_offset);
write_u64_le(&mut buf, OFF_L1_LENGTH, root.l1_manifest_length);
write_u64_le(&mut buf, OFF_TOTAL_VEC, root.total_vector_count);
write_u16_le(&mut buf, OFF_DIM, root.dimension);
buf[OFF_DTYPE] = root.base_dtype;
buf[OFF_PROFILE] = root.profile_id;
write_u32_le(&mut buf, OFF_EPOCH, root.epoch);
write_u64_le(&mut buf, OFF_CREATED, root.created_ns);
write_u64_le(&mut buf, OFF_MODIFIED, root.modified_ns);
// Entrypoint (16 bytes)
write_u64_le(&mut buf, OFF_ENTRYPOINT, root.entrypoint.seg_offset);
write_u32_le(&mut buf, OFF_ENTRYPOINT + 8, root.entrypoint.block_offset);
write_u32_le(&mut buf, OFF_ENTRYPOINT + 12, root.entrypoint.count);
// Top layer (16 bytes)
write_u64_le(&mut buf, OFF_TOPLAYER, root.toplayer.seg_offset);
write_u32_le(&mut buf, OFF_TOPLAYER + 8, root.toplayer.block_offset);
write_u32_le(&mut buf, OFF_TOPLAYER + 12, root.toplayer.node_count);
// Centroid (16 bytes)
write_u64_le(&mut buf, OFF_CENTROID, root.centroid.seg_offset);
write_u32_le(&mut buf, OFF_CENTROID + 8, root.centroid.block_offset);
write_u32_le(&mut buf, OFF_CENTROID + 12, root.centroid.count);
// Quant dict (16 bytes)
write_u64_le(&mut buf, OFF_QUANTDICT, root.quantdict.seg_offset);
write_u32_le(&mut buf, OFF_QUANTDICT + 8, root.quantdict.block_offset);
write_u32_le(&mut buf, OFF_QUANTDICT + 12, root.quantdict.size);
// Hot cache (16 bytes)
write_u64_le(&mut buf, OFF_HOT_CACHE, root.hot_cache.seg_offset);
write_u32_le(&mut buf, OFF_HOT_CACHE + 8, root.hot_cache.block_offset);
write_u32_le(&mut buf, OFF_HOT_CACHE + 12, root.hot_cache.vector_count);
// Prefetch map (12 bytes: u64 offset + u32 entries)
write_u64_le(&mut buf, OFF_PREFETCH, root.prefetch_map.offset);
write_u32_le(&mut buf, OFF_PREFETCH + 8, root.prefetch_map.entries);
write_u16_le(&mut buf, OFF_SIG_ALGO, root.sig_algo);
let sig_len = (root.sig_length as usize).min(Level0Root::SIG_BUF_SIZE);
write_u16_le(&mut buf, OFF_SIG_LEN, sig_len as u16);
buf[OFF_SIGNATURE..OFF_SIGNATURE + sig_len].copy_from_slice(&root.signature_buf[..sig_len]);
// Write FileIdentity from reserved area into the buffer
if root.reserved.len() >= 68 {
let fi = FileIdentity::from_bytes(root.reserved[..68].try_into().unwrap());
buf[OFF_FILE_ID..OFF_FILE_ID + 16].copy_from_slice(&fi.file_id);
buf[OFF_PARENT_ID..OFF_PARENT_ID + 16].copy_from_slice(&fi.parent_id);
buf[OFF_PARENT_HASH..OFF_PARENT_HASH + 32].copy_from_slice(&fi.parent_hash);
write_u32_le(&mut buf, OFF_LINEAGE_DEPTH, fi.lineage_depth);
}
// Write COW pointers from reserved[68..136] into the buffer
// Backward-compatible: zeros mean no COW.
if root.reserved.len() >= 132 {
let cow_off = 68;
buf[OFF_COW_MAP_OFFSET..OFF_COW_MAP_OFFSET + 8]
.copy_from_slice(&root.reserved[cow_off..cow_off + 8]);
buf[OFF_COW_MAP_GENERATION..OFF_COW_MAP_GENERATION + 4]
.copy_from_slice(&root.reserved[cow_off + 8..cow_off + 12]);
buf[OFF_MEMBERSHIP_OFFSET..OFF_MEMBERSHIP_OFFSET + 8]
.copy_from_slice(&root.reserved[cow_off + 12..cow_off + 20]);
buf[OFF_MEMBERSHIP_GENERATION..OFF_MEMBERSHIP_GENERATION + 4]
.copy_from_slice(&root.reserved[cow_off + 20..cow_off + 24]);
buf[OFF_SNAPSHOT_EPOCH..OFF_SNAPSHOT_EPOCH + 4]
.copy_from_slice(&root.reserved[cow_off + 24..cow_off + 28]);
buf[OFF_DOUBLE_ROOT_GENERATION..OFF_DOUBLE_ROOT_GENERATION + 4]
.copy_from_slice(&root.reserved[cow_off + 28..cow_off + 32]);
buf[OFF_DOUBLE_ROOT_HASH..OFF_DOUBLE_ROOT_HASH + 32]
.copy_from_slice(&root.reserved[cow_off + 32..cow_off + 64]);
}
// CRC32C over first 4092 bytes
let crc = crc32c::crc32c(&buf[..OFF_CHECKSUM]);
write_u32_le(&mut buf, OFF_CHECKSUM, crc);
buf
}
/// Fast validation: check magic + CRC32C without full deserialization.
pub fn validate_level0(data: &[u8; ROOT_MANIFEST_SIZE]) -> bool {
let magic = read_u32_le(data, OFF_MAGIC);
if magic != ROOT_MANIFEST_MAGIC {
return false;
}
let stored_crc = read_u32_le(data, OFF_CHECKSUM);
let computed_crc = crc32c::crc32c(&data[..OFF_CHECKSUM]);
stored_crc == computed_crc
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_root() -> Level0Root {
let mut root = Level0Root::zeroed();
root.version = 1;
root.flags = 0x0004; // SIGNED
root.l1_manifest_offset = 0x1_0000;
root.l1_manifest_length = 0x2000;
root.total_vector_count = 10_000_000;
root.dimension = 384;
root.base_dtype = 1; // f16
root.profile_id = 2; // text
root.epoch = 42;
root.created_ns = 1_700_000_000_000_000_000;
root.modified_ns = 1_700_000_001_000_000_000;
root.entrypoint = EntrypointPtr {
seg_offset: 0x1000,
block_offset: 64,
count: 3,
};
root.toplayer = TopLayerPtr {
seg_offset: 0x2000,
block_offset: 128,
node_count: 500,
};
root.centroid = CentroidPtr {
seg_offset: 0x3000,
block_offset: 0,
count: 256,
};
root.quantdict = QuantDictPtr {
seg_offset: 0x4000,
block_offset: 0,
size: 8192,
};
root.hot_cache = HotCachePtr {
seg_offset: 0x5000,
block_offset: 0,
vector_count: 1000,
};
root.prefetch_map = PrefetchMapPtr {
offset: 0x6000,
entries: 200,
_pad: 0,
};
root.sig_algo = 0; // Ed25519
root.sig_length = 4;
root.signature_buf[0] = 0xDE;
root.signature_buf[1] = 0xAD;
root.signature_buf[2] = 0xBE;
root.signature_buf[3] = 0xEF;
root
}
#[test]
fn round_trip() {
let original = sample_root();
let bytes = write_level0(&original);
let decoded = read_level0(&bytes).expect("read_level0 should succeed");
assert_eq!(decoded.magic, original.magic);
assert_eq!(decoded.version, original.version);
assert_eq!(decoded.flags, original.flags);
assert_eq!(decoded.l1_manifest_offset, original.l1_manifest_offset);
assert_eq!(decoded.l1_manifest_length, original.l1_manifest_length);
assert_eq!(decoded.total_vector_count, original.total_vector_count);
assert_eq!(decoded.dimension, original.dimension);
assert_eq!(decoded.base_dtype, original.base_dtype);
assert_eq!(decoded.profile_id, original.profile_id);
assert_eq!(decoded.epoch, original.epoch);
assert_eq!(decoded.created_ns, original.created_ns);
assert_eq!(decoded.modified_ns, original.modified_ns);
assert_eq!(
decoded.entrypoint.seg_offset,
original.entrypoint.seg_offset
);
assert_eq!(
decoded.entrypoint.block_offset,
original.entrypoint.block_offset
);
assert_eq!(decoded.entrypoint.count, original.entrypoint.count);
assert_eq!(decoded.toplayer.seg_offset, original.toplayer.seg_offset);
assert_eq!(decoded.toplayer.node_count, original.toplayer.node_count);
assert_eq!(decoded.centroid.seg_offset, original.centroid.seg_offset);
assert_eq!(decoded.centroid.count, original.centroid.count);
assert_eq!(decoded.quantdict.seg_offset, original.quantdict.seg_offset);
assert_eq!(decoded.quantdict.size, original.quantdict.size);
assert_eq!(decoded.hot_cache.seg_offset, original.hot_cache.seg_offset);
assert_eq!(
decoded.hot_cache.vector_count,
original.hot_cache.vector_count
);
assert_eq!(decoded.prefetch_map.offset, original.prefetch_map.offset);
assert_eq!(decoded.prefetch_map.entries, original.prefetch_map.entries);
assert_eq!(decoded.sig_algo, original.sig_algo);
assert_eq!(decoded.sig_length, original.sig_length);
assert_eq!(decoded.signature_buf[..4], original.signature_buf[..4]);
}
#[test]
fn crc_detects_corruption() {
let root = sample_root();
let mut bytes = write_level0(&root);
assert!(validate_level0(&bytes));
// Corrupt a byte in the middle
bytes[0x050] ^= 0xFF;
assert!(!validate_level0(&bytes));
// read_level0 should also fail
assert!(read_level0(&bytes).is_err());
}
#[test]
fn invalid_magic_rejected() {
let mut bytes = write_level0(&sample_root());
// Overwrite magic
bytes[0] = 0x00;
bytes[1] = 0x00;
bytes[2] = 0x00;
bytes[3] = 0x00;
// Fix CRC so only magic check fails
let crc = crc32c::crc32c(&bytes[..OFF_CHECKSUM]);
write_u32_le(&mut bytes, OFF_CHECKSUM, crc);
let err = read_level0(&bytes).unwrap_err();
match err {
RvfError::BadMagic { expected, got } => {
assert_eq!(expected, ROOT_MANIFEST_MAGIC);
assert_eq!(got, 0);
}
other => panic!("expected BadMagic, got {:?}", other),
}
}
#[test]
fn default_root_round_trips() {
let root = Level0Root::zeroed();
let bytes = write_level0(&root);
let decoded = read_level0(&bytes).unwrap();
assert_eq!(decoded.magic, ROOT_MANIFEST_MAGIC);
assert_eq!(decoded.total_vector_count, 0);
assert_eq!(decoded.dimension, 0);
}
#[test]
fn output_is_exactly_4096_bytes() {
let bytes = write_level0(&Level0Root::zeroed());
assert_eq!(bytes.len(), 4096);
}
#[test]
fn cow_pointers_round_trip() {
let mut root = sample_root();
// Set COW pointers in the reserved area (offsets 68..132)
let cow_off = 68;
let cow_map_offset: u64 = 0x1234_5678_9ABC_DEF0;
let cow_map_generation: u32 = 42;
let membership_offset: u64 = 0xFEDC_BA98_7654_3210;
let membership_generation: u32 = 7;
let snapshot_epoch: u32 = 100;
let double_root_generation: u32 = 3;
let double_root_hash = [0xEE; 32];
root.reserved[cow_off..cow_off + 8].copy_from_slice(&cow_map_offset.to_le_bytes());
root.reserved[cow_off + 8..cow_off + 12].copy_from_slice(&cow_map_generation.to_le_bytes());
root.reserved[cow_off + 12..cow_off + 20].copy_from_slice(&membership_offset.to_le_bytes());
root.reserved[cow_off + 20..cow_off + 24]
.copy_from_slice(&membership_generation.to_le_bytes());
root.reserved[cow_off + 24..cow_off + 28].copy_from_slice(&snapshot_epoch.to_le_bytes());
root.reserved[cow_off + 28..cow_off + 32]
.copy_from_slice(&double_root_generation.to_le_bytes());
root.reserved[cow_off + 32..cow_off + 64].copy_from_slice(&double_root_hash);
let bytes = write_level0(&root);
let decoded = read_level0(&bytes).expect("read_level0 should succeed");
// Verify COW pointers survived round-trip
let d_cow_off = 68;
let d_cow_map_offset = u64::from_le_bytes(
decoded.reserved[d_cow_off..d_cow_off + 8]
.try_into()
.unwrap(),
);
let d_cow_map_generation = u32::from_le_bytes(
decoded.reserved[d_cow_off + 8..d_cow_off + 12]
.try_into()
.unwrap(),
);
let d_membership_offset = u64::from_le_bytes(
decoded.reserved[d_cow_off + 12..d_cow_off + 20]
.try_into()
.unwrap(),
);
let d_membership_generation = u32::from_le_bytes(
decoded.reserved[d_cow_off + 20..d_cow_off + 24]
.try_into()
.unwrap(),
);
let d_snapshot_epoch = u32::from_le_bytes(
decoded.reserved[d_cow_off + 24..d_cow_off + 28]
.try_into()
.unwrap(),
);
let d_double_root_generation = u32::from_le_bytes(
decoded.reserved[d_cow_off + 28..d_cow_off + 32]
.try_into()
.unwrap(),
);
let d_double_root_hash = &decoded.reserved[d_cow_off + 32..d_cow_off + 64];
assert_eq!(d_cow_map_offset, cow_map_offset);
assert_eq!(d_cow_map_generation, cow_map_generation);
assert_eq!(d_membership_offset, membership_offset);
assert_eq!(d_membership_generation, membership_generation);
assert_eq!(d_snapshot_epoch, snapshot_epoch);
assert_eq!(d_double_root_generation, double_root_generation);
assert_eq!(d_double_root_hash, &double_root_hash[..]);
}
#[test]
fn cow_pointers_default_to_zero() {
// Verify that a root with no COW pointers still round-trips correctly
let root = Level0Root::zeroed();
let bytes = write_level0(&root);
let decoded = read_level0(&bytes).unwrap();
let cow_off = 68;
let cow_map_offset =
u64::from_le_bytes(decoded.reserved[cow_off..cow_off + 8].try_into().unwrap());
let snapshot_epoch = u32::from_le_bytes(
decoded.reserved[cow_off + 24..cow_off + 28]
.try_into()
.unwrap(),
);
assert_eq!(cow_map_offset, 0);
assert_eq!(snapshot_epoch, 0);
}
}

View File

@@ -0,0 +1,282 @@
//! Level 1 Full Manifest — variable-size TLV records.
//!
//! Level 1 is encoded as a sequence of tag-length-value records,
//! each 8-byte aligned, for forward compatibility.
use alloc::vec::Vec;
use rvf_types::{ErrorCode, RvfError};
/// Tag values for Level 1 manifest records.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u16)]
pub enum ManifestTag {
/// Array of segment directory entries.
SegmentDir = 0x0001,
/// Temperature tier assignments per block.
TempTierMap = 0x0002,
/// Index layer availability bitmap.
IndexLayers = 0x0003,
/// Epoch chain with rollback pointers.
OverlayChain = 0x0004,
/// Active/tombstoned segment sets.
CompactionState = 0x0005,
/// Multi-file shard references.
ShardRefs = 0x0006,
/// What this file can do (features, limits).
CapabilityManifest = 0x0007,
/// Domain-specific configuration.
ProfileConfig = 0x0008,
/// Pointer to latest SKETCH_SEG.
AccessSketchRef = 0x0009,
/// Full prefetch hint table.
PrefetchTable = 0x000A,
/// Restart point index for varint delta IDs.
IdRestartPoints = 0x000B,
/// Proof-of-computation witness chain.
WitnessChain = 0x000C,
/// Encryption key references (not keys themselves).
KeyDirectory = 0x000D,
}
impl ManifestTag {
pub fn from_u16(v: u16) -> Option<Self> {
match v {
0x0001 => Some(Self::SegmentDir),
0x0002 => Some(Self::TempTierMap),
0x0003 => Some(Self::IndexLayers),
0x0004 => Some(Self::OverlayChain),
0x0005 => Some(Self::CompactionState),
0x0006 => Some(Self::ShardRefs),
0x0007 => Some(Self::CapabilityManifest),
0x0008 => Some(Self::ProfileConfig),
0x0009 => Some(Self::AccessSketchRef),
0x000A => Some(Self::PrefetchTable),
0x000B => Some(Self::IdRestartPoints),
0x000C => Some(Self::WitnessChain),
0x000D => Some(Self::KeyDirectory),
_ => None,
}
}
}
/// A single TLV record from the Level 1 manifest.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TlvRecord {
pub tag: ManifestTag,
pub length: u32,
pub value: Vec<u8>,
}
/// Parsed Level 1 manifest: a collection of TLV records.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Level1Manifest {
pub records: Vec<TlvRecord>,
}
impl Level1Manifest {
/// Find the first record with the given tag.
pub fn find(&self, tag: ManifestTag) -> Option<&TlvRecord> {
self.records.iter().find(|r| r.tag == tag)
}
/// Find all records with the given tag.
pub fn find_all(&self, tag: ManifestTag) -> Vec<&TlvRecord> {
self.records.iter().filter(|r| r.tag == tag).collect()
}
}
// ---------- helpers ----------
fn read_u16_le(buf: &[u8], off: usize) -> u16 {
u16::from_le_bytes([buf[off], buf[off + 1]])
}
fn read_u32_le(buf: &[u8], off: usize) -> u32 {
u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]])
}
fn write_u16_le(buf: &mut Vec<u8>, v: u16) {
buf.extend_from_slice(&v.to_le_bytes());
}
fn write_u32_le(buf: &mut Vec<u8>, v: u32) {
buf.extend_from_slice(&v.to_le_bytes());
}
/// Round up to the next 8-byte boundary.
fn align8(n: usize) -> usize {
(n + 7) & !7
}
/// TLV record header layout:
/// tag: u16 (2 bytes)
/// length: u32 (4 bytes)
/// pad: u16 (2 bytes, to reach 8-byte alignment)
/// value: [u8; length]
/// [padding to 8-byte boundary]
const TLV_HEADER_SIZE: usize = 8; // tag(2) + length(4) + pad(2)
/// Deserialize a sequence of TLV records from raw bytes.
pub fn read_tlv_records(data: &[u8]) -> Result<Vec<TlvRecord>, RvfError> {
let mut records = Vec::new();
let mut pos = 0;
while pos + TLV_HEADER_SIZE <= data.len() {
let tag_raw = read_u16_le(data, pos);
let length = read_u32_le(data, pos + 2);
// pad at pos+6 is ignored on read
let tag = ManifestTag::from_u16(tag_raw).ok_or(RvfError::InvalidEnumValue {
type_name: "ManifestTag",
value: tag_raw as u64,
})?;
let value_start = pos + TLV_HEADER_SIZE;
let value_end = value_start + length as usize;
if value_end > data.len() {
return Err(RvfError::Code(ErrorCode::TruncatedSegment));
}
let value = data[value_start..value_end].to_vec();
records.push(TlvRecord { tag, length, value });
// Advance to next 8-byte aligned position
pos = align8(value_end);
}
Ok(records)
}
/// Serialize a sequence of TLV records into bytes (8-byte aligned).
pub fn write_tlv_records(records: &[TlvRecord]) -> Vec<u8> {
let mut buf = Vec::new();
for rec in records {
write_u16_le(&mut buf, rec.tag as u16);
write_u32_le(&mut buf, rec.value.len() as u32);
// pad field (2 bytes)
buf.extend_from_slice(&[0u8; 2]);
buf.extend_from_slice(&rec.value);
// Pad to 8-byte boundary
let padded = align8(buf.len());
buf.resize(padded, 0);
}
buf
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tag_from_u16_known() {
assert_eq!(ManifestTag::from_u16(0x0001), Some(ManifestTag::SegmentDir));
assert_eq!(
ManifestTag::from_u16(0x000D),
Some(ManifestTag::KeyDirectory)
);
}
#[test]
fn tag_from_u16_unknown() {
assert_eq!(ManifestTag::from_u16(0x0000), None);
assert_eq!(ManifestTag::from_u16(0x000E), None);
assert_eq!(ManifestTag::from_u16(0xFFFF), None);
}
#[test]
fn round_trip_single_record() {
let records = vec![TlvRecord {
tag: ManifestTag::SegmentDir,
length: 5,
value: vec![1, 2, 3, 4, 5],
}];
let bytes = write_tlv_records(&records);
assert_eq!(bytes.len() % 8, 0, "output must be 8-byte aligned");
let decoded = read_tlv_records(&bytes).unwrap();
assert_eq!(decoded.len(), 1);
assert_eq!(decoded[0].tag, ManifestTag::SegmentDir);
assert_eq!(decoded[0].value, vec![1, 2, 3, 4, 5]);
}
#[test]
fn round_trip_multiple_records() {
let records = vec![
TlvRecord {
tag: ManifestTag::SegmentDir,
length: 3,
value: vec![0xAA, 0xBB, 0xCC],
},
TlvRecord {
tag: ManifestTag::OverlayChain,
length: 8,
value: vec![1, 2, 3, 4, 5, 6, 7, 8],
},
TlvRecord {
tag: ManifestTag::CapabilityManifest,
length: 1,
value: vec![0xFF],
},
];
let bytes = write_tlv_records(&records);
assert_eq!(bytes.len() % 8, 0);
let decoded = read_tlv_records(&bytes).unwrap();
assert_eq!(decoded.len(), 3);
assert_eq!(decoded[0].tag, ManifestTag::SegmentDir);
assert_eq!(decoded[0].value, vec![0xAA, 0xBB, 0xCC]);
assert_eq!(decoded[1].tag, ManifestTag::OverlayChain);
assert_eq!(decoded[1].value, vec![1, 2, 3, 4, 5, 6, 7, 8]);
assert_eq!(decoded[2].tag, ManifestTag::CapabilityManifest);
assert_eq!(decoded[2].value, vec![0xFF]);
}
#[test]
fn empty_records() {
let bytes = write_tlv_records(&[]);
assert!(bytes.is_empty());
let decoded = read_tlv_records(&bytes).unwrap();
assert!(decoded.is_empty());
}
#[test]
fn truncated_value_returns_error() {
let mut buf = Vec::new();
write_u16_le(&mut buf, ManifestTag::SegmentDir as u16);
write_u32_le(&mut buf, 100); // claims 100 bytes
buf.extend_from_slice(&[0u8; 2]); // pad
buf.extend_from_slice(&[0u8; 10]); // only 10 bytes
let result = read_tlv_records(&buf);
assert!(result.is_err());
}
#[test]
fn level1_manifest_find() {
let manifest = Level1Manifest {
records: vec![
TlvRecord {
tag: ManifestTag::SegmentDir,
length: 3,
value: vec![1, 2, 3],
},
TlvRecord {
tag: ManifestTag::OverlayChain,
length: 2,
value: vec![4, 5],
},
],
};
assert!(manifest.find(ManifestTag::SegmentDir).is_some());
assert!(manifest.find(ManifestTag::OverlayChain).is_some());
assert!(manifest.find(ManifestTag::CompactionState).is_none());
}
}

View File

@@ -0,0 +1,29 @@
//! Two-level manifest system for the RuVector Format (RVF).
//!
//! The manifest system enables progressive boot:
//! - **Level 0** (fixed 4096 bytes at EOF): hotset pointers for instant query
//! - **Level 1** (variable-size TLV records): full segment directory
//!
//! A reader only needs Level 0 to start answering approximate queries.
//! Level 1 is loaded asynchronously for full-quality results.
#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
pub mod boot;
pub mod chain;
pub mod directory;
pub mod level0;
pub mod level1;
pub mod writer;
pub use boot::{boot_phase1, boot_phase2, extract_hotset_offsets, BootState, HotsetPointers};
pub use chain::OverlayChain;
pub use directory::{SegmentDirEntry, SegmentDirectory};
pub use level0::{read_level0, validate_level0, write_level0};
pub use level1::{read_tlv_records, write_tlv_records, Level1Manifest, ManifestTag, TlvRecord};
pub use writer::{build_manifest, build_manifest_at};
#[cfg(feature = "std")]
pub use writer::commit_manifest;

View File

@@ -0,0 +1,197 @@
//! Manifest Writer — builds a complete manifest (Level 1 TLV + Level 0 root).
//!
//! Output: Level 1 TLV payload followed by Level 0 root as last 4096 bytes.
use alloc::vec::Vec;
use rvf_types::{Level0Root, ROOT_MANIFEST_SIZE};
use crate::boot::HotsetPointers;
use crate::chain::{self, OverlayChain};
use crate::directory::{self, SegmentDirectory};
use crate::level0;
use crate::level1::{self, ManifestTag, TlvRecord};
/// Build a complete manifest from a segment directory, hotset pointers, epoch,
/// and an optional overlay chain (previous manifest link).
///
/// Returns a byte buffer containing:
/// - Level 1 TLV records (variable size)
/// - Level 0 root manifest (last 4096 bytes)
///
/// The `l1_manifest_offset` in Level 0 is set to 0 because the caller
/// must adjust it to the actual file position where this data is written.
/// Use [`build_manifest_at`] if you know the file offset ahead of time.
pub fn build_manifest(
dir: &SegmentDirectory,
hotset: &HotsetPointers,
epoch: u32,
prev_chain: Option<&OverlayChain>,
) -> Vec<u8> {
build_manifest_at(dir, hotset, epoch, prev_chain, 0)
}
/// Like [`build_manifest`], but sets `l1_manifest_offset` to `file_offset`.
///
/// This is for when the caller knows exactly where in the file the
/// manifest payload will be written.
pub fn build_manifest_at(
dir: &SegmentDirectory,
hotset: &HotsetPointers,
epoch: u32,
prev_chain: Option<&OverlayChain>,
file_offset: u64,
) -> Vec<u8> {
// Build TLV records
let mut records = Vec::new();
// Segment directory record
let dir_bytes = directory::write_directory(dir);
records.push(TlvRecord {
tag: ManifestTag::SegmentDir,
length: dir_bytes.len() as u32,
value: dir_bytes,
});
// Overlay chain record (if provided)
if let Some(chain_ref) = prev_chain {
let chain_bytes = chain::write_overlay_chain(chain_ref);
records.push(TlvRecord {
tag: ManifestTag::OverlayChain,
length: chain_bytes.len() as u32,
value: chain_bytes,
});
}
let l1_bytes = level1::write_tlv_records(&records);
let l1_len = l1_bytes.len() as u64;
// Build Level 0 root
let mut root = Level0Root::zeroed();
root.version = 1;
root.l1_manifest_offset = file_offset;
root.l1_manifest_length = l1_len;
root.epoch = epoch;
root.entrypoint = hotset.entrypoint;
root.toplayer = hotset.toplayer;
root.centroid = hotset.centroid;
root.quantdict = hotset.quantdict;
root.hot_cache = hotset.hot_cache;
root.prefetch_map = hotset.prefetch_map;
let l0_bytes = level0::write_level0(&root);
// Output: L1 TLV data + L0 root (last 4096 bytes)
let mut out = Vec::with_capacity(l1_bytes.len() + ROOT_MANIFEST_SIZE);
out.extend_from_slice(&l1_bytes);
out.extend_from_slice(&l0_bytes);
out
}
/// Write a manifest to a writer (e.g., file).
///
/// This appends the manifest bytes and flushes.
#[cfg(feature = "std")]
pub fn commit_manifest(
file: &mut impl std::io::Write,
manifest_bytes: &[u8],
) -> Result<(), rvf_types::RvfError> {
file.write_all(manifest_bytes)
.map_err(|_| rvf_types::RvfError::Code(rvf_types::ErrorCode::FsyncFailed))?;
file.flush()
.map_err(|_| rvf_types::RvfError::Code(rvf_types::ErrorCode::FsyncFailed))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::directory::SegmentDirEntry;
use rvf_types::EntrypointPtr;
fn sample_dir() -> SegmentDirectory {
SegmentDirectory {
entries: vec![
SegmentDirEntry {
segment_id: 1,
seg_type: 0x01,
tier: 0,
file_offset: 0,
payload_length: 4096,
..SegmentDirEntry::default()
},
SegmentDirEntry {
segment_id: 2,
seg_type: 0x02,
tier: 1,
file_offset: 4096,
payload_length: 8192,
..SegmentDirEntry::default()
},
],
}
}
fn sample_hotset() -> HotsetPointers {
HotsetPointers {
entrypoint: EntrypointPtr {
seg_offset: 0x100,
block_offset: 0,
count: 5,
},
..Default::default()
}
}
#[test]
fn build_manifest_ends_with_level0() {
let manifest = build_manifest(&sample_dir(), &sample_hotset(), 1, None);
assert!(manifest.len() > ROOT_MANIFEST_SIZE);
// Last 4096 bytes should be a valid Level 0
let l0_start = manifest.len() - ROOT_MANIFEST_SIZE;
let l0_data: &[u8; 4096] = manifest[l0_start..].try_into().unwrap();
assert!(level0::validate_level0(l0_data));
let root = level0::read_level0(l0_data).unwrap();
assert_eq!(root.epoch, 1);
assert_eq!(root.entrypoint.count, 5);
}
#[test]
fn build_manifest_with_chain() {
let chain = OverlayChain {
epoch: 1,
prev_manifest_offset: 0x1000,
prev_manifest_id: 5,
checkpoint_hash: [0xAB; 16],
};
let manifest = build_manifest(&sample_dir(), &sample_hotset(), 2, Some(&chain));
assert!(manifest.len() > ROOT_MANIFEST_SIZE);
let l0_start = manifest.len() - ROOT_MANIFEST_SIZE;
let l0_data: &[u8; 4096] = manifest[l0_start..].try_into().unwrap();
let root = level0::read_level0(l0_data).unwrap();
assert_eq!(root.epoch, 2);
}
#[test]
fn build_manifest_at_with_offset() {
let offset = 0x1_0000u64;
let manifest = build_manifest_at(&sample_dir(), &sample_hotset(), 3, None, offset);
let l0_start = manifest.len() - ROOT_MANIFEST_SIZE;
let l0_data: &[u8; 4096] = manifest[l0_start..].try_into().unwrap();
let root = level0::read_level0(l0_data).unwrap();
assert_eq!(root.l1_manifest_offset, offset);
}
#[cfg(feature = "std")]
#[test]
fn commit_manifest_writes_to_vec() {
let manifest = build_manifest(&sample_dir(), &sample_hotset(), 1, None);
let mut output = Vec::new();
commit_manifest(&mut output, &manifest).unwrap();
assert_eq!(output, manifest);
}
}