Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
255
vendor/ruvector/crates/rvf/rvf-manifest/src/boot.rs
vendored
Normal file
255
vendor/ruvector/crates/rvf/rvf-manifest/src/boot.rs
vendored
Normal file
@@ -0,0 +1,255 @@
|
||||
//! Progressive Boot Sequence — read Level 0 from EOF, then Level 1.
|
||||
//!
|
||||
//! Phase 1: Read last 4 KB -> hotset pointers -> system is queryable.
|
||||
//! Phase 2: Read Level 1 at l1_manifest_offset -> full directory.
|
||||
|
||||
use rvf_types::{
|
||||
CentroidPtr, EntrypointPtr, ErrorCode, HotCachePtr, Level0Root, PrefetchMapPtr, QuantDictPtr,
|
||||
RvfError, TopLayerPtr, ROOT_MANIFEST_SIZE,
|
||||
};
|
||||
|
||||
use crate::directory::SegmentDirectory;
|
||||
use crate::level0;
|
||||
use crate::level1::{self, Level1Manifest};
|
||||
|
||||
/// Collected hotset offsets extracted from the Level 0 root.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct HotsetPointers {
|
||||
pub entrypoint: EntrypointPtr,
|
||||
pub toplayer: TopLayerPtr,
|
||||
pub centroid: CentroidPtr,
|
||||
pub quantdict: QuantDictPtr,
|
||||
pub hot_cache: HotCachePtr,
|
||||
pub prefetch_map: PrefetchMapPtr,
|
||||
}
|
||||
|
||||
impl Default for HotsetPointers {
|
||||
fn default() -> Self {
|
||||
// Extract from a zeroed Level0Root
|
||||
let root = Level0Root::zeroed();
|
||||
Self {
|
||||
entrypoint: root.entrypoint,
|
||||
toplayer: root.toplayer,
|
||||
centroid: root.centroid,
|
||||
quantdict: root.quantdict,
|
||||
hot_cache: root.hot_cache,
|
||||
prefetch_map: root.prefetch_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Full boot state, progressively populated.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BootState {
|
||||
pub level0: Level0Root,
|
||||
pub level1: Option<Level1Manifest>,
|
||||
pub segment_dir: Option<SegmentDirectory>,
|
||||
}
|
||||
|
||||
/// Boot phase 1: read the last 4096 bytes from `file_data` and parse Level 0.
|
||||
///
|
||||
/// After this call the system has hotset pointers and can answer approximate queries.
|
||||
pub fn boot_phase1(file_data: &[u8]) -> Result<Level0Root, RvfError> {
|
||||
if file_data.len() < ROOT_MANIFEST_SIZE {
|
||||
return Err(RvfError::Code(ErrorCode::TruncatedSegment));
|
||||
}
|
||||
|
||||
let start = file_data.len() - ROOT_MANIFEST_SIZE;
|
||||
let tail: &[u8; ROOT_MANIFEST_SIZE] =
|
||||
file_data[start..start + ROOT_MANIFEST_SIZE]
|
||||
.try_into()
|
||||
.map_err(|_| RvfError::Code(ErrorCode::TruncatedSegment))?;
|
||||
|
||||
level0::read_level0(tail)
|
||||
}
|
||||
|
||||
/// Boot phase 2: using the Level 0 root, read and parse Level 1 (TLV records).
|
||||
///
|
||||
/// After this call the system has the full segment directory.
|
||||
pub fn boot_phase2(file_data: &[u8], root: &Level0Root) -> Result<Level1Manifest, RvfError> {
|
||||
let offset = root.l1_manifest_offset as usize;
|
||||
let length = root.l1_manifest_length as usize;
|
||||
|
||||
if length == 0 {
|
||||
return Ok(Level1Manifest::default());
|
||||
}
|
||||
|
||||
let end = offset
|
||||
.checked_add(length)
|
||||
.ok_or(RvfError::Code(ErrorCode::TruncatedSegment))?;
|
||||
if end > file_data.len() {
|
||||
return Err(RvfError::Code(ErrorCode::TruncatedSegment));
|
||||
}
|
||||
|
||||
let records = level1::read_tlv_records(&file_data[offset..end])?;
|
||||
Ok(Level1Manifest { records })
|
||||
}
|
||||
|
||||
/// Extract the six hotset pointers from a Level 0 root.
|
||||
pub fn extract_hotset_offsets(root: &Level0Root) -> HotsetPointers {
|
||||
HotsetPointers {
|
||||
entrypoint: root.entrypoint,
|
||||
toplayer: root.toplayer,
|
||||
centroid: root.centroid,
|
||||
quantdict: root.quantdict,
|
||||
hot_cache: root.hot_cache,
|
||||
prefetch_map: root.prefetch_map,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::directory::{self, SegmentDirEntry};
|
||||
use crate::level0;
|
||||
use crate::level1::{ManifestTag, TlvRecord};
|
||||
|
||||
fn make_test_file() -> Vec<u8> {
|
||||
// Build a segment directory with a few entries
|
||||
let dir = SegmentDirectory {
|
||||
entries: vec![
|
||||
SegmentDirEntry {
|
||||
segment_id: 1,
|
||||
seg_type: 0x01, // VEC
|
||||
tier: 0,
|
||||
file_offset: 0,
|
||||
payload_length: 4096,
|
||||
..SegmentDirEntry::default()
|
||||
},
|
||||
SegmentDirEntry {
|
||||
segment_id: 2,
|
||||
seg_type: 0x02, // INDEX
|
||||
tier: 1,
|
||||
file_offset: 4096,
|
||||
payload_length: 8192,
|
||||
..SegmentDirEntry::default()
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// Build Level 1 TLV records
|
||||
let dir_bytes = directory::write_directory(&dir);
|
||||
let tlv_records = vec![TlvRecord {
|
||||
tag: ManifestTag::SegmentDir,
|
||||
length: dir_bytes.len() as u32,
|
||||
value: dir_bytes,
|
||||
}];
|
||||
let l1_bytes = crate::level1::write_tlv_records(&tlv_records);
|
||||
|
||||
// Start with dummy segments data
|
||||
let mut file_data = vec![0u8; 16384];
|
||||
let l1_offset = file_data.len();
|
||||
file_data.extend_from_slice(&l1_bytes);
|
||||
|
||||
// Build Level 0 pointing to L1
|
||||
let mut root = Level0Root::zeroed();
|
||||
root.version = 1;
|
||||
root.l1_manifest_offset = l1_offset as u64;
|
||||
root.l1_manifest_length = l1_bytes.len() as u64;
|
||||
root.total_vector_count = 10_000;
|
||||
root.dimension = 384;
|
||||
root.base_dtype = 1;
|
||||
root.profile_id = 2;
|
||||
root.epoch = 1;
|
||||
root.entrypoint = EntrypointPtr {
|
||||
seg_offset: 0x100,
|
||||
block_offset: 0,
|
||||
count: 3,
|
||||
};
|
||||
root.toplayer = TopLayerPtr {
|
||||
seg_offset: 0x200,
|
||||
block_offset: 64,
|
||||
node_count: 500,
|
||||
};
|
||||
root.centroid = CentroidPtr {
|
||||
seg_offset: 0x300,
|
||||
block_offset: 0,
|
||||
count: 128,
|
||||
};
|
||||
root.quantdict = QuantDictPtr {
|
||||
seg_offset: 0x400,
|
||||
block_offset: 0,
|
||||
size: 4096,
|
||||
};
|
||||
root.hot_cache = HotCachePtr {
|
||||
seg_offset: 0x500,
|
||||
block_offset: 0,
|
||||
vector_count: 1000,
|
||||
};
|
||||
root.prefetch_map = PrefetchMapPtr {
|
||||
offset: 0x600,
|
||||
entries: 200,
|
||||
_pad: 0,
|
||||
};
|
||||
|
||||
let l0_bytes = level0::write_level0(&root);
|
||||
file_data.extend_from_slice(&l0_bytes);
|
||||
file_data
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn boot_phase1_extracts_hotset() {
|
||||
let file_data = make_test_file();
|
||||
let l0 = boot_phase1(&file_data).unwrap();
|
||||
|
||||
assert_eq!(l0.dimension, 384);
|
||||
assert_eq!(l0.total_vector_count, 10_000);
|
||||
assert_eq!(l0.epoch, 1);
|
||||
assert_eq!(l0.entrypoint.count, 3);
|
||||
assert_eq!(l0.toplayer.node_count, 500);
|
||||
assert_eq!(l0.centroid.count, 128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn boot_phase2_loads_directory() {
|
||||
let file_data = make_test_file();
|
||||
let l0 = boot_phase1(&file_data).unwrap();
|
||||
let l1 = boot_phase2(&file_data, &l0).unwrap();
|
||||
|
||||
assert!(!l1.records.is_empty());
|
||||
let dir_rec = l1.find(ManifestTag::SegmentDir).unwrap();
|
||||
let dir = directory::read_directory(&dir_rec.value).unwrap();
|
||||
assert_eq!(dir.entries.len(), 2);
|
||||
assert_eq!(dir.entries[0].segment_id, 1);
|
||||
assert_eq!(dir.entries[1].segment_id, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_hotset_offsets_works() {
|
||||
let file_data = make_test_file();
|
||||
let l0 = boot_phase1(&file_data).unwrap();
|
||||
let hotset = extract_hotset_offsets(&l0);
|
||||
|
||||
assert_eq!(hotset.entrypoint.seg_offset, 0x100);
|
||||
assert_eq!(hotset.toplayer.seg_offset, 0x200);
|
||||
assert_eq!(hotset.centroid.seg_offset, 0x300);
|
||||
assert_eq!(hotset.quantdict.seg_offset, 0x400);
|
||||
assert_eq!(hotset.hot_cache.seg_offset, 0x500);
|
||||
assert_eq!(hotset.prefetch_map.offset, 0x600);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn boot_phase1_rejects_short_data() {
|
||||
let result = boot_phase1(&[0u8; 100]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_boot_state() {
|
||||
let file_data = make_test_file();
|
||||
let l0 = boot_phase1(&file_data).unwrap();
|
||||
let l1 = boot_phase2(&file_data, &l0).unwrap();
|
||||
|
||||
let dir_rec = l1.find(ManifestTag::SegmentDir).unwrap();
|
||||
let dir = directory::read_directory(&dir_rec.value).unwrap();
|
||||
|
||||
let state = BootState {
|
||||
level0: l0,
|
||||
level1: Some(l1),
|
||||
segment_dir: Some(dir),
|
||||
};
|
||||
|
||||
assert_eq!(state.level0.epoch, 1);
|
||||
assert_eq!(state.segment_dir.as_ref().unwrap().len(), 2);
|
||||
}
|
||||
}
|
||||
146
vendor/ruvector/crates/rvf/rvf-manifest/src/chain.rs
vendored
Normal file
146
vendor/ruvector/crates/rvf/rvf-manifest/src/chain.rs
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
//! Overlay Chain — manifest rollback pointers for point-in-time recovery.
|
||||
//!
|
||||
//! Each `OVERLAY_CHAIN` TLV record stores the epoch, a pointer to the
|
||||
//! previous MANIFEST_SEG, and a checkpoint hash for bisection debugging.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use rvf_types::RvfError;
|
||||
|
||||
/// Fixed size of the serialized overlay chain record.
|
||||
pub const OVERLAY_CHAIN_SIZE: usize = 40;
|
||||
|
||||
/// An overlay chain entry linking to the previous manifest.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub struct OverlayChain {
|
||||
/// Current epoch number.
|
||||
pub epoch: u32,
|
||||
/// Byte offset of the previous MANIFEST_SEG in the file.
|
||||
pub prev_manifest_offset: u64,
|
||||
/// Segment ID of the previous MANIFEST_SEG.
|
||||
pub prev_manifest_id: u64,
|
||||
/// Hash of the complete state at this epoch (first 128 bits).
|
||||
pub checkpoint_hash: [u8; 16],
|
||||
}
|
||||
|
||||
/// Deserialize an overlay chain record.
|
||||
///
|
||||
/// Layout (36 bytes):
|
||||
/// ```text
|
||||
/// 0x00 u32 epoch
|
||||
/// 0x04 u32 padding (must be zero)
|
||||
/// 0x08 u64 prev_manifest_offset
|
||||
/// 0x10 u64 prev_manifest_id
|
||||
/// 0x18 [u8;16] checkpoint_hash
|
||||
/// ```
|
||||
pub fn read_overlay_chain(data: &[u8]) -> Result<OverlayChain, RvfError> {
|
||||
if data.len() < OVERLAY_CHAIN_SIZE {
|
||||
return Err(RvfError::SizeMismatch {
|
||||
expected: OVERLAY_CHAIN_SIZE,
|
||||
got: data.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let epoch = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
|
||||
let mut off8 = [0u8; 8];
|
||||
off8.copy_from_slice(&data[0x08..0x10]);
|
||||
let prev_manifest_offset = u64::from_le_bytes(off8);
|
||||
off8.copy_from_slice(&data[0x10..0x18]);
|
||||
let prev_manifest_id = u64::from_le_bytes(off8);
|
||||
let mut checkpoint_hash = [0u8; 16];
|
||||
checkpoint_hash.copy_from_slice(&data[0x18..0x28]);
|
||||
|
||||
Ok(OverlayChain {
|
||||
epoch,
|
||||
prev_manifest_offset,
|
||||
prev_manifest_id,
|
||||
checkpoint_hash,
|
||||
})
|
||||
}
|
||||
|
||||
/// Serialize an overlay chain record to bytes.
|
||||
pub fn write_overlay_chain(chain: &OverlayChain) -> Vec<u8> {
|
||||
let mut buf = vec![0u8; OVERLAY_CHAIN_SIZE];
|
||||
|
||||
buf[0..4].copy_from_slice(&chain.epoch.to_le_bytes());
|
||||
// bytes 4..8 are padding (zero)
|
||||
buf[0x08..0x10].copy_from_slice(&chain.prev_manifest_offset.to_le_bytes());
|
||||
buf[0x10..0x18].copy_from_slice(&chain.prev_manifest_id.to_le_bytes());
|
||||
buf[0x18..0x28].copy_from_slice(&chain.checkpoint_hash);
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn round_trip() {
|
||||
let chain = OverlayChain {
|
||||
epoch: 42,
|
||||
prev_manifest_offset: 0x1_0000,
|
||||
prev_manifest_id: 7,
|
||||
checkpoint_hash: [
|
||||
0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
|
||||
0x0B, 0x0C,
|
||||
],
|
||||
};
|
||||
|
||||
let bytes = write_overlay_chain(&chain);
|
||||
assert_eq!(bytes.len(), OVERLAY_CHAIN_SIZE);
|
||||
|
||||
let decoded = read_overlay_chain(&bytes).unwrap();
|
||||
assert_eq!(decoded, chain);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncated_data() {
|
||||
let result = read_overlay_chain(&[0u8; 10]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_chain() {
|
||||
let chain = OverlayChain::default();
|
||||
let bytes = write_overlay_chain(&chain);
|
||||
let decoded = read_overlay_chain(&bytes).unwrap();
|
||||
assert_eq!(decoded.epoch, 0);
|
||||
assert_eq!(decoded.prev_manifest_offset, 0);
|
||||
assert_eq!(decoded.prev_manifest_id, 0);
|
||||
assert_eq!(decoded.checkpoint_hash, [0u8; 16]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_sequence() {
|
||||
let chain1 = OverlayChain {
|
||||
epoch: 1,
|
||||
prev_manifest_offset: 0,
|
||||
prev_manifest_id: 0,
|
||||
checkpoint_hash: [0x01; 16],
|
||||
};
|
||||
let chain2 = OverlayChain {
|
||||
epoch: 2,
|
||||
prev_manifest_offset: 0x1000,
|
||||
prev_manifest_id: 1,
|
||||
checkpoint_hash: [0x02; 16],
|
||||
};
|
||||
let chain3 = OverlayChain {
|
||||
epoch: 3,
|
||||
prev_manifest_offset: 0x2000,
|
||||
prev_manifest_id: 2,
|
||||
checkpoint_hash: [0x03; 16],
|
||||
};
|
||||
|
||||
assert_eq!(chain3.prev_manifest_offset, 0x2000);
|
||||
assert_eq!(chain3.prev_manifest_id, 2);
|
||||
assert_eq!(chain2.prev_manifest_offset, 0x1000);
|
||||
assert_eq!(chain2.prev_manifest_id, 1);
|
||||
assert_eq!(chain1.prev_manifest_offset, 0);
|
||||
|
||||
for chain in [chain1, chain2, chain3] {
|
||||
let bytes = write_overlay_chain(&chain);
|
||||
let decoded = read_overlay_chain(&bytes).unwrap();
|
||||
assert_eq!(decoded, chain);
|
||||
}
|
||||
}
|
||||
}
|
||||
266
vendor/ruvector/crates/rvf/rvf-manifest/src/directory.rs
vendored
Normal file
266
vendor/ruvector/crates/rvf/rvf-manifest/src/directory.rs
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
//! Segment Directory — the array of segment location entries
|
||||
//! stored inside the `SEGMENT_DIR` TLV record of Level 1.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use rvf_types::{RvfError, SegmentType};
|
||||
|
||||
/// Size of each directory entry in bytes (cache-line aligned).
|
||||
pub const DIR_ENTRY_SIZE: usize = 64;
|
||||
|
||||
/// A single entry in the segment directory.
|
||||
///
|
||||
/// Binary layout (64 bytes):
|
||||
/// ```text
|
||||
/// 0x00 u64 segment_id
|
||||
/// 0x08 u8 seg_type
|
||||
/// 0x09 u8 tier (0=hot, 1=warm, 2=cold)
|
||||
/// 0x0A u16 flags
|
||||
/// 0x0C u32 reserved (must be zero)
|
||||
/// 0x10 u64 file_offset
|
||||
/// 0x18 u64 payload_length (decompressed)
|
||||
/// 0x20 u64 compressed_length (0 if uncompressed)
|
||||
/// 0x28 u16 shard_id (0 for main file)
|
||||
/// 0x2A u16 compression
|
||||
/// 0x2C u32 block_count
|
||||
/// 0x30 [u8;16] content_hash (first 128 bits)
|
||||
/// ```
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub struct SegmentDirEntry {
|
||||
pub segment_id: u64,
|
||||
pub seg_type: u8,
|
||||
pub tier: u8,
|
||||
pub flags: u16,
|
||||
pub file_offset: u64,
|
||||
pub payload_length: u64,
|
||||
pub compressed_length: u64,
|
||||
pub shard_id: u16,
|
||||
pub compression: u16,
|
||||
pub block_count: u32,
|
||||
pub content_hash: [u8; 16],
|
||||
}
|
||||
|
||||
/// The complete segment directory.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct SegmentDirectory {
|
||||
pub entries: Vec<SegmentDirEntry>,
|
||||
}
|
||||
|
||||
// ---------- helpers ----------
|
||||
|
||||
fn read_u16_le(buf: &[u8], off: usize) -> u16 {
|
||||
u16::from_le_bytes([buf[off], buf[off + 1]])
|
||||
}
|
||||
|
||||
fn read_u32_le(buf: &[u8], off: usize) -> u32 {
|
||||
u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]])
|
||||
}
|
||||
|
||||
fn read_u64_le(buf: &[u8], off: usize) -> u64 {
|
||||
let mut b = [0u8; 8];
|
||||
b.copy_from_slice(&buf[off..off + 8]);
|
||||
u64::from_le_bytes(b)
|
||||
}
|
||||
|
||||
fn write_u16_le(buf: &mut [u8], off: usize, v: u16) {
|
||||
buf[off..off + 2].copy_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
fn write_u32_le(buf: &mut [u8], off: usize, v: u32) {
|
||||
buf[off..off + 4].copy_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
fn write_u64_le(buf: &mut [u8], off: usize, v: u64) {
|
||||
buf[off..off + 8].copy_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
fn read_entry(buf: &[u8], base: usize) -> SegmentDirEntry {
|
||||
let mut content_hash = [0u8; 16];
|
||||
content_hash.copy_from_slice(&buf[base + 0x30..base + 0x40]);
|
||||
|
||||
SegmentDirEntry {
|
||||
segment_id: read_u64_le(buf, base),
|
||||
seg_type: buf[base + 0x08],
|
||||
tier: buf[base + 0x09],
|
||||
flags: read_u16_le(buf, base + 0x0A),
|
||||
file_offset: read_u64_le(buf, base + 0x10),
|
||||
payload_length: read_u64_le(buf, base + 0x18),
|
||||
compressed_length: read_u64_le(buf, base + 0x20),
|
||||
shard_id: read_u16_le(buf, base + 0x28),
|
||||
compression: read_u16_le(buf, base + 0x2A),
|
||||
block_count: read_u32_le(buf, base + 0x2C),
|
||||
content_hash,
|
||||
}
|
||||
}
|
||||
|
||||
fn write_entry(buf: &mut [u8], base: usize, e: &SegmentDirEntry) {
|
||||
write_u64_le(buf, base, e.segment_id);
|
||||
buf[base + 0x08] = e.seg_type;
|
||||
buf[base + 0x09] = e.tier;
|
||||
write_u16_le(buf, base + 0x0A, e.flags);
|
||||
write_u32_le(buf, base + 0x0C, 0); // reserved
|
||||
write_u64_le(buf, base + 0x10, e.file_offset);
|
||||
write_u64_le(buf, base + 0x18, e.payload_length);
|
||||
write_u64_le(buf, base + 0x20, e.compressed_length);
|
||||
write_u16_le(buf, base + 0x28, e.shard_id);
|
||||
write_u16_le(buf, base + 0x2A, e.compression);
|
||||
write_u32_le(buf, base + 0x2C, e.block_count);
|
||||
buf[base + 0x30..base + 0x40].copy_from_slice(&e.content_hash);
|
||||
}
|
||||
|
||||
/// Deserialize a segment directory from raw bytes.
|
||||
pub fn read_directory(data: &[u8]) -> Result<SegmentDirectory, RvfError> {
|
||||
if !data.len().is_multiple_of(DIR_ENTRY_SIZE) {
|
||||
return Err(RvfError::SizeMismatch {
|
||||
expected: (data.len() / DIR_ENTRY_SIZE + 1) * DIR_ENTRY_SIZE,
|
||||
got: data.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let count = data.len() / DIR_ENTRY_SIZE;
|
||||
let mut entries = Vec::with_capacity(count);
|
||||
for i in 0..count {
|
||||
entries.push(read_entry(data, i * DIR_ENTRY_SIZE));
|
||||
}
|
||||
|
||||
Ok(SegmentDirectory { entries })
|
||||
}
|
||||
|
||||
/// Serialize a segment directory to raw bytes.
|
||||
pub fn write_directory(dir: &SegmentDirectory) -> Vec<u8> {
|
||||
let mut buf = vec![0u8; dir.entries.len() * DIR_ENTRY_SIZE];
|
||||
for (i, entry) in dir.entries.iter().enumerate() {
|
||||
write_entry(&mut buf, i * DIR_ENTRY_SIZE, entry);
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
impl SegmentDirectory {
|
||||
/// Find a segment by its ID.
|
||||
pub fn find_segment(&self, id: u64) -> Option<&SegmentDirEntry> {
|
||||
self.entries.iter().find(|e| e.segment_id == id)
|
||||
}
|
||||
|
||||
/// Return all segments of the given type.
|
||||
pub fn segments_by_type(&self, seg_type: SegmentType) -> Vec<&SegmentDirEntry> {
|
||||
let raw = seg_type as u8;
|
||||
self.entries.iter().filter(|e| e.seg_type == raw).collect()
|
||||
}
|
||||
|
||||
/// Return the number of entries.
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
/// Return true if there are no entries.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_entry(id: u64, seg_type: u8, tier: u8) -> SegmentDirEntry {
|
||||
let mut hash = [0u8; 16];
|
||||
hash[0] = (id & 0xFF) as u8;
|
||||
SegmentDirEntry {
|
||||
segment_id: id,
|
||||
seg_type,
|
||||
tier,
|
||||
flags: 0,
|
||||
file_offset: id * 0x1000,
|
||||
payload_length: 4096,
|
||||
compressed_length: 0,
|
||||
shard_id: 0,
|
||||
compression: 0,
|
||||
block_count: 1,
|
||||
content_hash: hash,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_trip_single_entry() {
|
||||
let dir = SegmentDirectory {
|
||||
entries: vec![make_entry(1, SegmentType::Vec as u8, 0)],
|
||||
};
|
||||
|
||||
let bytes = write_directory(&dir);
|
||||
assert_eq!(bytes.len(), 64);
|
||||
|
||||
let decoded = read_directory(&bytes).unwrap();
|
||||
assert_eq!(decoded.entries.len(), 1);
|
||||
assert_eq!(decoded.entries[0], dir.entries[0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_trip_100_entries() {
|
||||
let entries: Vec<_> = (0..100)
|
||||
.map(|i| make_entry(i, (i % 13 + 1) as u8, (i % 3) as u8))
|
||||
.collect();
|
||||
|
||||
let dir = SegmentDirectory {
|
||||
entries: entries.clone(),
|
||||
};
|
||||
let bytes = write_directory(&dir);
|
||||
assert_eq!(bytes.len(), 100 * 64);
|
||||
|
||||
let decoded = read_directory(&bytes).unwrap();
|
||||
assert_eq!(decoded.entries.len(), 100);
|
||||
for (a, b) in decoded.entries.iter().zip(entries.iter()) {
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_segment_by_id() {
|
||||
let dir = SegmentDirectory {
|
||||
entries: vec![
|
||||
make_entry(10, SegmentType::Vec as u8, 0),
|
||||
make_entry(20, SegmentType::Index as u8, 1),
|
||||
make_entry(30, SegmentType::Manifest as u8, 0),
|
||||
],
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
dir.find_segment(20).unwrap().seg_type,
|
||||
SegmentType::Index as u8
|
||||
);
|
||||
assert!(dir.find_segment(99).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_by_type() {
|
||||
let dir = SegmentDirectory {
|
||||
entries: vec![
|
||||
make_entry(1, SegmentType::Vec as u8, 0),
|
||||
make_entry(2, SegmentType::Vec as u8, 1),
|
||||
make_entry(3, SegmentType::Index as u8, 0),
|
||||
make_entry(4, SegmentType::Vec as u8, 2),
|
||||
],
|
||||
};
|
||||
|
||||
let vecs = dir.segments_by_type(SegmentType::Vec);
|
||||
assert_eq!(vecs.len(), 3);
|
||||
let indexes = dir.segments_by_type(SegmentType::Index);
|
||||
assert_eq!(indexes.len(), 1);
|
||||
let manifests = dir.segments_by_type(SegmentType::Manifest);
|
||||
assert_eq!(manifests.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_size_returns_error() {
|
||||
let data = vec![0u8; 65]; // not a multiple of 64
|
||||
let result = read_directory(&data);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_directory() {
|
||||
let dir = SegmentDirectory { entries: vec![] };
|
||||
let bytes = write_directory(&dir);
|
||||
assert!(bytes.is_empty());
|
||||
let decoded = read_directory(&bytes).unwrap();
|
||||
assert!(decoded.is_empty());
|
||||
}
|
||||
}
|
||||
548
vendor/ruvector/crates/rvf/rvf-manifest/src/level0.rs
vendored
Normal file
548
vendor/ruvector/crates/rvf/rvf-manifest/src/level0.rs
vendored
Normal file
@@ -0,0 +1,548 @@
|
||||
//! Level 0 Root Manifest — fixed 4096 bytes at EOF.
|
||||
//!
|
||||
//! Provides read/write/validate functions that operate on raw byte arrays,
|
||||
//! using the `Level0Root` repr(C) struct from `rvf_types`.
|
||||
|
||||
use rvf_types::{
|
||||
CentroidPtr, EntrypointPtr, ErrorCode, FileIdentity, HotCachePtr, Level0Root, PrefetchMapPtr,
|
||||
QuantDictPtr, RvfError, TopLayerPtr, ROOT_MANIFEST_MAGIC, ROOT_MANIFEST_SIZE,
|
||||
};
|
||||
|
||||
// ---------- helpers for little-endian read/write ----------
|
||||
|
||||
fn read_u16_le(buf: &[u8], off: usize) -> u16 {
|
||||
u16::from_le_bytes([buf[off], buf[off + 1]])
|
||||
}
|
||||
|
||||
fn read_u32_le(buf: &[u8], off: usize) -> u32 {
|
||||
u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]])
|
||||
}
|
||||
|
||||
fn read_u64_le(buf: &[u8], off: usize) -> u64 {
|
||||
let mut b = [0u8; 8];
|
||||
b.copy_from_slice(&buf[off..off + 8]);
|
||||
u64::from_le_bytes(b)
|
||||
}
|
||||
|
||||
fn write_u16_le(buf: &mut [u8], off: usize, v: u16) {
|
||||
buf[off..off + 2].copy_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
fn write_u32_le(buf: &mut [u8], off: usize, v: u32) {
|
||||
buf[off..off + 4].copy_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
fn write_u64_le(buf: &mut [u8], off: usize, v: u64) {
|
||||
buf[off..off + 8].copy_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
// ---------- Offsets matching the spec ----------
|
||||
|
||||
const OFF_MAGIC: usize = 0x000;
|
||||
const OFF_VERSION: usize = 0x004;
|
||||
const OFF_FLAGS: usize = 0x006;
|
||||
const OFF_L1_OFFSET: usize = 0x008;
|
||||
const OFF_L1_LENGTH: usize = 0x010;
|
||||
const OFF_TOTAL_VEC: usize = 0x018;
|
||||
const OFF_DIM: usize = 0x020;
|
||||
const OFF_DTYPE: usize = 0x022;
|
||||
const OFF_PROFILE: usize = 0x023;
|
||||
const OFF_EPOCH: usize = 0x024;
|
||||
const OFF_CREATED: usize = 0x028;
|
||||
const OFF_MODIFIED: usize = 0x030;
|
||||
|
||||
const OFF_ENTRYPOINT: usize = 0x038;
|
||||
const OFF_TOPLAYER: usize = 0x048;
|
||||
const OFF_CENTROID: usize = 0x058;
|
||||
const OFF_QUANTDICT: usize = 0x068;
|
||||
const OFF_HOT_CACHE: usize = 0x078;
|
||||
const OFF_PREFETCH: usize = 0x088;
|
||||
|
||||
const OFF_SIG_ALGO: usize = 0x098;
|
||||
const OFF_SIG_LEN: usize = 0x09A;
|
||||
const OFF_SIGNATURE: usize = 0x09C;
|
||||
|
||||
// FileIdentity offsets within the reserved area (0xF00..0xF44)
|
||||
const OFF_FILE_ID: usize = 0xF00;
|
||||
const OFF_PARENT_ID: usize = 0xF10;
|
||||
const OFF_PARENT_HASH: usize = 0xF20;
|
||||
const OFF_LINEAGE_DEPTH: usize = 0xF40;
|
||||
|
||||
// COW pointer offsets within the reserved area (0xF44..0xF84)
|
||||
// These follow FileIdentity and are backward-compatible (zeros = no COW).
|
||||
const OFF_COW_MAP_OFFSET: usize = 0xF44;
|
||||
const OFF_COW_MAP_GENERATION: usize = 0xF4C;
|
||||
const OFF_MEMBERSHIP_OFFSET: usize = 0xF50;
|
||||
const OFF_MEMBERSHIP_GENERATION: usize = 0xF58;
|
||||
const OFF_SNAPSHOT_EPOCH: usize = 0xF5C;
|
||||
const OFF_DOUBLE_ROOT_GENERATION: usize = 0xF60;
|
||||
const OFF_DOUBLE_ROOT_HASH: usize = 0xF64;
|
||||
|
||||
const OFF_CHECKSUM: usize = 0xFFC;
|
||||
|
||||
/// Deserialize a Level 0 root manifest from exactly 4096 bytes.
|
||||
pub fn read_level0(data: &[u8; ROOT_MANIFEST_SIZE]) -> Result<Level0Root, RvfError> {
|
||||
let magic = read_u32_le(data, OFF_MAGIC);
|
||||
if magic != ROOT_MANIFEST_MAGIC {
|
||||
return Err(RvfError::BadMagic {
|
||||
expected: ROOT_MANIFEST_MAGIC,
|
||||
got: magic,
|
||||
});
|
||||
}
|
||||
|
||||
let stored_crc = read_u32_le(data, OFF_CHECKSUM);
|
||||
let computed_crc = crc32c::crc32c(&data[..OFF_CHECKSUM]);
|
||||
if stored_crc != computed_crc {
|
||||
return Err(RvfError::Code(ErrorCode::InvalidChecksum));
|
||||
}
|
||||
|
||||
let sig_length = read_u16_le(data, OFF_SIG_LEN);
|
||||
|
||||
let mut root = Level0Root::zeroed();
|
||||
root.magic = magic;
|
||||
root.version = read_u16_le(data, OFF_VERSION);
|
||||
root.flags = read_u16_le(data, OFF_FLAGS);
|
||||
root.l1_manifest_offset = read_u64_le(data, OFF_L1_OFFSET);
|
||||
root.l1_manifest_length = read_u64_le(data, OFF_L1_LENGTH);
|
||||
root.total_vector_count = read_u64_le(data, OFF_TOTAL_VEC);
|
||||
root.dimension = read_u16_le(data, OFF_DIM);
|
||||
root.base_dtype = data[OFF_DTYPE];
|
||||
root.profile_id = data[OFF_PROFILE];
|
||||
root.epoch = read_u32_le(data, OFF_EPOCH);
|
||||
root.created_ns = read_u64_le(data, OFF_CREATED);
|
||||
root.modified_ns = read_u64_le(data, OFF_MODIFIED);
|
||||
|
||||
root.entrypoint = EntrypointPtr {
|
||||
seg_offset: read_u64_le(data, OFF_ENTRYPOINT),
|
||||
block_offset: read_u32_le(data, OFF_ENTRYPOINT + 8),
|
||||
count: read_u32_le(data, OFF_ENTRYPOINT + 12),
|
||||
};
|
||||
root.toplayer = TopLayerPtr {
|
||||
seg_offset: read_u64_le(data, OFF_TOPLAYER),
|
||||
block_offset: read_u32_le(data, OFF_TOPLAYER + 8),
|
||||
node_count: read_u32_le(data, OFF_TOPLAYER + 12),
|
||||
};
|
||||
root.centroid = CentroidPtr {
|
||||
seg_offset: read_u64_le(data, OFF_CENTROID),
|
||||
block_offset: read_u32_le(data, OFF_CENTROID + 8),
|
||||
count: read_u32_le(data, OFF_CENTROID + 12),
|
||||
};
|
||||
root.quantdict = QuantDictPtr {
|
||||
seg_offset: read_u64_le(data, OFF_QUANTDICT),
|
||||
block_offset: read_u32_le(data, OFF_QUANTDICT + 8),
|
||||
size: read_u32_le(data, OFF_QUANTDICT + 12),
|
||||
};
|
||||
root.hot_cache = HotCachePtr {
|
||||
seg_offset: read_u64_le(data, OFF_HOT_CACHE),
|
||||
block_offset: read_u32_le(data, OFF_HOT_CACHE + 8),
|
||||
vector_count: read_u32_le(data, OFF_HOT_CACHE + 12),
|
||||
};
|
||||
root.prefetch_map = PrefetchMapPtr {
|
||||
offset: read_u64_le(data, OFF_PREFETCH),
|
||||
entries: read_u32_le(data, OFF_PREFETCH + 8),
|
||||
_pad: 0,
|
||||
};
|
||||
|
||||
root.sig_algo = read_u16_le(data, OFF_SIG_ALGO);
|
||||
root.sig_length = sig_length;
|
||||
|
||||
let sig_len = sig_length as usize;
|
||||
let sig_max = Level0Root::SIG_BUF_SIZE.min(sig_len);
|
||||
root.signature_buf[..sig_max].copy_from_slice(&data[OFF_SIGNATURE..OFF_SIGNATURE + sig_max]);
|
||||
|
||||
// Read FileIdentity from the reserved area
|
||||
let mut file_id = [0u8; 16];
|
||||
file_id.copy_from_slice(&data[OFF_FILE_ID..OFF_FILE_ID + 16]);
|
||||
let mut parent_id = [0u8; 16];
|
||||
parent_id.copy_from_slice(&data[OFF_PARENT_ID..OFF_PARENT_ID + 16]);
|
||||
let mut parent_hash = [0u8; 32];
|
||||
parent_hash.copy_from_slice(&data[OFF_PARENT_HASH..OFF_PARENT_HASH + 32]);
|
||||
let lineage_depth = read_u32_le(data, OFF_LINEAGE_DEPTH);
|
||||
|
||||
let fi = FileIdentity {
|
||||
file_id,
|
||||
parent_id,
|
||||
parent_hash,
|
||||
lineage_depth,
|
||||
};
|
||||
let fi_bytes = fi.to_bytes();
|
||||
root.reserved[..68].copy_from_slice(&fi_bytes);
|
||||
|
||||
// Read COW pointers from the reserved area (backward-compatible: zeros = no COW).
|
||||
// These are stored as raw bytes in reserved[68..136].
|
||||
let cow_map_offset = read_u64_le(data, OFF_COW_MAP_OFFSET);
|
||||
let cow_map_generation = read_u32_le(data, OFF_COW_MAP_GENERATION);
|
||||
let membership_offset = read_u64_le(data, OFF_MEMBERSHIP_OFFSET);
|
||||
let membership_generation = read_u32_le(data, OFF_MEMBERSHIP_GENERATION);
|
||||
let snapshot_epoch = read_u32_le(data, OFF_SNAPSHOT_EPOCH);
|
||||
let double_root_generation = read_u32_le(data, OFF_DOUBLE_ROOT_GENERATION);
|
||||
let mut double_root_hash = [0u8; 32];
|
||||
double_root_hash.copy_from_slice(&data[OFF_DOUBLE_ROOT_HASH..OFF_DOUBLE_ROOT_HASH + 32]);
|
||||
|
||||
// Pack COW pointers into reserved[68..136]
|
||||
let cow_off = 68;
|
||||
root.reserved[cow_off..cow_off + 8].copy_from_slice(&cow_map_offset.to_le_bytes());
|
||||
root.reserved[cow_off + 8..cow_off + 12].copy_from_slice(&cow_map_generation.to_le_bytes());
|
||||
root.reserved[cow_off + 12..cow_off + 20].copy_from_slice(&membership_offset.to_le_bytes());
|
||||
root.reserved[cow_off + 20..cow_off + 24].copy_from_slice(&membership_generation.to_le_bytes());
|
||||
root.reserved[cow_off + 24..cow_off + 28].copy_from_slice(&snapshot_epoch.to_le_bytes());
|
||||
root.reserved[cow_off + 28..cow_off + 32]
|
||||
.copy_from_slice(&double_root_generation.to_le_bytes());
|
||||
root.reserved[cow_off + 32..cow_off + 64].copy_from_slice(&double_root_hash);
|
||||
|
||||
root.root_checksum = stored_crc;
|
||||
|
||||
Ok(root)
|
||||
}
|
||||
|
||||
/// Serialize a Level 0 root manifest into exactly 4096 bytes.
|
||||
///
|
||||
/// The `root_checksum` field on the input is ignored; the checksum is
|
||||
/// computed over bytes 0x000..0xFFC and written at offset 0xFFC.
|
||||
pub fn write_level0(root: &Level0Root) -> [u8; ROOT_MANIFEST_SIZE] {
|
||||
let mut buf = [0u8; ROOT_MANIFEST_SIZE];
|
||||
|
||||
write_u32_le(&mut buf, OFF_MAGIC, root.magic);
|
||||
write_u16_le(&mut buf, OFF_VERSION, root.version);
|
||||
write_u16_le(&mut buf, OFF_FLAGS, root.flags);
|
||||
write_u64_le(&mut buf, OFF_L1_OFFSET, root.l1_manifest_offset);
|
||||
write_u64_le(&mut buf, OFF_L1_LENGTH, root.l1_manifest_length);
|
||||
write_u64_le(&mut buf, OFF_TOTAL_VEC, root.total_vector_count);
|
||||
write_u16_le(&mut buf, OFF_DIM, root.dimension);
|
||||
buf[OFF_DTYPE] = root.base_dtype;
|
||||
buf[OFF_PROFILE] = root.profile_id;
|
||||
write_u32_le(&mut buf, OFF_EPOCH, root.epoch);
|
||||
write_u64_le(&mut buf, OFF_CREATED, root.created_ns);
|
||||
write_u64_le(&mut buf, OFF_MODIFIED, root.modified_ns);
|
||||
|
||||
// Entrypoint (16 bytes)
|
||||
write_u64_le(&mut buf, OFF_ENTRYPOINT, root.entrypoint.seg_offset);
|
||||
write_u32_le(&mut buf, OFF_ENTRYPOINT + 8, root.entrypoint.block_offset);
|
||||
write_u32_le(&mut buf, OFF_ENTRYPOINT + 12, root.entrypoint.count);
|
||||
|
||||
// Top layer (16 bytes)
|
||||
write_u64_le(&mut buf, OFF_TOPLAYER, root.toplayer.seg_offset);
|
||||
write_u32_le(&mut buf, OFF_TOPLAYER + 8, root.toplayer.block_offset);
|
||||
write_u32_le(&mut buf, OFF_TOPLAYER + 12, root.toplayer.node_count);
|
||||
|
||||
// Centroid (16 bytes)
|
||||
write_u64_le(&mut buf, OFF_CENTROID, root.centroid.seg_offset);
|
||||
write_u32_le(&mut buf, OFF_CENTROID + 8, root.centroid.block_offset);
|
||||
write_u32_le(&mut buf, OFF_CENTROID + 12, root.centroid.count);
|
||||
|
||||
// Quant dict (16 bytes)
|
||||
write_u64_le(&mut buf, OFF_QUANTDICT, root.quantdict.seg_offset);
|
||||
write_u32_le(&mut buf, OFF_QUANTDICT + 8, root.quantdict.block_offset);
|
||||
write_u32_le(&mut buf, OFF_QUANTDICT + 12, root.quantdict.size);
|
||||
|
||||
// Hot cache (16 bytes)
|
||||
write_u64_le(&mut buf, OFF_HOT_CACHE, root.hot_cache.seg_offset);
|
||||
write_u32_le(&mut buf, OFF_HOT_CACHE + 8, root.hot_cache.block_offset);
|
||||
write_u32_le(&mut buf, OFF_HOT_CACHE + 12, root.hot_cache.vector_count);
|
||||
|
||||
// Prefetch map (12 bytes: u64 offset + u32 entries)
|
||||
write_u64_le(&mut buf, OFF_PREFETCH, root.prefetch_map.offset);
|
||||
write_u32_le(&mut buf, OFF_PREFETCH + 8, root.prefetch_map.entries);
|
||||
|
||||
write_u16_le(&mut buf, OFF_SIG_ALGO, root.sig_algo);
|
||||
let sig_len = (root.sig_length as usize).min(Level0Root::SIG_BUF_SIZE);
|
||||
write_u16_le(&mut buf, OFF_SIG_LEN, sig_len as u16);
|
||||
buf[OFF_SIGNATURE..OFF_SIGNATURE + sig_len].copy_from_slice(&root.signature_buf[..sig_len]);
|
||||
|
||||
// Write FileIdentity from reserved area into the buffer
|
||||
if root.reserved.len() >= 68 {
|
||||
let fi = FileIdentity::from_bytes(root.reserved[..68].try_into().unwrap());
|
||||
buf[OFF_FILE_ID..OFF_FILE_ID + 16].copy_from_slice(&fi.file_id);
|
||||
buf[OFF_PARENT_ID..OFF_PARENT_ID + 16].copy_from_slice(&fi.parent_id);
|
||||
buf[OFF_PARENT_HASH..OFF_PARENT_HASH + 32].copy_from_slice(&fi.parent_hash);
|
||||
write_u32_le(&mut buf, OFF_LINEAGE_DEPTH, fi.lineage_depth);
|
||||
}
|
||||
|
||||
// Write COW pointers from reserved[68..136] into the buffer
|
||||
// Backward-compatible: zeros mean no COW.
|
||||
if root.reserved.len() >= 132 {
|
||||
let cow_off = 68;
|
||||
buf[OFF_COW_MAP_OFFSET..OFF_COW_MAP_OFFSET + 8]
|
||||
.copy_from_slice(&root.reserved[cow_off..cow_off + 8]);
|
||||
buf[OFF_COW_MAP_GENERATION..OFF_COW_MAP_GENERATION + 4]
|
||||
.copy_from_slice(&root.reserved[cow_off + 8..cow_off + 12]);
|
||||
buf[OFF_MEMBERSHIP_OFFSET..OFF_MEMBERSHIP_OFFSET + 8]
|
||||
.copy_from_slice(&root.reserved[cow_off + 12..cow_off + 20]);
|
||||
buf[OFF_MEMBERSHIP_GENERATION..OFF_MEMBERSHIP_GENERATION + 4]
|
||||
.copy_from_slice(&root.reserved[cow_off + 20..cow_off + 24]);
|
||||
buf[OFF_SNAPSHOT_EPOCH..OFF_SNAPSHOT_EPOCH + 4]
|
||||
.copy_from_slice(&root.reserved[cow_off + 24..cow_off + 28]);
|
||||
buf[OFF_DOUBLE_ROOT_GENERATION..OFF_DOUBLE_ROOT_GENERATION + 4]
|
||||
.copy_from_slice(&root.reserved[cow_off + 28..cow_off + 32]);
|
||||
buf[OFF_DOUBLE_ROOT_HASH..OFF_DOUBLE_ROOT_HASH + 32]
|
||||
.copy_from_slice(&root.reserved[cow_off + 32..cow_off + 64]);
|
||||
}
|
||||
|
||||
// CRC32C over first 4092 bytes
|
||||
let crc = crc32c::crc32c(&buf[..OFF_CHECKSUM]);
|
||||
write_u32_le(&mut buf, OFF_CHECKSUM, crc);
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
/// Fast validation: check magic + CRC32C without full deserialization.
|
||||
pub fn validate_level0(data: &[u8; ROOT_MANIFEST_SIZE]) -> bool {
|
||||
let magic = read_u32_le(data, OFF_MAGIC);
|
||||
if magic != ROOT_MANIFEST_MAGIC {
|
||||
return false;
|
||||
}
|
||||
let stored_crc = read_u32_le(data, OFF_CHECKSUM);
|
||||
let computed_crc = crc32c::crc32c(&data[..OFF_CHECKSUM]);
|
||||
stored_crc == computed_crc
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn sample_root() -> Level0Root {
|
||||
let mut root = Level0Root::zeroed();
|
||||
root.version = 1;
|
||||
root.flags = 0x0004; // SIGNED
|
||||
root.l1_manifest_offset = 0x1_0000;
|
||||
root.l1_manifest_length = 0x2000;
|
||||
root.total_vector_count = 10_000_000;
|
||||
root.dimension = 384;
|
||||
root.base_dtype = 1; // f16
|
||||
root.profile_id = 2; // text
|
||||
root.epoch = 42;
|
||||
root.created_ns = 1_700_000_000_000_000_000;
|
||||
root.modified_ns = 1_700_000_001_000_000_000;
|
||||
root.entrypoint = EntrypointPtr {
|
||||
seg_offset: 0x1000,
|
||||
block_offset: 64,
|
||||
count: 3,
|
||||
};
|
||||
root.toplayer = TopLayerPtr {
|
||||
seg_offset: 0x2000,
|
||||
block_offset: 128,
|
||||
node_count: 500,
|
||||
};
|
||||
root.centroid = CentroidPtr {
|
||||
seg_offset: 0x3000,
|
||||
block_offset: 0,
|
||||
count: 256,
|
||||
};
|
||||
root.quantdict = QuantDictPtr {
|
||||
seg_offset: 0x4000,
|
||||
block_offset: 0,
|
||||
size: 8192,
|
||||
};
|
||||
root.hot_cache = HotCachePtr {
|
||||
seg_offset: 0x5000,
|
||||
block_offset: 0,
|
||||
vector_count: 1000,
|
||||
};
|
||||
root.prefetch_map = PrefetchMapPtr {
|
||||
offset: 0x6000,
|
||||
entries: 200,
|
||||
_pad: 0,
|
||||
};
|
||||
root.sig_algo = 0; // Ed25519
|
||||
root.sig_length = 4;
|
||||
root.signature_buf[0] = 0xDE;
|
||||
root.signature_buf[1] = 0xAD;
|
||||
root.signature_buf[2] = 0xBE;
|
||||
root.signature_buf[3] = 0xEF;
|
||||
root
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_trip() {
|
||||
let original = sample_root();
|
||||
let bytes = write_level0(&original);
|
||||
let decoded = read_level0(&bytes).expect("read_level0 should succeed");
|
||||
|
||||
assert_eq!(decoded.magic, original.magic);
|
||||
assert_eq!(decoded.version, original.version);
|
||||
assert_eq!(decoded.flags, original.flags);
|
||||
assert_eq!(decoded.l1_manifest_offset, original.l1_manifest_offset);
|
||||
assert_eq!(decoded.l1_manifest_length, original.l1_manifest_length);
|
||||
assert_eq!(decoded.total_vector_count, original.total_vector_count);
|
||||
assert_eq!(decoded.dimension, original.dimension);
|
||||
assert_eq!(decoded.base_dtype, original.base_dtype);
|
||||
assert_eq!(decoded.profile_id, original.profile_id);
|
||||
assert_eq!(decoded.epoch, original.epoch);
|
||||
assert_eq!(decoded.created_ns, original.created_ns);
|
||||
assert_eq!(decoded.modified_ns, original.modified_ns);
|
||||
|
||||
assert_eq!(
|
||||
decoded.entrypoint.seg_offset,
|
||||
original.entrypoint.seg_offset
|
||||
);
|
||||
assert_eq!(
|
||||
decoded.entrypoint.block_offset,
|
||||
original.entrypoint.block_offset
|
||||
);
|
||||
assert_eq!(decoded.entrypoint.count, original.entrypoint.count);
|
||||
|
||||
assert_eq!(decoded.toplayer.seg_offset, original.toplayer.seg_offset);
|
||||
assert_eq!(decoded.toplayer.node_count, original.toplayer.node_count);
|
||||
|
||||
assert_eq!(decoded.centroid.seg_offset, original.centroid.seg_offset);
|
||||
assert_eq!(decoded.centroid.count, original.centroid.count);
|
||||
|
||||
assert_eq!(decoded.quantdict.seg_offset, original.quantdict.seg_offset);
|
||||
assert_eq!(decoded.quantdict.size, original.quantdict.size);
|
||||
|
||||
assert_eq!(decoded.hot_cache.seg_offset, original.hot_cache.seg_offset);
|
||||
assert_eq!(
|
||||
decoded.hot_cache.vector_count,
|
||||
original.hot_cache.vector_count
|
||||
);
|
||||
|
||||
assert_eq!(decoded.prefetch_map.offset, original.prefetch_map.offset);
|
||||
assert_eq!(decoded.prefetch_map.entries, original.prefetch_map.entries);
|
||||
|
||||
assert_eq!(decoded.sig_algo, original.sig_algo);
|
||||
assert_eq!(decoded.sig_length, original.sig_length);
|
||||
assert_eq!(decoded.signature_buf[..4], original.signature_buf[..4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn crc_detects_corruption() {
|
||||
let root = sample_root();
|
||||
let mut bytes = write_level0(&root);
|
||||
assert!(validate_level0(&bytes));
|
||||
|
||||
// Corrupt a byte in the middle
|
||||
bytes[0x050] ^= 0xFF;
|
||||
assert!(!validate_level0(&bytes));
|
||||
|
||||
// read_level0 should also fail
|
||||
assert!(read_level0(&bytes).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_magic_rejected() {
|
||||
let mut bytes = write_level0(&sample_root());
|
||||
// Overwrite magic
|
||||
bytes[0] = 0x00;
|
||||
bytes[1] = 0x00;
|
||||
bytes[2] = 0x00;
|
||||
bytes[3] = 0x00;
|
||||
// Fix CRC so only magic check fails
|
||||
let crc = crc32c::crc32c(&bytes[..OFF_CHECKSUM]);
|
||||
write_u32_le(&mut bytes, OFF_CHECKSUM, crc);
|
||||
|
||||
let err = read_level0(&bytes).unwrap_err();
|
||||
match err {
|
||||
RvfError::BadMagic { expected, got } => {
|
||||
assert_eq!(expected, ROOT_MANIFEST_MAGIC);
|
||||
assert_eq!(got, 0);
|
||||
}
|
||||
other => panic!("expected BadMagic, got {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_root_round_trips() {
|
||||
let root = Level0Root::zeroed();
|
||||
let bytes = write_level0(&root);
|
||||
let decoded = read_level0(&bytes).unwrap();
|
||||
assert_eq!(decoded.magic, ROOT_MANIFEST_MAGIC);
|
||||
assert_eq!(decoded.total_vector_count, 0);
|
||||
assert_eq!(decoded.dimension, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn output_is_exactly_4096_bytes() {
|
||||
let bytes = write_level0(&Level0Root::zeroed());
|
||||
assert_eq!(bytes.len(), 4096);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_pointers_round_trip() {
|
||||
let mut root = sample_root();
|
||||
|
||||
// Set COW pointers in the reserved area (offsets 68..132)
|
||||
let cow_off = 68;
|
||||
let cow_map_offset: u64 = 0x1234_5678_9ABC_DEF0;
|
||||
let cow_map_generation: u32 = 42;
|
||||
let membership_offset: u64 = 0xFEDC_BA98_7654_3210;
|
||||
let membership_generation: u32 = 7;
|
||||
let snapshot_epoch: u32 = 100;
|
||||
let double_root_generation: u32 = 3;
|
||||
let double_root_hash = [0xEE; 32];
|
||||
|
||||
root.reserved[cow_off..cow_off + 8].copy_from_slice(&cow_map_offset.to_le_bytes());
|
||||
root.reserved[cow_off + 8..cow_off + 12].copy_from_slice(&cow_map_generation.to_le_bytes());
|
||||
root.reserved[cow_off + 12..cow_off + 20].copy_from_slice(&membership_offset.to_le_bytes());
|
||||
root.reserved[cow_off + 20..cow_off + 24]
|
||||
.copy_from_slice(&membership_generation.to_le_bytes());
|
||||
root.reserved[cow_off + 24..cow_off + 28].copy_from_slice(&snapshot_epoch.to_le_bytes());
|
||||
root.reserved[cow_off + 28..cow_off + 32]
|
||||
.copy_from_slice(&double_root_generation.to_le_bytes());
|
||||
root.reserved[cow_off + 32..cow_off + 64].copy_from_slice(&double_root_hash);
|
||||
|
||||
let bytes = write_level0(&root);
|
||||
let decoded = read_level0(&bytes).expect("read_level0 should succeed");
|
||||
|
||||
// Verify COW pointers survived round-trip
|
||||
let d_cow_off = 68;
|
||||
let d_cow_map_offset = u64::from_le_bytes(
|
||||
decoded.reserved[d_cow_off..d_cow_off + 8]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
let d_cow_map_generation = u32::from_le_bytes(
|
||||
decoded.reserved[d_cow_off + 8..d_cow_off + 12]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
let d_membership_offset = u64::from_le_bytes(
|
||||
decoded.reserved[d_cow_off + 12..d_cow_off + 20]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
let d_membership_generation = u32::from_le_bytes(
|
||||
decoded.reserved[d_cow_off + 20..d_cow_off + 24]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
let d_snapshot_epoch = u32::from_le_bytes(
|
||||
decoded.reserved[d_cow_off + 24..d_cow_off + 28]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
let d_double_root_generation = u32::from_le_bytes(
|
||||
decoded.reserved[d_cow_off + 28..d_cow_off + 32]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
let d_double_root_hash = &decoded.reserved[d_cow_off + 32..d_cow_off + 64];
|
||||
|
||||
assert_eq!(d_cow_map_offset, cow_map_offset);
|
||||
assert_eq!(d_cow_map_generation, cow_map_generation);
|
||||
assert_eq!(d_membership_offset, membership_offset);
|
||||
assert_eq!(d_membership_generation, membership_generation);
|
||||
assert_eq!(d_snapshot_epoch, snapshot_epoch);
|
||||
assert_eq!(d_double_root_generation, double_root_generation);
|
||||
assert_eq!(d_double_root_hash, &double_root_hash[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_pointers_default_to_zero() {
|
||||
// Verify that a root with no COW pointers still round-trips correctly
|
||||
let root = Level0Root::zeroed();
|
||||
let bytes = write_level0(&root);
|
||||
let decoded = read_level0(&bytes).unwrap();
|
||||
|
||||
let cow_off = 68;
|
||||
let cow_map_offset =
|
||||
u64::from_le_bytes(decoded.reserved[cow_off..cow_off + 8].try_into().unwrap());
|
||||
let snapshot_epoch = u32::from_le_bytes(
|
||||
decoded.reserved[cow_off + 24..cow_off + 28]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(cow_map_offset, 0);
|
||||
assert_eq!(snapshot_epoch, 0);
|
||||
}
|
||||
}
|
||||
282
vendor/ruvector/crates/rvf/rvf-manifest/src/level1.rs
vendored
Normal file
282
vendor/ruvector/crates/rvf/rvf-manifest/src/level1.rs
vendored
Normal file
@@ -0,0 +1,282 @@
|
||||
//! Level 1 Full Manifest — variable-size TLV records.
|
||||
//!
|
||||
//! Level 1 is encoded as a sequence of tag-length-value records,
|
||||
//! each 8-byte aligned, for forward compatibility.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
|
||||
/// Tag values for Level 1 manifest records.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[repr(u16)]
|
||||
pub enum ManifestTag {
|
||||
/// Array of segment directory entries.
|
||||
SegmentDir = 0x0001,
|
||||
/// Temperature tier assignments per block.
|
||||
TempTierMap = 0x0002,
|
||||
/// Index layer availability bitmap.
|
||||
IndexLayers = 0x0003,
|
||||
/// Epoch chain with rollback pointers.
|
||||
OverlayChain = 0x0004,
|
||||
/// Active/tombstoned segment sets.
|
||||
CompactionState = 0x0005,
|
||||
/// Multi-file shard references.
|
||||
ShardRefs = 0x0006,
|
||||
/// What this file can do (features, limits).
|
||||
CapabilityManifest = 0x0007,
|
||||
/// Domain-specific configuration.
|
||||
ProfileConfig = 0x0008,
|
||||
/// Pointer to latest SKETCH_SEG.
|
||||
AccessSketchRef = 0x0009,
|
||||
/// Full prefetch hint table.
|
||||
PrefetchTable = 0x000A,
|
||||
/// Restart point index for varint delta IDs.
|
||||
IdRestartPoints = 0x000B,
|
||||
/// Proof-of-computation witness chain.
|
||||
WitnessChain = 0x000C,
|
||||
/// Encryption key references (not keys themselves).
|
||||
KeyDirectory = 0x000D,
|
||||
}
|
||||
|
||||
impl ManifestTag {
|
||||
pub fn from_u16(v: u16) -> Option<Self> {
|
||||
match v {
|
||||
0x0001 => Some(Self::SegmentDir),
|
||||
0x0002 => Some(Self::TempTierMap),
|
||||
0x0003 => Some(Self::IndexLayers),
|
||||
0x0004 => Some(Self::OverlayChain),
|
||||
0x0005 => Some(Self::CompactionState),
|
||||
0x0006 => Some(Self::ShardRefs),
|
||||
0x0007 => Some(Self::CapabilityManifest),
|
||||
0x0008 => Some(Self::ProfileConfig),
|
||||
0x0009 => Some(Self::AccessSketchRef),
|
||||
0x000A => Some(Self::PrefetchTable),
|
||||
0x000B => Some(Self::IdRestartPoints),
|
||||
0x000C => Some(Self::WitnessChain),
|
||||
0x000D => Some(Self::KeyDirectory),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single TLV record from the Level 1 manifest.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct TlvRecord {
|
||||
pub tag: ManifestTag,
|
||||
pub length: u32,
|
||||
pub value: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Parsed Level 1 manifest: a collection of TLV records.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct Level1Manifest {
|
||||
pub records: Vec<TlvRecord>,
|
||||
}
|
||||
|
||||
impl Level1Manifest {
|
||||
/// Find the first record with the given tag.
|
||||
pub fn find(&self, tag: ManifestTag) -> Option<&TlvRecord> {
|
||||
self.records.iter().find(|r| r.tag == tag)
|
||||
}
|
||||
|
||||
/// Find all records with the given tag.
|
||||
pub fn find_all(&self, tag: ManifestTag) -> Vec<&TlvRecord> {
|
||||
self.records.iter().filter(|r| r.tag == tag).collect()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- helpers ----------
|
||||
|
||||
fn read_u16_le(buf: &[u8], off: usize) -> u16 {
|
||||
u16::from_le_bytes([buf[off], buf[off + 1]])
|
||||
}
|
||||
|
||||
fn read_u32_le(buf: &[u8], off: usize) -> u32 {
|
||||
u32::from_le_bytes([buf[off], buf[off + 1], buf[off + 2], buf[off + 3]])
|
||||
}
|
||||
|
||||
fn write_u16_le(buf: &mut Vec<u8>, v: u16) {
|
||||
buf.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
fn write_u32_le(buf: &mut Vec<u8>, v: u32) {
|
||||
buf.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
|
||||
/// Round up to the next 8-byte boundary.
|
||||
fn align8(n: usize) -> usize {
|
||||
(n + 7) & !7
|
||||
}
|
||||
|
||||
/// TLV record header layout:
|
||||
/// tag: u16 (2 bytes)
|
||||
/// length: u32 (4 bytes)
|
||||
/// pad: u16 (2 bytes, to reach 8-byte alignment)
|
||||
/// value: [u8; length]
|
||||
/// [padding to 8-byte boundary]
|
||||
const TLV_HEADER_SIZE: usize = 8; // tag(2) + length(4) + pad(2)
|
||||
|
||||
/// Deserialize a sequence of TLV records from raw bytes.
|
||||
pub fn read_tlv_records(data: &[u8]) -> Result<Vec<TlvRecord>, RvfError> {
|
||||
let mut records = Vec::new();
|
||||
let mut pos = 0;
|
||||
|
||||
while pos + TLV_HEADER_SIZE <= data.len() {
|
||||
let tag_raw = read_u16_le(data, pos);
|
||||
let length = read_u32_le(data, pos + 2);
|
||||
// pad at pos+6 is ignored on read
|
||||
|
||||
let tag = ManifestTag::from_u16(tag_raw).ok_or(RvfError::InvalidEnumValue {
|
||||
type_name: "ManifestTag",
|
||||
value: tag_raw as u64,
|
||||
})?;
|
||||
|
||||
let value_start = pos + TLV_HEADER_SIZE;
|
||||
let value_end = value_start + length as usize;
|
||||
|
||||
if value_end > data.len() {
|
||||
return Err(RvfError::Code(ErrorCode::TruncatedSegment));
|
||||
}
|
||||
|
||||
let value = data[value_start..value_end].to_vec();
|
||||
records.push(TlvRecord { tag, length, value });
|
||||
|
||||
// Advance to next 8-byte aligned position
|
||||
pos = align8(value_end);
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
/// Serialize a sequence of TLV records into bytes (8-byte aligned).
|
||||
pub fn write_tlv_records(records: &[TlvRecord]) -> Vec<u8> {
|
||||
let mut buf = Vec::new();
|
||||
|
||||
for rec in records {
|
||||
write_u16_le(&mut buf, rec.tag as u16);
|
||||
write_u32_le(&mut buf, rec.value.len() as u32);
|
||||
// pad field (2 bytes)
|
||||
buf.extend_from_slice(&[0u8; 2]);
|
||||
|
||||
buf.extend_from_slice(&rec.value);
|
||||
|
||||
// Pad to 8-byte boundary
|
||||
let padded = align8(buf.len());
|
||||
buf.resize(padded, 0);
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn tag_from_u16_known() {
|
||||
assert_eq!(ManifestTag::from_u16(0x0001), Some(ManifestTag::SegmentDir));
|
||||
assert_eq!(
|
||||
ManifestTag::from_u16(0x000D),
|
||||
Some(ManifestTag::KeyDirectory)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tag_from_u16_unknown() {
|
||||
assert_eq!(ManifestTag::from_u16(0x0000), None);
|
||||
assert_eq!(ManifestTag::from_u16(0x000E), None);
|
||||
assert_eq!(ManifestTag::from_u16(0xFFFF), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_trip_single_record() {
|
||||
let records = vec![TlvRecord {
|
||||
tag: ManifestTag::SegmentDir,
|
||||
length: 5,
|
||||
value: vec![1, 2, 3, 4, 5],
|
||||
}];
|
||||
|
||||
let bytes = write_tlv_records(&records);
|
||||
assert_eq!(bytes.len() % 8, 0, "output must be 8-byte aligned");
|
||||
|
||||
let decoded = read_tlv_records(&bytes).unwrap();
|
||||
assert_eq!(decoded.len(), 1);
|
||||
assert_eq!(decoded[0].tag, ManifestTag::SegmentDir);
|
||||
assert_eq!(decoded[0].value, vec![1, 2, 3, 4, 5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn round_trip_multiple_records() {
|
||||
let records = vec![
|
||||
TlvRecord {
|
||||
tag: ManifestTag::SegmentDir,
|
||||
length: 3,
|
||||
value: vec![0xAA, 0xBB, 0xCC],
|
||||
},
|
||||
TlvRecord {
|
||||
tag: ManifestTag::OverlayChain,
|
||||
length: 8,
|
||||
value: vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
},
|
||||
TlvRecord {
|
||||
tag: ManifestTag::CapabilityManifest,
|
||||
length: 1,
|
||||
value: vec![0xFF],
|
||||
},
|
||||
];
|
||||
|
||||
let bytes = write_tlv_records(&records);
|
||||
assert_eq!(bytes.len() % 8, 0);
|
||||
|
||||
let decoded = read_tlv_records(&bytes).unwrap();
|
||||
assert_eq!(decoded.len(), 3);
|
||||
assert_eq!(decoded[0].tag, ManifestTag::SegmentDir);
|
||||
assert_eq!(decoded[0].value, vec![0xAA, 0xBB, 0xCC]);
|
||||
assert_eq!(decoded[1].tag, ManifestTag::OverlayChain);
|
||||
assert_eq!(decoded[1].value, vec![1, 2, 3, 4, 5, 6, 7, 8]);
|
||||
assert_eq!(decoded[2].tag, ManifestTag::CapabilityManifest);
|
||||
assert_eq!(decoded[2].value, vec![0xFF]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_records() {
|
||||
let bytes = write_tlv_records(&[]);
|
||||
assert!(bytes.is_empty());
|
||||
let decoded = read_tlv_records(&bytes).unwrap();
|
||||
assert!(decoded.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncated_value_returns_error() {
|
||||
let mut buf = Vec::new();
|
||||
write_u16_le(&mut buf, ManifestTag::SegmentDir as u16);
|
||||
write_u32_le(&mut buf, 100); // claims 100 bytes
|
||||
buf.extend_from_slice(&[0u8; 2]); // pad
|
||||
buf.extend_from_slice(&[0u8; 10]); // only 10 bytes
|
||||
|
||||
let result = read_tlv_records(&buf);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn level1_manifest_find() {
|
||||
let manifest = Level1Manifest {
|
||||
records: vec![
|
||||
TlvRecord {
|
||||
tag: ManifestTag::SegmentDir,
|
||||
length: 3,
|
||||
value: vec![1, 2, 3],
|
||||
},
|
||||
TlvRecord {
|
||||
tag: ManifestTag::OverlayChain,
|
||||
length: 2,
|
||||
value: vec![4, 5],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
assert!(manifest.find(ManifestTag::SegmentDir).is_some());
|
||||
assert!(manifest.find(ManifestTag::OverlayChain).is_some());
|
||||
assert!(manifest.find(ManifestTag::CompactionState).is_none());
|
||||
}
|
||||
}
|
||||
29
vendor/ruvector/crates/rvf/rvf-manifest/src/lib.rs
vendored
Normal file
29
vendor/ruvector/crates/rvf/rvf-manifest/src/lib.rs
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
//! Two-level manifest system for the RuVector Format (RVF).
|
||||
//!
|
||||
//! The manifest system enables progressive boot:
|
||||
//! - **Level 0** (fixed 4096 bytes at EOF): hotset pointers for instant query
|
||||
//! - **Level 1** (variable-size TLV records): full segment directory
|
||||
//!
|
||||
//! A reader only needs Level 0 to start answering approximate queries.
|
||||
//! Level 1 is loaded asynchronously for full-quality results.
|
||||
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
pub mod boot;
|
||||
pub mod chain;
|
||||
pub mod directory;
|
||||
pub mod level0;
|
||||
pub mod level1;
|
||||
pub mod writer;
|
||||
|
||||
pub use boot::{boot_phase1, boot_phase2, extract_hotset_offsets, BootState, HotsetPointers};
|
||||
pub use chain::OverlayChain;
|
||||
pub use directory::{SegmentDirEntry, SegmentDirectory};
|
||||
pub use level0::{read_level0, validate_level0, write_level0};
|
||||
pub use level1::{read_tlv_records, write_tlv_records, Level1Manifest, ManifestTag, TlvRecord};
|
||||
pub use writer::{build_manifest, build_manifest_at};
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub use writer::commit_manifest;
|
||||
197
vendor/ruvector/crates/rvf/rvf-manifest/src/writer.rs
vendored
Normal file
197
vendor/ruvector/crates/rvf/rvf-manifest/src/writer.rs
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
//! Manifest Writer — builds a complete manifest (Level 1 TLV + Level 0 root).
|
||||
//!
|
||||
//! Output: Level 1 TLV payload followed by Level 0 root as last 4096 bytes.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use rvf_types::{Level0Root, ROOT_MANIFEST_SIZE};
|
||||
|
||||
use crate::boot::HotsetPointers;
|
||||
use crate::chain::{self, OverlayChain};
|
||||
use crate::directory::{self, SegmentDirectory};
|
||||
use crate::level0;
|
||||
use crate::level1::{self, ManifestTag, TlvRecord};
|
||||
|
||||
/// Build a complete manifest from a segment directory, hotset pointers, epoch,
|
||||
/// and an optional overlay chain (previous manifest link).
|
||||
///
|
||||
/// Returns a byte buffer containing:
|
||||
/// - Level 1 TLV records (variable size)
|
||||
/// - Level 0 root manifest (last 4096 bytes)
|
||||
///
|
||||
/// The `l1_manifest_offset` in Level 0 is set to 0 because the caller
|
||||
/// must adjust it to the actual file position where this data is written.
|
||||
/// Use [`build_manifest_at`] if you know the file offset ahead of time.
|
||||
pub fn build_manifest(
|
||||
dir: &SegmentDirectory,
|
||||
hotset: &HotsetPointers,
|
||||
epoch: u32,
|
||||
prev_chain: Option<&OverlayChain>,
|
||||
) -> Vec<u8> {
|
||||
build_manifest_at(dir, hotset, epoch, prev_chain, 0)
|
||||
}
|
||||
|
||||
/// Like [`build_manifest`], but sets `l1_manifest_offset` to `file_offset`.
|
||||
///
|
||||
/// This is for when the caller knows exactly where in the file the
|
||||
/// manifest payload will be written.
|
||||
pub fn build_manifest_at(
|
||||
dir: &SegmentDirectory,
|
||||
hotset: &HotsetPointers,
|
||||
epoch: u32,
|
||||
prev_chain: Option<&OverlayChain>,
|
||||
file_offset: u64,
|
||||
) -> Vec<u8> {
|
||||
// Build TLV records
|
||||
let mut records = Vec::new();
|
||||
|
||||
// Segment directory record
|
||||
let dir_bytes = directory::write_directory(dir);
|
||||
records.push(TlvRecord {
|
||||
tag: ManifestTag::SegmentDir,
|
||||
length: dir_bytes.len() as u32,
|
||||
value: dir_bytes,
|
||||
});
|
||||
|
||||
// Overlay chain record (if provided)
|
||||
if let Some(chain_ref) = prev_chain {
|
||||
let chain_bytes = chain::write_overlay_chain(chain_ref);
|
||||
records.push(TlvRecord {
|
||||
tag: ManifestTag::OverlayChain,
|
||||
length: chain_bytes.len() as u32,
|
||||
value: chain_bytes,
|
||||
});
|
||||
}
|
||||
|
||||
let l1_bytes = level1::write_tlv_records(&records);
|
||||
let l1_len = l1_bytes.len() as u64;
|
||||
|
||||
// Build Level 0 root
|
||||
let mut root = Level0Root::zeroed();
|
||||
root.version = 1;
|
||||
root.l1_manifest_offset = file_offset;
|
||||
root.l1_manifest_length = l1_len;
|
||||
root.epoch = epoch;
|
||||
root.entrypoint = hotset.entrypoint;
|
||||
root.toplayer = hotset.toplayer;
|
||||
root.centroid = hotset.centroid;
|
||||
root.quantdict = hotset.quantdict;
|
||||
root.hot_cache = hotset.hot_cache;
|
||||
root.prefetch_map = hotset.prefetch_map;
|
||||
|
||||
let l0_bytes = level0::write_level0(&root);
|
||||
|
||||
// Output: L1 TLV data + L0 root (last 4096 bytes)
|
||||
let mut out = Vec::with_capacity(l1_bytes.len() + ROOT_MANIFEST_SIZE);
|
||||
out.extend_from_slice(&l1_bytes);
|
||||
out.extend_from_slice(&l0_bytes);
|
||||
out
|
||||
}
|
||||
|
||||
/// Write a manifest to a writer (e.g., file).
|
||||
///
|
||||
/// This appends the manifest bytes and flushes.
|
||||
#[cfg(feature = "std")]
|
||||
pub fn commit_manifest(
|
||||
file: &mut impl std::io::Write,
|
||||
manifest_bytes: &[u8],
|
||||
) -> Result<(), rvf_types::RvfError> {
|
||||
file.write_all(manifest_bytes)
|
||||
.map_err(|_| rvf_types::RvfError::Code(rvf_types::ErrorCode::FsyncFailed))?;
|
||||
file.flush()
|
||||
.map_err(|_| rvf_types::RvfError::Code(rvf_types::ErrorCode::FsyncFailed))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::directory::SegmentDirEntry;
|
||||
use rvf_types::EntrypointPtr;
|
||||
|
||||
fn sample_dir() -> SegmentDirectory {
|
||||
SegmentDirectory {
|
||||
entries: vec![
|
||||
SegmentDirEntry {
|
||||
segment_id: 1,
|
||||
seg_type: 0x01,
|
||||
tier: 0,
|
||||
file_offset: 0,
|
||||
payload_length: 4096,
|
||||
..SegmentDirEntry::default()
|
||||
},
|
||||
SegmentDirEntry {
|
||||
segment_id: 2,
|
||||
seg_type: 0x02,
|
||||
tier: 1,
|
||||
file_offset: 4096,
|
||||
payload_length: 8192,
|
||||
..SegmentDirEntry::default()
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
fn sample_hotset() -> HotsetPointers {
|
||||
HotsetPointers {
|
||||
entrypoint: EntrypointPtr {
|
||||
seg_offset: 0x100,
|
||||
block_offset: 0,
|
||||
count: 5,
|
||||
},
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_manifest_ends_with_level0() {
|
||||
let manifest = build_manifest(&sample_dir(), &sample_hotset(), 1, None);
|
||||
assert!(manifest.len() > ROOT_MANIFEST_SIZE);
|
||||
|
||||
// Last 4096 bytes should be a valid Level 0
|
||||
let l0_start = manifest.len() - ROOT_MANIFEST_SIZE;
|
||||
let l0_data: &[u8; 4096] = manifest[l0_start..].try_into().unwrap();
|
||||
assert!(level0::validate_level0(l0_data));
|
||||
|
||||
let root = level0::read_level0(l0_data).unwrap();
|
||||
assert_eq!(root.epoch, 1);
|
||||
assert_eq!(root.entrypoint.count, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_manifest_with_chain() {
|
||||
let chain = OverlayChain {
|
||||
epoch: 1,
|
||||
prev_manifest_offset: 0x1000,
|
||||
prev_manifest_id: 5,
|
||||
checkpoint_hash: [0xAB; 16],
|
||||
};
|
||||
|
||||
let manifest = build_manifest(&sample_dir(), &sample_hotset(), 2, Some(&chain));
|
||||
assert!(manifest.len() > ROOT_MANIFEST_SIZE);
|
||||
|
||||
let l0_start = manifest.len() - ROOT_MANIFEST_SIZE;
|
||||
let l0_data: &[u8; 4096] = manifest[l0_start..].try_into().unwrap();
|
||||
let root = level0::read_level0(l0_data).unwrap();
|
||||
assert_eq!(root.epoch, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_manifest_at_with_offset() {
|
||||
let offset = 0x1_0000u64;
|
||||
let manifest = build_manifest_at(&sample_dir(), &sample_hotset(), 3, None, offset);
|
||||
|
||||
let l0_start = manifest.len() - ROOT_MANIFEST_SIZE;
|
||||
let l0_data: &[u8; 4096] = manifest[l0_start..].try_into().unwrap();
|
||||
let root = level0::read_level0(l0_data).unwrap();
|
||||
assert_eq!(root.l1_manifest_offset, offset);
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
#[test]
|
||||
fn commit_manifest_writes_to_vec() {
|
||||
let manifest = build_manifest(&sample_dir(), &sample_hotset(), 1, None);
|
||||
let mut output = Vec::new();
|
||||
commit_manifest(&mut output, &manifest).unwrap();
|
||||
assert_eq!(output, manifest);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user