Files
wifi-densepose/vendor/ruvector/crates/rvf/rvf-runtime/src/cow.rs

503 lines
18 KiB
Rust

//! COW read/write engine for vector-addressed clusters.
//!
//! Cluster addressing: `cluster_id = vector_id / vectors_per_cluster`
//!
//! - **Read**: lookup in map -> LocalOffset (read local) or ParentRef (follow chain)
//! - **Write**: if inherited -> copy parent slab -> local, apply mutation, update map
//! - **Write coalescing**: multiple writes to the same inherited cluster are buffered;
//! on flush, the parent slab is copied once and all mutations applied.
use std::collections::HashMap;
use std::fs::File;
use std::io::{Seek, SeekFrom, Write};
use rvf_types::cow_map::CowMapEntry;
use rvf_types::{ErrorCode, RvfError};
use crate::cow_map::CowMap;
use crate::store::simple_shake256_256;
/// Witness event emitted when a COW slab copy or delta occurs.
pub struct WitnessEvent {
/// Event type: 0x0E = CLUSTER_COW, 0x0F = CLUSTER_DELTA.
pub event_type: u8,
/// ID of the cluster affected.
pub cluster_id: u32,
/// SHAKE-256-256 hash of the parent cluster data before copy.
pub parent_cluster_hash: [u8; 32],
/// SHAKE-256-256 hash of the new local cluster data after copy.
pub new_cluster_hash: [u8; 32],
}
/// A pending write buffered for coalescing.
struct PendingWrite {
/// Byte offset of the vector within the cluster.
vector_offset_in_cluster: u32,
/// Vector data to write.
data: Vec<u8>,
}
/// COW read/write engine for vector-addressed clusters.
pub struct CowEngine {
/// The COW cluster map.
cow_map: CowMap,
/// Cluster size in bytes (power of 2).
cluster_size: u32,
/// Vectors per cluster.
vectors_per_cluster: u32,
/// Bytes per vector (dimension * sizeof(f32)).
bytes_per_vector: u32,
/// L0 cache: cluster_id -> resolved local file offset.
l0_cache: HashMap<u32, u64>,
/// Write coalescing buffer: cluster_id -> pending writes.
write_buffer: HashMap<u32, Vec<PendingWrite>>,
/// Whether this engine is frozen (snapshot).
frozen: bool,
/// Snapshot epoch (0 = mutable).
snapshot_epoch: u32,
}
impl CowEngine {
/// Create a new COW engine.
///
/// # Panics
/// Panics if `vectors_per_cluster` is 0 (would cause division by zero on read/write).
pub fn new(cluster_size: u32, vectors_per_cluster: u32, bytes_per_vector: u32) -> Self {
assert!(vectors_per_cluster > 0, "vectors_per_cluster must be > 0");
Self {
cow_map: CowMap::new_flat(0),
cluster_size,
vectors_per_cluster,
bytes_per_vector,
l0_cache: HashMap::new(),
write_buffer: HashMap::new(),
frozen: false,
snapshot_epoch: 0,
}
}
/// Create a COW engine initialized from a parent (all clusters point to parent).
///
/// # Panics
/// Panics if `vectors_per_cluster` is 0 (would cause division by zero on read/write).
pub fn from_parent(
cluster_count: u32,
cluster_size: u32,
vectors_per_cluster: u32,
bytes_per_vector: u32,
) -> Self {
assert!(vectors_per_cluster > 0, "vectors_per_cluster must be > 0");
Self {
cow_map: CowMap::new_parent_ref(cluster_count),
cluster_size,
vectors_per_cluster,
bytes_per_vector,
l0_cache: HashMap::new(),
write_buffer: HashMap::new(),
frozen: false,
snapshot_epoch: 0,
}
}
/// Get a reference to the underlying COW map.
pub fn cow_map(&self) -> &CowMap {
&self.cow_map
}
/// Read a vector by ID. Returns byte slice of vector data.
pub fn read_vector(
&self,
vector_id: u64,
file: &File,
parent: Option<&File>,
) -> Result<Vec<u8>, RvfError> {
let cluster_id = (vector_id / self.vectors_per_cluster as u64) as u32;
let vector_index_in_cluster = (vector_id % self.vectors_per_cluster as u64) as u32;
let vector_offset = vector_index_in_cluster * self.bytes_per_vector;
let cluster_data = self.read_cluster(cluster_id, file, parent)?;
let start = vector_offset as usize;
let end = start + self.bytes_per_vector as usize;
if end > cluster_data.len() {
return Err(RvfError::Code(ErrorCode::ClusterNotFound));
}
Ok(cluster_data[start..end].to_vec())
}
/// Read an entire cluster. Returns cluster data.
pub fn read_cluster(
&self,
cluster_id: u32,
file: &File,
parent: Option<&File>,
) -> Result<Vec<u8>, RvfError> {
// Check L0 cache first
if let Some(&cached_offset) = self.l0_cache.get(&cluster_id) {
return read_bytes_at(file, cached_offset, self.cluster_size as usize);
}
match self.cow_map.lookup(cluster_id) {
CowMapEntry::LocalOffset(offset) => {
read_bytes_at(file, offset, self.cluster_size as usize)
}
CowMapEntry::ParentRef => {
let parent_file = parent.ok_or(RvfError::Code(ErrorCode::ParentChainBroken))?;
let parent_offset = cluster_id as u64 * self.cluster_size as u64;
read_bytes_at(parent_file, parent_offset, self.cluster_size as usize)
}
CowMapEntry::Unallocated => {
// Return a zeroed cluster for unallocated
Ok(vec![0u8; self.cluster_size as usize])
}
}
}
/// Write a vector. Handles COW: copies parent slab if inherited.
///
/// Writes are buffered for coalescing. Call `flush_writes` to commit.
pub fn write_vector(&mut self, vector_id: u64, data: &[u8]) -> Result<(), RvfError> {
if self.frozen {
return Err(RvfError::Code(ErrorCode::SnapshotFrozen));
}
if data.len() != self.bytes_per_vector as usize {
return Err(RvfError::Code(ErrorCode::DimensionMismatch));
}
let cluster_id = (vector_id / self.vectors_per_cluster as u64) as u32;
let vector_index_in_cluster = (vector_id % self.vectors_per_cluster as u64) as u32;
let vector_offset = vector_index_in_cluster * self.bytes_per_vector;
self.write_buffer
.entry(cluster_id)
.or_default()
.push(PendingWrite {
vector_offset_in_cluster: vector_offset,
data: data.to_vec(),
});
Ok(())
}
/// Flush write coalescing buffer. Performs actual slab copies for inherited
/// clusters and applies all pending mutations.
pub fn flush_writes(
&mut self,
file: &mut File,
parent: Option<&File>,
) -> Result<Vec<WitnessEvent>, RvfError> {
if self.frozen {
return Err(RvfError::Code(ErrorCode::SnapshotFrozen));
}
let pending: Vec<(u32, Vec<PendingWrite>)> = self.write_buffer.drain().collect();
let mut witness_events = Vec::new();
for (cluster_id, writes) in pending {
let entry = self.cow_map.lookup(cluster_id);
// Get or create local cluster data
let mut cluster_data = match entry {
CowMapEntry::LocalOffset(offset) => {
// Already local: read existing data
read_bytes_at(file, offset, self.cluster_size as usize)?
}
CowMapEntry::ParentRef => {
// COW: copy parent slab to local
let parent_file = parent.ok_or(RvfError::Code(ErrorCode::ParentChainBroken))?;
let parent_offset = cluster_id as u64 * self.cluster_size as u64;
let parent_data =
read_bytes_at(parent_file, parent_offset, self.cluster_size as usize)?;
let parent_hash = simple_shake256_256(&parent_data);
// Allocate space at end of file
let new_offset = file
.seek(SeekFrom::End(0))
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
// Write parent data as initial local copy
file.write_all(&parent_data)
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
// Update map
self.cow_map
.update(cluster_id, CowMapEntry::LocalOffset(new_offset));
self.l0_cache.insert(cluster_id, new_offset);
// We'll compute new hash after mutations and emit witness then
witness_events.push(WitnessEvent {
event_type: 0x0E, // CLUSTER_COW
cluster_id,
parent_cluster_hash: parent_hash,
new_cluster_hash: [0u8; 32], // placeholder, updated below
});
parent_data
}
CowMapEntry::Unallocated => {
// Allocate a new zeroed cluster
let zeroed = vec![0u8; self.cluster_size as usize];
let new_offset = file
.seek(SeekFrom::End(0))
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
file.write_all(&zeroed)
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
self.cow_map
.update(cluster_id, CowMapEntry::LocalOffset(new_offset));
self.l0_cache.insert(cluster_id, new_offset);
zeroed
}
};
// Apply all pending writes to the cluster data
for pw in &writes {
let start = pw.vector_offset_in_cluster as usize;
let end = start + pw.data.len();
if end > cluster_data.len() {
return Err(RvfError::Code(ErrorCode::ClusterNotFound));
}
cluster_data[start..end].copy_from_slice(&pw.data);
}
// Write the mutated cluster back to its local offset
if let CowMapEntry::LocalOffset(offset) = self.cow_map.lookup(cluster_id) {
file.seek(SeekFrom::Start(offset))
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
file.write_all(&cluster_data)
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
// Update witness event hash if we emitted one for this cluster
let new_hash = simple_shake256_256(&cluster_data);
for event in witness_events.iter_mut().rev() {
if event.cluster_id == cluster_id {
event.new_cluster_hash = new_hash;
break;
}
}
}
}
file.sync_all()
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
Ok(witness_events)
}
/// Snapshot-freeze: set epoch, prevent further writes to this generation.
pub fn freeze(&mut self, epoch: u32) -> Result<(), RvfError> {
if self.frozen {
return Err(RvfError::Code(ErrorCode::SnapshotFrozen));
}
if !self.write_buffer.is_empty() {
return Err(RvfError::Code(ErrorCode::FsyncFailed));
}
self.frozen = true;
self.snapshot_epoch = epoch;
Ok(())
}
/// Check if frozen.
pub fn is_frozen(&self) -> bool {
self.frozen
}
/// Get the snapshot epoch.
pub fn snapshot_epoch(&self) -> u32 {
self.snapshot_epoch
}
/// Get COW statistics.
pub fn stats(&self) -> CowStats {
CowStats {
cluster_count: self.cow_map.cluster_count(),
local_cluster_count: self.cow_map.local_cluster_count(),
cluster_size: self.cluster_size,
vectors_per_cluster: self.vectors_per_cluster,
frozen: self.frozen,
snapshot_epoch: self.snapshot_epoch,
pending_writes: self.write_buffer.values().map(|v| v.len()).sum(),
}
}
}
/// Statistics about the COW engine state.
pub struct CowStats {
/// Total clusters in the map.
pub cluster_count: u32,
/// Clusters with local data (COW-copied or newly written).
pub local_cluster_count: u32,
/// Cluster size in bytes.
pub cluster_size: u32,
/// Vectors per cluster.
pub vectors_per_cluster: u32,
/// Whether the engine is frozen.
pub frozen: bool,
/// Snapshot epoch (0 = mutable).
pub snapshot_epoch: u32,
/// Number of pending writes in the coalescing buffer.
pub pending_writes: usize,
}
/// Read `len` bytes from a file at the given offset.
///
/// Uses `pread` on Unix to avoid seek + BufReader overhead on the hot path.
#[cfg(unix)]
fn read_bytes_at(file: &File, offset: u64, len: usize) -> Result<Vec<u8>, RvfError> {
use std::os::unix::fs::FileExt;
let mut buf = vec![0u8; len];
file.read_exact_at(&mut buf, offset)
.map_err(|_| RvfError::Code(ErrorCode::ClusterNotFound))?;
Ok(buf)
}
/// Read `len` bytes from a file at the given offset (non-Unix fallback).
#[cfg(not(unix))]
fn read_bytes_at(file: &File, offset: u64, len: usize) -> Result<Vec<u8>, RvfError> {
use std::io::Read;
let mut reader = std::io::BufReader::new(file);
reader
.seek(SeekFrom::Start(offset))
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
let mut buf = vec![0u8; len];
reader
.read_exact(&mut buf)
.map_err(|_| RvfError::Code(ErrorCode::ClusterNotFound))?;
Ok(buf)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_parent_file(cluster_size: u32, cluster_count: u32) -> NamedTempFile {
let mut f = NamedTempFile::new().unwrap();
for cluster_id in 0..cluster_count {
let mut data = vec![0u8; cluster_size as usize];
// Fill each cluster with its ID byte for identification
for b in data.iter_mut() {
*b = (cluster_id & 0xFF) as u8;
}
f.write_all(&data).unwrap();
}
f.flush().unwrap();
f
}
#[test]
fn cow_read_from_parent() {
let cluster_size = 256u32;
let vecs_per_cluster = 4u32;
let bytes_per_vec = 64u32; // 16 floats * 4 bytes
let parent_file = create_parent_file(cluster_size, 4);
let child_file = NamedTempFile::new().unwrap();
let engine = CowEngine::from_parent(4, cluster_size, vecs_per_cluster, bytes_per_vec);
// Read cluster 2 from parent
let data = engine
.read_cluster(2, child_file.as_file(), Some(parent_file.as_file()))
.unwrap();
assert_eq!(data.len(), cluster_size as usize);
assert!(data.iter().all(|&b| b == 2));
}
#[test]
fn cow_write_triggers_copy() {
let cluster_size = 128u32;
let vecs_per_cluster = 2u32;
let bytes_per_vec = 64u32;
let parent_file = create_parent_file(cluster_size, 2);
let child_file = NamedTempFile::new().unwrap();
let mut engine = CowEngine::from_parent(2, cluster_size, vecs_per_cluster, bytes_per_vec);
// Write vector 0 (cluster 0)
let new_data = vec![0xAA; bytes_per_vec as usize];
engine.write_vector(0, &new_data).unwrap();
let events = engine
.flush_writes(
&mut child_file.as_file().try_clone().unwrap(),
Some(parent_file.as_file()),
)
.unwrap();
// Should have one COW event
assert_eq!(events.len(), 1);
assert_eq!(events[0].event_type, 0x0E);
assert_eq!(events[0].cluster_id, 0);
// Now the cluster should be local
assert_eq!(engine.cow_map().local_cluster_count(), 1);
}
#[test]
fn cow_write_coalescing() {
let cluster_size = 128u32;
let vecs_per_cluster = 2u32;
let bytes_per_vec = 64u32;
let parent_file = create_parent_file(cluster_size, 2);
let child_file = NamedTempFile::new().unwrap();
let mut engine = CowEngine::from_parent(2, cluster_size, vecs_per_cluster, bytes_per_vec);
// Write both vectors in cluster 0
let data_a = vec![0xAA; bytes_per_vec as usize];
let data_b = vec![0xBB; bytes_per_vec as usize];
engine.write_vector(0, &data_a).unwrap();
engine.write_vector(1, &data_b).unwrap();
let events = engine
.flush_writes(
&mut child_file.as_file().try_clone().unwrap(),
Some(parent_file.as_file()),
)
.unwrap();
// Only one COW copy event even though two writes
assert_eq!(events.len(), 1);
assert_eq!(events[0].cluster_id, 0);
}
#[test]
fn cow_frozen_rejects_writes() {
let mut engine = CowEngine::new(128, 2, 64);
engine.freeze(1).unwrap();
assert!(engine.is_frozen());
let result = engine.write_vector(0, &vec![0u8; 64]);
assert!(result.is_err());
}
#[test]
fn cow_read_unallocated_returns_zeros() {
let engine = CowEngine::new(128, 2, 64);
let child_file = NamedTempFile::new().unwrap();
let data = engine.read_cluster(0, child_file.as_file(), None).unwrap();
assert_eq!(data.len(), 128);
assert!(data.iter().all(|&b| b == 0));
}
#[test]
fn cow_stats() {
let mut engine = CowEngine::from_parent(4, 256, 4, 64);
let stats = engine.stats();
assert_eq!(stats.cluster_count, 4);
assert_eq!(stats.local_cluster_count, 0);
assert!(!stats.frozen);
// Buffer a write
engine.write_vector(0, &vec![0u8; 64]).unwrap();
let stats = engine.stats();
assert_eq!(stats.pending_writes, 1);
}
}