Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
275
vendor/ruvector/crates/rvf/rvf-runtime/src/adversarial.rs
vendored
Normal file
275
vendor/ruvector/crates/rvf/rvf-runtime/src/adversarial.rs
vendored
Normal file
@@ -0,0 +1,275 @@
|
||||
//! Adversarial distribution detection and adaptive n_probe for ADR-033 §2.
|
||||
//!
|
||||
//! Detects degenerate centroid distance distributions that indicate
|
||||
//! adversarial or pathological input, and automatically widens the
|
||||
//! search to compensate.
|
||||
|
||||
/// Coefficient of variation threshold below which centroid distances
|
||||
/// are considered degenerate (no discriminative power).
|
||||
pub const DEGENERATE_CV_THRESHOLD: f32 = 0.05;
|
||||
|
||||
/// Detect adversarial or degenerate centroid distance distributions.
|
||||
///
|
||||
/// Returns `true` if the distribution is too uniform to trust centroid
|
||||
/// routing — all top-K distances are within 5% CV of each other.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `distances` - Distances from query to all centroids.
|
||||
/// * `k` - Number of probes (n_probe) to consider.
|
||||
pub fn is_degenerate_distribution(distances: &[f32], k: usize) -> bool {
|
||||
if k == 0 || distances.len() < 2 {
|
||||
return true;
|
||||
}
|
||||
|
||||
let sample_size = (2 * k).min(distances.len());
|
||||
|
||||
// Partial sort to get top-2k smallest distances.
|
||||
let mut sorted = distances.to_vec();
|
||||
sorted.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal));
|
||||
let top = &sorted[..sample_size];
|
||||
|
||||
// Compute coefficient of variation (CV = stddev / mean).
|
||||
let sum: f32 = top.iter().sum();
|
||||
let mean = sum / top.len() as f32;
|
||||
|
||||
if mean < f32::EPSILON {
|
||||
return true; // All distances near zero.
|
||||
}
|
||||
|
||||
let variance = top.iter().map(|d| (d - mean).powi(2)).sum::<f32>() / top.len() as f32;
|
||||
|
||||
// Guard against NaN from floating-point rounding producing negative variance.
|
||||
if !variance.is_finite() || variance < 0.0 {
|
||||
return true; // Treat non-finite variance as degenerate.
|
||||
}
|
||||
|
||||
let cv = variance.sqrt() / mean;
|
||||
|
||||
cv < DEGENERATE_CV_THRESHOLD
|
||||
}
|
||||
|
||||
/// Compute the coefficient of variation for top-K centroid distances.
|
||||
///
|
||||
/// Returns the CV value for reporting in `SearchEvidenceSummary`.
|
||||
pub fn centroid_distance_cv(distances: &[f32], k: usize) -> f32 {
|
||||
if k == 0 || distances.len() < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let sample_size = (2 * k).min(distances.len());
|
||||
let mut sorted = distances.to_vec();
|
||||
sorted.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal));
|
||||
let top = &sorted[..sample_size];
|
||||
|
||||
let sum: f32 = top.iter().sum();
|
||||
let mean = sum / top.len() as f32;
|
||||
|
||||
if mean < f32::EPSILON {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let variance = top.iter().map(|d| (d - mean).powi(2)).sum::<f32>() / top.len() as f32;
|
||||
|
||||
if !variance.is_finite() || variance < 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
variance.sqrt() / mean
|
||||
}
|
||||
|
||||
/// Adaptively widen n_probe when degenerate distributions are detected.
|
||||
///
|
||||
/// When distances are uniform, centroids provide no discriminative power.
|
||||
/// Widen to sqrt(K) or 4x base, whichever is smaller.
|
||||
pub fn adaptive_n_probe(
|
||||
base_n_probe: u32,
|
||||
centroid_distances: &[f32],
|
||||
total_centroids: u32,
|
||||
) -> u32 {
|
||||
if is_degenerate_distribution(centroid_distances, base_n_probe as usize) {
|
||||
let widened = (total_centroids as f64).sqrt().ceil() as u32;
|
||||
base_n_probe.max(widened).min(base_n_probe * 4)
|
||||
} else {
|
||||
base_n_probe
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute effective n_probe with epoch drift compensation.
|
||||
///
|
||||
/// When centroid epoch drift is detected, widen n_probe to compensate
|
||||
/// for stale centroids. Linear widening up to 2x at max_drift.
|
||||
pub fn effective_n_probe_with_drift(base_n_probe: u32, epoch_drift: u32, max_drift: u32) -> u32 {
|
||||
if max_drift == 0 {
|
||||
return base_n_probe;
|
||||
}
|
||||
|
||||
let half_drift = max_drift / 2;
|
||||
|
||||
if epoch_drift <= half_drift {
|
||||
// Within comfort zone: no adjustment.
|
||||
base_n_probe
|
||||
} else if epoch_drift <= max_drift {
|
||||
// Drift zone: linear widening up to 2x.
|
||||
let numerator = epoch_drift - half_drift;
|
||||
let scale = 1.0 + numerator as f64 / max_drift as f64;
|
||||
(base_n_probe as f64 * scale).ceil() as u32
|
||||
} else {
|
||||
// Beyond max drift: double n_probe, schedule recomputation.
|
||||
base_n_probe * 2
|
||||
}
|
||||
}
|
||||
|
||||
/// Combined n_probe adjustment: applies both drift and adversarial widening.
|
||||
///
|
||||
/// The maximum of the two widened values is used.
|
||||
pub fn combined_effective_n_probe(
|
||||
base_n_probe: u32,
|
||||
centroid_distances: &[f32],
|
||||
total_centroids: u32,
|
||||
epoch_drift: u32,
|
||||
max_drift: u32,
|
||||
) -> (u32, bool) {
|
||||
let drift_adjusted = effective_n_probe_with_drift(base_n_probe, epoch_drift, max_drift);
|
||||
let adversarial_adjusted = adaptive_n_probe(base_n_probe, centroid_distances, total_centroids);
|
||||
let degenerate = is_degenerate_distribution(centroid_distances, base_n_probe as usize);
|
||||
|
||||
// Cap at 4x base to prevent drift+adversarial from stacking unboundedly.
|
||||
let combined = drift_adjusted
|
||||
.max(adversarial_adjusted)
|
||||
.min(base_n_probe.saturating_mul(4));
|
||||
|
||||
(combined, degenerate)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn degenerate_uniform_distances() {
|
||||
// All distances identical — maximally degenerate.
|
||||
let distances = vec![1.0; 100];
|
||||
assert!(is_degenerate_distribution(&distances, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_degenerate_natural_distances() {
|
||||
// Well-separated distances — natural embeddings.
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32 * 0.1).collect();
|
||||
assert!(!is_degenerate_distribution(&distances, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degenerate_nearly_uniform() {
|
||||
// Distances within 1% of each other — degenerate.
|
||||
let distances: Vec<f32> = (0..100).map(|i| 1.0 + (i as f32) * 0.001).collect();
|
||||
assert!(is_degenerate_distribution(&distances, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degenerate_empty() {
|
||||
assert!(is_degenerate_distribution(&[], 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degenerate_single() {
|
||||
assert!(is_degenerate_distribution(&[1.0], 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degenerate_zero_k() {
|
||||
assert!(is_degenerate_distribution(&[1.0, 2.0], 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degenerate_all_zeros() {
|
||||
let distances = vec![0.0; 100];
|
||||
assert!(is_degenerate_distribution(&distances, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cv_computation_natural() {
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32 * 0.1).collect();
|
||||
let cv = centroid_distance_cv(&distances, 10);
|
||||
assert!(cv > DEGENERATE_CV_THRESHOLD);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cv_computation_uniform() {
|
||||
let distances = vec![1.0; 100];
|
||||
let cv = centroid_distance_cv(&distances, 10);
|
||||
assert!(cv < DEGENERATE_CV_THRESHOLD);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adaptive_n_probe_no_change() {
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32).collect();
|
||||
let result = adaptive_n_probe(10, &distances, 100);
|
||||
assert_eq!(result, 10); // No widening needed.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adaptive_n_probe_widens_on_degenerate() {
|
||||
let distances = vec![1.0; 100];
|
||||
let result = adaptive_n_probe(10, &distances, 100);
|
||||
// sqrt(100) = 10, max(10, 10) = 10, min(10, 40) = 10
|
||||
assert!(result >= 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adaptive_n_probe_widens_large_k() {
|
||||
let distances = vec![1.0; 1000];
|
||||
let result = adaptive_n_probe(4, &distances, 1000);
|
||||
// sqrt(1000) ≈ 32, max(4, 32) = 32, min(32, 16) = 16
|
||||
assert_eq!(result, 16); // Capped at 4x base.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drift_no_adjustment() {
|
||||
let result = effective_n_probe_with_drift(10, 0, 64);
|
||||
assert_eq!(result, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drift_within_comfort_zone() {
|
||||
let result = effective_n_probe_with_drift(10, 20, 64);
|
||||
assert_eq!(result, 10); // 20 <= 64/2 = 32
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drift_linear_widening() {
|
||||
let result = effective_n_probe_with_drift(10, 48, 64);
|
||||
// (48 - 32) / 64 = 0.25, scale = 1.25, 10 * 1.25 = 12.5 -> 13
|
||||
assert!(result > 10 && result <= 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drift_beyond_max() {
|
||||
let result = effective_n_probe_with_drift(10, 100, 64);
|
||||
assert_eq!(result, 20); // Doubled.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drift_zero_max() {
|
||||
let result = effective_n_probe_with_drift(10, 50, 0);
|
||||
assert_eq!(result, 10); // No drift adjustment possible.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn combined_takes_max() {
|
||||
// Natural distances (no adversarial widening) but high drift.
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32).collect();
|
||||
let (result, degenerate) = combined_effective_n_probe(10, &distances, 100, 100, 64);
|
||||
assert_eq!(result, 20); // Drift dominates.
|
||||
assert!(!degenerate);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn combined_adversarial_dominates() {
|
||||
// Uniform distances (adversarial) but no drift.
|
||||
let distances = vec![1.0; 1000];
|
||||
let (result, degenerate) = combined_effective_n_probe(4, &distances, 1000, 0, 64);
|
||||
assert!(result >= 4);
|
||||
assert!(degenerate);
|
||||
}
|
||||
}
|
||||
566
vendor/ruvector/crates/rvf/rvf-runtime/src/agi_authority.rs
vendored
Normal file
566
vendor/ruvector/crates/rvf/rvf-runtime/src/agi_authority.rs
vendored
Normal file
@@ -0,0 +1,566 @@
|
||||
//! Authority and resource budget enforcement runtime for AGI containers (ADR-036).
|
||||
//!
|
||||
//! - [`AuthorityGuard`]: enforces per-execution authority levels and per-action-class
|
||||
//! overrides, ensuring container actions never exceed their granted privileges.
|
||||
//! - [`BudgetTracker`]: tracks resource consumption against a [`ResourceBudget`] and
|
||||
//! returns `BudgetExhausted` errors when any resource is at its limit.
|
||||
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
const ACTION_CLASS_COUNT: usize = 10;
|
||||
|
||||
/// Classification of actions that a container execution may perform.
|
||||
///
|
||||
/// Each class can be independently granted a different [`AuthorityLevel`]
|
||||
/// via [`AuthorityGuard::grant_action_class`].
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[repr(u8)]
|
||||
pub enum ActionClass {
|
||||
ReadMemory = 0,
|
||||
WriteMemory = 1,
|
||||
ReadFile = 2,
|
||||
WriteFile = 3,
|
||||
RunTest = 4,
|
||||
RunCommand = 5,
|
||||
GitPush = 6,
|
||||
CreatePR = 7,
|
||||
SendMessage = 8,
|
||||
ModifyInfra = 9,
|
||||
}
|
||||
|
||||
impl ActionClass {
|
||||
/// All variants in discriminant order.
|
||||
pub const ALL: [ActionClass; ACTION_CLASS_COUNT] = [
|
||||
Self::ReadMemory,
|
||||
Self::WriteMemory,
|
||||
Self::ReadFile,
|
||||
Self::WriteFile,
|
||||
Self::RunTest,
|
||||
Self::RunCommand,
|
||||
Self::GitPush,
|
||||
Self::CreatePR,
|
||||
Self::SendMessage,
|
||||
Self::ModifyInfra,
|
||||
];
|
||||
}
|
||||
|
||||
/// Runtime guard enforcing authority levels for container execution.
|
||||
///
|
||||
/// Holds a global maximum authority (from execution mode or explicit) and an
|
||||
/// optional per-action-class override table as a fixed-size array.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AuthorityGuard {
|
||||
max_authority: AuthorityLevel,
|
||||
mode: ExecutionMode,
|
||||
class_overrides: [Option<AuthorityLevel>; ACTION_CLASS_COUNT],
|
||||
}
|
||||
|
||||
impl AuthorityGuard {
|
||||
/// Create a guard using the default authority for the given execution mode.
|
||||
pub fn new(mode: ExecutionMode) -> Self {
|
||||
Self {
|
||||
max_authority: AuthorityLevel::default_for_mode(mode),
|
||||
mode,
|
||||
class_overrides: [None; ACTION_CLASS_COUNT],
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a guard with an explicit maximum authority level.
|
||||
pub fn with_max_authority(mode: ExecutionMode, max: AuthorityLevel) -> Self {
|
||||
Self {
|
||||
max_authority: max,
|
||||
mode,
|
||||
class_overrides: [None; ACTION_CLASS_COUNT],
|
||||
}
|
||||
}
|
||||
|
||||
/// The execution mode this guard was created for.
|
||||
pub fn mode(&self) -> ExecutionMode {
|
||||
self.mode
|
||||
}
|
||||
|
||||
/// The global maximum authority level.
|
||||
pub fn max_authority(&self) -> AuthorityLevel {
|
||||
self.max_authority
|
||||
}
|
||||
|
||||
/// Check whether the guard permits the `required` level.
|
||||
pub fn check(&self, required: AuthorityLevel) -> Result<(), ContainerError> {
|
||||
if self.max_authority.permits(required) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ContainerError::InsufficientAuthority {
|
||||
required: required as u8,
|
||||
granted: self.max_authority as u8,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Check authority for a specific action class.
|
||||
///
|
||||
/// Per-class overrides are capped by the global maximum to prevent escalation.
|
||||
pub fn check_action_class(
|
||||
&self,
|
||||
class: ActionClass,
|
||||
required: AuthorityLevel,
|
||||
) -> Result<(), ContainerError> {
|
||||
let effective = match self.class_overrides[class as usize] {
|
||||
Some(o) if (o as u8) <= (self.max_authority as u8) => o,
|
||||
Some(_) => self.max_authority,
|
||||
None => self.max_authority,
|
||||
};
|
||||
if effective.permits(required) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ContainerError::InsufficientAuthority {
|
||||
required: required as u8,
|
||||
granted: effective as u8,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Grant authority for a specific action class (capped by global max at check time).
|
||||
pub fn grant_action_class(&mut self, class: ActionClass, level: AuthorityLevel) {
|
||||
self.class_overrides[class as usize] = Some(level);
|
||||
}
|
||||
}
|
||||
|
||||
/// Percentage utilization for each resource dimension (0.0..=100.0).
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct BudgetUtilization {
|
||||
pub time_pct: f32,
|
||||
pub tokens_pct: f32,
|
||||
pub cost_pct: f32,
|
||||
pub tool_calls_pct: f32,
|
||||
pub external_writes_pct: f32,
|
||||
}
|
||||
|
||||
/// Point-in-time snapshot of budget state.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct BudgetSnapshot {
|
||||
pub budget: ResourceBudget,
|
||||
pub used_time_secs: u32,
|
||||
pub used_tokens: u32,
|
||||
pub used_cost_microdollars: u32,
|
||||
pub used_tool_calls: u16,
|
||||
pub used_external_writes: u16,
|
||||
}
|
||||
|
||||
/// Tracks resource consumption against a [`ResourceBudget`].
|
||||
///
|
||||
/// Each `charge_*` method adds to the running total and returns
|
||||
/// [`ContainerError::BudgetExhausted`] if the total would exceed the budget.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BudgetTracker {
|
||||
budget: ResourceBudget,
|
||||
used_time_secs: u32,
|
||||
used_tokens: u32,
|
||||
used_cost_microdollars: u32,
|
||||
used_tool_calls: u16,
|
||||
used_external_writes: u16,
|
||||
}
|
||||
|
||||
impl BudgetTracker {
|
||||
/// Create a tracker with the given budget (clamped to hard maximums).
|
||||
pub fn new(budget: ResourceBudget) -> Self {
|
||||
Self {
|
||||
budget: budget.clamped(),
|
||||
used_time_secs: 0,
|
||||
used_tokens: 0,
|
||||
used_cost_microdollars: 0,
|
||||
used_tool_calls: 0,
|
||||
used_external_writes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// The clamped budget this tracker enforces.
|
||||
pub fn budget(&self) -> &ResourceBudget {
|
||||
&self.budget
|
||||
}
|
||||
|
||||
/// Charge token usage.
|
||||
pub fn charge_tokens(&mut self, tokens: u32) -> Result<(), ContainerError> {
|
||||
let t = self.used_tokens.saturating_add(tokens);
|
||||
if t > self.budget.max_tokens {
|
||||
return Err(ContainerError::BudgetExhausted("tokens"));
|
||||
}
|
||||
self.used_tokens = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge cost in microdollars.
|
||||
pub fn charge_cost(&mut self, microdollars: u32) -> Result<(), ContainerError> {
|
||||
let t = self.used_cost_microdollars.saturating_add(microdollars);
|
||||
if t > self.budget.max_cost_microdollars {
|
||||
return Err(ContainerError::BudgetExhausted("cost"));
|
||||
}
|
||||
self.used_cost_microdollars = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge one tool call.
|
||||
pub fn charge_tool_call(&mut self) -> Result<(), ContainerError> {
|
||||
let t = self.used_tool_calls.saturating_add(1);
|
||||
if t > self.budget.max_tool_calls {
|
||||
return Err(ContainerError::BudgetExhausted("tool_calls"));
|
||||
}
|
||||
self.used_tool_calls = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge one external write.
|
||||
pub fn charge_external_write(&mut self) -> Result<(), ContainerError> {
|
||||
let t = self.used_external_writes.saturating_add(1);
|
||||
if t > self.budget.max_external_writes {
|
||||
return Err(ContainerError::BudgetExhausted("external_writes"));
|
||||
}
|
||||
self.used_external_writes = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge wall-clock time in seconds.
|
||||
pub fn charge_time(&mut self, secs: u32) -> Result<(), ContainerError> {
|
||||
let t = self.used_time_secs.saturating_add(secs);
|
||||
if t > self.budget.max_time_secs {
|
||||
return Err(ContainerError::BudgetExhausted("time"));
|
||||
}
|
||||
self.used_time_secs = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remaining tokens before exhaustion.
|
||||
pub fn remaining_tokens(&self) -> u32 {
|
||||
self.budget.max_tokens.saturating_sub(self.used_tokens)
|
||||
}
|
||||
|
||||
/// Remaining cost budget in microdollars.
|
||||
pub fn remaining_cost(&self) -> u32 {
|
||||
self.budget
|
||||
.max_cost_microdollars
|
||||
.saturating_sub(self.used_cost_microdollars)
|
||||
}
|
||||
|
||||
/// Remaining wall-clock time in seconds.
|
||||
pub fn remaining_time(&self) -> u32 {
|
||||
self.budget
|
||||
.max_time_secs
|
||||
.saturating_sub(self.used_time_secs)
|
||||
}
|
||||
|
||||
/// Compute utilization percentages for each resource dimension.
|
||||
pub fn utilization(&self) -> BudgetUtilization {
|
||||
BudgetUtilization {
|
||||
time_pct: pct(self.used_time_secs as f32, self.budget.max_time_secs as f32),
|
||||
tokens_pct: pct(self.used_tokens as f32, self.budget.max_tokens as f32),
|
||||
cost_pct: pct(
|
||||
self.used_cost_microdollars as f32,
|
||||
self.budget.max_cost_microdollars as f32,
|
||||
),
|
||||
tool_calls_pct: pct(
|
||||
self.used_tool_calls as f32,
|
||||
self.budget.max_tool_calls as f32,
|
||||
),
|
||||
external_writes_pct: pct(
|
||||
self.used_external_writes as f32,
|
||||
self.budget.max_external_writes as f32,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if ANY resource dimension with a non-zero budget has reached
|
||||
/// its limit. A zero-max dimension is disabled, not exhausted.
|
||||
pub fn is_exhausted(&self) -> bool {
|
||||
(self.budget.max_time_secs > 0 && self.used_time_secs >= self.budget.max_time_secs)
|
||||
|| (self.budget.max_tokens > 0 && self.used_tokens >= self.budget.max_tokens)
|
||||
|| (self.budget.max_cost_microdollars > 0
|
||||
&& self.used_cost_microdollars >= self.budget.max_cost_microdollars)
|
||||
|| (self.budget.max_tool_calls > 0
|
||||
&& self.used_tool_calls >= self.budget.max_tool_calls)
|
||||
|| (self.budget.max_external_writes > 0
|
||||
&& self.used_external_writes >= self.budget.max_external_writes)
|
||||
}
|
||||
|
||||
/// Capture a point-in-time snapshot of the tracker state.
|
||||
pub fn snapshot(&self) -> BudgetSnapshot {
|
||||
BudgetSnapshot {
|
||||
budget: self.budget,
|
||||
used_time_secs: self.used_time_secs,
|
||||
used_tokens: self.used_tokens,
|
||||
used_cost_microdollars: self.used_cost_microdollars,
|
||||
used_tool_calls: self.used_tool_calls,
|
||||
used_external_writes: self.used_external_writes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn pct(used: f32, max: f32) -> f32 {
|
||||
if max == 0.0 {
|
||||
if used > 0.0 {
|
||||
100.0
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
} else {
|
||||
(used / max * 100.0).min(100.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_authority_per_mode() {
|
||||
let r = AuthorityGuard::new(ExecutionMode::Replay);
|
||||
assert_eq!(r.max_authority(), AuthorityLevel::ReadOnly);
|
||||
assert_eq!(r.mode(), ExecutionMode::Replay);
|
||||
assert_eq!(
|
||||
AuthorityGuard::new(ExecutionMode::Verify).max_authority(),
|
||||
AuthorityLevel::ExecuteTools
|
||||
);
|
||||
assert_eq!(
|
||||
AuthorityGuard::new(ExecutionMode::Live).max_authority(),
|
||||
AuthorityLevel::WriteMemory
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_pass_and_fail() {
|
||||
let g = AuthorityGuard::new(ExecutionMode::Verify);
|
||||
assert!(g.check(AuthorityLevel::ReadOnly).is_ok());
|
||||
assert!(g.check(AuthorityLevel::ExecuteTools).is_ok());
|
||||
assert_eq!(
|
||||
g.check(AuthorityLevel::WriteExternal).unwrap_err(),
|
||||
ContainerError::InsufficientAuthority {
|
||||
required: 3,
|
||||
granted: 2
|
||||
}
|
||||
);
|
||||
let ro = AuthorityGuard::new(ExecutionMode::Replay);
|
||||
assert_eq!(
|
||||
ro.check(AuthorityLevel::WriteMemory).unwrap_err(),
|
||||
ContainerError::InsufficientAuthority {
|
||||
required: 1,
|
||||
granted: 0
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_max_authority_overrides_default() {
|
||||
let g = AuthorityGuard::with_max_authority(
|
||||
ExecutionMode::Replay,
|
||||
AuthorityLevel::WriteExternal,
|
||||
);
|
||||
assert_eq!(g.mode(), ExecutionMode::Replay);
|
||||
assert!(g.check(AuthorityLevel::WriteExternal).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_grant_restrict_and_inherit() {
|
||||
let mut g =
|
||||
AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::WriteExternal);
|
||||
assert!(g
|
||||
.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteExternal)
|
||||
.is_ok());
|
||||
g.grant_action_class(ActionClass::GitPush, AuthorityLevel::ReadOnly);
|
||||
assert_eq!(
|
||||
g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteMemory)
|
||||
.unwrap_err(),
|
||||
ContainerError::InsufficientAuthority {
|
||||
required: 1,
|
||||
granted: 0
|
||||
}
|
||||
);
|
||||
assert!(g
|
||||
.check_action_class(ActionClass::ReadMemory, AuthorityLevel::WriteExternal)
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_override_capped_by_global() {
|
||||
let mut g = AuthorityGuard::new(ExecutionMode::Replay);
|
||||
g.grant_action_class(ActionClass::RunCommand, AuthorityLevel::WriteExternal);
|
||||
assert!(g
|
||||
.check_action_class(ActionClass::RunCommand, AuthorityLevel::WriteMemory)
|
||||
.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_override_within_global() {
|
||||
let mut g =
|
||||
AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::ExecuteTools);
|
||||
g.grant_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory);
|
||||
assert!(g
|
||||
.check_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory)
|
||||
.is_ok());
|
||||
assert!(g
|
||||
.check_action_class(ActionClass::WriteFile, AuthorityLevel::ExecuteTools)
|
||||
.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_all_variants() {
|
||||
assert_eq!(ActionClass::ALL.len(), ACTION_CLASS_COUNT);
|
||||
for (i, c) in ActionClass::ALL.iter().enumerate() {
|
||||
assert_eq!(*c as usize, i);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_zero_usage() {
|
||||
let t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert_eq!(t.remaining_tokens(), 200_000);
|
||||
assert_eq!(t.remaining_cost(), 1_000_000);
|
||||
assert_eq!(t.remaining_time(), 300);
|
||||
assert!(!t.is_exhausted());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn charge_and_exhaust_each_resource() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_tokens(200_000).is_ok());
|
||||
assert_eq!(
|
||||
t.charge_tokens(1),
|
||||
Err(ContainerError::BudgetExhausted("tokens"))
|
||||
);
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_cost(1_000_000).is_ok());
|
||||
assert_eq!(
|
||||
t.charge_cost(1),
|
||||
Err(ContainerError::BudgetExhausted("cost"))
|
||||
);
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
for _ in 0..50 {
|
||||
t.charge_tool_call().unwrap();
|
||||
}
|
||||
assert!(t.charge_tool_call().is_err());
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_time(300).is_ok());
|
||||
assert!(t.charge_time(1).is_err());
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_external_write().is_err()); // zero budget
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::EXTENDED);
|
||||
for _ in 0..10 {
|
||||
t.charge_external_write().unwrap();
|
||||
}
|
||||
assert!(t.charge_external_write().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_exhausted_semantics() {
|
||||
assert!(!BudgetTracker::new(ResourceBudget::DEFAULT).is_exhausted());
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
t.charge_tokens(200_000).unwrap();
|
||||
assert!(t.is_exhausted());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utilization_calculation() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
t.charge_tokens(100_000).unwrap();
|
||||
t.charge_time(150).unwrap();
|
||||
t.charge_cost(500_000).unwrap();
|
||||
let u = t.utilization();
|
||||
assert!((u.tokens_pct - 50.0).abs() < 0.01);
|
||||
assert!((u.time_pct - 50.0).abs() < 0.01);
|
||||
assert!((u.cost_pct - 50.0).abs() < 0.01);
|
||||
|
||||
t.charge_tokens(100_000).unwrap();
|
||||
let u2 = t.utilization();
|
||||
assert!((u2.tokens_pct - 100.0).abs() < 0.01);
|
||||
|
||||
let z = BudgetTracker::new(ResourceBudget {
|
||||
max_time_secs: 0,
|
||||
max_tokens: 0,
|
||||
max_cost_microdollars: 0,
|
||||
max_tool_calls: 0,
|
||||
max_external_writes: 0,
|
||||
});
|
||||
assert!((z.utilization().time_pct).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn snapshot_captures_state() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::EXTENDED);
|
||||
t.charge_tokens(5_000).unwrap();
|
||||
t.charge_time(60).unwrap();
|
||||
t.charge_cost(100_000).unwrap();
|
||||
t.charge_tool_call().unwrap();
|
||||
t.charge_external_write().unwrap();
|
||||
let s = t.snapshot();
|
||||
assert_eq!(s.budget, ResourceBudget::EXTENDED);
|
||||
assert_eq!(s.used_tokens, 5_000);
|
||||
assert_eq!(s.used_time_secs, 60);
|
||||
assert_eq!(s.used_cost_microdollars, 100_000);
|
||||
assert_eq!(s.used_tool_calls, 1);
|
||||
assert_eq!(s.used_external_writes, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_clamped_on_creation() {
|
||||
let t = BudgetTracker::new(ResourceBudget {
|
||||
max_time_secs: 999_999,
|
||||
max_tokens: 999_999_999,
|
||||
max_cost_microdollars: 999_999_999,
|
||||
max_tool_calls: 60_000,
|
||||
max_external_writes: 60_000,
|
||||
});
|
||||
let b = t.budget();
|
||||
assert_eq!(b.max_time_secs, ResourceBudget::MAX.max_time_secs);
|
||||
assert_eq!(b.max_tokens, ResourceBudget::MAX.max_tokens);
|
||||
assert_eq!(
|
||||
b.max_external_writes,
|
||||
ResourceBudget::MAX.max_external_writes
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn charge_exactly_at_limit() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget {
|
||||
max_time_secs: 10,
|
||||
max_tokens: 100,
|
||||
max_cost_microdollars: 500,
|
||||
max_tool_calls: 3,
|
||||
max_external_writes: 2,
|
||||
});
|
||||
assert!(t.charge_tokens(100).is_ok());
|
||||
assert!(t.charge_time(10).is_ok());
|
||||
assert!(t.charge_cost(500).is_ok());
|
||||
for _ in 0..3 {
|
||||
t.charge_tool_call().unwrap();
|
||||
}
|
||||
for _ in 0..2 {
|
||||
t.charge_external_write().unwrap();
|
||||
}
|
||||
assert_eq!(t.remaining_tokens(), 0);
|
||||
assert_eq!(t.remaining_cost(), 0);
|
||||
assert_eq!(t.remaining_time(), 0);
|
||||
assert!(t.is_exhausted());
|
||||
assert!(t.charge_tokens(1).is_err());
|
||||
assert!(t.charge_time(1).is_err());
|
||||
assert!(t.charge_cost(1).is_err());
|
||||
assert!(t.charge_tool_call().is_err());
|
||||
assert!(t.charge_external_write().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_budget_allows_high_usage() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::MAX);
|
||||
assert!(t.charge_tokens(1_000_000).is_ok());
|
||||
assert!(t.charge_time(3600).is_ok());
|
||||
assert!(t.charge_cost(10_000_000).is_ok());
|
||||
for _ in 0..500 {
|
||||
t.charge_tool_call().unwrap();
|
||||
}
|
||||
for _ in 0..50 {
|
||||
t.charge_external_write().unwrap();
|
||||
}
|
||||
assert!(t.is_exhausted());
|
||||
}
|
||||
}
|
||||
678
vendor/ruvector/crates/rvf/rvf-runtime/src/agi_coherence.rs
vendored
Normal file
678
vendor/ruvector/crates/rvf/rvf-runtime/src/agi_coherence.rs
vendored
Normal file
@@ -0,0 +1,678 @@
|
||||
//! Coherence monitoring and container validation pipeline (ADR-036).
|
||||
//!
|
||||
//! This module provides two main components:
|
||||
//!
|
||||
//! - [`CoherenceMonitor`]: tracks real-time coherence state across events,
|
||||
//! contradictions, and task rollbacks, transitioning through [`CoherenceState`]
|
||||
//! to gate commits, skill promotion, and autonomous execution.
|
||||
//!
|
||||
//! - [`ContainerValidator`]: runs the full validation pipeline over an AGI
|
||||
//! container's header, segments, and coherence thresholds, collecting all
|
||||
//! errors rather than short-circuiting.
|
||||
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CoherenceState
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Runtime coherence state derived from threshold checks.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum CoherenceState {
|
||||
/// All signals within bounds.
|
||||
Healthy,
|
||||
/// Contradiction rate exceeded -- skill promotion frozen.
|
||||
SkillFreeze,
|
||||
/// Coherence score below minimum -- commits blocked, repair mode.
|
||||
RepairMode,
|
||||
/// Rollback ratio exceeded -- execution halted, human review required.
|
||||
Halted,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CoherenceReport
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Point-in-time snapshot of all coherence metrics.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CoherenceReport {
|
||||
pub state: CoherenceState,
|
||||
pub coherence_score: f32,
|
||||
pub contradiction_rate: f32,
|
||||
pub rollback_ratio: f32,
|
||||
pub total_events: u64,
|
||||
pub total_contradictions: u64,
|
||||
pub total_tasks: u64,
|
||||
pub total_rollbacks: u64,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CoherenceMonitor
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Tracks real-time coherence state and gates system actions.
|
||||
pub struct CoherenceMonitor {
|
||||
thresholds: CoherenceThresholds,
|
||||
current_coherence: f32,
|
||||
total_events: u64,
|
||||
total_contradictions: u64,
|
||||
total_tasks: u64,
|
||||
total_rollbacks: u64,
|
||||
state: CoherenceState,
|
||||
}
|
||||
|
||||
impl CoherenceMonitor {
|
||||
/// Create a new monitor with the given thresholds. Returns an error if the
|
||||
/// thresholds are out of valid ranges.
|
||||
pub fn new(thresholds: CoherenceThresholds) -> Result<Self, ContainerError> {
|
||||
thresholds.validate()?;
|
||||
Ok(Self {
|
||||
thresholds,
|
||||
current_coherence: 1.0,
|
||||
total_events: 0,
|
||||
total_contradictions: 0,
|
||||
total_tasks: 0,
|
||||
total_rollbacks: 0,
|
||||
state: CoherenceState::Healthy,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a monitor with [`CoherenceThresholds::DEFAULT`].
|
||||
pub fn with_defaults() -> Self {
|
||||
Self {
|
||||
thresholds: CoherenceThresholds::DEFAULT,
|
||||
current_coherence: 1.0,
|
||||
total_events: 0,
|
||||
total_contradictions: 0,
|
||||
total_tasks: 0,
|
||||
total_rollbacks: 0,
|
||||
state: CoherenceState::Healthy,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the current coherence score and re-evaluate state.
|
||||
pub fn update_coherence(&mut self, score: f32) {
|
||||
self.current_coherence = score;
|
||||
self.recompute_state();
|
||||
}
|
||||
|
||||
/// Record a generic event (increments event counter).
|
||||
pub fn record_event(&mut self) {
|
||||
self.total_events = self.total_events.saturating_add(1);
|
||||
}
|
||||
|
||||
/// Record a contradiction event and re-evaluate state.
|
||||
pub fn record_contradiction(&mut self) {
|
||||
self.total_contradictions = self.total_contradictions.saturating_add(1);
|
||||
self.recompute_state();
|
||||
}
|
||||
|
||||
/// Record a task completion. If `rolled_back` is true the rollback counter
|
||||
/// is also incremented. State is re-evaluated afterward.
|
||||
pub fn record_task_completion(&mut self, rolled_back: bool) {
|
||||
self.total_tasks = self.total_tasks.saturating_add(1);
|
||||
if rolled_back {
|
||||
self.total_rollbacks = self.total_rollbacks.saturating_add(1);
|
||||
}
|
||||
self.recompute_state();
|
||||
}
|
||||
|
||||
/// Current coherence state.
|
||||
pub fn state(&self) -> CoherenceState {
|
||||
self.state
|
||||
}
|
||||
|
||||
/// Whether the system may commit world-model deltas. True when
|
||||
/// [`CoherenceState::Healthy`] or [`CoherenceState::SkillFreeze`].
|
||||
pub fn can_commit(&self) -> bool {
|
||||
matches!(
|
||||
self.state,
|
||||
CoherenceState::Healthy | CoherenceState::SkillFreeze
|
||||
)
|
||||
}
|
||||
|
||||
/// Whether new skills may be promoted. True only when
|
||||
/// [`CoherenceState::Healthy`].
|
||||
pub fn can_promote_skill(&self) -> bool {
|
||||
self.state == CoherenceState::Healthy
|
||||
}
|
||||
|
||||
/// Whether the system requires human review. True when
|
||||
/// [`CoherenceState::Halted`].
|
||||
pub fn requires_human_review(&self) -> bool {
|
||||
self.state == CoherenceState::Halted
|
||||
}
|
||||
|
||||
/// Contradiction rate: contradictions per 100 events.
|
||||
/// Returns `0.0` when there are no events.
|
||||
pub fn contradiction_rate(&self) -> f32 {
|
||||
if self.total_events == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
(self.total_contradictions as f32 / self.total_events as f32) * 100.0
|
||||
}
|
||||
|
||||
/// Rollback ratio: rollbacks / total tasks.
|
||||
/// Returns `0.0` when there are no tasks.
|
||||
pub fn rollback_ratio(&self) -> f32 {
|
||||
if self.total_tasks == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
self.total_rollbacks as f32 / self.total_tasks as f32
|
||||
}
|
||||
|
||||
/// Produce a point-in-time snapshot of all metrics.
|
||||
pub fn report(&self) -> CoherenceReport {
|
||||
CoherenceReport {
|
||||
state: self.state,
|
||||
coherence_score: self.current_coherence,
|
||||
contradiction_rate: self.contradiction_rate(),
|
||||
rollback_ratio: self.rollback_ratio(),
|
||||
total_events: self.total_events,
|
||||
total_contradictions: self.total_contradictions,
|
||||
total_tasks: self.total_tasks,
|
||||
total_rollbacks: self.total_rollbacks,
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Internal
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/// Determine the worst applicable state from the current metrics.
|
||||
/// Priority (most severe first): Halted > RepairMode > SkillFreeze > Healthy.
|
||||
fn recompute_state(&mut self) {
|
||||
if self.rollback_ratio() > self.thresholds.max_rollback_ratio {
|
||||
self.state = CoherenceState::Halted;
|
||||
} else if self.current_coherence < self.thresholds.min_coherence_score {
|
||||
self.state = CoherenceState::RepairMode;
|
||||
} else if self.contradiction_rate() > self.thresholds.max_contradiction_rate {
|
||||
self.state = CoherenceState::SkillFreeze;
|
||||
} else {
|
||||
self.state = CoherenceState::Healthy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ContainerValidator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Full validation pipeline for an AGI container.
|
||||
pub struct ContainerValidator {
|
||||
mode: ExecutionMode,
|
||||
}
|
||||
|
||||
impl ContainerValidator {
|
||||
/// Create a validator for the given execution mode.
|
||||
pub fn new(mode: ExecutionMode) -> Self {
|
||||
Self { mode }
|
||||
}
|
||||
|
||||
/// Validate container segments against mode requirements.
|
||||
pub fn validate_segments(&self, segments: &ContainerSegments) -> Result<(), ContainerError> {
|
||||
segments.validate(self.mode)
|
||||
}
|
||||
|
||||
/// Validate the AGI container header.
|
||||
///
|
||||
/// Checks: magic bytes, version (must be 1), and flag consistency --
|
||||
/// replay-capable containers must not claim Live-only features without
|
||||
/// the kernel flag.
|
||||
pub fn validate_header(&self, header: &AgiContainerHeader) -> Result<(), ContainerError> {
|
||||
if !header.is_valid_magic() {
|
||||
return Err(ContainerError::InvalidConfig("bad magic bytes"));
|
||||
}
|
||||
if header.version == 0 || header.version > 1 {
|
||||
return Err(ContainerError::InvalidConfig("unsupported header version"));
|
||||
}
|
||||
// Flag consistency: if REPLAY_CAPABLE is set the container should
|
||||
// also have the witness flag, since replays depend on witness chains.
|
||||
if header.is_replay_capable() && (header.flags & AGI_HAS_WITNESS == 0) {
|
||||
return Err(ContainerError::InvalidConfig(
|
||||
"replay-capable flag requires witness flag",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate coherence threshold ranges.
|
||||
pub fn validate_coherence(
|
||||
&self,
|
||||
thresholds: &CoherenceThresholds,
|
||||
) -> Result<(), ContainerError> {
|
||||
thresholds.validate()
|
||||
}
|
||||
|
||||
/// Run all validations, collecting every error rather than
|
||||
/// short-circuiting on the first failure.
|
||||
pub fn validate_full(
|
||||
&self,
|
||||
header: &AgiContainerHeader,
|
||||
segments: &ContainerSegments,
|
||||
thresholds: &CoherenceThresholds,
|
||||
) -> Vec<ContainerError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
if let Err(e) = self.validate_header(header) {
|
||||
errors.push(e);
|
||||
}
|
||||
if let Err(e) = self.validate_segments(segments) {
|
||||
errors.push(e);
|
||||
}
|
||||
if let Err(e) = self.validate_coherence(thresholds) {
|
||||
errors.push(e);
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// -- Helpers --
|
||||
|
||||
fn valid_header() -> AgiContainerHeader {
|
||||
AgiContainerHeader {
|
||||
magic: AGI_MAGIC,
|
||||
version: 1,
|
||||
flags: AGI_HAS_KERNEL | AGI_HAS_WITNESS | AGI_REPLAY_CAPABLE,
|
||||
container_id: [0x01; 16],
|
||||
build_id: [0x02; 16],
|
||||
created_ns: 1_700_000_000_000_000_000,
|
||||
model_id_hash: [0; 8],
|
||||
policy_hash: [0; 8],
|
||||
}
|
||||
}
|
||||
|
||||
fn valid_segments() -> ContainerSegments {
|
||||
ContainerSegments {
|
||||
manifest_present: true,
|
||||
kernel_present: true,
|
||||
world_model_present: true,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — state transitions
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn monitor_starts_healthy() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert_eq!(m.state(), CoherenceState::Healthy);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn monitor_healthy_to_skill_freeze() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Default max_contradiction_rate is 5.0 per 100 events.
|
||||
// Record 100 events with 6 contradictions -> rate = 6.0 > 5.0.
|
||||
for _ in 0..100 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..6 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
assert_eq!(m.state(), CoherenceState::SkillFreeze);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn monitor_healthy_to_repair_mode() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Default min_coherence_score is 0.70.
|
||||
m.update_coherence(0.50);
|
||||
assert_eq!(m.state(), CoherenceState::RepairMode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn monitor_healthy_to_halted() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Default max_rollback_ratio is 0.20.
|
||||
// 10 tasks, 3 rolled back -> ratio = 0.30 > 0.20.
|
||||
for _ in 0..7 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
for _ in 0..3 {
|
||||
m.record_task_completion(true);
|
||||
}
|
||||
assert_eq!(m.state(), CoherenceState::Halted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn halted_takes_priority_over_repair_mode() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Both rollback ratio and coherence score are bad.
|
||||
m.update_coherence(0.50);
|
||||
for _ in 0..5 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
for _ in 0..5 {
|
||||
m.record_task_completion(true);
|
||||
}
|
||||
// Halted (rollback) is highest severity and wins.
|
||||
assert_eq!(m.state(), CoherenceState::Halted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repair_mode_takes_priority_over_skill_freeze() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Both coherence and contradiction rate are bad.
|
||||
m.update_coherence(0.50);
|
||||
for _ in 0..100 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..10 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
// RepairMode wins over SkillFreeze.
|
||||
assert_eq!(m.state(), CoherenceState::RepairMode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recovery_back_to_healthy() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.update_coherence(0.50);
|
||||
assert_eq!(m.state(), CoherenceState::RepairMode);
|
||||
m.update_coherence(0.90);
|
||||
assert_eq!(m.state(), CoherenceState::Healthy);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — gate queries
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn can_commit_when_healthy() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!(m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_commit_when_skill_freeze() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..100 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..6 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
assert_eq!(m.state(), CoherenceState::SkillFreeze);
|
||||
assert!(m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cannot_commit_when_repair_mode() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.update_coherence(0.50);
|
||||
assert!(!m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cannot_commit_when_halted() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.record_task_completion(true); // 1 task, 1 rollback -> ratio = 1.0
|
||||
assert!(!m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_promote_skill_only_when_healthy() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!(m.can_promote_skill());
|
||||
|
||||
let mut m2 = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..100 {
|
||||
m2.record_event();
|
||||
}
|
||||
for _ in 0..6 {
|
||||
m2.record_contradiction();
|
||||
}
|
||||
assert_eq!(m2.state(), CoherenceState::SkillFreeze);
|
||||
assert!(!m2.can_promote_skill());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn requires_human_review_only_when_halted() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!(!m.requires_human_review());
|
||||
|
||||
let mut m2 = CoherenceMonitor::with_defaults();
|
||||
m2.record_task_completion(true);
|
||||
assert_eq!(m2.state(), CoherenceState::Halted);
|
||||
assert!(m2.requires_human_review());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — rate calculations
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn contradiction_rate_calculation() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..200 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..4 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
// 4 contradictions in (200 events + 4 contradiction events via record_contradiction)
|
||||
// record_contradiction does not call record_event, so total_events = 200.
|
||||
// Rate = (4 / 200) * 100 = 2.0.
|
||||
assert!((m.contradiction_rate() - 2.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollback_ratio_calculation() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..8 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
for _ in 0..2 {
|
||||
m.record_task_completion(true);
|
||||
}
|
||||
// 2 rollbacks / 10 tasks = 0.20.
|
||||
assert!((m.rollback_ratio() - 0.20).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — edge cases
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn contradiction_rate_zero_events() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!((m.contradiction_rate() - 0.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollback_ratio_zero_tasks() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!((m.rollback_ratio() - 0.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_with_invalid_thresholds() {
|
||||
let bad = CoherenceThresholds {
|
||||
min_coherence_score: 2.0,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
assert!(CoherenceMonitor::new(bad).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_with_valid_thresholds() {
|
||||
let m = CoherenceMonitor::new(CoherenceThresholds::STRICT).unwrap();
|
||||
assert_eq!(m.state(), CoherenceState::Healthy);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn report_snapshot() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.update_coherence(0.85);
|
||||
for _ in 0..50 {
|
||||
m.record_event();
|
||||
}
|
||||
m.record_contradiction();
|
||||
// 9 successful + 1 rollback = ratio 0.10, within the 0.20 threshold.
|
||||
for _ in 0..9 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
m.record_task_completion(true);
|
||||
|
||||
let r = m.report();
|
||||
assert_eq!(r.state, CoherenceState::Healthy);
|
||||
assert!((r.coherence_score - 0.85).abs() < f32::EPSILON);
|
||||
assert_eq!(r.total_events, 50);
|
||||
assert_eq!(r.total_contradictions, 1);
|
||||
assert_eq!(r.total_tasks, 10);
|
||||
assert_eq!(r.total_rollbacks, 1);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ContainerValidator — validate_segments
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn validator_segments_delegates_ok() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let segs = valid_segments();
|
||||
assert!(v.validate_segments(&segs).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_segments_delegates_error() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Replay);
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
witness_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
assert!(v.validate_segments(&segs).is_err());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ContainerValidator — validate_header
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn validator_header_ok() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
assert!(v.validate_header(&valid_header()).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_bad_magic() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let mut h = valid_header();
|
||||
h.magic = 0xDEADBEEF;
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig("bad magic bytes"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_bad_version_zero() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let mut h = valid_header();
|
||||
h.version = 0;
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig("unsupported header version"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_bad_version_future() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let mut h = valid_header();
|
||||
h.version = 99;
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig("unsupported header version"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_replay_without_witness() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let h = AgiContainerHeader {
|
||||
magic: AGI_MAGIC,
|
||||
version: 1,
|
||||
flags: AGI_REPLAY_CAPABLE, // missing AGI_HAS_WITNESS
|
||||
container_id: [0; 16],
|
||||
build_id: [0; 16],
|
||||
created_ns: 0,
|
||||
model_id_hash: [0; 8],
|
||||
policy_hash: [0; 8],
|
||||
};
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig(
|
||||
"replay-capable flag requires witness flag"
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ContainerValidator — validate_full
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn validator_full_all_ok() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let errs = v.validate_full(
|
||||
&valid_header(),
|
||||
&valid_segments(),
|
||||
&CoherenceThresholds::DEFAULT,
|
||||
);
|
||||
assert!(errs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_full_collects_multiple_errors() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
|
||||
// Bad header (wrong magic).
|
||||
let mut h = valid_header();
|
||||
h.magic = 0xBAD0CAFE;
|
||||
|
||||
// Bad segments (no kernel, no wasm, no world model for Live).
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Bad thresholds.
|
||||
let bad_thresh = CoherenceThresholds {
|
||||
min_coherence_score: -1.0,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
|
||||
let errs = v.validate_full(&h, &segs, &bad_thresh);
|
||||
// Expect at least 3 errors: header, segments, and thresholds.
|
||||
assert!(errs.len() >= 3, "expected >= 3 errors, got {}", errs.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_full_partial_errors() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Replay);
|
||||
|
||||
// Good header, bad segments (replay needs witness), good thresholds.
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
witness_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let errs = v.validate_full(&valid_header(), &segs, &CoherenceThresholds::DEFAULT);
|
||||
assert_eq!(errs.len(), 1);
|
||||
}
|
||||
}
|
||||
620
vendor/ruvector/crates/rvf/rvf-runtime/src/agi_container.rs
vendored
Normal file
620
vendor/ruvector/crates/rvf/rvf-runtime/src/agi_container.rs
vendored
Normal file
@@ -0,0 +1,620 @@
|
||||
//! AGI Cognitive Container builder and validator (ADR-036).
|
||||
//!
|
||||
//! Assembles a complete intelligence runtime into a single RVF artifact:
|
||||
//! micro Linux kernel, Claude Code + Claude Flow configs, world model,
|
||||
//! evaluation harness, witness chains, tool adapters, and policies.
|
||||
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
use crate::seed_crypto;
|
||||
|
||||
/// Builder for assembling an AGI cognitive container manifest.
|
||||
///
|
||||
/// The manifest is a META segment in the RVF file. Other segments
|
||||
/// (KERNEL_SEG, WASM_SEG, VEC_SEG, etc.) are added through the
|
||||
/// main RVF write path; this builder only handles the manifest.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AgiContainerBuilder {
|
||||
/// Container UUID.
|
||||
pub container_id: [u8; 16],
|
||||
/// Build UUID.
|
||||
pub build_id: [u8; 16],
|
||||
/// Pinned model identifier string.
|
||||
pub model_id: Option<Vec<u8>>,
|
||||
/// Governance policy (binary).
|
||||
pub policy: Option<Vec<u8>>,
|
||||
/// Policy hash from ADR-035 GovernancePolicy.
|
||||
pub policy_hash: [u8; 8],
|
||||
/// Orchestrator config (Claude Code + Claude Flow).
|
||||
pub orchestrator_config: Option<Vec<u8>>,
|
||||
/// MCP tool adapter registry.
|
||||
pub tool_registry: Option<Vec<u8>>,
|
||||
/// Agent role prompts.
|
||||
pub agent_prompts: Option<Vec<u8>>,
|
||||
/// Evaluation task suite.
|
||||
pub eval_tasks: Option<Vec<u8>>,
|
||||
/// Grading rules.
|
||||
pub eval_graders: Option<Vec<u8>>,
|
||||
/// Skill library.
|
||||
pub skill_library: Option<Vec<u8>>,
|
||||
/// Replay script.
|
||||
pub replay_script: Option<Vec<u8>>,
|
||||
/// Kernel boot config.
|
||||
pub kernel_config: Option<Vec<u8>>,
|
||||
/// Network config.
|
||||
pub network_config: Option<Vec<u8>>,
|
||||
/// Coherence gate config.
|
||||
pub coherence_config: Option<Vec<u8>>,
|
||||
/// Project instructions (CLAUDE.md).
|
||||
pub project_instructions: Option<Vec<u8>>,
|
||||
/// Dependency snapshot.
|
||||
pub dependency_snapshot: Option<Vec<u8>>,
|
||||
/// Authority level and resource budget config.
|
||||
pub authority_config: Option<Vec<u8>>,
|
||||
/// Target domain profile.
|
||||
pub domain_profile: Option<Vec<u8>>,
|
||||
/// Segment inventory.
|
||||
pub segments: ContainerSegments,
|
||||
/// Extra flags to OR in.
|
||||
pub extra_flags: u16,
|
||||
}
|
||||
|
||||
impl AgiContainerBuilder {
|
||||
/// Create a new builder with container and build IDs.
|
||||
pub fn new(container_id: [u8; 16], build_id: [u8; 16]) -> Self {
|
||||
Self {
|
||||
container_id,
|
||||
build_id,
|
||||
model_id: None,
|
||||
policy: None,
|
||||
policy_hash: [0; 8],
|
||||
orchestrator_config: None,
|
||||
tool_registry: None,
|
||||
agent_prompts: None,
|
||||
eval_tasks: None,
|
||||
eval_graders: None,
|
||||
skill_library: None,
|
||||
replay_script: None,
|
||||
kernel_config: None,
|
||||
network_config: None,
|
||||
coherence_config: None,
|
||||
project_instructions: None,
|
||||
dependency_snapshot: None,
|
||||
authority_config: None,
|
||||
domain_profile: None,
|
||||
segments: ContainerSegments::default(),
|
||||
extra_flags: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pin the model to a specific version.
|
||||
pub fn with_model_id(mut self, model_id: &str) -> Self {
|
||||
self.model_id = Some(model_id.as_bytes().to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the governance policy.
|
||||
pub fn with_policy(mut self, policy: &[u8], hash: [u8; 8]) -> Self {
|
||||
self.policy = Some(policy.to_vec());
|
||||
self.policy_hash = hash;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the Claude Code + Claude Flow orchestrator config.
|
||||
pub fn with_orchestrator(mut self, config: &[u8]) -> Self {
|
||||
self.orchestrator_config = Some(config.to_vec());
|
||||
self.extra_flags |= AGI_HAS_ORCHESTRATOR;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the MCP tool adapter registry.
|
||||
pub fn with_tool_registry(mut self, registry: &[u8]) -> Self {
|
||||
self.tool_registry = Some(registry.to_vec());
|
||||
self.extra_flags |= AGI_HAS_TOOLS;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set agent role prompts.
|
||||
pub fn with_agent_prompts(mut self, prompts: &[u8]) -> Self {
|
||||
self.agent_prompts = Some(prompts.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the evaluation task suite.
|
||||
pub fn with_eval_tasks(mut self, tasks: &[u8]) -> Self {
|
||||
self.eval_tasks = Some(tasks.to_vec());
|
||||
self.extra_flags |= AGI_HAS_EVAL;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the grading rules.
|
||||
pub fn with_eval_graders(mut self, graders: &[u8]) -> Self {
|
||||
self.eval_graders = Some(graders.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the promoted skill library.
|
||||
pub fn with_skill_library(mut self, skills: &[u8]) -> Self {
|
||||
self.skill_library = Some(skills.to_vec());
|
||||
self.extra_flags |= AGI_HAS_SKILLS;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the replay automation script.
|
||||
pub fn with_replay_script(mut self, script: &[u8]) -> Self {
|
||||
self.replay_script = Some(script.to_vec());
|
||||
self.extra_flags |= AGI_REPLAY_CAPABLE;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the kernel boot configuration.
|
||||
pub fn with_kernel_config(mut self, config: &[u8]) -> Self {
|
||||
self.kernel_config = Some(config.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the network configuration.
|
||||
pub fn with_network_config(mut self, config: &[u8]) -> Self {
|
||||
self.network_config = Some(config.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the coherence gate configuration.
|
||||
pub fn with_coherence_config(mut self, config: &[u8]) -> Self {
|
||||
self.coherence_config = Some(config.to_vec());
|
||||
self.extra_flags |= AGI_HAS_COHERENCE_GATES;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the project instructions (CLAUDE.md content).
|
||||
pub fn with_project_instructions(mut self, instructions: &[u8]) -> Self {
|
||||
self.project_instructions = Some(instructions.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the dependency snapshot.
|
||||
pub fn with_dependency_snapshot(mut self, snapshot: &[u8]) -> Self {
|
||||
self.dependency_snapshot = Some(snapshot.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set authority and resource budget configuration.
|
||||
pub fn with_authority_config(mut self, config: &[u8]) -> Self {
|
||||
self.authority_config = Some(config.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the target domain profile.
|
||||
pub fn with_domain_profile(mut self, profile: &[u8]) -> Self {
|
||||
self.domain_profile = Some(profile.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Mark offline capability.
|
||||
pub fn offline_capable(mut self) -> Self {
|
||||
self.extra_flags |= AGI_OFFLINE_CAPABLE;
|
||||
self
|
||||
}
|
||||
|
||||
/// Declare the segment inventory.
|
||||
pub fn with_segments(mut self, segments: ContainerSegments) -> Self {
|
||||
self.segments = segments;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the manifest TLV payload (sections only, no header).
|
||||
fn build_sections(&self) -> Vec<u8> {
|
||||
let mut payload = Vec::new();
|
||||
|
||||
let mut write_section = |tag: u16, data: &[u8]| {
|
||||
payload.extend_from_slice(&tag.to_le_bytes());
|
||||
payload.extend_from_slice(&(data.len() as u32).to_le_bytes());
|
||||
payload.extend_from_slice(data);
|
||||
};
|
||||
|
||||
write_section(AGI_TAG_CONTAINER_ID, &self.container_id);
|
||||
write_section(AGI_TAG_BUILD_ID, &self.build_id);
|
||||
|
||||
if let Some(ref mid) = self.model_id {
|
||||
write_section(AGI_TAG_MODEL_ID, mid);
|
||||
}
|
||||
if let Some(ref p) = self.policy {
|
||||
write_section(AGI_TAG_POLICY, p);
|
||||
}
|
||||
if let Some(ref oc) = self.orchestrator_config {
|
||||
write_section(AGI_TAG_ORCHESTRATOR, oc);
|
||||
}
|
||||
if let Some(ref tr) = self.tool_registry {
|
||||
write_section(AGI_TAG_TOOL_REGISTRY, tr);
|
||||
}
|
||||
if let Some(ref ap) = self.agent_prompts {
|
||||
write_section(AGI_TAG_AGENT_PROMPTS, ap);
|
||||
}
|
||||
if let Some(ref et) = self.eval_tasks {
|
||||
write_section(AGI_TAG_EVAL_TASKS, et);
|
||||
}
|
||||
if let Some(ref eg) = self.eval_graders {
|
||||
write_section(AGI_TAG_EVAL_GRADERS, eg);
|
||||
}
|
||||
if let Some(ref sl) = self.skill_library {
|
||||
write_section(AGI_TAG_SKILL_LIBRARY, sl);
|
||||
}
|
||||
if let Some(ref rs) = self.replay_script {
|
||||
write_section(AGI_TAG_REPLAY_SCRIPT, rs);
|
||||
}
|
||||
if let Some(ref kc) = self.kernel_config {
|
||||
write_section(AGI_TAG_KERNEL_CONFIG, kc);
|
||||
}
|
||||
if let Some(ref nc) = self.network_config {
|
||||
write_section(AGI_TAG_NETWORK_CONFIG, nc);
|
||||
}
|
||||
if let Some(ref cc) = self.coherence_config {
|
||||
write_section(AGI_TAG_COHERENCE_CONFIG, cc);
|
||||
}
|
||||
if let Some(ref pi) = self.project_instructions {
|
||||
write_section(AGI_TAG_PROJECT_INSTRUCTIONS, pi);
|
||||
}
|
||||
if let Some(ref ds) = self.dependency_snapshot {
|
||||
write_section(AGI_TAG_DEPENDENCY_SNAPSHOT, ds);
|
||||
}
|
||||
if let Some(ref ac) = self.authority_config {
|
||||
write_section(AGI_TAG_AUTHORITY_CONFIG, ac);
|
||||
}
|
||||
if let Some(ref dp) = self.domain_profile {
|
||||
write_section(AGI_TAG_DOMAIN_PROFILE, dp);
|
||||
}
|
||||
|
||||
payload
|
||||
}
|
||||
|
||||
/// Build the manifest: header + TLV sections.
|
||||
pub fn build(self) -> Result<(Vec<u8>, AgiContainerHeader), ContainerError> {
|
||||
let sections = self.build_sections();
|
||||
|
||||
let model_id_hash = match &self.model_id {
|
||||
Some(mid) => seed_crypto::seed_content_hash(mid),
|
||||
None => [0u8; 8],
|
||||
};
|
||||
|
||||
let flags = self.segments.to_flags() | self.extra_flags;
|
||||
|
||||
let created_ns = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_nanos() as u64;
|
||||
|
||||
let header = AgiContainerHeader {
|
||||
magic: AGI_MAGIC,
|
||||
version: 1,
|
||||
flags,
|
||||
container_id: self.container_id,
|
||||
build_id: self.build_id,
|
||||
created_ns,
|
||||
model_id_hash,
|
||||
policy_hash: self.policy_hash,
|
||||
};
|
||||
|
||||
let mut payload = Vec::with_capacity(AGI_HEADER_SIZE + sections.len());
|
||||
payload.extend_from_slice(&header.to_bytes());
|
||||
payload.extend_from_slice(§ions);
|
||||
|
||||
// Mark manifest as present for validation.
|
||||
let mut segs = self.segments.clone();
|
||||
segs.manifest_present = true;
|
||||
|
||||
Ok((payload, header))
|
||||
}
|
||||
|
||||
/// Build and sign with HMAC-SHA256.
|
||||
pub fn build_and_sign(
|
||||
mut self,
|
||||
key: &[u8],
|
||||
) -> Result<(Vec<u8>, AgiContainerHeader), ContainerError> {
|
||||
self.segments.crypto_present = true;
|
||||
let (unsigned, mut header) = self.build()?;
|
||||
header.flags |= AGI_SIGNED;
|
||||
|
||||
let sig = seed_crypto::sign_seed(key, &unsigned);
|
||||
let mut signed = unsigned;
|
||||
// Re-write the header with SIGNED flag.
|
||||
let header_bytes = header.to_bytes();
|
||||
signed[..AGI_HEADER_SIZE].copy_from_slice(&header_bytes);
|
||||
signed.extend_from_slice(&sig);
|
||||
|
||||
Ok((signed, header))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsed AGI container manifest with zero-copy section references.
|
||||
#[derive(Debug)]
|
||||
pub struct ParsedAgiManifest<'a> {
|
||||
/// Parsed header.
|
||||
pub header: AgiContainerHeader,
|
||||
/// Model identifier string.
|
||||
pub model_id: Option<&'a [u8]>,
|
||||
/// Policy bytes.
|
||||
pub policy: Option<&'a [u8]>,
|
||||
/// Orchestrator config.
|
||||
pub orchestrator_config: Option<&'a [u8]>,
|
||||
/// Tool registry.
|
||||
pub tool_registry: Option<&'a [u8]>,
|
||||
/// Agent prompts.
|
||||
pub agent_prompts: Option<&'a [u8]>,
|
||||
/// Eval tasks.
|
||||
pub eval_tasks: Option<&'a [u8]>,
|
||||
/// Eval graders.
|
||||
pub eval_graders: Option<&'a [u8]>,
|
||||
/// Skill library.
|
||||
pub skill_library: Option<&'a [u8]>,
|
||||
/// Replay script.
|
||||
pub replay_script: Option<&'a [u8]>,
|
||||
/// Kernel config.
|
||||
pub kernel_config: Option<&'a [u8]>,
|
||||
/// Network config.
|
||||
pub network_config: Option<&'a [u8]>,
|
||||
/// Coherence gate config.
|
||||
pub coherence_config: Option<&'a [u8]>,
|
||||
/// Project instructions.
|
||||
pub project_instructions: Option<&'a [u8]>,
|
||||
/// Dependency snapshot.
|
||||
pub dependency_snapshot: Option<&'a [u8]>,
|
||||
/// Authority configuration.
|
||||
pub authority_config: Option<&'a [u8]>,
|
||||
/// Domain profile.
|
||||
pub domain_profile: Option<&'a [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> ParsedAgiManifest<'a> {
|
||||
/// Parse a manifest from bytes.
|
||||
pub fn parse(data: &'a [u8]) -> Result<Self, ContainerError> {
|
||||
let header = AgiContainerHeader::from_bytes(data)
|
||||
.map_err(|_| ContainerError::InvalidConfig("invalid header"))?;
|
||||
|
||||
let mut result = Self {
|
||||
header,
|
||||
model_id: None,
|
||||
policy: None,
|
||||
orchestrator_config: None,
|
||||
tool_registry: None,
|
||||
agent_prompts: None,
|
||||
eval_tasks: None,
|
||||
eval_graders: None,
|
||||
skill_library: None,
|
||||
replay_script: None,
|
||||
kernel_config: None,
|
||||
network_config: None,
|
||||
coherence_config: None,
|
||||
project_instructions: None,
|
||||
dependency_snapshot: None,
|
||||
authority_config: None,
|
||||
domain_profile: None,
|
||||
};
|
||||
|
||||
// Parse TLV sections after header.
|
||||
let mut pos = AGI_HEADER_SIZE;
|
||||
while pos + 6 <= data.len() {
|
||||
let tag = u16::from_le_bytes([data[pos], data[pos + 1]]);
|
||||
let length =
|
||||
u32::from_le_bytes([data[pos + 2], data[pos + 3], data[pos + 4], data[pos + 5]])
|
||||
as usize;
|
||||
pos += 6;
|
||||
|
||||
if pos + length > data.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
let value = &data[pos..pos + length];
|
||||
match tag {
|
||||
AGI_TAG_MODEL_ID => result.model_id = Some(value),
|
||||
AGI_TAG_POLICY => result.policy = Some(value),
|
||||
AGI_TAG_ORCHESTRATOR => result.orchestrator_config = Some(value),
|
||||
AGI_TAG_TOOL_REGISTRY => result.tool_registry = Some(value),
|
||||
AGI_TAG_AGENT_PROMPTS => result.agent_prompts = Some(value),
|
||||
AGI_TAG_EVAL_TASKS => result.eval_tasks = Some(value),
|
||||
AGI_TAG_EVAL_GRADERS => result.eval_graders = Some(value),
|
||||
AGI_TAG_SKILL_LIBRARY => result.skill_library = Some(value),
|
||||
AGI_TAG_REPLAY_SCRIPT => result.replay_script = Some(value),
|
||||
AGI_TAG_KERNEL_CONFIG => result.kernel_config = Some(value),
|
||||
AGI_TAG_NETWORK_CONFIG => result.network_config = Some(value),
|
||||
AGI_TAG_COHERENCE_CONFIG => result.coherence_config = Some(value),
|
||||
AGI_TAG_PROJECT_INSTRUCTIONS => result.project_instructions = Some(value),
|
||||
AGI_TAG_DEPENDENCY_SNAPSHOT => result.dependency_snapshot = Some(value),
|
||||
AGI_TAG_AUTHORITY_CONFIG => result.authority_config = Some(value),
|
||||
AGI_TAG_DOMAIN_PROFILE => result.domain_profile = Some(value),
|
||||
_ => {} // forward-compat: ignore unknown tags
|
||||
}
|
||||
|
||||
pos += length;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Get the model ID as a UTF-8 string.
|
||||
pub fn model_id_str(&self) -> Option<&str> {
|
||||
self.model_id.and_then(|b| core::str::from_utf8(b).ok())
|
||||
}
|
||||
|
||||
/// Check if the manifest has all sections needed for autonomous operation.
|
||||
pub fn is_autonomous_capable(&self) -> bool {
|
||||
self.orchestrator_config.is_some()
|
||||
&& self.eval_tasks.is_some()
|
||||
&& self.eval_graders.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const ORCHESTRATOR_CONFIG: &[u8] = br#"{
|
||||
"claude_code": {
|
||||
"model": "claude-opus-4-6",
|
||||
"max_turns": 100,
|
||||
"permission_mode": "bypassPermissions"
|
||||
},
|
||||
"claude_flow": {
|
||||
"topology": "hierarchical",
|
||||
"max_agents": 15,
|
||||
"strategy": "specialized",
|
||||
"memory": "hybrid"
|
||||
}
|
||||
}"#;
|
||||
|
||||
const TOOL_REGISTRY: &[u8] = br#"[
|
||||
{"name": "ruvector_query", "type": "vector_search"},
|
||||
{"name": "ruvector_cypher", "type": "graph_query"},
|
||||
{"name": "ruvector_commit_delta", "type": "write"},
|
||||
{"name": "rvf_snapshot", "type": "snapshot"},
|
||||
{"name": "rvf_witness_export", "type": "export"},
|
||||
{"name": "eval_run", "type": "evaluation"}
|
||||
]"#;
|
||||
|
||||
const COHERENCE_CONFIG: &[u8] = br#"{
|
||||
"min_cut_threshold": 0.7,
|
||||
"contradiction_pressure_max": 0.3,
|
||||
"quarantine_ttl_hours": 24,
|
||||
"skill_promotion_k": 5,
|
||||
"rollback_on_violation": true
|
||||
}"#;
|
||||
|
||||
#[test]
|
||||
fn build_full_container() {
|
||||
let segs = ContainerSegments {
|
||||
kernel_present: true,
|
||||
kernel_size: 5_000_000,
|
||||
wasm_count: 2,
|
||||
wasm_total_size: 60_000,
|
||||
vec_segment_count: 4,
|
||||
index_segment_count: 2,
|
||||
witness_count: 100,
|
||||
crypto_present: false,
|
||||
manifest_present: false,
|
||||
orchestrator_present: true,
|
||||
world_model_present: true,
|
||||
domain_expansion_present: false,
|
||||
total_size: 0,
|
||||
};
|
||||
|
||||
let builder = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_policy(b"autonomous", [0xAA; 8])
|
||||
.with_orchestrator(ORCHESTRATOR_CONFIG)
|
||||
.with_tool_registry(TOOL_REGISTRY)
|
||||
.with_agent_prompts(b"You are a coder agent...")
|
||||
.with_eval_tasks(b"[{\"id\":1,\"spec\":\"fix bug\"}]")
|
||||
.with_eval_graders(b"[{\"type\":\"test_pass\"}]")
|
||||
.with_skill_library(b"[]")
|
||||
.with_replay_script(b"#!/bin/sh\nrvf replay $1")
|
||||
.with_kernel_config(b"console=ttyS0 root=/dev/vda")
|
||||
.with_network_config(b"{\"port\":8080}")
|
||||
.with_coherence_config(COHERENCE_CONFIG)
|
||||
.with_project_instructions(b"# CLAUDE.md\nFollow DDD...")
|
||||
.with_dependency_snapshot(b"sha256:abc123")
|
||||
.offline_capable()
|
||||
.with_segments(segs);
|
||||
|
||||
let (payload, header) = builder.build().unwrap();
|
||||
|
||||
assert!(header.is_valid_magic());
|
||||
assert!(header.has_kernel());
|
||||
assert!(header.has_orchestrator());
|
||||
assert!(header.is_replay_capable());
|
||||
assert!(header.is_offline_capable());
|
||||
|
||||
// Parse it back.
|
||||
let parsed = ParsedAgiManifest::parse(&payload).unwrap();
|
||||
assert_eq!(parsed.model_id_str(), Some("claude-opus-4-6"));
|
||||
assert_eq!(parsed.orchestrator_config.unwrap(), ORCHESTRATOR_CONFIG);
|
||||
assert_eq!(parsed.tool_registry.unwrap(), TOOL_REGISTRY);
|
||||
assert_eq!(parsed.coherence_config.unwrap(), COHERENCE_CONFIG);
|
||||
assert!(parsed.project_instructions.is_some());
|
||||
assert!(parsed.is_autonomous_capable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signed_container_round_trip() {
|
||||
let key = b"container-signing-key-for-tests!";
|
||||
let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_orchestrator(ORCHESTRATOR_CONFIG)
|
||||
.with_eval_tasks(b"[]")
|
||||
.with_eval_graders(b"[]")
|
||||
.with_segments(ContainerSegments {
|
||||
kernel_present: true,
|
||||
manifest_present: false,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let (payload, header) = builder.build_and_sign(key).unwrap();
|
||||
assert!(header.is_signed());
|
||||
|
||||
// Verify signature.
|
||||
let unsigned_len = payload.len() - 32;
|
||||
let sig = &payload[unsigned_len..];
|
||||
assert!(seed_crypto::verify_seed(key, &payload[..unsigned_len], sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimal_container() {
|
||||
let builder =
|
||||
AgiContainerBuilder::new([0x30; 16], [0x40; 16]).with_segments(ContainerSegments {
|
||||
kernel_present: true,
|
||||
manifest_present: true,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let (payload, header) = builder.build().unwrap();
|
||||
assert!(header.has_kernel());
|
||||
|
||||
let parsed = ParsedAgiManifest::parse(&payload).unwrap();
|
||||
assert!(parsed.model_id.is_none());
|
||||
assert!(!parsed.is_autonomous_capable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authority_and_domain_round_trip() {
|
||||
let authority = br#"{"max_authority":"ExecuteTools","budget":{"max_time_secs":600,"max_tokens":400000}}"#;
|
||||
let domain = b"repo-automation-v1";
|
||||
|
||||
let builder = AgiContainerBuilder::new([0x50; 16], [0x60; 16])
|
||||
.with_authority_config(authority)
|
||||
.with_domain_profile(domain)
|
||||
.with_segments(ContainerSegments {
|
||||
kernel_present: true,
|
||||
manifest_present: true,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let (payload, _header) = builder.build().unwrap();
|
||||
let parsed = ParsedAgiManifest::parse(&payload).unwrap();
|
||||
assert_eq!(parsed.authority_config.unwrap(), authority.as_slice());
|
||||
assert_eq!(parsed.domain_profile.unwrap(), domain.as_slice());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn segment_validation() {
|
||||
// Live mode needs kernel or WASM, plus world model.
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
kernel_present: true,
|
||||
world_model_present: true,
|
||||
..Default::default()
|
||||
};
|
||||
assert!(segs.validate(ExecutionMode::Live).is_ok());
|
||||
|
||||
// Replay mode needs witness.
|
||||
let segs2 = ContainerSegments {
|
||||
manifest_present: true,
|
||||
witness_count: 50,
|
||||
..Default::default()
|
||||
};
|
||||
assert!(segs2.validate(ExecutionMode::Replay).is_ok());
|
||||
}
|
||||
}
|
||||
209
vendor/ruvector/crates/rvf/rvf-runtime/src/compaction.rs
vendored
Normal file
209
vendor/ruvector/crates/rvf/rvf-runtime/src/compaction.rs
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
//! Background compaction for dead space reclamation.
|
||||
//!
|
||||
//! Compaction scheduling policy (from spec 10, section 7):
|
||||
//! - IO budget: max 30% of IOPS (60% in emergency)
|
||||
//! - Priority: queries > ingest > compaction
|
||||
//! - Triggers: dead_space > 20%, segment_count > 32, time > 60s
|
||||
//! - Emergency: dead_space > 70% -> preempt ingest
|
||||
//!
|
||||
//! Segment selection order:
|
||||
//! 1. Tombstoned segments (reclaim dead space)
|
||||
//! 2. Small VEC_SEGs (< 1MB, merge into larger)
|
||||
//! 3. High-overlap INDEX_SEGs
|
||||
//! 4. Cold OVERLAY_SEGs
|
||||
|
||||
/// Compaction trigger thresholds.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct CompactionThresholds {
|
||||
/// Minimum dead space ratio to trigger compaction.
|
||||
pub dead_space_ratio: f64,
|
||||
/// Maximum segment count before compaction.
|
||||
pub max_segment_count: u32,
|
||||
/// Minimum seconds since last compaction.
|
||||
pub min_interval_secs: u64,
|
||||
/// Emergency dead space ratio (preempts ingest).
|
||||
pub emergency_ratio: f64,
|
||||
}
|
||||
|
||||
impl Default for CompactionThresholds {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dead_space_ratio: 0.20,
|
||||
max_segment_count: 32,
|
||||
min_interval_secs: 60,
|
||||
emergency_ratio: 0.70,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compaction decision.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) enum CompactionDecision {
|
||||
/// No compaction needed.
|
||||
None,
|
||||
/// Normal compaction should run.
|
||||
Normal,
|
||||
/// Emergency compaction (high dead space).
|
||||
Emergency,
|
||||
}
|
||||
|
||||
/// Evaluate whether compaction should run.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn evaluate_triggers(
|
||||
dead_space_ratio: f64,
|
||||
segment_count: u32,
|
||||
secs_since_last: u64,
|
||||
thresholds: &CompactionThresholds,
|
||||
) -> CompactionDecision {
|
||||
// Emergency check first.
|
||||
if dead_space_ratio > thresholds.emergency_ratio {
|
||||
return CompactionDecision::Emergency;
|
||||
}
|
||||
|
||||
// Check all normal conditions.
|
||||
if secs_since_last < thresholds.min_interval_secs {
|
||||
return CompactionDecision::None;
|
||||
}
|
||||
|
||||
if dead_space_ratio > thresholds.dead_space_ratio {
|
||||
return CompactionDecision::Normal;
|
||||
}
|
||||
|
||||
if segment_count > thresholds.max_segment_count {
|
||||
return CompactionDecision::Normal;
|
||||
}
|
||||
|
||||
CompactionDecision::None
|
||||
}
|
||||
|
||||
/// Represents a compaction plan: which segments to compact and how.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct CompactionPlan {
|
||||
/// Segment IDs to compact (input).
|
||||
pub source_segments: Vec<u64>,
|
||||
/// Whether this is emergency compaction.
|
||||
pub emergency: bool,
|
||||
/// IO budget as a fraction (0.30 normal, 0.60 emergency).
|
||||
pub io_budget: f64,
|
||||
}
|
||||
|
||||
impl CompactionPlan {
|
||||
/// Create a normal compaction plan.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn normal(segments: Vec<u64>) -> Self {
|
||||
Self {
|
||||
source_segments: segments,
|
||||
emergency: false,
|
||||
io_budget: 0.30,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an emergency compaction plan.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn emergency(segments: Vec<u64>) -> Self {
|
||||
Self {
|
||||
source_segments: segments,
|
||||
emergency: true,
|
||||
io_budget: 0.60,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Select segments for compaction based on the tiered strategy.
|
||||
///
|
||||
/// Priority:
|
||||
/// 1. Tombstoned segments
|
||||
/// 2. Small VEC_SEGs (< threshold)
|
||||
/// 3. Remaining segments by age
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn select_segments(
|
||||
segment_dir: &[(u64, u64, u8, bool)], // (seg_id, payload_len, seg_type, is_tombstoned)
|
||||
max_segments: usize,
|
||||
) -> Vec<u64> {
|
||||
let mut selected = Vec::new();
|
||||
|
||||
// Phase 1: tombstoned segments.
|
||||
for &(seg_id, _, _, tombstoned) in segment_dir {
|
||||
if tombstoned && selected.len() < max_segments {
|
||||
selected.push(seg_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: small VEC_SEGs (< 1MB).
|
||||
let small_threshold = 1024 * 1024;
|
||||
for &(seg_id, payload_len, seg_type, _) in segment_dir {
|
||||
if seg_type == 0x01
|
||||
&& payload_len < small_threshold
|
||||
&& selected.len() < max_segments
|
||||
&& !selected.contains(&seg_id)
|
||||
{
|
||||
selected.push(seg_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: fill remaining with oldest segments.
|
||||
for &(seg_id, _, _, _) in segment_dir {
|
||||
if selected.len() >= max_segments {
|
||||
break;
|
||||
}
|
||||
if !selected.contains(&seg_id) {
|
||||
selected.push(seg_id);
|
||||
}
|
||||
}
|
||||
|
||||
selected
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn no_compaction_when_fresh() {
|
||||
let decision = evaluate_triggers(0.10, 10, 30, &CompactionThresholds::default());
|
||||
assert_eq!(decision, CompactionDecision::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normal_compaction_on_dead_space() {
|
||||
let decision = evaluate_triggers(0.25, 10, 120, &CompactionThresholds::default());
|
||||
assert_eq!(decision, CompactionDecision::Normal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normal_compaction_on_segment_count() {
|
||||
let decision = evaluate_triggers(0.10, 50, 120, &CompactionThresholds::default());
|
||||
assert_eq!(decision, CompactionDecision::Normal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emergency_compaction_on_high_dead_space() {
|
||||
let decision = evaluate_triggers(0.75, 10, 10, &CompactionThresholds::default());
|
||||
assert_eq!(decision, CompactionDecision::Emergency);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_compaction_before_interval() {
|
||||
let decision = evaluate_triggers(0.25, 50, 30, &CompactionThresholds::default());
|
||||
// Even though dead_space and segment_count exceed thresholds,
|
||||
// interval hasn't passed.
|
||||
assert_eq!(decision, CompactionDecision::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_tombstoned_first() {
|
||||
let segments = vec![
|
||||
(1, 500_000, 0x01, false),
|
||||
(2, 100_000, 0x01, true), // tombstoned
|
||||
(3, 200_000, 0x01, false),
|
||||
(4, 50_000, 0x01, true), // tombstoned
|
||||
];
|
||||
let selected = select_segments(&segments, 3);
|
||||
// Tombstoned segments (2, 4) should come first.
|
||||
assert_eq!(selected[0], 2);
|
||||
assert_eq!(selected[1], 4);
|
||||
assert_eq!(selected.len(), 3);
|
||||
}
|
||||
}
|
||||
308
vendor/ruvector/crates/rvf/rvf-runtime/src/compress.rs
vendored
Normal file
308
vendor/ruvector/crates/rvf/rvf-runtime/src/compress.rs
vendored
Normal file
@@ -0,0 +1,308 @@
|
||||
//! Zero-dependency LZ77 compression for QR seed microkernels.
|
||||
//!
|
||||
//! Simple but effective: 4 KB sliding window, match lengths 3-10,
|
||||
//! literal runs up to 128 bytes. Typical WASM compression ratio: 1.4-2.5x.
|
||||
//!
|
||||
//! Wire format (SCF-1 — Seed Compression Format):
|
||||
//! - Header: 4 bytes (original size as LE u32)
|
||||
//! - Token stream:
|
||||
//! - `0x00..=0x7F` (bit 7 clear): Literal run, count = byte + 1 (1-128)
|
||||
//! - `0x80..=0xFF` (bit 7 set): Back-reference
|
||||
//! - length = ((byte >> 4) & 0x07) + 3 (3-10)
|
||||
//! - offset = ((byte & 0x0F) << 8) | next_byte + 1 (1-4096)
|
||||
|
||||
/// Compression errors.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum CompressError {
|
||||
/// Compressed data too short to contain header.
|
||||
TooShort,
|
||||
/// Compressed stream is truncated.
|
||||
Truncated,
|
||||
/// Back-reference offset exceeds output size.
|
||||
InvalidOffset,
|
||||
/// Decompressed size doesn't match header.
|
||||
SizeMismatch { expected: usize, got: usize },
|
||||
}
|
||||
|
||||
impl core::fmt::Display for CompressError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self {
|
||||
CompressError::TooShort => write!(f, "compressed data too short"),
|
||||
CompressError::Truncated => write!(f, "compressed stream truncated"),
|
||||
CompressError::InvalidOffset => write!(f, "invalid back-reference offset"),
|
||||
CompressError::SizeMismatch { expected, got } => {
|
||||
write!(f, "size mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash a 3-byte trigram for the LZ77 hash table.
|
||||
#[inline]
|
||||
fn trigram_hash(a: u8, b: u8, c: u8) -> usize {
|
||||
(((a as usize) << 4) ^ ((b as usize) << 2) ^ (c as usize)) & 0xFFF
|
||||
}
|
||||
|
||||
/// Flush accumulated literals to the output.
|
||||
fn flush_literals(output: &mut Vec<u8>, literals: &[u8]) {
|
||||
let mut offset = 0;
|
||||
while offset < literals.len() {
|
||||
let chunk = core::cmp::min(128, literals.len() - offset);
|
||||
output.push((chunk - 1) as u8); // 0x00..=0x7F
|
||||
output.extend_from_slice(&literals[offset..offset + chunk]);
|
||||
offset += chunk;
|
||||
}
|
||||
}
|
||||
|
||||
/// Compress data using LZ77 with a 4 KB sliding window.
|
||||
///
|
||||
/// Returns the compressed payload prefixed with a 4-byte original-size header.
|
||||
pub fn compress(input: &[u8]) -> Vec<u8> {
|
||||
let mut output = Vec::with_capacity(input.len());
|
||||
|
||||
// Header: original size (LE u32).
|
||||
output.extend_from_slice(&(input.len() as u32).to_le_bytes());
|
||||
|
||||
if input.is_empty() {
|
||||
return output;
|
||||
}
|
||||
|
||||
// Hash table: maps trigram hash → most recent position.
|
||||
let mut table = [0u32; 4096];
|
||||
let mut literals: Vec<u8> = Vec::new();
|
||||
let mut pos = 0;
|
||||
|
||||
while pos < input.len() {
|
||||
let mut best_len = 0usize;
|
||||
let mut best_offset = 0usize;
|
||||
|
||||
if pos + 3 <= input.len() {
|
||||
let hash = trigram_hash(input[pos], input[pos + 1], input[pos + 2]);
|
||||
let candidate = table[hash] as usize;
|
||||
table[hash] = pos as u32;
|
||||
|
||||
if candidate < pos && pos - candidate <= 4096 {
|
||||
let max_len = core::cmp::min(10, input.len() - pos);
|
||||
let mut match_len = 0;
|
||||
while match_len < max_len && input[candidate + match_len] == input[pos + match_len]
|
||||
{
|
||||
match_len += 1;
|
||||
}
|
||||
if match_len >= 3 {
|
||||
best_len = match_len;
|
||||
best_offset = pos - candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if best_len >= 3 {
|
||||
// Flush any pending literals first.
|
||||
flush_literals(&mut output, &literals);
|
||||
literals.clear();
|
||||
|
||||
// Emit match token: 1LLL_OOOO OOOOOOOO
|
||||
let len_code = (best_len - 3) as u8; // 0-7
|
||||
let offset_val = (best_offset - 1) as u16; // 0-4095
|
||||
let offset_hi = ((offset_val >> 8) & 0x0F) as u8;
|
||||
let offset_lo = (offset_val & 0xFF) as u8;
|
||||
|
||||
output.push(0x80 | (len_code << 4) | offset_hi);
|
||||
output.push(offset_lo);
|
||||
|
||||
// Update hash table for positions within the match.
|
||||
for i in 1..best_len {
|
||||
if pos + i + 3 <= input.len() {
|
||||
let h = trigram_hash(input[pos + i], input[pos + i + 1], input[pos + i + 2]);
|
||||
table[h] = (pos + i) as u32;
|
||||
}
|
||||
}
|
||||
|
||||
pos += best_len;
|
||||
} else {
|
||||
literals.push(input[pos]);
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining literals.
|
||||
flush_literals(&mut output, &literals);
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
/// Decompress SCF-1 data back to original bytes.
|
||||
pub fn decompress(compressed: &[u8]) -> Result<Vec<u8>, CompressError> {
|
||||
if compressed.len() < 4 {
|
||||
return Err(CompressError::TooShort);
|
||||
}
|
||||
|
||||
let original_size =
|
||||
u32::from_le_bytes([compressed[0], compressed[1], compressed[2], compressed[3]]) as usize;
|
||||
|
||||
let mut output = Vec::with_capacity(original_size);
|
||||
let mut pos = 4;
|
||||
|
||||
while output.len() < original_size && pos < compressed.len() {
|
||||
let control = compressed[pos];
|
||||
pos += 1;
|
||||
|
||||
if control & 0x80 == 0 {
|
||||
// Literal run.
|
||||
let count = (control as usize) + 1;
|
||||
if pos + count > compressed.len() {
|
||||
return Err(CompressError::Truncated);
|
||||
}
|
||||
output.extend_from_slice(&compressed[pos..pos + count]);
|
||||
pos += count;
|
||||
} else {
|
||||
// Back-reference.
|
||||
if pos >= compressed.len() {
|
||||
return Err(CompressError::Truncated);
|
||||
}
|
||||
let length = (((control >> 4) & 0x07) as usize) + 3;
|
||||
let offset_hi = (control & 0x0F) as usize;
|
||||
let offset_lo = compressed[pos] as usize;
|
||||
pos += 1;
|
||||
let offset = (offset_hi << 8 | offset_lo) + 1;
|
||||
|
||||
if offset > output.len() {
|
||||
return Err(CompressError::InvalidOffset);
|
||||
}
|
||||
|
||||
let start = output.len() - offset;
|
||||
for i in 0..length {
|
||||
let byte = output[start + i];
|
||||
output.push(byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if output.len() != original_size {
|
||||
return Err(CompressError::SizeMismatch {
|
||||
expected: original_size,
|
||||
got: output.len(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn empty_round_trip() {
|
||||
let compressed = compress(b"");
|
||||
assert_eq!(compressed, [0, 0, 0, 0]); // Just the size header.
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert!(decompressed.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn short_literal_round_trip() {
|
||||
let input = b"Hello, World!";
|
||||
let compressed = compress(input);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(&decompressed, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repeated_data_compresses() {
|
||||
// Highly repetitive data should compress well.
|
||||
let input: Vec<u8> = (0..1000).map(|i| (i % 7) as u8).collect();
|
||||
let compressed = compress(&input);
|
||||
assert!(
|
||||
compressed.len() < input.len(),
|
||||
"compressed {} >= original {}",
|
||||
compressed.len(),
|
||||
input.len()
|
||||
);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(decompressed, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wasm_like_data_compresses() {
|
||||
// Simulate WASM module: lots of zero runs and repeated patterns.
|
||||
let mut wasm = Vec::new();
|
||||
// Magic + version.
|
||||
wasm.extend_from_slice(&[0x00, 0x61, 0x73, 0x6D, 0x01, 0x00, 0x00, 0x00]);
|
||||
// Repeated section patterns.
|
||||
for _ in 0..100 {
|
||||
wasm.extend_from_slice(&[0x01, 0x06, 0x01, 0x60, 0x01, 0x7F, 0x01, 0x7F]);
|
||||
}
|
||||
// Zero fill.
|
||||
wasm.resize(wasm.len() + 500, 0x00);
|
||||
|
||||
let compressed = compress(&wasm);
|
||||
assert!(
|
||||
compressed.len() < wasm.len(),
|
||||
"compressed {} >= original {}",
|
||||
compressed.len(),
|
||||
wasm.len()
|
||||
);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(decompressed, wasm);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn random_like_data_round_trips() {
|
||||
// Incompressible data should still round-trip correctly.
|
||||
let input: Vec<u8> = (0..500).map(|i| ((i * 131 + 17) % 256) as u8).collect();
|
||||
let compressed = compress(&input);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(decompressed, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn large_data_round_trip() {
|
||||
let input: Vec<u8> = (0..8000)
|
||||
.map(|i| ((i * 37 + i / 100) % 256) as u8)
|
||||
.collect();
|
||||
let compressed = compress(&input);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(decompressed, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_zeros_compress_well() {
|
||||
let input = vec![0u8; 4096];
|
||||
let compressed = compress(&input);
|
||||
// 4096 zeros with 4KB window and match length 10 should compress very well.
|
||||
assert!(compressed.len() < input.len() / 2);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(decompressed, input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decompress_truncated_fails() {
|
||||
let compressed = compress(b"test data for truncation");
|
||||
// Truncate the compressed data.
|
||||
let truncated = &compressed[..compressed.len() / 2];
|
||||
assert!(decompress(truncated).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decompress_too_short_fails() {
|
||||
assert_eq!(decompress(&[0, 0]), Err(CompressError::TooShort));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compress_error_display() {
|
||||
let e = CompressError::SizeMismatch {
|
||||
expected: 100,
|
||||
got: 50,
|
||||
};
|
||||
assert!(format!("{e}").contains("100"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exactly_128_byte_literal_run() {
|
||||
// 128 unique bytes forces exactly one max-length literal run.
|
||||
let input: Vec<u8> = (0..128).map(|i| (i * 2 + 1) as u8).collect();
|
||||
let compressed = compress(&input);
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(decompressed, input);
|
||||
}
|
||||
}
|
||||
502
vendor/ruvector/crates/rvf/rvf-runtime/src/cow.rs
vendored
Normal file
502
vendor/ruvector/crates/rvf/rvf-runtime/src/cow.rs
vendored
Normal file
@@ -0,0 +1,502 @@
|
||||
//! COW read/write engine for vector-addressed clusters.
|
||||
//!
|
||||
//! Cluster addressing: `cluster_id = vector_id / vectors_per_cluster`
|
||||
//!
|
||||
//! - **Read**: lookup in map -> LocalOffset (read local) or ParentRef (follow chain)
|
||||
//! - **Write**: if inherited -> copy parent slab -> local, apply mutation, update map
|
||||
//! - **Write coalescing**: multiple writes to the same inherited cluster are buffered;
|
||||
//! on flush, the parent slab is copied once and all mutations applied.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
|
||||
use rvf_types::cow_map::CowMapEntry;
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
|
||||
use crate::cow_map::CowMap;
|
||||
use crate::store::simple_shake256_256;
|
||||
|
||||
/// Witness event emitted when a COW slab copy or delta occurs.
|
||||
pub struct WitnessEvent {
|
||||
/// Event type: 0x0E = CLUSTER_COW, 0x0F = CLUSTER_DELTA.
|
||||
pub event_type: u8,
|
||||
/// ID of the cluster affected.
|
||||
pub cluster_id: u32,
|
||||
/// SHAKE-256-256 hash of the parent cluster data before copy.
|
||||
pub parent_cluster_hash: [u8; 32],
|
||||
/// SHAKE-256-256 hash of the new local cluster data after copy.
|
||||
pub new_cluster_hash: [u8; 32],
|
||||
}
|
||||
|
||||
/// A pending write buffered for coalescing.
|
||||
struct PendingWrite {
|
||||
/// Byte offset of the vector within the cluster.
|
||||
vector_offset_in_cluster: u32,
|
||||
/// Vector data to write.
|
||||
data: Vec<u8>,
|
||||
}
|
||||
|
||||
/// COW read/write engine for vector-addressed clusters.
|
||||
pub struct CowEngine {
|
||||
/// The COW cluster map.
|
||||
cow_map: CowMap,
|
||||
/// Cluster size in bytes (power of 2).
|
||||
cluster_size: u32,
|
||||
/// Vectors per cluster.
|
||||
vectors_per_cluster: u32,
|
||||
/// Bytes per vector (dimension * sizeof(f32)).
|
||||
bytes_per_vector: u32,
|
||||
/// L0 cache: cluster_id -> resolved local file offset.
|
||||
l0_cache: HashMap<u32, u64>,
|
||||
/// Write coalescing buffer: cluster_id -> pending writes.
|
||||
write_buffer: HashMap<u32, Vec<PendingWrite>>,
|
||||
/// Whether this engine is frozen (snapshot).
|
||||
frozen: bool,
|
||||
/// Snapshot epoch (0 = mutable).
|
||||
snapshot_epoch: u32,
|
||||
}
|
||||
|
||||
impl CowEngine {
|
||||
/// Create a new COW engine.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `vectors_per_cluster` is 0 (would cause division by zero on read/write).
|
||||
pub fn new(cluster_size: u32, vectors_per_cluster: u32, bytes_per_vector: u32) -> Self {
|
||||
assert!(vectors_per_cluster > 0, "vectors_per_cluster must be > 0");
|
||||
Self {
|
||||
cow_map: CowMap::new_flat(0),
|
||||
cluster_size,
|
||||
vectors_per_cluster,
|
||||
bytes_per_vector,
|
||||
l0_cache: HashMap::new(),
|
||||
write_buffer: HashMap::new(),
|
||||
frozen: false,
|
||||
snapshot_epoch: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a COW engine initialized from a parent (all clusters point to parent).
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `vectors_per_cluster` is 0 (would cause division by zero on read/write).
|
||||
pub fn from_parent(
|
||||
cluster_count: u32,
|
||||
cluster_size: u32,
|
||||
vectors_per_cluster: u32,
|
||||
bytes_per_vector: u32,
|
||||
) -> Self {
|
||||
assert!(vectors_per_cluster > 0, "vectors_per_cluster must be > 0");
|
||||
Self {
|
||||
cow_map: CowMap::new_parent_ref(cluster_count),
|
||||
cluster_size,
|
||||
vectors_per_cluster,
|
||||
bytes_per_vector,
|
||||
l0_cache: HashMap::new(),
|
||||
write_buffer: HashMap::new(),
|
||||
frozen: false,
|
||||
snapshot_epoch: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a reference to the underlying COW map.
|
||||
pub fn cow_map(&self) -> &CowMap {
|
||||
&self.cow_map
|
||||
}
|
||||
|
||||
/// Read a vector by ID. Returns byte slice of vector data.
|
||||
pub fn read_vector(
|
||||
&self,
|
||||
vector_id: u64,
|
||||
file: &File,
|
||||
parent: Option<&File>,
|
||||
) -> Result<Vec<u8>, RvfError> {
|
||||
let cluster_id = (vector_id / self.vectors_per_cluster as u64) as u32;
|
||||
let vector_index_in_cluster = (vector_id % self.vectors_per_cluster as u64) as u32;
|
||||
let vector_offset = vector_index_in_cluster * self.bytes_per_vector;
|
||||
|
||||
let cluster_data = self.read_cluster(cluster_id, file, parent)?;
|
||||
|
||||
let start = vector_offset as usize;
|
||||
let end = start + self.bytes_per_vector as usize;
|
||||
if end > cluster_data.len() {
|
||||
return Err(RvfError::Code(ErrorCode::ClusterNotFound));
|
||||
}
|
||||
|
||||
Ok(cluster_data[start..end].to_vec())
|
||||
}
|
||||
|
||||
/// Read an entire cluster. Returns cluster data.
|
||||
pub fn read_cluster(
|
||||
&self,
|
||||
cluster_id: u32,
|
||||
file: &File,
|
||||
parent: Option<&File>,
|
||||
) -> Result<Vec<u8>, RvfError> {
|
||||
// Check L0 cache first
|
||||
if let Some(&cached_offset) = self.l0_cache.get(&cluster_id) {
|
||||
return read_bytes_at(file, cached_offset, self.cluster_size as usize);
|
||||
}
|
||||
|
||||
match self.cow_map.lookup(cluster_id) {
|
||||
CowMapEntry::LocalOffset(offset) => {
|
||||
read_bytes_at(file, offset, self.cluster_size as usize)
|
||||
}
|
||||
CowMapEntry::ParentRef => {
|
||||
let parent_file = parent.ok_or(RvfError::Code(ErrorCode::ParentChainBroken))?;
|
||||
let parent_offset = cluster_id as u64 * self.cluster_size as u64;
|
||||
read_bytes_at(parent_file, parent_offset, self.cluster_size as usize)
|
||||
}
|
||||
CowMapEntry::Unallocated => {
|
||||
// Return a zeroed cluster for unallocated
|
||||
Ok(vec![0u8; self.cluster_size as usize])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a vector. Handles COW: copies parent slab if inherited.
|
||||
///
|
||||
/// Writes are buffered for coalescing. Call `flush_writes` to commit.
|
||||
pub fn write_vector(&mut self, vector_id: u64, data: &[u8]) -> Result<(), RvfError> {
|
||||
if self.frozen {
|
||||
return Err(RvfError::Code(ErrorCode::SnapshotFrozen));
|
||||
}
|
||||
if data.len() != self.bytes_per_vector as usize {
|
||||
return Err(RvfError::Code(ErrorCode::DimensionMismatch));
|
||||
}
|
||||
|
||||
let cluster_id = (vector_id / self.vectors_per_cluster as u64) as u32;
|
||||
let vector_index_in_cluster = (vector_id % self.vectors_per_cluster as u64) as u32;
|
||||
let vector_offset = vector_index_in_cluster * self.bytes_per_vector;
|
||||
|
||||
self.write_buffer
|
||||
.entry(cluster_id)
|
||||
.or_default()
|
||||
.push(PendingWrite {
|
||||
vector_offset_in_cluster: vector_offset,
|
||||
data: data.to_vec(),
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Flush write coalescing buffer. Performs actual slab copies for inherited
|
||||
/// clusters and applies all pending mutations.
|
||||
pub fn flush_writes(
|
||||
&mut self,
|
||||
file: &mut File,
|
||||
parent: Option<&File>,
|
||||
) -> Result<Vec<WitnessEvent>, RvfError> {
|
||||
if self.frozen {
|
||||
return Err(RvfError::Code(ErrorCode::SnapshotFrozen));
|
||||
}
|
||||
|
||||
let pending: Vec<(u32, Vec<PendingWrite>)> = self.write_buffer.drain().collect();
|
||||
|
||||
let mut witness_events = Vec::new();
|
||||
|
||||
for (cluster_id, writes) in pending {
|
||||
let entry = self.cow_map.lookup(cluster_id);
|
||||
|
||||
// Get or create local cluster data
|
||||
let mut cluster_data = match entry {
|
||||
CowMapEntry::LocalOffset(offset) => {
|
||||
// Already local: read existing data
|
||||
read_bytes_at(file, offset, self.cluster_size as usize)?
|
||||
}
|
||||
CowMapEntry::ParentRef => {
|
||||
// COW: copy parent slab to local
|
||||
let parent_file = parent.ok_or(RvfError::Code(ErrorCode::ParentChainBroken))?;
|
||||
let parent_offset = cluster_id as u64 * self.cluster_size as u64;
|
||||
let parent_data =
|
||||
read_bytes_at(parent_file, parent_offset, self.cluster_size as usize)?;
|
||||
let parent_hash = simple_shake256_256(&parent_data);
|
||||
|
||||
// Allocate space at end of file
|
||||
let new_offset = file
|
||||
.seek(SeekFrom::End(0))
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
|
||||
// Write parent data as initial local copy
|
||||
file.write_all(&parent_data)
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
|
||||
// Update map
|
||||
self.cow_map
|
||||
.update(cluster_id, CowMapEntry::LocalOffset(new_offset));
|
||||
self.l0_cache.insert(cluster_id, new_offset);
|
||||
|
||||
// We'll compute new hash after mutations and emit witness then
|
||||
witness_events.push(WitnessEvent {
|
||||
event_type: 0x0E, // CLUSTER_COW
|
||||
cluster_id,
|
||||
parent_cluster_hash: parent_hash,
|
||||
new_cluster_hash: [0u8; 32], // placeholder, updated below
|
||||
});
|
||||
|
||||
parent_data
|
||||
}
|
||||
CowMapEntry::Unallocated => {
|
||||
// Allocate a new zeroed cluster
|
||||
let zeroed = vec![0u8; self.cluster_size as usize];
|
||||
let new_offset = file
|
||||
.seek(SeekFrom::End(0))
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
file.write_all(&zeroed)
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
self.cow_map
|
||||
.update(cluster_id, CowMapEntry::LocalOffset(new_offset));
|
||||
self.l0_cache.insert(cluster_id, new_offset);
|
||||
zeroed
|
||||
}
|
||||
};
|
||||
|
||||
// Apply all pending writes to the cluster data
|
||||
for pw in &writes {
|
||||
let start = pw.vector_offset_in_cluster as usize;
|
||||
let end = start + pw.data.len();
|
||||
if end > cluster_data.len() {
|
||||
return Err(RvfError::Code(ErrorCode::ClusterNotFound));
|
||||
}
|
||||
cluster_data[start..end].copy_from_slice(&pw.data);
|
||||
}
|
||||
|
||||
// Write the mutated cluster back to its local offset
|
||||
if let CowMapEntry::LocalOffset(offset) = self.cow_map.lookup(cluster_id) {
|
||||
file.seek(SeekFrom::Start(offset))
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
file.write_all(&cluster_data)
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
|
||||
// Update witness event hash if we emitted one for this cluster
|
||||
let new_hash = simple_shake256_256(&cluster_data);
|
||||
for event in witness_events.iter_mut().rev() {
|
||||
if event.cluster_id == cluster_id {
|
||||
event.new_cluster_hash = new_hash;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file.sync_all()
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
|
||||
Ok(witness_events)
|
||||
}
|
||||
|
||||
/// Snapshot-freeze: set epoch, prevent further writes to this generation.
|
||||
pub fn freeze(&mut self, epoch: u32) -> Result<(), RvfError> {
|
||||
if self.frozen {
|
||||
return Err(RvfError::Code(ErrorCode::SnapshotFrozen));
|
||||
}
|
||||
if !self.write_buffer.is_empty() {
|
||||
return Err(RvfError::Code(ErrorCode::FsyncFailed));
|
||||
}
|
||||
self.frozen = true;
|
||||
self.snapshot_epoch = epoch;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if frozen.
|
||||
pub fn is_frozen(&self) -> bool {
|
||||
self.frozen
|
||||
}
|
||||
|
||||
/// Get the snapshot epoch.
|
||||
pub fn snapshot_epoch(&self) -> u32 {
|
||||
self.snapshot_epoch
|
||||
}
|
||||
|
||||
/// Get COW statistics.
|
||||
pub fn stats(&self) -> CowStats {
|
||||
CowStats {
|
||||
cluster_count: self.cow_map.cluster_count(),
|
||||
local_cluster_count: self.cow_map.local_cluster_count(),
|
||||
cluster_size: self.cluster_size,
|
||||
vectors_per_cluster: self.vectors_per_cluster,
|
||||
frozen: self.frozen,
|
||||
snapshot_epoch: self.snapshot_epoch,
|
||||
pending_writes: self.write_buffer.values().map(|v| v.len()).sum(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about the COW engine state.
|
||||
pub struct CowStats {
|
||||
/// Total clusters in the map.
|
||||
pub cluster_count: u32,
|
||||
/// Clusters with local data (COW-copied or newly written).
|
||||
pub local_cluster_count: u32,
|
||||
/// Cluster size in bytes.
|
||||
pub cluster_size: u32,
|
||||
/// Vectors per cluster.
|
||||
pub vectors_per_cluster: u32,
|
||||
/// Whether the engine is frozen.
|
||||
pub frozen: bool,
|
||||
/// Snapshot epoch (0 = mutable).
|
||||
pub snapshot_epoch: u32,
|
||||
/// Number of pending writes in the coalescing buffer.
|
||||
pub pending_writes: usize,
|
||||
}
|
||||
|
||||
/// Read `len` bytes from a file at the given offset.
|
||||
///
|
||||
/// Uses `pread` on Unix to avoid seek + BufReader overhead on the hot path.
|
||||
#[cfg(unix)]
|
||||
fn read_bytes_at(file: &File, offset: u64, len: usize) -> Result<Vec<u8>, RvfError> {
|
||||
use std::os::unix::fs::FileExt;
|
||||
let mut buf = vec![0u8; len];
|
||||
file.read_exact_at(&mut buf, offset)
|
||||
.map_err(|_| RvfError::Code(ErrorCode::ClusterNotFound))?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Read `len` bytes from a file at the given offset (non-Unix fallback).
|
||||
#[cfg(not(unix))]
|
||||
fn read_bytes_at(file: &File, offset: u64, len: usize) -> Result<Vec<u8>, RvfError> {
|
||||
use std::io::Read;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
reader
|
||||
.seek(SeekFrom::Start(offset))
|
||||
.map_err(|_| RvfError::Code(ErrorCode::FsyncFailed))?;
|
||||
let mut buf = vec![0u8; len];
|
||||
reader
|
||||
.read_exact(&mut buf)
|
||||
.map_err(|_| RvfError::Code(ErrorCode::ClusterNotFound))?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
fn create_parent_file(cluster_size: u32, cluster_count: u32) -> NamedTempFile {
|
||||
let mut f = NamedTempFile::new().unwrap();
|
||||
for cluster_id in 0..cluster_count {
|
||||
let mut data = vec![0u8; cluster_size as usize];
|
||||
// Fill each cluster with its ID byte for identification
|
||||
for b in data.iter_mut() {
|
||||
*b = (cluster_id & 0xFF) as u8;
|
||||
}
|
||||
f.write_all(&data).unwrap();
|
||||
}
|
||||
f.flush().unwrap();
|
||||
f
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_read_from_parent() {
|
||||
let cluster_size = 256u32;
|
||||
let vecs_per_cluster = 4u32;
|
||||
let bytes_per_vec = 64u32; // 16 floats * 4 bytes
|
||||
|
||||
let parent_file = create_parent_file(cluster_size, 4);
|
||||
let child_file = NamedTempFile::new().unwrap();
|
||||
|
||||
let engine = CowEngine::from_parent(4, cluster_size, vecs_per_cluster, bytes_per_vec);
|
||||
|
||||
// Read cluster 2 from parent
|
||||
let data = engine
|
||||
.read_cluster(2, child_file.as_file(), Some(parent_file.as_file()))
|
||||
.unwrap();
|
||||
assert_eq!(data.len(), cluster_size as usize);
|
||||
assert!(data.iter().all(|&b| b == 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_write_triggers_copy() {
|
||||
let cluster_size = 128u32;
|
||||
let vecs_per_cluster = 2u32;
|
||||
let bytes_per_vec = 64u32;
|
||||
|
||||
let parent_file = create_parent_file(cluster_size, 2);
|
||||
let child_file = NamedTempFile::new().unwrap();
|
||||
|
||||
let mut engine = CowEngine::from_parent(2, cluster_size, vecs_per_cluster, bytes_per_vec);
|
||||
|
||||
// Write vector 0 (cluster 0)
|
||||
let new_data = vec![0xAA; bytes_per_vec as usize];
|
||||
engine.write_vector(0, &new_data).unwrap();
|
||||
|
||||
let events = engine
|
||||
.flush_writes(
|
||||
&mut child_file.as_file().try_clone().unwrap(),
|
||||
Some(parent_file.as_file()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Should have one COW event
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].event_type, 0x0E);
|
||||
assert_eq!(events[0].cluster_id, 0);
|
||||
|
||||
// Now the cluster should be local
|
||||
assert_eq!(engine.cow_map().local_cluster_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_write_coalescing() {
|
||||
let cluster_size = 128u32;
|
||||
let vecs_per_cluster = 2u32;
|
||||
let bytes_per_vec = 64u32;
|
||||
|
||||
let parent_file = create_parent_file(cluster_size, 2);
|
||||
let child_file = NamedTempFile::new().unwrap();
|
||||
|
||||
let mut engine = CowEngine::from_parent(2, cluster_size, vecs_per_cluster, bytes_per_vec);
|
||||
|
||||
// Write both vectors in cluster 0
|
||||
let data_a = vec![0xAA; bytes_per_vec as usize];
|
||||
let data_b = vec![0xBB; bytes_per_vec as usize];
|
||||
engine.write_vector(0, &data_a).unwrap();
|
||||
engine.write_vector(1, &data_b).unwrap();
|
||||
|
||||
let events = engine
|
||||
.flush_writes(
|
||||
&mut child_file.as_file().try_clone().unwrap(),
|
||||
Some(parent_file.as_file()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Only one COW copy event even though two writes
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].cluster_id, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_frozen_rejects_writes() {
|
||||
let mut engine = CowEngine::new(128, 2, 64);
|
||||
engine.freeze(1).unwrap();
|
||||
assert!(engine.is_frozen());
|
||||
|
||||
let result = engine.write_vector(0, &vec![0u8; 64]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_read_unallocated_returns_zeros() {
|
||||
let engine = CowEngine::new(128, 2, 64);
|
||||
let child_file = NamedTempFile::new().unwrap();
|
||||
|
||||
let data = engine.read_cluster(0, child_file.as_file(), None).unwrap();
|
||||
assert_eq!(data.len(), 128);
|
||||
assert!(data.iter().all(|&b| b == 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cow_stats() {
|
||||
let mut engine = CowEngine::from_parent(4, 256, 4, 64);
|
||||
let stats = engine.stats();
|
||||
assert_eq!(stats.cluster_count, 4);
|
||||
assert_eq!(stats.local_cluster_count, 0);
|
||||
assert!(!stats.frozen);
|
||||
|
||||
// Buffer a write
|
||||
engine.write_vector(0, &vec![0u8; 64]).unwrap();
|
||||
let stats = engine.stats();
|
||||
assert_eq!(stats.pending_writes, 1);
|
||||
}
|
||||
}
|
||||
225
vendor/ruvector/crates/rvf/rvf-runtime/src/cow_compact.rs
vendored
Normal file
225
vendor/ruvector/crates/rvf/rvf-runtime/src/cow_compact.rs
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
//! COW-aware compaction engine.
|
||||
//!
|
||||
//! Two compaction modes:
|
||||
//! - **Read optimize**: rewrite hot clusters contiguously for sequential I/O.
|
||||
//! - **Space reclaim**: if `hash(local) == hash(parent)`, replace LocalOffset
|
||||
//! with ParentRef to reclaim local storage.
|
||||
//!
|
||||
//! Segment preservation: unknown segments are copied forward unless
|
||||
//! `strip_unknown` is set.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rvf_types::cow_map::CowMapEntry;
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::cow_map::CowMap;
|
||||
use crate::store::simple_shake256_256;
|
||||
|
||||
/// Result of a COW compaction operation.
|
||||
pub struct CompactionResult {
|
||||
/// Number of clusters rewritten or reclaimed.
|
||||
pub clusters_affected: u32,
|
||||
/// Bytes reclaimed (for space_reclaim mode).
|
||||
pub bytes_reclaimed: u64,
|
||||
/// Number of clusters that matched parent and were converted to ParentRef.
|
||||
pub clusters_deduplicated: u32,
|
||||
}
|
||||
|
||||
/// Refcount data for shared clusters.
|
||||
pub struct RefcountData {
|
||||
/// Map from cluster_id to reference count.
|
||||
pub refcounts: HashMap<u32, u32>,
|
||||
}
|
||||
|
||||
/// COW-aware compaction engine.
|
||||
pub struct CowCompactor {
|
||||
/// Whether to strip unknown segment types during compaction.
|
||||
pub strip_unknown: bool,
|
||||
}
|
||||
|
||||
impl Default for CowCompactor {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl CowCompactor {
|
||||
/// Create a new compactor with default settings.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
strip_unknown: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Read-optimize compaction: reorder local clusters for sequential read.
|
||||
///
|
||||
/// Scans the COW map, reads all LocalOffset clusters, and rewrites them
|
||||
/// contiguously in cluster_id order. Updates the map entries to point to
|
||||
/// the new contiguous offsets.
|
||||
pub fn compact_read_optimize(
|
||||
cow_map: &mut CowMap,
|
||||
local_data: &HashMap<u32, Vec<u8>>,
|
||||
cluster_size: u32,
|
||||
) -> Result<CompactionResult, RvfError> {
|
||||
let mut clusters_affected = 0u32;
|
||||
let mut new_data: Vec<(u32, Vec<u8>)> = Vec::new();
|
||||
|
||||
// Collect all local clusters in order
|
||||
for cluster_id in 0..cow_map.cluster_count() {
|
||||
if let CowMapEntry::LocalOffset(_) = cow_map.lookup(cluster_id) {
|
||||
if let Some(data) = local_data.get(&cluster_id) {
|
||||
new_data.push((cluster_id, data.clone()));
|
||||
clusters_affected += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assign new sequential offsets (these would be written to file)
|
||||
let mut offset = 0u64;
|
||||
for (cluster_id, _data) in &new_data {
|
||||
cow_map.update(*cluster_id, CowMapEntry::LocalOffset(offset));
|
||||
offset += cluster_size as u64;
|
||||
}
|
||||
|
||||
Ok(CompactionResult {
|
||||
clusters_affected,
|
||||
bytes_reclaimed: 0,
|
||||
clusters_deduplicated: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Space-reclaim compaction: if local cluster data matches parent data,
|
||||
/// replace LocalOffset with ParentRef to reclaim space.
|
||||
pub fn compact_space_reclaim(
|
||||
cow_map: &mut CowMap,
|
||||
local_data: &HashMap<u32, Vec<u8>>,
|
||||
parent_data: &HashMap<u32, Vec<u8>>,
|
||||
cluster_size: u32,
|
||||
) -> Result<CompactionResult, RvfError> {
|
||||
let mut clusters_deduplicated = 0u32;
|
||||
let mut bytes_reclaimed = 0u64;
|
||||
|
||||
for cluster_id in 0..cow_map.cluster_count() {
|
||||
if let CowMapEntry::LocalOffset(_) = cow_map.lookup(cluster_id) {
|
||||
let local = match local_data.get(&cluster_id) {
|
||||
Some(d) => d,
|
||||
None => continue,
|
||||
};
|
||||
let parent = match parent_data.get(&cluster_id) {
|
||||
Some(d) => d,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let local_hash = simple_shake256_256(local);
|
||||
let parent_hash = simple_shake256_256(parent);
|
||||
|
||||
if local_hash == parent_hash {
|
||||
cow_map.update(cluster_id, CowMapEntry::ParentRef);
|
||||
clusters_deduplicated += 1;
|
||||
bytes_reclaimed += cluster_size as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(CompactionResult {
|
||||
clusters_affected: clusters_deduplicated,
|
||||
bytes_reclaimed,
|
||||
clusters_deduplicated,
|
||||
})
|
||||
}
|
||||
|
||||
/// Rebuild reference counts from the COW map.
|
||||
///
|
||||
/// Each LocalOffset cluster has refcount 1.
|
||||
/// ParentRef clusters increment the parent's refcount.
|
||||
pub fn rebuild_refcounts(cow_map: &CowMap) -> RefcountData {
|
||||
let mut refcounts = HashMap::new();
|
||||
|
||||
for cluster_id in 0..cow_map.cluster_count() {
|
||||
match cow_map.lookup(cluster_id) {
|
||||
CowMapEntry::LocalOffset(_) => {
|
||||
*refcounts.entry(cluster_id).or_insert(0) += 1;
|
||||
}
|
||||
CowMapEntry::ParentRef => {
|
||||
// Parent cluster is referenced
|
||||
*refcounts.entry(cluster_id).or_insert(0) += 1;
|
||||
}
|
||||
CowMapEntry::Unallocated => {}
|
||||
}
|
||||
}
|
||||
|
||||
RefcountData { refcounts }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn read_optimize_reorders_clusters() {
|
||||
let mut map = CowMap::new_flat(4);
|
||||
map.update(0, CowMapEntry::LocalOffset(0x1000));
|
||||
map.update(2, CowMapEntry::LocalOffset(0x3000));
|
||||
// 1 and 3 are unallocated
|
||||
|
||||
let mut local_data = HashMap::new();
|
||||
local_data.insert(0, vec![0xAA; 256]);
|
||||
local_data.insert(2, vec![0xBB; 256]);
|
||||
|
||||
let result = CowCompactor::compact_read_optimize(&mut map, &local_data, 256).unwrap();
|
||||
|
||||
assert_eq!(result.clusters_affected, 2);
|
||||
|
||||
// Clusters should now have sequential offsets
|
||||
assert_eq!(map.lookup(0), CowMapEntry::LocalOffset(0));
|
||||
assert_eq!(map.lookup(2), CowMapEntry::LocalOffset(256));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn space_reclaim_deduplicates() {
|
||||
let mut map = CowMap::new_flat(3);
|
||||
let shared_data = vec![0xAA; 128];
|
||||
let different_data = vec![0xBB; 128];
|
||||
|
||||
map.update(0, CowMapEntry::LocalOffset(0x100));
|
||||
map.update(1, CowMapEntry::LocalOffset(0x200));
|
||||
map.update(2, CowMapEntry::ParentRef);
|
||||
|
||||
let mut local_data = HashMap::new();
|
||||
local_data.insert(0, shared_data.clone()); // same as parent
|
||||
local_data.insert(1, different_data); // different from parent
|
||||
|
||||
let mut parent_data = HashMap::new();
|
||||
parent_data.insert(0, shared_data); // matches local
|
||||
parent_data.insert(1, vec![0xCC; 128]); // does not match local
|
||||
|
||||
let result =
|
||||
CowCompactor::compact_space_reclaim(&mut map, &local_data, &parent_data, 128).unwrap();
|
||||
|
||||
assert_eq!(result.clusters_deduplicated, 1);
|
||||
assert_eq!(result.bytes_reclaimed, 128);
|
||||
|
||||
// Cluster 0 should be ParentRef now (deduplicated)
|
||||
assert_eq!(map.lookup(0), CowMapEntry::ParentRef);
|
||||
// Cluster 1 should remain local (different data)
|
||||
assert_eq!(map.lookup(1), CowMapEntry::LocalOffset(0x200));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rebuild_refcounts() {
|
||||
let mut map = CowMap::new_flat(4);
|
||||
map.update(0, CowMapEntry::LocalOffset(0x100));
|
||||
map.update(1, CowMapEntry::ParentRef);
|
||||
map.update(2, CowMapEntry::LocalOffset(0x200));
|
||||
// 3 is unallocated
|
||||
|
||||
let refcounts = CowCompactor::rebuild_refcounts(&map);
|
||||
|
||||
assert_eq!(refcounts.refcounts.get(&0), Some(&1));
|
||||
assert_eq!(refcounts.refcounts.get(&1), Some(&1));
|
||||
assert_eq!(refcounts.refcounts.get(&2), Some(&1));
|
||||
assert_eq!(refcounts.refcounts.get(&3), None);
|
||||
}
|
||||
}
|
||||
226
vendor/ruvector/crates/rvf/rvf-runtime/src/cow_map.rs
vendored
Normal file
226
vendor/ruvector/crates/rvf/rvf-runtime/src/cow_map.rs
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
//! COW cluster map for vector-addressed cluster resolution.
|
||||
//!
|
||||
//! Supports three formats: flat array (default), ART tree, and extent list.
|
||||
//! Currently only flat_array is implemented; ART tree and extent list are
|
||||
//! reserved for future optimization of sparse mappings.
|
||||
|
||||
use rvf_types::cow_map::{CowMapEntry, MapFormat};
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
|
||||
/// Adaptive cluster map for cluster_id -> location resolution.
|
||||
///
|
||||
/// Each cluster is either local (written to this file), inherited from the
|
||||
/// parent (ParentRef), or unallocated.
|
||||
pub struct CowMap {
|
||||
format: MapFormat,
|
||||
entries: Vec<CowMapEntry>,
|
||||
}
|
||||
|
||||
impl CowMap {
|
||||
/// Create a new flat-array map with `cluster_count` entries, all Unallocated.
|
||||
pub fn new_flat(cluster_count: u32) -> Self {
|
||||
Self {
|
||||
format: MapFormat::FlatArray,
|
||||
entries: vec![CowMapEntry::Unallocated; cluster_count as usize],
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new flat-array map with all entries set to ParentRef.
|
||||
pub fn new_parent_ref(cluster_count: u32) -> Self {
|
||||
Self {
|
||||
format: MapFormat::FlatArray,
|
||||
entries: vec![CowMapEntry::ParentRef; cluster_count as usize],
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up a cluster by ID.
|
||||
pub fn lookup(&self, cluster_id: u32) -> CowMapEntry {
|
||||
self.entries
|
||||
.get(cluster_id as usize)
|
||||
.copied()
|
||||
.unwrap_or(CowMapEntry::Unallocated)
|
||||
}
|
||||
|
||||
/// Update a cluster entry.
|
||||
pub fn update(&mut self, cluster_id: u32, entry: CowMapEntry) {
|
||||
let idx = cluster_id as usize;
|
||||
if idx >= self.entries.len() {
|
||||
self.entries.resize(idx + 1, CowMapEntry::Unallocated);
|
||||
}
|
||||
self.entries[idx] = entry;
|
||||
}
|
||||
|
||||
/// Serialize the map to bytes.
|
||||
///
|
||||
/// Wire format (flat_array):
|
||||
/// format(u8) | cluster_count(u32) | entries[cluster_count]
|
||||
/// Each entry: tag(u8) | offset(u64)
|
||||
/// tag 0x00 = Unallocated, tag 0x01 = ParentRef, tag 0x02 = LocalOffset
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
let count = self.entries.len() as u32;
|
||||
// 1 (format) + 4 (count) + count * 9 (tag + offset)
|
||||
let mut buf = Vec::with_capacity(5 + self.entries.len() * 9);
|
||||
buf.push(self.format as u8);
|
||||
buf.extend_from_slice(&count.to_le_bytes());
|
||||
for entry in &self.entries {
|
||||
match entry {
|
||||
CowMapEntry::Unallocated => {
|
||||
buf.push(0x00);
|
||||
buf.extend_from_slice(&0u64.to_le_bytes());
|
||||
}
|
||||
CowMapEntry::ParentRef => {
|
||||
buf.push(0x01);
|
||||
buf.extend_from_slice(&0u64.to_le_bytes());
|
||||
}
|
||||
CowMapEntry::LocalOffset(off) => {
|
||||
buf.push(0x02);
|
||||
buf.extend_from_slice(&off.to_le_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
/// Deserialize a CowMap from bytes.
|
||||
pub fn deserialize(data: &[u8], format: MapFormat) -> Result<Self, RvfError> {
|
||||
if data.len() < 5 {
|
||||
return Err(RvfError::Code(ErrorCode::CowMapCorrupt));
|
||||
}
|
||||
let stored_format = data[0];
|
||||
if stored_format != format as u8 {
|
||||
return Err(RvfError::Code(ErrorCode::CowMapCorrupt));
|
||||
}
|
||||
let count = u32::from_le_bytes([data[1], data[2], data[3], data[4]]) as usize;
|
||||
let expected_len = count
|
||||
.checked_mul(9)
|
||||
.and_then(|v| v.checked_add(5))
|
||||
.ok_or(RvfError::Code(ErrorCode::CowMapCorrupt))?;
|
||||
if data.len() < expected_len {
|
||||
return Err(RvfError::Code(ErrorCode::CowMapCorrupt));
|
||||
}
|
||||
let mut entries = Vec::with_capacity(count);
|
||||
let mut offset = 5;
|
||||
for _ in 0..count {
|
||||
let tag = data[offset];
|
||||
let val = u64::from_le_bytes([
|
||||
data[offset + 1],
|
||||
data[offset + 2],
|
||||
data[offset + 3],
|
||||
data[offset + 4],
|
||||
data[offset + 5],
|
||||
data[offset + 6],
|
||||
data[offset + 7],
|
||||
data[offset + 8],
|
||||
]);
|
||||
let entry = match tag {
|
||||
0x00 => CowMapEntry::Unallocated,
|
||||
0x01 => CowMapEntry::ParentRef,
|
||||
0x02 => CowMapEntry::LocalOffset(val),
|
||||
_ => return Err(RvfError::Code(ErrorCode::CowMapCorrupt)),
|
||||
};
|
||||
entries.push(entry);
|
||||
offset += 9;
|
||||
}
|
||||
Ok(Self { format, entries })
|
||||
}
|
||||
|
||||
/// Count of clusters that have local data.
|
||||
pub fn local_cluster_count(&self) -> u32 {
|
||||
self.entries
|
||||
.iter()
|
||||
.filter(|e| matches!(e, CowMapEntry::LocalOffset(_)))
|
||||
.count() as u32
|
||||
}
|
||||
|
||||
/// Total number of clusters in the map.
|
||||
pub fn cluster_count(&self) -> u32 {
|
||||
self.entries.len() as u32
|
||||
}
|
||||
|
||||
/// Get the map format.
|
||||
pub fn format(&self) -> MapFormat {
|
||||
self.format
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn new_flat_all_unallocated() {
|
||||
let map = CowMap::new_flat(10);
|
||||
assert_eq!(map.cluster_count(), 10);
|
||||
assert_eq!(map.local_cluster_count(), 0);
|
||||
for i in 0..10 {
|
||||
assert_eq!(map.lookup(i), CowMapEntry::Unallocated);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_parent_ref_all_parent() {
|
||||
let map = CowMap::new_parent_ref(5);
|
||||
assert_eq!(map.cluster_count(), 5);
|
||||
for i in 0..5 {
|
||||
assert_eq!(map.lookup(i), CowMapEntry::ParentRef);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_and_lookup() {
|
||||
let mut map = CowMap::new_flat(4);
|
||||
map.update(1, CowMapEntry::LocalOffset(0x1000));
|
||||
map.update(3, CowMapEntry::ParentRef);
|
||||
assert_eq!(map.lookup(0), CowMapEntry::Unallocated);
|
||||
assert_eq!(map.lookup(1), CowMapEntry::LocalOffset(0x1000));
|
||||
assert_eq!(map.lookup(2), CowMapEntry::Unallocated);
|
||||
assert_eq!(map.lookup(3), CowMapEntry::ParentRef);
|
||||
assert_eq!(map.local_cluster_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_grows_map() {
|
||||
let mut map = CowMap::new_flat(2);
|
||||
map.update(5, CowMapEntry::LocalOffset(0x2000));
|
||||
assert_eq!(map.cluster_count(), 6);
|
||||
assert_eq!(map.lookup(5), CowMapEntry::LocalOffset(0x2000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_bounds_lookup_returns_unallocated() {
|
||||
let map = CowMap::new_flat(2);
|
||||
assert_eq!(map.lookup(100), CowMapEntry::Unallocated);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize_round_trip() {
|
||||
let mut map = CowMap::new_flat(4);
|
||||
map.update(0, CowMapEntry::LocalOffset(0x100));
|
||||
map.update(1, CowMapEntry::ParentRef);
|
||||
// 2 stays Unallocated
|
||||
map.update(3, CowMapEntry::LocalOffset(0x200));
|
||||
|
||||
let bytes = map.serialize();
|
||||
let map2 = CowMap::deserialize(&bytes, MapFormat::FlatArray).unwrap();
|
||||
|
||||
assert_eq!(map2.cluster_count(), 4);
|
||||
assert_eq!(map2.lookup(0), CowMapEntry::LocalOffset(0x100));
|
||||
assert_eq!(map2.lookup(1), CowMapEntry::ParentRef);
|
||||
assert_eq!(map2.lookup(2), CowMapEntry::Unallocated);
|
||||
assert_eq!(map2.lookup(3), CowMapEntry::LocalOffset(0x200));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_corrupt_data() {
|
||||
let result = CowMap::deserialize(&[0x00, 0x01], MapFormat::FlatArray);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_wrong_format() {
|
||||
let map = CowMap::new_flat(1);
|
||||
let bytes = map.serialize();
|
||||
let result = CowMap::deserialize(&bytes, MapFormat::ArtTree);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
122
vendor/ruvector/crates/rvf/rvf-runtime/src/deletion.rs
vendored
Normal file
122
vendor/ruvector/crates/rvf/rvf-runtime/src/deletion.rs
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
//! Logical deletion: soft-delete via JOURNAL_SEG tombstones.
|
||||
//!
|
||||
//! Deletion protocol (two-fsync):
|
||||
//! 1. Append JOURNAL_SEG with tombstone entries
|
||||
//! 2. fsync (orphan-safe: no manifest references it yet)
|
||||
//! 3. Update deletion bitmap in memory
|
||||
//! 4. Append MANIFEST_SEG with updated bitmap
|
||||
//! 5. fsync (deletion now visible to all new readers)
|
||||
//!
|
||||
//! Physical reclamation happens during compaction.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// In-memory deletion bitmap.
|
||||
///
|
||||
/// Tracks soft-deleted vector IDs. In a production implementation this
|
||||
/// would use a Roaring bitmap for space efficiency; here we use a HashSet
|
||||
/// for correctness and clarity.
|
||||
pub(crate) struct DeletionBitmap {
|
||||
deleted: HashSet<u64>,
|
||||
}
|
||||
|
||||
impl DeletionBitmap {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
deleted: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Load from a list of deleted IDs (e.g., from a manifest).
|
||||
pub(crate) fn from_ids(ids: &[u64]) -> Self {
|
||||
Self {
|
||||
deleted: ids.iter().copied().collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark a vector ID as soft-deleted.
|
||||
pub(crate) fn delete(&mut self, id: u64) {
|
||||
self.deleted.insert(id);
|
||||
}
|
||||
|
||||
/// Mark multiple vector IDs as soft-deleted.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn delete_batch(&mut self, ids: &[u64]) {
|
||||
for &id in ids {
|
||||
self.deleted.insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a vector ID is soft-deleted.
|
||||
#[inline]
|
||||
pub(crate) fn is_deleted(&self, id: u64) -> bool {
|
||||
self.deleted.contains(&id)
|
||||
}
|
||||
|
||||
/// Remove vector IDs from the bitmap (after compaction physically removes them).
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn clear_ids(&mut self, ids: &[u64]) {
|
||||
for &id in ids {
|
||||
self.deleted.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Number of soft-deleted vectors.
|
||||
pub(crate) fn count(&self) -> usize {
|
||||
self.deleted.len()
|
||||
}
|
||||
|
||||
/// Return all deleted IDs as a sorted vector.
|
||||
pub(crate) fn to_sorted_ids(&self) -> Vec<u64> {
|
||||
let mut ids: Vec<u64> = self.deleted.iter().copied().collect();
|
||||
ids.sort_unstable();
|
||||
ids
|
||||
}
|
||||
|
||||
/// Clear all entries.
|
||||
pub(crate) fn clear(&mut self) {
|
||||
self.deleted.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn bitmap_basic_ops() {
|
||||
let mut bm = DeletionBitmap::new();
|
||||
assert!(!bm.is_deleted(42));
|
||||
assert_eq!(bm.count(), 0);
|
||||
|
||||
bm.delete(42);
|
||||
assert!(bm.is_deleted(42));
|
||||
assert_eq!(bm.count(), 1);
|
||||
|
||||
bm.delete_batch(&[100, 200, 300]);
|
||||
assert_eq!(bm.count(), 4);
|
||||
assert!(bm.is_deleted(200));
|
||||
|
||||
bm.clear_ids(&[42, 200]);
|
||||
assert_eq!(bm.count(), 2);
|
||||
assert!(!bm.is_deleted(42));
|
||||
assert!(!bm.is_deleted(200));
|
||||
assert!(bm.is_deleted(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bitmap_from_ids() {
|
||||
let bm = DeletionBitmap::from_ids(&[1, 2, 3]);
|
||||
assert!(bm.is_deleted(1));
|
||||
assert!(bm.is_deleted(2));
|
||||
assert!(bm.is_deleted(3));
|
||||
assert!(!bm.is_deleted(4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bitmap_to_sorted() {
|
||||
let mut bm = DeletionBitmap::new();
|
||||
bm.delete_batch(&[50, 10, 30, 20, 40]);
|
||||
assert_eq!(bm.to_sorted_ids(), vec![10, 20, 30, 40, 50]);
|
||||
}
|
||||
}
|
||||
382
vendor/ruvector/crates/rvf/rvf-runtime/src/dos.rs
vendored
Normal file
382
vendor/ruvector/crates/rvf/rvf-runtime/src/dos.rs
vendored
Normal file
@@ -0,0 +1,382 @@
|
||||
//! DoS hardening for ADR-033 §3.3.1.
|
||||
//!
|
||||
//! Provides per-connection budget tokens, negative caching of degenerate
|
||||
//! queries, and optional proof-of-work for public endpoints.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Per-connection token bucket for rate-limiting distance operations.
|
||||
///
|
||||
/// Each query consumes tokens from the bucket. When tokens are exhausted,
|
||||
/// queries are rejected until the bucket refills.
|
||||
pub struct BudgetTokenBucket {
|
||||
/// Maximum tokens (distance ops) per window.
|
||||
max_tokens: u64,
|
||||
/// Current available tokens.
|
||||
tokens: u64,
|
||||
/// Window duration for token refill.
|
||||
window: Duration,
|
||||
/// Start of current window.
|
||||
window_start: Instant,
|
||||
}
|
||||
|
||||
impl BudgetTokenBucket {
|
||||
/// Create a new token bucket.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `max_tokens` - Maximum distance ops per window.
|
||||
/// * `window` - Duration of each refill window.
|
||||
pub fn new(max_tokens: u64, window: Duration) -> Self {
|
||||
Self {
|
||||
max_tokens,
|
||||
tokens: max_tokens,
|
||||
window,
|
||||
window_start: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to consume `cost` tokens. Returns `Ok(remaining)` if sufficient
|
||||
/// tokens are available, `Err(deficit)` if not.
|
||||
pub fn try_consume(&mut self, cost: u64) -> Result<u64, u64> {
|
||||
self.maybe_refill();
|
||||
|
||||
if cost <= self.tokens {
|
||||
self.tokens -= cost;
|
||||
Ok(self.tokens)
|
||||
} else {
|
||||
Err(cost - self.tokens)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check remaining tokens without consuming.
|
||||
pub fn remaining(&mut self) -> u64 {
|
||||
self.maybe_refill();
|
||||
self.tokens
|
||||
}
|
||||
|
||||
/// Force a refill (for testing or manual reset).
|
||||
pub fn refill(&mut self) {
|
||||
self.tokens = self.max_tokens;
|
||||
self.window_start = Instant::now();
|
||||
}
|
||||
|
||||
fn maybe_refill(&mut self) {
|
||||
if self.window_start.elapsed() >= self.window {
|
||||
self.tokens = self.max_tokens;
|
||||
self.window_start = Instant::now();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Quantized query signature for negative caching.
|
||||
///
|
||||
/// The query vector is quantized to int8 and hashed to produce a
|
||||
/// compact fingerprint for degenerate query detection.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct QuerySignature {
|
||||
hash: u64,
|
||||
}
|
||||
|
||||
impl QuerySignature {
|
||||
/// Compute a signature from a query vector.
|
||||
///
|
||||
/// Quantizes to int8, then hashes with FNV-1a for speed.
|
||||
pub fn from_query(query: &[f32]) -> Self {
|
||||
// FNV-1a hash of quantized vector.
|
||||
let mut hash: u64 = 0xcbf29ce484222325;
|
||||
for &val in query {
|
||||
// Quantize to int8 range [-128, 127].
|
||||
let quantized = (val.clamp(-1.0, 1.0) * 127.0) as i8;
|
||||
hash ^= quantized as u64;
|
||||
hash = hash.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
Self { hash }
|
||||
}
|
||||
}
|
||||
|
||||
/// Negative cache entry tracking degenerate query hits.
|
||||
struct NegativeCacheEntry {
|
||||
hit_count: u32,
|
||||
first_seen: Instant,
|
||||
last_seen: Instant,
|
||||
}
|
||||
|
||||
/// Negative cache for degenerate queries.
|
||||
///
|
||||
/// If a query signature triggers degenerate mode more than N times
|
||||
/// in a window, forces `SafetyNetBudget::DISABLED` for subsequent
|
||||
/// matches, preventing repeated budget burn on the same attack vector.
|
||||
pub struct NegativeCache {
|
||||
entries: HashMap<QuerySignature, NegativeCacheEntry>,
|
||||
/// Number of degenerate hits before a signature is blacklisted.
|
||||
threshold: u32,
|
||||
/// Window duration for counting hits.
|
||||
window: Duration,
|
||||
/// Maximum cache size to prevent memory exhaustion.
|
||||
max_entries: usize,
|
||||
}
|
||||
|
||||
impl NegativeCache {
|
||||
/// Create a new negative cache.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `threshold` - Number of degenerate hits before blacklisting.
|
||||
/// * `window` - Duration window for counting hits.
|
||||
/// * `max_entries` - Maximum cache entries.
|
||||
pub fn new(threshold: u32, window: Duration, max_entries: usize) -> Self {
|
||||
Self {
|
||||
entries: HashMap::new(),
|
||||
threshold,
|
||||
window,
|
||||
max_entries,
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a degenerate query hit. Returns `true` if the query is
|
||||
/// now blacklisted (should force DISABLED safety net).
|
||||
pub fn record_degenerate(&mut self, sig: QuerySignature) -> bool {
|
||||
let now = Instant::now();
|
||||
|
||||
// Evict expired entries periodically.
|
||||
if self.entries.len() >= self.max_entries {
|
||||
self.evict_expired(now);
|
||||
}
|
||||
|
||||
// If still at capacity, evict oldest.
|
||||
if self.entries.len() >= self.max_entries {
|
||||
self.evict_oldest();
|
||||
}
|
||||
|
||||
let entry = self.entries.entry(sig).or_insert(NegativeCacheEntry {
|
||||
hit_count: 0,
|
||||
first_seen: now,
|
||||
last_seen: now,
|
||||
});
|
||||
|
||||
// Reset if outside window.
|
||||
if now.duration_since(entry.first_seen) > self.window {
|
||||
entry.hit_count = 0;
|
||||
entry.first_seen = now;
|
||||
}
|
||||
|
||||
entry.hit_count += 1;
|
||||
entry.last_seen = now;
|
||||
|
||||
entry.hit_count >= self.threshold
|
||||
}
|
||||
|
||||
/// Check if a query signature is blacklisted.
|
||||
pub fn is_blacklisted(&self, sig: &QuerySignature) -> bool {
|
||||
if let Some(entry) = self.entries.get(sig) {
|
||||
entry.hit_count >= self.threshold
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Number of currently tracked signatures.
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
/// Check if the cache is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
|
||||
fn evict_expired(&mut self, now: Instant) {
|
||||
self.entries
|
||||
.retain(|_, entry| now.duration_since(entry.first_seen) <= self.window);
|
||||
}
|
||||
|
||||
fn evict_oldest(&mut self) {
|
||||
if let Some(oldest_key) = self
|
||||
.entries
|
||||
.iter()
|
||||
.min_by_key(|(_, e)| e.last_seen)
|
||||
.map(|(k, _)| *k)
|
||||
{
|
||||
self.entries.remove(&oldest_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Proof-of-work challenge for public endpoints.
|
||||
///
|
||||
/// The caller must find a nonce such that `hash(challenge || nonce)`
|
||||
/// has `difficulty` leading zero bits. This is opt-in, not default.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ProofOfWork {
|
||||
/// The challenge bytes (typically random).
|
||||
pub challenge: [u8; 16],
|
||||
/// Required leading zero bits in the hash. Capped at MAX_DIFFICULTY.
|
||||
pub difficulty: u8,
|
||||
}
|
||||
|
||||
impl ProofOfWork {
|
||||
/// Maximum allowed difficulty (24 bits = ~16M hashes average).
|
||||
/// Higher values risk CPU-bound DoS.
|
||||
pub const MAX_DIFFICULTY: u8 = 24;
|
||||
|
||||
/// Verify that a nonce satisfies the proof-of-work requirement.
|
||||
///
|
||||
/// Uses FNV-1a for speed (this is DoS mitigation, not cryptographic security).
|
||||
/// Clamps difficulty to MAX_DIFFICULTY to prevent compute DoS.
|
||||
pub fn verify(&self, nonce: u64) -> bool {
|
||||
let mut hash: u64 = 0xcbf29ce484222325;
|
||||
for &byte in &self.challenge {
|
||||
hash ^= byte as u64;
|
||||
hash = hash.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
for &byte in &nonce.to_le_bytes() {
|
||||
hash ^= byte as u64;
|
||||
hash = hash.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
|
||||
let clamped = self.difficulty.min(Self::MAX_DIFFICULTY);
|
||||
let leading_zeros = hash.leading_zeros() as u8;
|
||||
leading_zeros >= clamped
|
||||
}
|
||||
|
||||
/// Find a valid nonce (for testing / client-side use).
|
||||
/// Returns `None` if no nonce found within `max_attempts`.
|
||||
pub fn solve(&self) -> Option<u64> {
|
||||
let max_attempts: u64 = 1u64 << self.difficulty.min(Self::MAX_DIFFICULTY).min(30);
|
||||
for nonce in 0..max_attempts.saturating_mul(4) {
|
||||
if self.verify(nonce) {
|
||||
return Some(nonce);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn token_bucket_basic() {
|
||||
let mut bucket = BudgetTokenBucket::new(100, Duration::from_secs(1));
|
||||
assert_eq!(bucket.remaining(), 100);
|
||||
assert_eq!(bucket.try_consume(30), Ok(70));
|
||||
assert_eq!(bucket.remaining(), 70);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_bucket_exhaustion() {
|
||||
let mut bucket = BudgetTokenBucket::new(10, Duration::from_secs(60));
|
||||
assert_eq!(bucket.try_consume(10), Ok(0));
|
||||
assert!(bucket.try_consume(1).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_bucket_refill() {
|
||||
let mut bucket = BudgetTokenBucket::new(100, Duration::from_millis(1));
|
||||
bucket.try_consume(100).unwrap();
|
||||
assert!(bucket.try_consume(1).is_err());
|
||||
std::thread::sleep(Duration::from_millis(2));
|
||||
assert_eq!(bucket.remaining(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_bucket_manual_refill() {
|
||||
let mut bucket = BudgetTokenBucket::new(100, Duration::from_secs(60));
|
||||
bucket.try_consume(100).unwrap();
|
||||
bucket.refill();
|
||||
assert_eq!(bucket.remaining(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_signature_deterministic() {
|
||||
let query = vec![0.1, 0.2, 0.3, 0.4];
|
||||
let sig1 = QuerySignature::from_query(&query);
|
||||
let sig2 = QuerySignature::from_query(&query);
|
||||
assert_eq!(sig1, sig2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_signature_different_vectors() {
|
||||
let sig1 = QuerySignature::from_query(&[0.1, 0.2, 0.3]);
|
||||
let sig2 = QuerySignature::from_query(&[0.4, 0.5, 0.6]);
|
||||
assert_ne!(sig1, sig2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_cache_below_threshold() {
|
||||
let mut cache = NegativeCache::new(3, Duration::from_secs(60), 1000);
|
||||
let sig = QuerySignature::from_query(&[0.1, 0.2]);
|
||||
assert!(!cache.record_degenerate(sig));
|
||||
assert!(!cache.record_degenerate(sig));
|
||||
assert!(!cache.is_blacklisted(&sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_cache_reaches_threshold() {
|
||||
let mut cache = NegativeCache::new(3, Duration::from_secs(60), 1000);
|
||||
let sig = QuerySignature::from_query(&[0.1, 0.2]);
|
||||
cache.record_degenerate(sig);
|
||||
cache.record_degenerate(sig);
|
||||
assert!(cache.record_degenerate(sig)); // 3rd hit = blacklisted.
|
||||
assert!(cache.is_blacklisted(&sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_cache_max_entries() {
|
||||
let mut cache = NegativeCache::new(100, Duration::from_secs(60), 5);
|
||||
for i in 0..10 {
|
||||
let sig = QuerySignature::from_query(&[i as f32]);
|
||||
cache.record_degenerate(sig);
|
||||
}
|
||||
assert!(cache.len() <= 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_cache_empty() {
|
||||
let cache = NegativeCache::new(3, Duration::from_secs(60), 1000);
|
||||
assert!(cache.is_empty());
|
||||
assert_eq!(cache.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proof_of_work_low_difficulty() {
|
||||
let pow = ProofOfWork {
|
||||
challenge: [0xAB; 16],
|
||||
difficulty: 1, // Very easy.
|
||||
};
|
||||
let nonce = pow.solve().expect("should solve easily");
|
||||
assert!(pow.verify(nonce));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proof_of_work_wrong_nonce() {
|
||||
let pow = ProofOfWork {
|
||||
challenge: [0xAB; 16],
|
||||
difficulty: 16, // Moderate difficulty.
|
||||
};
|
||||
// Random nonce is very unlikely to pass.
|
||||
assert!(!pow.verify(0xDEADBEEF));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proof_of_work_solve_and_verify() {
|
||||
let pow = ProofOfWork {
|
||||
challenge: [0x42; 16],
|
||||
difficulty: 8,
|
||||
};
|
||||
let nonce = pow.solve().expect("should solve d=8");
|
||||
assert!(pow.verify(nonce));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proof_of_work_max_difficulty_clamped() {
|
||||
let pow = ProofOfWork {
|
||||
challenge: [0x42; 16],
|
||||
difficulty: 255, // Extreme — will be clamped to MAX_DIFFICULTY.
|
||||
};
|
||||
// verify() clamps internally, so this is equivalent to d=24.
|
||||
// solve() uses clamped difficulty too.
|
||||
assert_eq!(pow.difficulty.min(ProofOfWork::MAX_DIFFICULTY), 24);
|
||||
}
|
||||
}
|
||||
382
vendor/ruvector/crates/rvf/rvf-runtime/src/ffi.rs
vendored
Normal file
382
vendor/ruvector/crates/rvf/rvf-runtime/src/ffi.rs
vendored
Normal file
@@ -0,0 +1,382 @@
|
||||
//! C FFI for App Clip / mobile integration.
|
||||
//!
|
||||
//! These `extern "C"` functions can be compiled into a static library
|
||||
//! (.a / .xcframework) and called directly from Swift or Kotlin.
|
||||
//!
|
||||
//! Build for iOS:
|
||||
//! cargo build --release --target aarch64-apple-ios --lib
|
||||
//! cargo build --release --target aarch64-apple-ios-sim --lib
|
||||
//!
|
||||
//! Build for Android:
|
||||
//! cargo build --release --target aarch64-linux-android --lib
|
||||
//!
|
||||
//! The App Clip contains ~50 KB of this library. Combined with the QR
|
||||
//! seed payload, the user experience is: Scan → Boot → Intelligence.
|
||||
|
||||
use crate::compress;
|
||||
use crate::qr_seed::ParsedSeed;
|
||||
use crate::seed_crypto;
|
||||
use rvf_types::qr_seed::{SeedHeader, SEED_HEADER_SIZE};
|
||||
|
||||
/// Result codes for FFI functions.
|
||||
pub const RVQS_OK: i32 = 0;
|
||||
pub const RVQS_ERR_NULL_PTR: i32 = -1;
|
||||
pub const RVQS_ERR_TOO_SHORT: i32 = -2;
|
||||
pub const RVQS_ERR_BAD_MAGIC: i32 = -3;
|
||||
pub const RVQS_ERR_SIGNATURE_INVALID: i32 = -4;
|
||||
pub const RVQS_ERR_HASH_MISMATCH: i32 = -5;
|
||||
pub const RVQS_ERR_DECOMPRESS_FAIL: i32 = -6;
|
||||
pub const RVQS_ERR_BUFFER_TOO_SMALL: i32 = -7;
|
||||
pub const RVQS_ERR_PARSE_FAIL: i32 = -8;
|
||||
|
||||
/// Opaque header struct for C interop (mirrors SeedHeader layout).
|
||||
#[repr(C)]
|
||||
pub struct RvqsHeaderC {
|
||||
pub seed_magic: u32,
|
||||
pub seed_version: u16,
|
||||
pub flags: u16,
|
||||
pub file_id: [u8; 8],
|
||||
pub total_vector_count: u32,
|
||||
pub dimension: u16,
|
||||
pub base_dtype: u8,
|
||||
pub profile_id: u8,
|
||||
pub created_ns: u64,
|
||||
pub microkernel_offset: u32,
|
||||
pub microkernel_size: u32,
|
||||
pub download_manifest_offset: u32,
|
||||
pub download_manifest_size: u32,
|
||||
pub sig_algo: u16,
|
||||
pub sig_length: u16,
|
||||
pub total_seed_size: u32,
|
||||
pub content_hash: [u8; 8],
|
||||
}
|
||||
|
||||
const _: () = assert!(core::mem::size_of::<RvqsHeaderC>() == SEED_HEADER_SIZE);
|
||||
|
||||
impl From<SeedHeader> for RvqsHeaderC {
|
||||
fn from(h: SeedHeader) -> Self {
|
||||
Self {
|
||||
seed_magic: h.seed_magic,
|
||||
seed_version: h.seed_version,
|
||||
flags: h.flags,
|
||||
file_id: h.file_id,
|
||||
total_vector_count: h.total_vector_count,
|
||||
dimension: h.dimension,
|
||||
base_dtype: h.base_dtype,
|
||||
profile_id: h.profile_id,
|
||||
created_ns: h.created_ns,
|
||||
microkernel_offset: h.microkernel_offset,
|
||||
microkernel_size: h.microkernel_size,
|
||||
download_manifest_offset: h.download_manifest_offset,
|
||||
download_manifest_size: h.download_manifest_size,
|
||||
sig_algo: h.sig_algo,
|
||||
sig_length: h.sig_length,
|
||||
total_seed_size: h.total_seed_size,
|
||||
content_hash: h.content_hash,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a QR seed payload and extract the header.
|
||||
///
|
||||
/// # Safety
|
||||
/// `data` must point to `data_len` valid bytes. `out` must point to a valid `RvqsHeaderC`.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rvqs_parse_header(
|
||||
data: *const u8,
|
||||
data_len: usize,
|
||||
out: *mut RvqsHeaderC,
|
||||
) -> i32 {
|
||||
if data.is_null() || out.is_null() {
|
||||
return RVQS_ERR_NULL_PTR;
|
||||
}
|
||||
if data_len < SEED_HEADER_SIZE {
|
||||
return RVQS_ERR_TOO_SHORT;
|
||||
}
|
||||
|
||||
let slice = core::slice::from_raw_parts(data, data_len);
|
||||
match SeedHeader::from_bytes(slice) {
|
||||
Ok(header) => {
|
||||
*out = header.into();
|
||||
RVQS_OK
|
||||
}
|
||||
Err(_) => RVQS_ERR_BAD_MAGIC,
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify the HMAC-SHA256 signature of a QR seed.
|
||||
///
|
||||
/// # Safety
|
||||
/// All pointers must be valid for their respective lengths.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rvqs_verify_signature(
|
||||
data: *const u8,
|
||||
data_len: usize,
|
||||
key: *const u8,
|
||||
key_len: usize,
|
||||
) -> i32 {
|
||||
if data.is_null() || key.is_null() {
|
||||
return RVQS_ERR_NULL_PTR;
|
||||
}
|
||||
if data_len < SEED_HEADER_SIZE {
|
||||
return RVQS_ERR_TOO_SHORT;
|
||||
}
|
||||
|
||||
let slice = core::slice::from_raw_parts(data, data_len);
|
||||
let key_slice = core::slice::from_raw_parts(key, key_len);
|
||||
|
||||
let parsed = match ParsedSeed::parse(slice) {
|
||||
Ok(p) => p,
|
||||
Err(_) => return RVQS_ERR_PARSE_FAIL,
|
||||
};
|
||||
|
||||
let signature = match parsed.signature {
|
||||
Some(s) => s,
|
||||
None => return RVQS_ERR_SIGNATURE_INVALID,
|
||||
};
|
||||
|
||||
let signed_payload = match parsed.signed_payload(slice) {
|
||||
Some(p) => p,
|
||||
None => return RVQS_ERR_SIGNATURE_INVALID,
|
||||
};
|
||||
|
||||
if seed_crypto::verify_seed(key_slice, signed_payload, signature) {
|
||||
RVQS_OK
|
||||
} else {
|
||||
RVQS_ERR_SIGNATURE_INVALID
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify the content hash of a QR seed payload.
|
||||
///
|
||||
/// # Safety
|
||||
/// `data` must point to `data_len` valid bytes.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rvqs_verify_content_hash(data: *const u8, data_len: usize) -> i32 {
|
||||
if data.is_null() {
|
||||
return RVQS_ERR_NULL_PTR;
|
||||
}
|
||||
if data_len < SEED_HEADER_SIZE {
|
||||
return RVQS_ERR_TOO_SHORT;
|
||||
}
|
||||
|
||||
let slice = core::slice::from_raw_parts(data, data_len);
|
||||
let parsed = match ParsedSeed::parse(slice) {
|
||||
Ok(p) => p,
|
||||
Err(_) => return RVQS_ERR_PARSE_FAIL,
|
||||
};
|
||||
|
||||
let microkernel = parsed.microkernel.unwrap_or(&[]);
|
||||
let manifest = parsed.manifest_bytes.unwrap_or(&[]);
|
||||
|
||||
let mut hash_input = Vec::with_capacity(microkernel.len() + manifest.len());
|
||||
hash_input.extend_from_slice(microkernel);
|
||||
hash_input.extend_from_slice(manifest);
|
||||
|
||||
if seed_crypto::verify_content_hash(&parsed.header.content_hash, &hash_input) {
|
||||
RVQS_OK
|
||||
} else {
|
||||
RVQS_ERR_HASH_MISMATCH
|
||||
}
|
||||
}
|
||||
|
||||
/// Decompress the microkernel from a QR seed.
|
||||
///
|
||||
/// # Safety
|
||||
/// `data` must point to `data_len` valid bytes. `out` must point to `out_cap` bytes.
|
||||
/// `out_len` will receive the actual decompressed size.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rvqs_decompress_microkernel(
|
||||
data: *const u8,
|
||||
data_len: usize,
|
||||
out: *mut u8,
|
||||
out_cap: usize,
|
||||
out_len: *mut usize,
|
||||
) -> i32 {
|
||||
if data.is_null() || out.is_null() || out_len.is_null() {
|
||||
return RVQS_ERR_NULL_PTR;
|
||||
}
|
||||
|
||||
let slice = core::slice::from_raw_parts(data, data_len);
|
||||
let parsed = match ParsedSeed::parse(slice) {
|
||||
Ok(p) => p,
|
||||
Err(_) => return RVQS_ERR_PARSE_FAIL,
|
||||
};
|
||||
|
||||
let compressed = match parsed.microkernel {
|
||||
Some(m) => m,
|
||||
None => {
|
||||
*out_len = 0;
|
||||
return RVQS_OK;
|
||||
}
|
||||
};
|
||||
|
||||
let decompressed = match compress::decompress(compressed) {
|
||||
Ok(d) => d,
|
||||
Err(_) => return RVQS_ERR_DECOMPRESS_FAIL,
|
||||
};
|
||||
|
||||
if decompressed.len() > out_cap {
|
||||
return RVQS_ERR_BUFFER_TOO_SMALL;
|
||||
}
|
||||
|
||||
let out_slice = core::slice::from_raw_parts_mut(out, out_cap);
|
||||
out_slice[..decompressed.len()].copy_from_slice(&decompressed);
|
||||
*out_len = decompressed.len();
|
||||
|
||||
RVQS_OK
|
||||
}
|
||||
|
||||
/// Get the download manifest URL from a parsed seed.
|
||||
///
|
||||
/// # Safety
|
||||
/// All pointers must be valid. `url_buf` must have `url_cap` bytes available.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rvqs_get_primary_host_url(
|
||||
data: *const u8,
|
||||
data_len: usize,
|
||||
url_buf: *mut u8,
|
||||
url_cap: usize,
|
||||
url_len: *mut usize,
|
||||
) -> i32 {
|
||||
if data.is_null() || url_buf.is_null() || url_len.is_null() {
|
||||
return RVQS_ERR_NULL_PTR;
|
||||
}
|
||||
|
||||
let slice = core::slice::from_raw_parts(data, data_len);
|
||||
let parsed = match ParsedSeed::parse(slice) {
|
||||
Ok(p) => p,
|
||||
Err(_) => return RVQS_ERR_PARSE_FAIL,
|
||||
};
|
||||
|
||||
let manifest = match parsed.parse_manifest() {
|
||||
Ok(m) => m,
|
||||
Err(_) => return RVQS_ERR_PARSE_FAIL,
|
||||
};
|
||||
|
||||
let host = match manifest.hosts.first() {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
*url_len = 0;
|
||||
return RVQS_OK;
|
||||
}
|
||||
};
|
||||
|
||||
let url_bytes = &host.url[..host.url_length as usize];
|
||||
if url_bytes.len() > url_cap {
|
||||
return RVQS_ERR_BUFFER_TOO_SMALL;
|
||||
}
|
||||
|
||||
let out_slice = core::slice::from_raw_parts_mut(url_buf, url_cap);
|
||||
out_slice[..url_bytes.len()].copy_from_slice(url_bytes);
|
||||
*url_len = url_bytes.len();
|
||||
|
||||
RVQS_OK
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::qr_seed::{make_host_entry, SeedBuilder};
|
||||
use rvf_types::qr_seed::*;
|
||||
|
||||
fn build_signed_seed() -> Vec<u8> {
|
||||
let key = b"test-key-for-ffi-unit-testing-ok";
|
||||
let mk = crate::compress::compress(&[0xCA; 2000]);
|
||||
let host = make_host_entry("https://cdn.test.com/brain.rvf", 0, 1, [0xAA; 16]).unwrap();
|
||||
|
||||
let builder = SeedBuilder::new([0x01; 8], 128, 1000)
|
||||
.with_microkernel(mk)
|
||||
.add_host(host);
|
||||
let (payload, _header) = builder.build_and_sign(key).unwrap();
|
||||
payload
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_parse_header() {
|
||||
let payload = build_signed_seed();
|
||||
let mut header = core::mem::MaybeUninit::<RvqsHeaderC>::uninit();
|
||||
let rc = unsafe { rvqs_parse_header(payload.as_ptr(), payload.len(), header.as_mut_ptr()) };
|
||||
assert_eq!(rc, RVQS_OK);
|
||||
let header = unsafe { header.assume_init() };
|
||||
assert_eq!(header.seed_magic, SEED_MAGIC);
|
||||
assert_eq!(header.dimension, 128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_verify_signature() {
|
||||
let key = b"test-key-for-ffi-unit-testing-ok";
|
||||
let payload = build_signed_seed();
|
||||
let rc = unsafe {
|
||||
rvqs_verify_signature(payload.as_ptr(), payload.len(), key.as_ptr(), key.len())
|
||||
};
|
||||
assert_eq!(rc, RVQS_OK);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_verify_signature_wrong_key() {
|
||||
let payload = build_signed_seed();
|
||||
let bad_key = b"wrong-key-should-fail-verificatn";
|
||||
let rc = unsafe {
|
||||
rvqs_verify_signature(
|
||||
payload.as_ptr(),
|
||||
payload.len(),
|
||||
bad_key.as_ptr(),
|
||||
bad_key.len(),
|
||||
)
|
||||
};
|
||||
assert_eq!(rc, RVQS_ERR_SIGNATURE_INVALID);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_verify_content_hash() {
|
||||
let payload = build_signed_seed();
|
||||
let rc = unsafe { rvqs_verify_content_hash(payload.as_ptr(), payload.len()) };
|
||||
assert_eq!(rc, RVQS_OK);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_decompress_microkernel() {
|
||||
let payload = build_signed_seed();
|
||||
let mut out = vec![0u8; 8192];
|
||||
let mut out_len: usize = 0;
|
||||
let rc = unsafe {
|
||||
rvqs_decompress_microkernel(
|
||||
payload.as_ptr(),
|
||||
payload.len(),
|
||||
out.as_mut_ptr(),
|
||||
out.len(),
|
||||
&mut out_len,
|
||||
)
|
||||
};
|
||||
assert_eq!(rc, RVQS_OK);
|
||||
assert_eq!(out_len, 2000);
|
||||
assert_eq!(&out[..out_len], &[0xCA; 2000]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_get_primary_host_url() {
|
||||
let payload = build_signed_seed();
|
||||
let mut url_buf = vec![0u8; 256];
|
||||
let mut url_len: usize = 0;
|
||||
let rc = unsafe {
|
||||
rvqs_get_primary_host_url(
|
||||
payload.as_ptr(),
|
||||
payload.len(),
|
||||
url_buf.as_mut_ptr(),
|
||||
url_buf.len(),
|
||||
&mut url_len,
|
||||
)
|
||||
};
|
||||
assert_eq!(rc, RVQS_OK);
|
||||
let url = core::str::from_utf8(&url_buf[..url_len]).unwrap();
|
||||
assert_eq!(url, "https://cdn.test.com/brain.rvf");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ffi_null_ptr_returns_error() {
|
||||
let mut header = core::mem::MaybeUninit::<RvqsHeaderC>::uninit();
|
||||
let rc = unsafe { rvqs_parse_header(core::ptr::null(), 0, header.as_mut_ptr()) };
|
||||
assert_eq!(rc, RVQS_ERR_NULL_PTR);
|
||||
}
|
||||
}
|
||||
262
vendor/ruvector/crates/rvf/rvf-runtime/src/filter.rs
vendored
Normal file
262
vendor/ruvector/crates/rvf/rvf-runtime/src/filter.rs
vendored
Normal file
@@ -0,0 +1,262 @@
|
||||
//! Filter expression evaluation for metadata-based vector filtering.
|
||||
//!
|
||||
//! Filter expressions are boolean predicate trees evaluated against
|
||||
//! per-vector metadata. The runtime selects a strategy (pre-filter,
|
||||
//! intra-filter, or post-filter) based on estimated selectivity.
|
||||
|
||||
use crate::options::MetadataValue;
|
||||
|
||||
/// A filter expression for metadata-based vector filtering.
|
||||
///
|
||||
/// Leaf nodes compare a metadata field against a literal value.
|
||||
/// Internal nodes combine sub-expressions with boolean logic.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum FilterExpr {
|
||||
/// field == value
|
||||
Eq(u16, FilterValue),
|
||||
/// field != value
|
||||
Ne(u16, FilterValue),
|
||||
/// field < value
|
||||
Lt(u16, FilterValue),
|
||||
/// field <= value
|
||||
Le(u16, FilterValue),
|
||||
/// field > value
|
||||
Gt(u16, FilterValue),
|
||||
/// field >= value
|
||||
Ge(u16, FilterValue),
|
||||
/// field in [values]
|
||||
In(u16, Vec<FilterValue>),
|
||||
/// field in [low, high)
|
||||
Range(u16, FilterValue, FilterValue),
|
||||
/// All sub-expressions must match.
|
||||
And(Vec<FilterExpr>),
|
||||
/// Any sub-expression must match.
|
||||
Or(Vec<FilterExpr>),
|
||||
/// Negate the sub-expression.
|
||||
Not(Box<FilterExpr>),
|
||||
}
|
||||
|
||||
/// A typed value used in filter comparisons.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum FilterValue {
|
||||
U64(u64),
|
||||
I64(i64),
|
||||
F64(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
}
|
||||
|
||||
impl FilterValue {
|
||||
/// Compare two filter values. Returns None if types are incompatible.
|
||||
fn partial_cmp_value(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
match (self, other) {
|
||||
(FilterValue::U64(a), FilterValue::U64(b)) => a.partial_cmp(b),
|
||||
(FilterValue::I64(a), FilterValue::I64(b)) => a.partial_cmp(b),
|
||||
(FilterValue::F64(a), FilterValue::F64(b)) => a.partial_cmp(b),
|
||||
(FilterValue::String(a), FilterValue::String(b)) => a.partial_cmp(b),
|
||||
(FilterValue::Bool(a), FilterValue::Bool(b)) => a.partial_cmp(b),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// In-memory metadata store for filter evaluation.
|
||||
/// Maps (vector_id, field_id) -> MetadataValue.
|
||||
pub(crate) struct MetadataStore {
|
||||
/// Entries indexed by vector position.
|
||||
entries: Vec<Vec<(u16, FilterValue)>>,
|
||||
/// Mapping from vector_id to position index.
|
||||
id_to_pos: std::collections::HashMap<u64, usize>,
|
||||
}
|
||||
|
||||
impl MetadataStore {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
entries: Vec::new(),
|
||||
id_to_pos: std::collections::HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add metadata for a vector. `fields` are (field_id, value) pairs.
|
||||
pub(crate) fn insert(&mut self, vector_id: u64, fields: Vec<(u16, FilterValue)>) {
|
||||
let pos = self.entries.len();
|
||||
self.id_to_pos.insert(vector_id, pos);
|
||||
self.entries.push(fields);
|
||||
}
|
||||
|
||||
/// Get a field value for a vector.
|
||||
pub(crate) fn get_field(&self, vector_id: u64, field_id: u16) -> Option<&FilterValue> {
|
||||
let pos = self.id_to_pos.get(&vector_id)?;
|
||||
self.entries
|
||||
.get(*pos)?
|
||||
.iter()
|
||||
.find(|(fid, _)| *fid == field_id)
|
||||
.map(|(_, v)| v)
|
||||
}
|
||||
|
||||
/// Remove all metadata for the given vector IDs.
|
||||
pub(crate) fn remove_ids(&mut self, ids: &[u64]) {
|
||||
for id in ids {
|
||||
self.id_to_pos.remove(id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return vector count tracked by the metadata store.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
self.id_to_pos.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate a filter expression against a single vector's metadata.
|
||||
pub(crate) fn evaluate(expr: &FilterExpr, vector_id: u64, meta: &MetadataStore) -> bool {
|
||||
match expr {
|
||||
FilterExpr::Eq(field_id, val) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.map(|v| v == val)
|
||||
.unwrap_or(false),
|
||||
FilterExpr::Ne(field_id, val) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.map(|v| v != val)
|
||||
.unwrap_or(true),
|
||||
FilterExpr::Lt(field_id, val) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.and_then(|v| v.partial_cmp_value(val))
|
||||
.map(|ord| ord == std::cmp::Ordering::Less)
|
||||
.unwrap_or(false),
|
||||
FilterExpr::Le(field_id, val) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.and_then(|v| v.partial_cmp_value(val))
|
||||
.map(|ord| ord != std::cmp::Ordering::Greater)
|
||||
.unwrap_or(false),
|
||||
FilterExpr::Gt(field_id, val) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.and_then(|v| v.partial_cmp_value(val))
|
||||
.map(|ord| ord == std::cmp::Ordering::Greater)
|
||||
.unwrap_or(false),
|
||||
FilterExpr::Ge(field_id, val) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.and_then(|v| v.partial_cmp_value(val))
|
||||
.map(|ord| ord != std::cmp::Ordering::Less)
|
||||
.unwrap_or(false),
|
||||
FilterExpr::In(field_id, vals) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.map(|v| vals.contains(v))
|
||||
.unwrap_or(false),
|
||||
FilterExpr::Range(field_id, low, high) => meta
|
||||
.get_field(vector_id, *field_id)
|
||||
.and_then(|v| {
|
||||
let ge_low = v
|
||||
.partial_cmp_value(low)
|
||||
.map(|o| o != std::cmp::Ordering::Less)?;
|
||||
let lt_high = v
|
||||
.partial_cmp_value(high)
|
||||
.map(|o| o == std::cmp::Ordering::Less)?;
|
||||
Some(ge_low && lt_high)
|
||||
})
|
||||
.unwrap_or(false),
|
||||
FilterExpr::And(exprs) => exprs.iter().all(|e| evaluate(e, vector_id, meta)),
|
||||
FilterExpr::Or(exprs) => exprs.iter().any(|e| evaluate(e, vector_id, meta)),
|
||||
FilterExpr::Not(expr) => !evaluate(expr, vector_id, meta),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a MetadataValue (options module) to a FilterValue for evaluation.
|
||||
pub(crate) fn metadata_value_to_filter(mv: &MetadataValue) -> FilterValue {
|
||||
match mv {
|
||||
MetadataValue::U64(v) => FilterValue::U64(*v),
|
||||
MetadataValue::I64(v) => FilterValue::I64(*v),
|
||||
MetadataValue::F64(v) => FilterValue::F64(*v),
|
||||
MetadataValue::String(v) => FilterValue::String(v.clone()),
|
||||
MetadataValue::Bytes(_) => FilterValue::String(String::new()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_store() -> MetadataStore {
|
||||
let mut store = MetadataStore::new();
|
||||
store.insert(
|
||||
0,
|
||||
vec![
|
||||
(0, FilterValue::String("apple".into())),
|
||||
(1, FilterValue::U64(100)),
|
||||
],
|
||||
);
|
||||
store.insert(
|
||||
1,
|
||||
vec![
|
||||
(0, FilterValue::String("banana".into())),
|
||||
(1, FilterValue::U64(200)),
|
||||
],
|
||||
);
|
||||
store.insert(
|
||||
2,
|
||||
vec![
|
||||
(0, FilterValue::String("apple".into())),
|
||||
(1, FilterValue::U64(300)),
|
||||
],
|
||||
);
|
||||
store
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_eq() {
|
||||
let store = make_store();
|
||||
let expr = FilterExpr::Eq(0, FilterValue::String("apple".into()));
|
||||
assert!(evaluate(&expr, 0, &store));
|
||||
assert!(!evaluate(&expr, 1, &store));
|
||||
assert!(evaluate(&expr, 2, &store));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_ne() {
|
||||
let store = make_store();
|
||||
let expr = FilterExpr::Ne(0, FilterValue::String("apple".into()));
|
||||
assert!(!evaluate(&expr, 0, &store));
|
||||
assert!(evaluate(&expr, 1, &store));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_range() {
|
||||
let store = make_store();
|
||||
let expr = FilterExpr::Range(1, FilterValue::U64(150), FilterValue::U64(250));
|
||||
assert!(!evaluate(&expr, 0, &store)); // 100 < 150
|
||||
assert!(evaluate(&expr, 1, &store)); // 200 in [150, 250)
|
||||
assert!(!evaluate(&expr, 2, &store)); // 300 >= 250
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_and_or() {
|
||||
let store = make_store();
|
||||
let expr = FilterExpr::And(vec![
|
||||
FilterExpr::Eq(0, FilterValue::String("apple".into())),
|
||||
FilterExpr::Gt(1, FilterValue::U64(150)),
|
||||
]);
|
||||
assert!(!evaluate(&expr, 0, &store)); // apple but 100 <= 150
|
||||
assert!(!evaluate(&expr, 1, &store)); // banana
|
||||
assert!(evaluate(&expr, 2, &store)); // apple and 300 > 150
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_not() {
|
||||
let store = make_store();
|
||||
let expr = FilterExpr::Not(Box::new(FilterExpr::Eq(
|
||||
0,
|
||||
FilterValue::String("apple".into()),
|
||||
)));
|
||||
assert!(!evaluate(&expr, 0, &store));
|
||||
assert!(evaluate(&expr, 1, &store));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_in() {
|
||||
let store = make_store();
|
||||
let expr = FilterExpr::In(1, vec![FilterValue::U64(100), FilterValue::U64(300)]);
|
||||
assert!(evaluate(&expr, 0, &store));
|
||||
assert!(!evaluate(&expr, 1, &store));
|
||||
assert!(evaluate(&expr, 2, &store));
|
||||
}
|
||||
}
|
||||
76
vendor/ruvector/crates/rvf/rvf-runtime/src/lib.rs
vendored
Normal file
76
vendor/ruvector/crates/rvf/rvf-runtime/src/lib.rs
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
//! RuVector Format runtime — the main user-facing API.
|
||||
//!
|
||||
//! This crate provides [`RvfStore`], the primary interface for creating,
|
||||
//! opening, querying, and managing RVF vector stores. It ties together
|
||||
//! the segment model, manifest system, HNSW indexing, quantization, and
|
||||
//! compaction into a single cohesive runtime.
|
||||
//!
|
||||
//! # Architecture
|
||||
//!
|
||||
//! - **Append-only writes**: All mutations append new segments; no in-place edits.
|
||||
//! - **Progressive boot**: Readers see results before the full file is loaded.
|
||||
//! - **Single-writer / multi-reader**: Advisory lock file enforces exclusivity.
|
||||
//! - **Background compaction**: Dead space is reclaimed without blocking queries.
|
||||
|
||||
pub mod adversarial;
|
||||
pub mod agi_authority;
|
||||
pub mod agi_coherence;
|
||||
pub mod agi_container;
|
||||
pub mod compaction;
|
||||
pub mod compress;
|
||||
pub mod cow;
|
||||
pub mod cow_compact;
|
||||
pub mod cow_map;
|
||||
pub mod deletion;
|
||||
pub mod dos;
|
||||
pub mod ffi;
|
||||
pub mod filter;
|
||||
pub mod locking;
|
||||
pub mod membership;
|
||||
pub mod options;
|
||||
#[cfg(feature = "qr")]
|
||||
pub mod qr_encode;
|
||||
pub mod qr_seed;
|
||||
pub mod read_path;
|
||||
pub mod safety_net;
|
||||
pub mod seed_crypto;
|
||||
pub mod status;
|
||||
pub mod store;
|
||||
pub mod witness;
|
||||
pub mod write_path;
|
||||
|
||||
pub use adversarial::{
|
||||
adaptive_n_probe, centroid_distance_cv, combined_effective_n_probe,
|
||||
effective_n_probe_with_drift, is_degenerate_distribution, DEGENERATE_CV_THRESHOLD,
|
||||
};
|
||||
pub use agi_container::{AgiContainerBuilder, ParsedAgiManifest};
|
||||
pub use compress::{compress, decompress, CompressError};
|
||||
pub use cow::{CowEngine, CowStats, WitnessEvent};
|
||||
pub use cow_compact::CowCompactor;
|
||||
pub use cow_map::CowMap;
|
||||
pub use dos::{BudgetTokenBucket, NegativeCache, ProofOfWork, QuerySignature};
|
||||
pub use filter::FilterExpr;
|
||||
pub use membership::MembershipFilter;
|
||||
pub use options::{
|
||||
CompactionResult, DeleteResult, IngestResult, MetadataEntry, MetadataValue, QualityEnvelope,
|
||||
QueryOptions, RvfOptions, SearchResult, WitnessConfig,
|
||||
};
|
||||
#[cfg(feature = "qr")]
|
||||
pub use qr_encode::{EcLevel, QrCode, QrEncoder, QrError};
|
||||
pub use qr_seed::{
|
||||
make_host_entry, BootstrapProgress, DownloadManifest, ParsedSeed, SeedBuilder, SeedError,
|
||||
};
|
||||
pub use safety_net::{
|
||||
selective_safety_net_scan, should_activate_safety_net, Candidate, SafetyNetResult,
|
||||
};
|
||||
pub use seed_crypto::{
|
||||
full_content_hash, layer_content_hash, seed_content_hash, sign_seed, verify_layer, verify_seed,
|
||||
SIG_ALGO_HMAC_SHA256,
|
||||
};
|
||||
#[cfg(feature = "ed25519")]
|
||||
pub use seed_crypto::{sign_seed_ed25519, verify_seed_ed25519, SIG_ALGO_ED25519};
|
||||
pub use status::StoreStatus;
|
||||
pub use store::RvfStore;
|
||||
pub use witness::{
|
||||
GovernancePolicy, ParsedWitness, ScorecardBuilder, WitnessBuilder, WitnessError,
|
||||
};
|
||||
407
vendor/ruvector/crates/rvf/rvf-runtime/src/locking.rs
vendored
Normal file
407
vendor/ruvector/crates/rvf/rvf-runtime/src/locking.rs
vendored
Normal file
@@ -0,0 +1,407 @@
|
||||
//! Writer lock management for single-writer / multi-reader concurrency.
|
||||
//!
|
||||
//! Implements the advisory lock file protocol from spec 09:
|
||||
//! - Lock file at `{path}.lock` with PID, hostname, timestamp, UUID
|
||||
//! - Stale lock detection via PID liveness and age threshold
|
||||
//! - Atomic creation via O_CREAT | O_EXCL
|
||||
|
||||
use std::fs;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
/// The lock file magic: "RVLF" in ASCII (big-endian).
|
||||
const LOCK_MAGIC: u32 = 0x52564C46;
|
||||
|
||||
/// Lock protocol version.
|
||||
const LOCK_VERSION: u32 = 1;
|
||||
|
||||
/// Lock file total size in bytes.
|
||||
const LOCK_FILE_SIZE: usize = 104;
|
||||
|
||||
/// Stale lock age threshold for same-host (30 seconds in nanoseconds).
|
||||
const STALE_AGE_NS: u64 = 30_000_000_000;
|
||||
|
||||
/// Represents an acquired writer lock.
|
||||
pub(crate) struct WriterLock {
|
||||
lock_path: PathBuf,
|
||||
writer_id: [u8; 16],
|
||||
}
|
||||
|
||||
impl WriterLock {
|
||||
/// Attempt to acquire the writer lock for the given RVF file path.
|
||||
///
|
||||
/// Returns `Ok(WriterLock)` on success, or an `io::Error` if the lock
|
||||
/// is held by another active writer.
|
||||
pub(crate) fn acquire(rvf_path: &Path) -> io::Result<Self> {
|
||||
let lock_path = lock_path_for(rvf_path);
|
||||
let pid = std::process::id();
|
||||
let hostname = get_hostname();
|
||||
let timestamp_ns = now_ns();
|
||||
let writer_id = random_uuid();
|
||||
|
||||
// Build lock file content.
|
||||
let content = build_lock_content(pid, &hostname, timestamp_ns, &writer_id);
|
||||
|
||||
// Attempt atomic creation.
|
||||
match atomic_create_file(&lock_path, &content) {
|
||||
Ok(()) => Ok(WriterLock {
|
||||
lock_path,
|
||||
writer_id,
|
||||
}),
|
||||
Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
|
||||
// Check for stale lock.
|
||||
if try_break_stale_lock(&lock_path)? {
|
||||
// Retry after breaking stale lock.
|
||||
atomic_create_file(&lock_path, &content)?;
|
||||
Ok(WriterLock {
|
||||
lock_path,
|
||||
writer_id,
|
||||
})
|
||||
} else {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
"another writer holds the lock",
|
||||
))
|
||||
}
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Release the writer lock.
|
||||
///
|
||||
/// Verifies that the lock file still contains our writer_id before
|
||||
/// removing it, preventing deletion of a lock legitimately taken over.
|
||||
pub(crate) fn release(self) -> io::Result<()> {
|
||||
// Verify our writer_id is still in the lock.
|
||||
if let Ok(content) = fs::read(&self.lock_path) {
|
||||
if content.len() >= LOCK_FILE_SIZE {
|
||||
let stored_id = &content[0x50..0x60];
|
||||
if stored_id == self.writer_id {
|
||||
let _ = fs::remove_file(&self.lock_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if the lock is still held by us.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn is_valid(&self) -> bool {
|
||||
if let Ok(content) = fs::read(&self.lock_path) {
|
||||
if content.len() >= LOCK_FILE_SIZE {
|
||||
let stored_id = &content[0x50..0x60];
|
||||
return stored_id == self.writer_id;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WriterLock {
|
||||
fn drop(&mut self) {
|
||||
// Best-effort release on drop.
|
||||
if let Ok(content) = fs::read(&self.lock_path) {
|
||||
if content.len() >= LOCK_FILE_SIZE {
|
||||
let stored_id = &content[0x50..0x60];
|
||||
if stored_id == self.writer_id {
|
||||
let _ = fs::remove_file(&self.lock_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the lock file path for a given RVF file.
|
||||
pub(crate) fn lock_path_for(rvf_path: &Path) -> PathBuf {
|
||||
let mut p = rvf_path.as_os_str().to_os_string();
|
||||
p.push(".lock");
|
||||
PathBuf::from(p)
|
||||
}
|
||||
|
||||
/// Try to break a stale lock. Returns `true` if the lock was broken.
|
||||
fn try_break_stale_lock(lock_path: &Path) -> io::Result<bool> {
|
||||
let content = match fs::read(lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(true),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
if content.len() < LOCK_FILE_SIZE {
|
||||
// Invalid lock file — delete it.
|
||||
let _ = fs::remove_file(lock_path);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Validate magic.
|
||||
let magic = u32::from_le_bytes([content[0], content[1], content[2], content[3]]);
|
||||
if magic != LOCK_MAGIC {
|
||||
let _ = fs::remove_file(lock_path);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Read PID and timestamp.
|
||||
let lock_pid = u32::from_le_bytes([content[4], content[5], content[6], content[7]]);
|
||||
let lock_timestamp = u64::from_le_bytes([
|
||||
content[0x48],
|
||||
content[0x49],
|
||||
content[0x4A],
|
||||
content[0x4B],
|
||||
content[0x4C],
|
||||
content[0x4D],
|
||||
content[0x4E],
|
||||
content[0x4F],
|
||||
]);
|
||||
|
||||
let current_time = now_ns();
|
||||
let age = current_time.saturating_sub(lock_timestamp);
|
||||
|
||||
// Read hostname.
|
||||
let lock_hostname = read_hostname_from_lock(&content[0x08..0x48]);
|
||||
let current_hostname = get_hostname();
|
||||
let same_host = lock_hostname == current_hostname;
|
||||
|
||||
// Check if PID is alive (same host only).
|
||||
let pid_alive = if same_host {
|
||||
is_pid_alive(lock_pid)
|
||||
} else {
|
||||
// Cannot check remote PID; rely on age only.
|
||||
true
|
||||
};
|
||||
|
||||
// Stale conditions:
|
||||
// - PID is dead AND age > threshold (same host)
|
||||
// - Age > extended threshold (cross-host)
|
||||
let threshold = if same_host {
|
||||
STALE_AGE_NS
|
||||
} else {
|
||||
300_000_000_000
|
||||
};
|
||||
|
||||
if !pid_alive && age > threshold {
|
||||
let _ = fs::remove_file(lock_path);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
if !same_host && age > threshold {
|
||||
let _ = fs::remove_file(lock_path);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn build_lock_content(
|
||||
pid: u32,
|
||||
hostname: &str,
|
||||
timestamp_ns: u64,
|
||||
writer_id: &[u8; 16],
|
||||
) -> Vec<u8> {
|
||||
let mut buf = vec![0u8; LOCK_FILE_SIZE];
|
||||
|
||||
// Magic (0x00).
|
||||
buf[0..4].copy_from_slice(&LOCK_MAGIC.to_le_bytes());
|
||||
// PID (0x04).
|
||||
buf[4..8].copy_from_slice(&pid.to_le_bytes());
|
||||
// Hostname (0x08, max 64 bytes, null-terminated).
|
||||
let host_bytes = hostname.as_bytes();
|
||||
let copy_len = host_bytes.len().min(62); // Reserve byte for null terminator
|
||||
buf[0x08..0x08 + copy_len].copy_from_slice(&host_bytes[..copy_len]);
|
||||
buf[0x08 + copy_len] = 0; // Explicit null terminator
|
||||
// Timestamp (0x48).
|
||||
buf[0x48..0x50].copy_from_slice(×tamp_ns.to_le_bytes());
|
||||
// Writer ID (0x50).
|
||||
buf[0x50..0x60].copy_from_slice(writer_id);
|
||||
// Lock version (0x60).
|
||||
buf[0x60..0x64].copy_from_slice(&LOCK_VERSION.to_le_bytes());
|
||||
// CRC32 (0x64) — simplified: we use a basic checksum.
|
||||
let crc = simple_crc32(&buf[0..0x64]);
|
||||
buf[0x64..0x68].copy_from_slice(&crc.to_le_bytes());
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
fn atomic_create_file(path: &Path, content: &[u8]) -> io::Result<()> {
|
||||
// Use O_CREAT | O_EXCL semantics via OpenOptions.
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(path)?;
|
||||
file.write_all(content)?;
|
||||
file.sync_all()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_hostname_from_lock(buf: &[u8]) -> String {
|
||||
let end = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
|
||||
String::from_utf8_lossy(&buf[..end]).into_owned()
|
||||
}
|
||||
|
||||
fn get_hostname() -> String {
|
||||
std::env::var("HOSTNAME").unwrap_or_else(|_| {
|
||||
fs::read_to_string("/etc/hostname")
|
||||
.unwrap_or_else(|_| "unknown".into())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
}
|
||||
|
||||
fn now_ns() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn random_uuid() -> [u8; 16] {
|
||||
// Simple random UUID generation using /dev/urandom or time-based fallback.
|
||||
let mut buf = [0u8; 16];
|
||||
if let Ok(mut f) = fs::File::open("/dev/urandom") {
|
||||
let _ = f.read_exact(&mut buf);
|
||||
} else {
|
||||
// Fallback: use timestamp + PID.
|
||||
let ts = now_ns();
|
||||
buf[0..8].copy_from_slice(&ts.to_le_bytes());
|
||||
buf[8..12].copy_from_slice(&std::process::id().to_le_bytes());
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
fn is_pid_alive(pid: u32) -> bool {
|
||||
// On Unix, kill(pid, 0) checks process existence without sending a signal.
|
||||
// A return of 0 means the process exists and we have permission to signal it.
|
||||
// EPERM (errno = 1) means the process exists but belongs to a different user
|
||||
// -- still alive. Any other error (ESRCH = no such process) means dead.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let ret = libc_kill(pid as i32, 0);
|
||||
if ret == 0 {
|
||||
return true;
|
||||
}
|
||||
// Check errno for EPERM -- process exists but we lack permission
|
||||
let err = unsafe { *libc_errno() };
|
||||
err == EPERM
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
// On non-Unix platforms, we cannot determine PID liveness.
|
||||
// Conservatively assume alive to avoid breaking stale locks
|
||||
// that might still be held. The age-based fallback in
|
||||
// try_break_stale_lock will handle truly stale locks.
|
||||
let _ = pid;
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
extern "C" {
|
||||
fn kill(pid: i32, sig: i32) -> i32;
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
extern "C" {
|
||||
fn __errno_location() -> *mut i32;
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
|
||||
extern "C" {
|
||||
fn __error() -> *mut i32;
|
||||
}
|
||||
|
||||
/// Permission denied errno -- process exists but belongs to another user.
|
||||
#[cfg(unix)]
|
||||
const EPERM: i32 = 1;
|
||||
|
||||
#[cfg(unix)]
|
||||
fn libc_kill(pid: i32, sig: i32) -> i32 {
|
||||
unsafe { kill(pid, sig) }
|
||||
}
|
||||
|
||||
/// Get a pointer to the thread-local errno value.
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
fn libc_errno() -> *mut i32 {
|
||||
unsafe { __errno_location() }
|
||||
}
|
||||
|
||||
/// Get a pointer to the thread-local errno value (macOS/BSD).
|
||||
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
|
||||
fn libc_errno() -> *mut i32 {
|
||||
unsafe { __error() }
|
||||
}
|
||||
|
||||
/// Simple CRC32 (not CRC32C) for lock file checksumming.
|
||||
fn simple_crc32(data: &[u8]) -> u32 {
|
||||
let mut crc: u32 = 0xFFFFFFFF;
|
||||
for &byte in data {
|
||||
crc ^= byte as u32;
|
||||
for _ in 0..8 {
|
||||
if crc & 1 != 0 {
|
||||
crc = (crc >> 1) ^ 0xEDB88320;
|
||||
} else {
|
||||
crc >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
!crc
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn lock_path_computation() {
|
||||
let p = Path::new("/tmp/data.rvf");
|
||||
assert_eq!(lock_path_for(p), PathBuf::from("/tmp/data.rvf.lock"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn acquire_and_release() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let rvf_path = dir.path().join("test.rvf");
|
||||
fs::write(&rvf_path, b"").unwrap();
|
||||
|
||||
let lock = WriterLock::acquire(&rvf_path).unwrap();
|
||||
assert!(lock.is_valid());
|
||||
|
||||
// Second acquisition should fail.
|
||||
let result = WriterLock::acquire(&rvf_path);
|
||||
assert!(result.is_err());
|
||||
|
||||
lock.release().unwrap();
|
||||
|
||||
// Now acquisition should succeed again.
|
||||
let lock2 = WriterLock::acquire(&rvf_path).unwrap();
|
||||
assert!(lock2.is_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lock_detection() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let rvf_path = dir.path().join("test2.rvf");
|
||||
fs::write(&rvf_path, b"").unwrap();
|
||||
let lock_path = lock_path_for(&rvf_path);
|
||||
|
||||
// Write a lock with PID 999999999 (almost certainly dead) and old timestamp.
|
||||
let fake_pid = 999999999u32;
|
||||
let old_ts = now_ns().saturating_sub(60_000_000_000); // 60s ago
|
||||
let fake_id = [0xABu8; 16];
|
||||
let content = build_lock_content(fake_pid, &get_hostname(), old_ts, &fake_id);
|
||||
fs::write(&lock_path, &content).unwrap();
|
||||
|
||||
// Should be able to acquire despite existing lock (stale).
|
||||
let lock = WriterLock::acquire(&rvf_path).unwrap();
|
||||
assert!(lock.is_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_crc32_works() {
|
||||
let data = b"hello";
|
||||
let crc = simple_crc32(data);
|
||||
assert_ne!(crc, 0);
|
||||
// Same input produces same output.
|
||||
assert_eq!(crc, simple_crc32(data));
|
||||
}
|
||||
}
|
||||
324
vendor/ruvector/crates/rvf/rvf-runtime/src/membership.rs
vendored
Normal file
324
vendor/ruvector/crates/rvf/rvf-runtime/src/membership.rs
vendored
Normal file
@@ -0,0 +1,324 @@
|
||||
//! Membership filter for shared HNSW index traversal.
|
||||
//!
|
||||
//! Include mode (default): vector visible iff `filter.contains(id)`.
|
||||
//! Exclude mode: vector visible iff `!filter.contains(id)`.
|
||||
//!
|
||||
//! Empty filter in include mode = empty view (fail-safe).
|
||||
//!
|
||||
//! HNSW traversal integration:
|
||||
//! - Excluded nodes MAY be pushed onto exploration heap (routing waypoints)
|
||||
//! - Excluded nodes MUST NOT be pushed onto result heap
|
||||
//! - Excluded nodes DO NOT decrement `ef_remaining`
|
||||
|
||||
use rvf_types::membership::{FilterMode, MembershipHeader, MEMBERSHIP_MAGIC};
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
|
||||
/// Membership filter backed by a dense bitmap.
|
||||
pub struct MembershipFilter {
|
||||
/// Include or exclude mode.
|
||||
mode: FilterMode,
|
||||
/// Dense bit vector: one bit per vector ID.
|
||||
bitmap: Vec<u64>,
|
||||
/// Total vector count (capacity of the filter).
|
||||
vector_count: u64,
|
||||
/// Number of set bits (members).
|
||||
member_count: u64,
|
||||
/// Generation counter for optimistic concurrency.
|
||||
generation_id: u32,
|
||||
}
|
||||
|
||||
impl MembershipFilter {
|
||||
/// Create a new include-mode filter with given capacity. All bits start clear.
|
||||
pub fn new_include(vector_count: u64) -> Self {
|
||||
let words = vector_count.div_ceil(64) as usize;
|
||||
Self {
|
||||
mode: FilterMode::Include,
|
||||
bitmap: vec![0u64; words],
|
||||
vector_count,
|
||||
member_count: 0,
|
||||
generation_id: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new exclude-mode filter with given capacity. All bits start clear.
|
||||
pub fn new_exclude(vector_count: u64) -> Self {
|
||||
let words = vector_count.div_ceil(64) as usize;
|
||||
Self {
|
||||
mode: FilterMode::Exclude,
|
||||
bitmap: vec![0u64; words],
|
||||
vector_count,
|
||||
member_count: 0,
|
||||
generation_id: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a vector ID to the filter.
|
||||
pub fn add(&mut self, vector_id: u64) {
|
||||
if vector_id >= self.vector_count {
|
||||
return;
|
||||
}
|
||||
let word = (vector_id / 64) as usize;
|
||||
let bit = vector_id % 64;
|
||||
if word < self.bitmap.len() {
|
||||
let mask = 1u64 << bit;
|
||||
if self.bitmap[word] & mask == 0 {
|
||||
self.bitmap[word] |= mask;
|
||||
self.member_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a vector ID from the filter.
|
||||
pub fn remove(&mut self, vector_id: u64) {
|
||||
if vector_id >= self.vector_count {
|
||||
return;
|
||||
}
|
||||
let word = (vector_id / 64) as usize;
|
||||
let bit = vector_id % 64;
|
||||
if word < self.bitmap.len() {
|
||||
let mask = 1u64 << bit;
|
||||
if self.bitmap[word] & mask != 0 {
|
||||
self.bitmap[word] &= !mask;
|
||||
self.member_count -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a vector ID is in the filter bitmap.
|
||||
fn bitmap_contains(&self, vector_id: u64) -> bool {
|
||||
if vector_id >= self.vector_count {
|
||||
return false;
|
||||
}
|
||||
let word = (vector_id / 64) as usize;
|
||||
let bit = vector_id % 64;
|
||||
if word < self.bitmap.len() {
|
||||
self.bitmap[word] & (1u64 << bit) != 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a vector ID is visible through this filter.
|
||||
///
|
||||
/// In Include mode: visible iff the bit is set.
|
||||
/// In Exclude mode: visible iff the bit is NOT set.
|
||||
pub fn contains(&self, vector_id: u64) -> bool {
|
||||
match self.mode {
|
||||
FilterMode::Include => self.bitmap_contains(vector_id),
|
||||
FilterMode::Exclude => !self.bitmap_contains(vector_id),
|
||||
}
|
||||
}
|
||||
|
||||
/// Number of set bits (members in the bitmap).
|
||||
pub fn member_count(&self) -> u64 {
|
||||
self.member_count
|
||||
}
|
||||
|
||||
/// Total vector capacity.
|
||||
pub fn vector_count(&self) -> u64 {
|
||||
self.vector_count
|
||||
}
|
||||
|
||||
/// Filter mode.
|
||||
pub fn mode(&self) -> FilterMode {
|
||||
self.mode
|
||||
}
|
||||
|
||||
/// Generation ID.
|
||||
pub fn generation_id(&self) -> u32 {
|
||||
self.generation_id
|
||||
}
|
||||
|
||||
/// Increment generation ID.
|
||||
pub fn bump_generation(&mut self) {
|
||||
self.generation_id += 1;
|
||||
}
|
||||
|
||||
/// Serialize the bitmap to bytes (just the raw bitmap words).
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
let mut buf = Vec::with_capacity(self.bitmap.len() * 8);
|
||||
for &word in &self.bitmap {
|
||||
buf.extend_from_slice(&word.to_le_bytes());
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
/// Deserialize a MembershipFilter from bitmap bytes and a header.
|
||||
pub fn deserialize(data: &[u8], header: &MembershipHeader) -> Result<Self, RvfError> {
|
||||
let mode = FilterMode::try_from(header.filter_mode)
|
||||
.map_err(|_| RvfError::Code(ErrorCode::MembershipInvalid))?;
|
||||
|
||||
let word_count = header.vector_count.div_ceil(64) as usize;
|
||||
let expected_bytes = word_count * 8;
|
||||
if data.len() < expected_bytes {
|
||||
return Err(RvfError::Code(ErrorCode::MembershipInvalid));
|
||||
}
|
||||
|
||||
let mut bitmap = Vec::with_capacity(word_count);
|
||||
for i in 0..word_count {
|
||||
let offset = i * 8;
|
||||
let word = u64::from_le_bytes([
|
||||
data[offset],
|
||||
data[offset + 1],
|
||||
data[offset + 2],
|
||||
data[offset + 3],
|
||||
data[offset + 4],
|
||||
data[offset + 5],
|
||||
data[offset + 6],
|
||||
data[offset + 7],
|
||||
]);
|
||||
bitmap.push(word);
|
||||
}
|
||||
|
||||
// Recount set bits
|
||||
let member_count: u64 = bitmap.iter().map(|w| w.count_ones() as u64).sum();
|
||||
|
||||
Ok(Self {
|
||||
mode,
|
||||
bitmap,
|
||||
vector_count: header.vector_count,
|
||||
member_count,
|
||||
generation_id: header.generation_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a MembershipHeader for this filter.
|
||||
pub fn to_header(&self) -> MembershipHeader {
|
||||
let bitmap_bytes = self.serialize();
|
||||
let filter_hash = crate::store::simple_shake256_256(&bitmap_bytes);
|
||||
|
||||
MembershipHeader {
|
||||
magic: MEMBERSHIP_MAGIC,
|
||||
version: 1,
|
||||
filter_type: rvf_types::membership::FilterType::Bitmap as u8,
|
||||
filter_mode: self.mode as u8,
|
||||
vector_count: self.vector_count,
|
||||
member_count: self.member_count,
|
||||
filter_offset: 96, // right after header
|
||||
filter_size: bitmap_bytes.len() as u32,
|
||||
generation_id: self.generation_id,
|
||||
filter_hash,
|
||||
bloom_offset: 0,
|
||||
bloom_size: 0,
|
||||
_reserved: 0,
|
||||
_reserved2: [0u8; 8],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn include_mode_empty_is_empty_view() {
|
||||
let filter = MembershipFilter::new_include(100);
|
||||
for i in 0..100 {
|
||||
assert!(!filter.contains(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn include_mode_add_and_check() {
|
||||
let mut filter = MembershipFilter::new_include(100);
|
||||
filter.add(10);
|
||||
filter.add(50);
|
||||
filter.add(99);
|
||||
|
||||
assert!(filter.contains(10));
|
||||
assert!(filter.contains(50));
|
||||
assert!(filter.contains(99));
|
||||
assert!(!filter.contains(0));
|
||||
assert!(!filter.contains(11));
|
||||
assert_eq!(filter.member_count(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exclude_mode() {
|
||||
let mut filter = MembershipFilter::new_exclude(100);
|
||||
// In exclude mode, all are visible when bitmap is empty
|
||||
assert!(filter.contains(0));
|
||||
assert!(filter.contains(50));
|
||||
|
||||
// Add to bitmap means "exclude this vector"
|
||||
filter.add(50);
|
||||
assert!(!filter.contains(50));
|
||||
assert!(filter.contains(0));
|
||||
assert!(filter.contains(99));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_remove() {
|
||||
let mut filter = MembershipFilter::new_include(64);
|
||||
filter.add(10);
|
||||
assert_eq!(filter.member_count(), 1);
|
||||
assert!(filter.contains(10));
|
||||
|
||||
filter.remove(10);
|
||||
assert_eq!(filter.member_count(), 0);
|
||||
assert!(!filter.contains(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_out_of_bounds_ignored() {
|
||||
let mut filter = MembershipFilter::new_include(10);
|
||||
filter.add(100); // beyond vector_count
|
||||
assert_eq!(filter.member_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_add_no_double_count() {
|
||||
let mut filter = MembershipFilter::new_include(64);
|
||||
filter.add(5);
|
||||
filter.add(5);
|
||||
assert_eq!(filter.member_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize_round_trip() {
|
||||
let mut filter = MembershipFilter::new_include(200);
|
||||
filter.add(0);
|
||||
filter.add(63);
|
||||
filter.add(64);
|
||||
filter.add(127);
|
||||
filter.add(199);
|
||||
|
||||
let header = filter.to_header();
|
||||
let bitmap_data = filter.serialize();
|
||||
|
||||
let filter2 = MembershipFilter::deserialize(&bitmap_data, &header).unwrap();
|
||||
assert_eq!(filter2.vector_count(), 200);
|
||||
assert_eq!(filter2.member_count(), 5);
|
||||
assert!(filter2.contains(0));
|
||||
assert!(filter2.contains(63));
|
||||
assert!(filter2.contains(64));
|
||||
assert!(filter2.contains(127));
|
||||
assert!(filter2.contains(199));
|
||||
assert!(!filter2.contains(1));
|
||||
assert!(!filter2.contains(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generation_bump() {
|
||||
let mut filter = MembershipFilter::new_include(10);
|
||||
assert_eq!(filter.generation_id(), 0);
|
||||
filter.bump_generation();
|
||||
assert_eq!(filter.generation_id(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bitmap_word_boundary() {
|
||||
// Test vectors near 64-bit word boundaries
|
||||
let mut filter = MembershipFilter::new_include(130);
|
||||
filter.add(63);
|
||||
filter.add(64);
|
||||
filter.add(128);
|
||||
|
||||
assert!(filter.contains(63));
|
||||
assert!(filter.contains(64));
|
||||
assert!(filter.contains(128));
|
||||
assert!(!filter.contains(62));
|
||||
assert!(!filter.contains(65));
|
||||
assert!(!filter.contains(129));
|
||||
}
|
||||
}
|
||||
207
vendor/ruvector/crates/rvf/rvf-runtime/src/options.rs
vendored
Normal file
207
vendor/ruvector/crates/rvf/rvf-runtime/src/options.rs
vendored
Normal file
@@ -0,0 +1,207 @@
|
||||
//! Configuration types for the RVF runtime.
|
||||
|
||||
use crate::filter::FilterExpr;
|
||||
use rvf_types::quality::{
|
||||
BudgetReport, DegradationReport, QualityPreference, ResponseQuality, SafetyNetBudget,
|
||||
SearchEvidenceSummary,
|
||||
};
|
||||
use rvf_types::security::SecurityPolicy;
|
||||
|
||||
/// Distance metric used for vector similarity search.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum DistanceMetric {
|
||||
/// Squared Euclidean distance (L2).
|
||||
#[default]
|
||||
L2,
|
||||
/// Inner (dot) product distance (negated).
|
||||
InnerProduct,
|
||||
/// Cosine distance (1 - cosine_similarity).
|
||||
Cosine,
|
||||
}
|
||||
|
||||
/// Compression profile for stored vectors.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum CompressionProfile {
|
||||
/// No compression — raw fp32 vectors.
|
||||
#[default]
|
||||
None,
|
||||
/// Scalar quantization (int8).
|
||||
Scalar,
|
||||
/// Product quantization.
|
||||
Product,
|
||||
}
|
||||
|
||||
/// Configuration for automatic witness segment generation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WitnessConfig {
|
||||
/// Append a witness entry after each ingest operation. Default: true.
|
||||
pub witness_ingest: bool,
|
||||
/// Append a witness entry after each delete operation. Default: true.
|
||||
pub witness_delete: bool,
|
||||
/// Append a witness entry after each compact operation. Default: true.
|
||||
pub witness_compact: bool,
|
||||
/// Append a witness entry after each query operation. Default: false.
|
||||
/// Enable this for audit-trail compliance; it adds I/O to the hot path.
|
||||
pub audit_queries: bool,
|
||||
}
|
||||
|
||||
impl Default for WitnessConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
witness_ingest: true,
|
||||
witness_delete: true,
|
||||
witness_compact: true,
|
||||
audit_queries: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for creating a new RVF store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RvfOptions {
|
||||
/// Vector dimensionality (required).
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: DistanceMetric,
|
||||
/// Hardware profile identifier (0=Generic, 1=Core, 2=Hot, 3=Full).
|
||||
pub profile: u8,
|
||||
/// Domain profile for the file (determines canonical extension).
|
||||
pub domain_profile: rvf_types::DomainProfile,
|
||||
/// Compression profile for stored vectors.
|
||||
pub compression: CompressionProfile,
|
||||
/// Whether segment signing is enabled.
|
||||
pub signing: bool,
|
||||
/// HNSW M parameter: max edges per node per layer.
|
||||
pub m: u16,
|
||||
/// HNSW ef_construction: beam width during index build.
|
||||
pub ef_construction: u16,
|
||||
/// Witness auto-generation configuration.
|
||||
pub witness: WitnessConfig,
|
||||
/// Security policy for manifest signature verification (ADR-033 §4).
|
||||
pub security_policy: SecurityPolicy,
|
||||
}
|
||||
|
||||
impl Default for RvfOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dimension: 0,
|
||||
metric: DistanceMetric::L2,
|
||||
profile: 0,
|
||||
domain_profile: rvf_types::DomainProfile::Generic,
|
||||
compression: CompressionProfile::None,
|
||||
signing: false,
|
||||
m: 16,
|
||||
ef_construction: 200,
|
||||
witness: WitnessConfig::default(),
|
||||
security_policy: SecurityPolicy::Strict,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Options controlling a query operation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct QueryOptions {
|
||||
/// HNSW ef_search parameter (beam width during search).
|
||||
pub ef_search: u16,
|
||||
/// Optional metadata filter expression.
|
||||
pub filter: Option<FilterExpr>,
|
||||
/// Query timeout in milliseconds (0 = no timeout).
|
||||
pub timeout_ms: u32,
|
||||
/// Quality vs latency preference (ADR-033).
|
||||
pub quality_preference: QualityPreference,
|
||||
/// Safety net budget caps. Callers may tighten but not loosen
|
||||
/// beyond the mode default (unless PreferQuality, which extends to 4x).
|
||||
pub safety_net_budget: SafetyNetBudget,
|
||||
}
|
||||
|
||||
impl Default for QueryOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ef_search: 100,
|
||||
filter: None,
|
||||
timeout_ms: 0,
|
||||
quality_preference: QualityPreference::Auto,
|
||||
safety_net_budget: SafetyNetBudget::LAYER_A,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single search result: vector ID and distance.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct SearchResult {
|
||||
/// The vector's unique identifier.
|
||||
pub id: u64,
|
||||
/// Distance from the query vector (lower = more similar).
|
||||
pub distance: f32,
|
||||
/// Per-candidate retrieval quality (ADR-033).
|
||||
pub retrieval_quality: rvf_types::quality::RetrievalQuality,
|
||||
}
|
||||
|
||||
/// The mandatory outer return type for all query APIs (ADR-033 §2.4).
|
||||
///
|
||||
/// This is not optional. This is not a nested field.
|
||||
/// JSON flattening cannot discard it. gRPC serialization cannot drop it.
|
||||
/// MCP tool responses must include it.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct QualityEnvelope {
|
||||
/// The search results.
|
||||
pub results: Vec<SearchResult>,
|
||||
/// Top-level quality signal. Consumers MUST inspect this.
|
||||
pub quality: ResponseQuality,
|
||||
/// Structured evidence for why the quality is what it is.
|
||||
pub evidence: SearchEvidenceSummary,
|
||||
/// Resource consumption report for this query.
|
||||
pub budgets: BudgetReport,
|
||||
/// If quality is degraded, the structured reason.
|
||||
pub degradation: Option<DegradationReport>,
|
||||
}
|
||||
|
||||
/// Result of a batch ingest operation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IngestResult {
|
||||
/// Number of vectors successfully ingested.
|
||||
pub accepted: u64,
|
||||
/// Number of vectors rejected.
|
||||
pub rejected: u64,
|
||||
/// Manifest epoch after the ingest commit.
|
||||
pub epoch: u32,
|
||||
}
|
||||
|
||||
/// Result of a delete operation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DeleteResult {
|
||||
/// Number of vectors soft-deleted.
|
||||
pub deleted: u64,
|
||||
/// Manifest epoch after the delete commit.
|
||||
pub epoch: u32,
|
||||
}
|
||||
|
||||
/// Result of a compaction operation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CompactionResult {
|
||||
/// Number of segments compacted.
|
||||
pub segments_compacted: u32,
|
||||
/// Bytes of dead space reclaimed.
|
||||
pub bytes_reclaimed: u64,
|
||||
/// Manifest epoch after compaction commit.
|
||||
pub epoch: u32,
|
||||
}
|
||||
|
||||
/// A single metadata entry for a vector.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MetadataEntry {
|
||||
/// Metadata field identifier.
|
||||
pub field_id: u16,
|
||||
/// The metadata value.
|
||||
pub value: MetadataValue,
|
||||
}
|
||||
|
||||
/// Metadata value types matching the spec.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum MetadataValue {
|
||||
U64(u64),
|
||||
I64(i64),
|
||||
F64(f64),
|
||||
String(String),
|
||||
Bytes(Vec<u8>),
|
||||
}
|
||||
1134
vendor/ruvector/crates/rvf/rvf-runtime/src/qr_encode.rs
vendored
Normal file
1134
vendor/ruvector/crates/rvf/rvf-runtime/src/qr_encode.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1094
vendor/ruvector/crates/rvf/rvf-runtime/src/qr_seed.rs
vendored
Normal file
1094
vendor/ruvector/crates/rvf/rvf-runtime/src/qr_seed.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
519
vendor/ruvector/crates/rvf/rvf-runtime/src/read_path.rs
vendored
Normal file
519
vendor/ruvector/crates/rvf/rvf-runtime/src/read_path.rs
vendored
Normal file
@@ -0,0 +1,519 @@
|
||||
//! Progressive read logic for the RVF runtime.
|
||||
//!
|
||||
//! Boot sequence:
|
||||
//! 1. Seek to EOF - 4096, parse Level 0 root manifest
|
||||
//! 2. Extract hotset pointers, mmap hot segments
|
||||
//! 3. Background: parse Level 1 -> full segment directory
|
||||
//! 4. On-demand: load cold segments as queries need them
|
||||
|
||||
use rvf_types::{FileIdentity, SegmentHeader, SegmentType, SEGMENT_HEADER_SIZE, SEGMENT_MAGIC};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, Read, Seek, SeekFrom};
|
||||
|
||||
/// A parsed segment directory entry.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct SegDirEntry {
|
||||
pub seg_id: u64,
|
||||
pub offset: u64,
|
||||
pub payload_length: u64,
|
||||
pub seg_type: u8,
|
||||
}
|
||||
|
||||
/// Parsed manifest data from the file.
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct ParsedManifest {
|
||||
pub epoch: u32,
|
||||
pub dimension: u16,
|
||||
pub total_vectors: u64,
|
||||
pub profile_id: u8,
|
||||
pub segment_dir: Vec<SegDirEntry>,
|
||||
pub deleted_ids: Vec<u64>,
|
||||
pub file_identity: Option<FileIdentity>,
|
||||
}
|
||||
|
||||
/// In-memory vector storage loaded from VEC_SEGs.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct VectorData {
|
||||
/// Maps vector_id -> (dimension-sized f32 slice stored as Vec<f32>).
|
||||
pub vectors: HashMap<u64, Vec<f32>>,
|
||||
pub dimension: u16,
|
||||
}
|
||||
|
||||
impl VectorData {
|
||||
pub(crate) fn new(dimension: u16) -> Self {
|
||||
Self {
|
||||
vectors: HashMap::new(),
|
||||
dimension,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get(&self, id: u64) -> Option<&[f32]> {
|
||||
self.vectors.get(&id).map(|v| v.as_slice())
|
||||
}
|
||||
|
||||
pub(crate) fn insert(&mut self, id: u64, data: Vec<f32>) {
|
||||
self.vectors.insert(id, data);
|
||||
}
|
||||
|
||||
pub(crate) fn remove(&mut self, id: u64) {
|
||||
self.vectors.remove(&id);
|
||||
}
|
||||
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
self.vectors.len()
|
||||
}
|
||||
|
||||
pub(crate) fn ids(&self) -> impl Iterator<Item = &u64> {
|
||||
self.vectors.keys()
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan backwards from EOF to find and parse the latest valid manifest.
|
||||
///
|
||||
/// Reads a tail chunk and scans byte-by-byte for the magic + manifest-type
|
||||
/// pattern, since segment headers are NOT necessarily 64-byte aligned from EOF.
|
||||
pub(crate) fn find_latest_manifest<R: Read + Seek>(
|
||||
reader: &mut R,
|
||||
) -> io::Result<Option<ParsedManifest>> {
|
||||
let file_size = reader.seek(SeekFrom::End(0))?;
|
||||
if file_size < SEGMENT_HEADER_SIZE as u64 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Read up to 64 KB from the tail of the file into memory for scanning.
|
||||
// The manifest is typically ~4 KB, so 64 KB gives 16x headroom.
|
||||
let scan_size = std::cmp::min(file_size, 65_536) as usize;
|
||||
let scan_start = file_size - scan_size as u64;
|
||||
reader.seek(SeekFrom::Start(scan_start))?;
|
||||
let mut buf = vec![0u8; scan_size];
|
||||
reader.read_exact(&mut buf)?;
|
||||
|
||||
let magic_bytes = SEGMENT_MAGIC.to_le_bytes();
|
||||
let manifest_type = SegmentType::Manifest as u8;
|
||||
|
||||
// Scan backwards through the buffer looking for magic + manifest type.
|
||||
// We need at least SEGMENT_HEADER_SIZE bytes from the candidate position.
|
||||
if buf.len() < SEGMENT_HEADER_SIZE {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let last_possible = buf.len() - SEGMENT_HEADER_SIZE;
|
||||
for i in (0..=last_possible).rev() {
|
||||
if buf[i..i + 4] == magic_bytes && buf[i + 5] == manifest_type {
|
||||
// Found a candidate manifest header at offset `i` within the buffer.
|
||||
let hdr_buf = &buf[i..i + SEGMENT_HEADER_SIZE];
|
||||
let payload_length_u64 = u64::from_le_bytes([
|
||||
hdr_buf[0x10],
|
||||
hdr_buf[0x11],
|
||||
hdr_buf[0x12],
|
||||
hdr_buf[0x13],
|
||||
hdr_buf[0x14],
|
||||
hdr_buf[0x15],
|
||||
hdr_buf[0x16],
|
||||
hdr_buf[0x17],
|
||||
]);
|
||||
|
||||
// Reject implausible payload lengths to prevent OOM.
|
||||
if payload_length_u64 > MAX_READ_PAYLOAD {
|
||||
continue;
|
||||
}
|
||||
let payload_length = payload_length_u64 as usize;
|
||||
|
||||
let payload_start = i + SEGMENT_HEADER_SIZE;
|
||||
let payload_end = match payload_start.checked_add(payload_length) {
|
||||
Some(end) => end,
|
||||
None => continue, // overflow: skip this candidate
|
||||
};
|
||||
|
||||
if payload_end <= buf.len() {
|
||||
// Payload is within our buffer — parse directly.
|
||||
if let Some(manifest) = parse_manifest_payload(&buf[payload_start..payload_end]) {
|
||||
return Ok(Some(manifest));
|
||||
}
|
||||
} else {
|
||||
// Payload extends beyond our buffer — read from file.
|
||||
let file_offset = scan_start + i as u64 + SEGMENT_HEADER_SIZE as u64;
|
||||
reader.seek(SeekFrom::Start(file_offset))?;
|
||||
let mut payload = vec![0u8; payload_length];
|
||||
if reader.read_exact(&mut payload).is_ok() {
|
||||
if let Some(manifest) = parse_manifest_payload(&payload) {
|
||||
return Ok(Some(manifest));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Parse a manifest payload into structured data.
|
||||
fn parse_manifest_payload(payload: &[u8]) -> Option<ParsedManifest> {
|
||||
// Minimum header: epoch(4) + dim(2) + total_vectors(8) + seg_count(4) + profile(1) + pad(3) = 22
|
||||
if payload.len() < 22 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let epoch = u32::from_le_bytes([payload[0], payload[1], payload[2], payload[3]]);
|
||||
let dimension = u16::from_le_bytes([payload[4], payload[5]]);
|
||||
let total_vectors = u64::from_le_bytes([
|
||||
payload[6],
|
||||
payload[7],
|
||||
payload[8],
|
||||
payload[9],
|
||||
payload[10],
|
||||
payload[11],
|
||||
payload[12],
|
||||
payload[13],
|
||||
]);
|
||||
let seg_count = u32::from_le_bytes([payload[14], payload[15], payload[16], payload[17]]);
|
||||
let profile_id = payload[18];
|
||||
|
||||
let mut offset = 22; // past header (4+2+8+4+1+3)
|
||||
|
||||
// Validate that seg_count does not exceed what the payload can actually hold.
|
||||
// Each directory entry is 25 bytes, so seg_count * 25 + 22 must fit in the payload.
|
||||
let max_possible_entries = payload.len().saturating_sub(22) / 25;
|
||||
if (seg_count as usize) > max_possible_entries {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Parse segment directory.
|
||||
let mut segment_dir = Vec::with_capacity(seg_count as usize);
|
||||
for _ in 0..seg_count {
|
||||
if offset + 25 > payload.len() {
|
||||
return None;
|
||||
}
|
||||
let seg_id = u64::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
payload[offset + 4],
|
||||
payload[offset + 5],
|
||||
payload[offset + 6],
|
||||
payload[offset + 7],
|
||||
]);
|
||||
let seg_offset = u64::from_le_bytes([
|
||||
payload[offset + 8],
|
||||
payload[offset + 9],
|
||||
payload[offset + 10],
|
||||
payload[offset + 11],
|
||||
payload[offset + 12],
|
||||
payload[offset + 13],
|
||||
payload[offset + 14],
|
||||
payload[offset + 15],
|
||||
]);
|
||||
let plen = u64::from_le_bytes([
|
||||
payload[offset + 16],
|
||||
payload[offset + 17],
|
||||
payload[offset + 18],
|
||||
payload[offset + 19],
|
||||
payload[offset + 20],
|
||||
payload[offset + 21],
|
||||
payload[offset + 22],
|
||||
payload[offset + 23],
|
||||
]);
|
||||
let stype = payload[offset + 24];
|
||||
segment_dir.push(SegDirEntry {
|
||||
seg_id,
|
||||
offset: seg_offset,
|
||||
payload_length: plen,
|
||||
seg_type: stype,
|
||||
});
|
||||
offset += 25;
|
||||
}
|
||||
|
||||
// Parse deletion bitmap.
|
||||
let mut deleted_ids = Vec::new();
|
||||
if offset + 4 <= payload.len() {
|
||||
let del_count = u32::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
]);
|
||||
offset += 4;
|
||||
for _ in 0..del_count {
|
||||
if offset + 8 > payload.len() {
|
||||
break;
|
||||
}
|
||||
let did = u64::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
payload[offset + 4],
|
||||
payload[offset + 5],
|
||||
payload[offset + 6],
|
||||
payload[offset + 7],
|
||||
]);
|
||||
deleted_ids.push(did);
|
||||
offset += 8;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to parse FileIdentity trailer (backward-compatible).
|
||||
// Look for magic marker 0x46494449 ("FIDI") followed by 68 bytes.
|
||||
let file_identity = if offset + 4 + 68 <= payload.len() {
|
||||
let marker = u32::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
]);
|
||||
if marker == 0x4649_4449 {
|
||||
offset += 4;
|
||||
let fi_data: &[u8; 68] = payload[offset..offset + 68].try_into().ok()?;
|
||||
Some(FileIdentity::from_bytes(fi_data))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Some(ParsedManifest {
|
||||
epoch,
|
||||
dimension,
|
||||
total_vectors,
|
||||
profile_id,
|
||||
segment_dir,
|
||||
deleted_ids,
|
||||
file_identity,
|
||||
})
|
||||
}
|
||||
|
||||
/// Read a VEC_SEG payload and return (id, vector) pairs.
|
||||
pub(crate) fn read_vec_seg_payload(payload: &[u8]) -> Option<Vec<(u64, Vec<f32>)>> {
|
||||
if payload.len() < 6 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let dimension = u16::from_le_bytes([payload[0], payload[1]]) as usize;
|
||||
let vector_count =
|
||||
u32::from_le_bytes([payload[2], payload[3], payload[4], payload[5]]) as usize;
|
||||
|
||||
let bytes_per_vec = dimension * 4;
|
||||
let expected_size = 6 + vector_count * (8 + bytes_per_vec);
|
||||
if payload.len() < expected_size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut result = Vec::with_capacity(vector_count);
|
||||
let mut offset = 6;
|
||||
|
||||
for _ in 0..vector_count {
|
||||
let vec_id = u64::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
payload[offset + 4],
|
||||
payload[offset + 5],
|
||||
payload[offset + 6],
|
||||
payload[offset + 7],
|
||||
]);
|
||||
offset += 8;
|
||||
|
||||
let mut vec_data = Vec::with_capacity(dimension);
|
||||
for _ in 0..dimension {
|
||||
let val = f32::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
]);
|
||||
vec_data.push(val);
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
result.push((vec_id, vec_data));
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
||||
/// Maximum allowed payload size when reading segments (256 MiB).
|
||||
/// This prevents a malicious payload_length field from causing OOM.
|
||||
const MAX_READ_PAYLOAD: u64 = 256 * 1024 * 1024;
|
||||
|
||||
/// Read a segment's payload from the file given its offset.
|
||||
///
|
||||
/// Validates magic, enforces a maximum payload size, and verifies the
|
||||
/// content hash before returning the data.
|
||||
pub(crate) fn read_segment_payload<R: Read + Seek>(
|
||||
reader: &mut R,
|
||||
seg_offset: u64,
|
||||
) -> io::Result<(SegmentHeader, Vec<u8>)> {
|
||||
reader.seek(SeekFrom::Start(seg_offset))?;
|
||||
|
||||
let mut hdr_buf = [0u8; SEGMENT_HEADER_SIZE];
|
||||
reader.read_exact(&mut hdr_buf)?;
|
||||
|
||||
let magic = u32::from_le_bytes([hdr_buf[0], hdr_buf[1], hdr_buf[2], hdr_buf[3]]);
|
||||
if magic != SEGMENT_MAGIC {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"invalid segment magic",
|
||||
));
|
||||
}
|
||||
|
||||
let payload_length = u64::from_le_bytes([
|
||||
hdr_buf[0x10],
|
||||
hdr_buf[0x11],
|
||||
hdr_buf[0x12],
|
||||
hdr_buf[0x13],
|
||||
hdr_buf[0x14],
|
||||
hdr_buf[0x15],
|
||||
hdr_buf[0x16],
|
||||
hdr_buf[0x17],
|
||||
]);
|
||||
|
||||
// Enforce maximum payload size to prevent OOM from crafted files.
|
||||
if payload_length > MAX_READ_PAYLOAD {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"segment payload too large: {} bytes (max {})",
|
||||
payload_length, MAX_READ_PAYLOAD
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let header = SegmentHeader {
|
||||
magic,
|
||||
version: hdr_buf[0x04],
|
||||
seg_type: hdr_buf[0x05],
|
||||
flags: u16::from_le_bytes([hdr_buf[0x06], hdr_buf[0x07]]),
|
||||
segment_id: u64::from_le_bytes([
|
||||
hdr_buf[0x08],
|
||||
hdr_buf[0x09],
|
||||
hdr_buf[0x0A],
|
||||
hdr_buf[0x0B],
|
||||
hdr_buf[0x0C],
|
||||
hdr_buf[0x0D],
|
||||
hdr_buf[0x0E],
|
||||
hdr_buf[0x0F],
|
||||
]),
|
||||
payload_length,
|
||||
timestamp_ns: u64::from_le_bytes([
|
||||
hdr_buf[0x18],
|
||||
hdr_buf[0x19],
|
||||
hdr_buf[0x1A],
|
||||
hdr_buf[0x1B],
|
||||
hdr_buf[0x1C],
|
||||
hdr_buf[0x1D],
|
||||
hdr_buf[0x1E],
|
||||
hdr_buf[0x1F],
|
||||
]),
|
||||
checksum_algo: hdr_buf[0x20],
|
||||
compression: hdr_buf[0x21],
|
||||
reserved_0: u16::from_le_bytes([hdr_buf[0x22], hdr_buf[0x23]]),
|
||||
reserved_1: u32::from_le_bytes([
|
||||
hdr_buf[0x24],
|
||||
hdr_buf[0x25],
|
||||
hdr_buf[0x26],
|
||||
hdr_buf[0x27],
|
||||
]),
|
||||
content_hash: {
|
||||
let mut h = [0u8; 16];
|
||||
h.copy_from_slice(&hdr_buf[0x28..0x38]);
|
||||
h
|
||||
},
|
||||
uncompressed_len: u32::from_le_bytes([
|
||||
hdr_buf[0x38],
|
||||
hdr_buf[0x39],
|
||||
hdr_buf[0x3A],
|
||||
hdr_buf[0x3B],
|
||||
]),
|
||||
alignment_pad: u32::from_le_bytes([
|
||||
hdr_buf[0x3C],
|
||||
hdr_buf[0x3D],
|
||||
hdr_buf[0x3E],
|
||||
hdr_buf[0x3F],
|
||||
]),
|
||||
};
|
||||
|
||||
// payload_length is guaranteed <= MAX_READ_PAYLOAD (256 MiB) which fits in usize.
|
||||
let mut payload = vec![0u8; payload_length as usize];
|
||||
reader.read_exact(&mut payload)?;
|
||||
|
||||
// Verify content hash if it is non-zero (zero hash means "not set").
|
||||
if header.content_hash != [0u8; 16] {
|
||||
let computed = compute_content_hash(&payload);
|
||||
if computed != header.content_hash {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"segment content hash mismatch",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok((header, payload))
|
||||
}
|
||||
|
||||
/// Compute a 16-byte content hash matching the write path's algorithm.
|
||||
/// Uses CRC32 with rotations to fill 16 bytes.
|
||||
fn compute_content_hash(data: &[u8]) -> [u8; 16] {
|
||||
let mut hash = [0u8; 16];
|
||||
let crc = crc32_for_verify(data);
|
||||
for i in 0..4 {
|
||||
let rotated = crc.rotate_left(i as u32 * 8);
|
||||
hash[i * 4..(i + 1) * 4].copy_from_slice(&rotated.to_le_bytes());
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Simple CRC32 computation (matches write_path::crc32_slice).
|
||||
fn crc32_for_verify(data: &[u8]) -> u32 {
|
||||
let mut crc: u32 = 0xFFFF_FFFF;
|
||||
for &byte in data {
|
||||
crc ^= byte as u32;
|
||||
for _ in 0..8 {
|
||||
if crc & 1 != 0 {
|
||||
crc = (crc >> 1) ^ 0xEDB8_8320;
|
||||
} else {
|
||||
crc >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
!crc
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_empty_manifest() {
|
||||
assert!(parse_manifest_payload(&[]).is_none());
|
||||
assert!(parse_manifest_payload(&[0u8; 10]).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vec_seg_round_trip() {
|
||||
// Build a VEC_SEG payload: dim=2, count=2, vectors.
|
||||
let dim: u16 = 2;
|
||||
let count: u32 = 2;
|
||||
let mut payload = Vec::new();
|
||||
payload.extend_from_slice(&dim.to_le_bytes());
|
||||
payload.extend_from_slice(&count.to_le_bytes());
|
||||
// Vector 0: id=10, [1.0, 2.0]
|
||||
payload.extend_from_slice(&10u64.to_le_bytes());
|
||||
payload.extend_from_slice(&1.0f32.to_le_bytes());
|
||||
payload.extend_from_slice(&2.0f32.to_le_bytes());
|
||||
// Vector 1: id=20, [3.0, 4.0]
|
||||
payload.extend_from_slice(&20u64.to_le_bytes());
|
||||
payload.extend_from_slice(&3.0f32.to_le_bytes());
|
||||
payload.extend_from_slice(&4.0f32.to_le_bytes());
|
||||
|
||||
let result = read_vec_seg_payload(&payload).unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].0, 10);
|
||||
assert_eq!(result[0].1, vec![1.0, 2.0]);
|
||||
assert_eq!(result[1].0, 20);
|
||||
assert_eq!(result[1].1, vec![3.0, 4.0]);
|
||||
}
|
||||
}
|
||||
424
vendor/ruvector/crates/rvf/rvf-runtime/src/safety_net.rs
vendored
Normal file
424
vendor/ruvector/crates/rvf/rvf-runtime/src/safety_net.rs
vendored
Normal file
@@ -0,0 +1,424 @@
|
||||
//! Selective safety net scan for ADR-033 §3.3.
|
||||
//!
|
||||
//! When the HNSW candidate set is too small (< 2*k), the safety net
|
||||
//! activates a targeted three-phase scan:
|
||||
//!
|
||||
//! 1. **Multi-centroid union**: vectors near best-matching centroids
|
||||
//! 2. **HNSW neighbor expansion**: 1-hop neighbors of existing candidates
|
||||
//! 3. **Recency window**: recently ingested vectors not yet indexed
|
||||
//!
|
||||
//! All phases respect triple budget caps (time, candidates, distance ops).
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use rvf_types::quality::{
|
||||
BudgetReport, BudgetType, DegradationReason, DegradationReport, FallbackPath, SafetyNetBudget,
|
||||
};
|
||||
|
||||
use crate::options::SearchResult;
|
||||
|
||||
/// A candidate with distance and retrieval source.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Candidate {
|
||||
pub id: u64,
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// Result of the safety net scan.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SafetyNetResult {
|
||||
/// Additional candidates found by the safety net.
|
||||
pub candidates: Vec<Candidate>,
|
||||
/// Budget consumption report.
|
||||
pub budget_report: BudgetReport,
|
||||
/// Whether any budget was exhausted.
|
||||
pub budget_exhausted: bool,
|
||||
/// If degraded, the full report.
|
||||
pub degradation: Option<DegradationReport>,
|
||||
}
|
||||
|
||||
/// Budget tracker enforcing all three caps simultaneously.
|
||||
struct BudgetTracker {
|
||||
deadline_us: u64,
|
||||
start: Instant,
|
||||
max_candidates: u64,
|
||||
max_distance_ops: u64,
|
||||
candidates_scanned: u64,
|
||||
distance_ops: u64,
|
||||
exhausted: bool,
|
||||
exhausted_type: Option<BudgetType>,
|
||||
}
|
||||
|
||||
impl BudgetTracker {
|
||||
fn new(budget: &SafetyNetBudget) -> Self {
|
||||
Self {
|
||||
deadline_us: budget.max_scan_time_us,
|
||||
start: Instant::now(),
|
||||
max_candidates: budget.max_scan_candidates,
|
||||
max_distance_ops: budget.max_distance_ops,
|
||||
candidates_scanned: 0,
|
||||
distance_ops: 0,
|
||||
exhausted: false,
|
||||
exhausted_type: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if any budget is exceeded. Returns true if we should stop.
|
||||
fn is_exceeded(&mut self) -> bool {
|
||||
if self.exhausted {
|
||||
return true;
|
||||
}
|
||||
|
||||
if self.candidates_scanned >= self.max_candidates {
|
||||
self.exhausted = true;
|
||||
self.exhausted_type = Some(BudgetType::Candidates);
|
||||
return true;
|
||||
}
|
||||
|
||||
if self.distance_ops >= self.max_distance_ops {
|
||||
self.exhausted = true;
|
||||
self.exhausted_type = Some(BudgetType::DistanceOps);
|
||||
return true;
|
||||
}
|
||||
|
||||
let elapsed_us = self.start.elapsed().as_micros() as u64;
|
||||
if elapsed_us >= self.deadline_us {
|
||||
self.exhausted = true;
|
||||
self.exhausted_type = Some(BudgetType::Time);
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Record a distance operation. Returns true if budget still available.
|
||||
fn record_distance_op(&mut self) -> bool {
|
||||
self.distance_ops += 1;
|
||||
self.candidates_scanned += 1;
|
||||
!self.is_exceeded()
|
||||
}
|
||||
|
||||
/// Record multiple distance operations. Returns true if budget still available.
|
||||
#[allow(dead_code)]
|
||||
fn record_ops(&mut self, count: u64) -> bool {
|
||||
self.distance_ops += count;
|
||||
self.candidates_scanned += count;
|
||||
!self.is_exceeded()
|
||||
}
|
||||
|
||||
fn elapsed_us(&self) -> u64 {
|
||||
self.start.elapsed().as_micros() as u64
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute squared L2 distance between two vectors.
|
||||
fn l2_distance_sq(a: &[f32], b: &[f32]) -> f32 {
|
||||
debug_assert_eq!(a.len(), b.len());
|
||||
a.iter().zip(b.iter()).map(|(x, y)| (x - y) * (x - y)).sum()
|
||||
}
|
||||
|
||||
/// Execute the selective safety net scan.
|
||||
///
|
||||
/// The scan proceeds in three phases, each respecting the budget:
|
||||
/// 1. Multi-centroid union: scan vectors assigned to top-T centroids
|
||||
/// 2. HNSW neighbor expansion: 1-hop neighbors of existing HNSW candidates
|
||||
/// 3. Recency window: most recently ingested vectors
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `query` - The query vector.
|
||||
/// * `k` - Number of neighbors requested.
|
||||
/// * `hnsw_candidates` - Candidates already found by HNSW (may be empty).
|
||||
/// * `all_vectors` - All stored vectors as (id, vector) pairs.
|
||||
/// * `budget` - Triple budget caps.
|
||||
/// * `vector_count` - Total number of vectors in the store.
|
||||
pub fn selective_safety_net_scan(
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
hnsw_candidates: &[SearchResult],
|
||||
all_vectors: &[(u64, &[f32])],
|
||||
budget: &SafetyNetBudget,
|
||||
vector_count: u64,
|
||||
) -> SafetyNetResult {
|
||||
if budget.is_disabled() {
|
||||
return SafetyNetResult {
|
||||
candidates: Vec::new(),
|
||||
budget_report: BudgetReport::default(),
|
||||
budget_exhausted: false,
|
||||
degradation: None,
|
||||
};
|
||||
}
|
||||
|
||||
let mut tracker = BudgetTracker::new(budget);
|
||||
let mut candidates: Vec<Candidate> = Vec::new();
|
||||
|
||||
// Collect existing candidate IDs for dedup.
|
||||
let existing_ids: std::collections::HashSet<u64> =
|
||||
hnsw_candidates.iter().map(|c| c.id).collect();
|
||||
|
||||
// Phase 1: Scan from the beginning of all_vectors (simulating centroid union).
|
||||
// In a full implementation, vectors would be organized by centroid.
|
||||
// Here we scan a targeted subset proportional to sqrt(total).
|
||||
let phase1_limit = ((vector_count as f64).sqrt().ceil() as usize).min(all_vectors.len());
|
||||
|
||||
for &(id, vec) in all_vectors.iter().take(phase1_limit) {
|
||||
if tracker.is_exceeded() {
|
||||
break;
|
||||
}
|
||||
if existing_ids.contains(&id) {
|
||||
continue;
|
||||
}
|
||||
if vec.len() != query.len() {
|
||||
continue;
|
||||
}
|
||||
let dist = l2_distance_sq(query, vec);
|
||||
if !tracker.record_distance_op() {
|
||||
candidates.push(Candidate { id, distance: dist });
|
||||
break;
|
||||
}
|
||||
candidates.push(Candidate { id, distance: dist });
|
||||
}
|
||||
|
||||
// Phase 2: HNSW neighbor expansion.
|
||||
// Scan neighbors of existing candidates (approximate using vector proximity).
|
||||
if !tracker.is_exceeded() && !hnsw_candidates.is_empty() {
|
||||
let expansion_budget = k.min(hnsw_candidates.len());
|
||||
let mut neighbor_ids: Vec<u64> = Vec::new();
|
||||
|
||||
for _existing in hnsw_candidates.iter().take(expansion_budget) {
|
||||
if tracker.is_exceeded() {
|
||||
break;
|
||||
}
|
||||
// Find nearby vectors as "neighbors" (simplified for runtime).
|
||||
for &(id, vec) in all_vectors.iter() {
|
||||
if tracker.is_exceeded() {
|
||||
break;
|
||||
}
|
||||
if existing_ids.contains(&id) || neighbor_ids.contains(&id) {
|
||||
continue;
|
||||
}
|
||||
if vec.len() != query.len() {
|
||||
continue;
|
||||
}
|
||||
let dist = l2_distance_sq(query, vec);
|
||||
if !tracker.record_distance_op() {
|
||||
candidates.push(Candidate { id, distance: dist });
|
||||
neighbor_ids.push(id);
|
||||
break;
|
||||
}
|
||||
candidates.push(Candidate { id, distance: dist });
|
||||
neighbor_ids.push(id);
|
||||
// Only take a few neighbors per candidate.
|
||||
if neighbor_ids.len() >= expansion_budget * 3 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: Recency window — scan most recently added vectors.
|
||||
if !tracker.is_exceeded() {
|
||||
let recency_limit = (budget.max_scan_candidates - tracker.candidates_scanned)
|
||||
.min(all_vectors.len() as u64) as usize;
|
||||
|
||||
for &(id, vec) in all_vectors.iter().rev().take(recency_limit) {
|
||||
if tracker.is_exceeded() {
|
||||
break;
|
||||
}
|
||||
if existing_ids.contains(&id) {
|
||||
continue;
|
||||
}
|
||||
if vec.len() != query.len() {
|
||||
continue;
|
||||
}
|
||||
let dist = l2_distance_sq(query, vec);
|
||||
if !tracker.record_distance_op() {
|
||||
candidates.push(Candidate { id, distance: dist });
|
||||
break;
|
||||
}
|
||||
candidates.push(Candidate { id, distance: dist });
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed = tracker.elapsed_us();
|
||||
let budget_report = BudgetReport {
|
||||
safety_net_scan_us: elapsed,
|
||||
total_us: elapsed,
|
||||
distance_ops: tracker.distance_ops,
|
||||
distance_ops_budget: budget.max_distance_ops,
|
||||
linear_scan_count: tracker.candidates_scanned,
|
||||
linear_scan_budget: budget.max_scan_candidates,
|
||||
..BudgetReport::default()
|
||||
};
|
||||
|
||||
let degradation = if tracker.exhausted {
|
||||
Some(DegradationReport {
|
||||
fallback_path: FallbackPath::SafetyNetBudgetExhausted,
|
||||
reason: DegradationReason::BudgetExhausted {
|
||||
scanned: tracker.candidates_scanned,
|
||||
total: vector_count,
|
||||
budget_type: tracker.exhausted_type.unwrap_or(BudgetType::DistanceOps),
|
||||
},
|
||||
guarantee_lost: "recall may be below target; safety net budget exhausted",
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
SafetyNetResult {
|
||||
candidates,
|
||||
budget_report,
|
||||
budget_exhausted: tracker.exhausted,
|
||||
degradation,
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine if the safety net should activate.
|
||||
///
|
||||
/// Activates when the HNSW candidate set is smaller than `2 * k`.
|
||||
pub fn should_activate_safety_net(hnsw_candidate_count: usize, k: usize) -> bool {
|
||||
hnsw_candidate_count < 2 * k
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_vectors(count: usize, dim: usize) -> Vec<(u64, Vec<f32>)> {
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
let vec: Vec<f32> = (0..dim).map(|d| (i * dim + d) as f32 * 0.01).collect();
|
||||
(i as u64, vec)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn safety_net_disabled_returns_empty() {
|
||||
let query = vec![0.0; 4];
|
||||
let vecs = make_vectors(100, 4);
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let result =
|
||||
selective_safety_net_scan(&query, 10, &[], &refs, &SafetyNetBudget::DISABLED, 100);
|
||||
assert!(result.candidates.is_empty());
|
||||
assert!(!result.budget_exhausted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn safety_net_finds_candidates() {
|
||||
let query = vec![0.0; 4];
|
||||
let vecs = make_vectors(100, 4);
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let result =
|
||||
selective_safety_net_scan(&query, 10, &[], &refs, &SafetyNetBudget::LAYER_A, 100);
|
||||
assert!(!result.candidates.is_empty());
|
||||
assert!(result.budget_report.distance_ops > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn safety_net_respects_distance_ops_budget() {
|
||||
let query = vec![0.0; 4];
|
||||
let vecs = make_vectors(50_000, 4);
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let tight_budget = SafetyNetBudget {
|
||||
max_scan_time_us: 1_000_000, // 1 second (won't hit)
|
||||
max_scan_candidates: 50,
|
||||
max_distance_ops: 50,
|
||||
};
|
||||
|
||||
let result = selective_safety_net_scan(&query, 10, &[], &refs, &tight_budget, 50_000);
|
||||
// Must not exceed budget.
|
||||
assert!(result.budget_report.distance_ops <= 51); // +1 for the op that triggers exhaustion
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn safety_net_reports_budget_exhaustion() {
|
||||
let query = vec![0.0; 4];
|
||||
let vecs = make_vectors(10_000, 4);
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let tiny_budget = SafetyNetBudget {
|
||||
max_scan_time_us: 1_000_000,
|
||||
max_scan_candidates: 5,
|
||||
max_distance_ops: 5,
|
||||
};
|
||||
|
||||
let result = selective_safety_net_scan(&query, 10, &[], &refs, &tiny_budget, 10_000);
|
||||
assert!(result.budget_exhausted);
|
||||
assert!(result.degradation.is_some());
|
||||
let deg = result.degradation.unwrap();
|
||||
assert_eq!(deg.fallback_path, FallbackPath::SafetyNetBudgetExhausted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn safety_net_deduplicates_existing() {
|
||||
let query = vec![0.0; 4];
|
||||
let vecs = make_vectors(20, 4);
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let existing = vec![
|
||||
SearchResult {
|
||||
id: 0,
|
||||
distance: 0.1,
|
||||
retrieval_quality: rvf_types::quality::RetrievalQuality::Full,
|
||||
},
|
||||
SearchResult {
|
||||
id: 1,
|
||||
distance: 0.2,
|
||||
retrieval_quality: rvf_types::quality::RetrievalQuality::Full,
|
||||
},
|
||||
];
|
||||
|
||||
let result =
|
||||
selective_safety_net_scan(&query, 5, &existing, &refs, &SafetyNetBudget::LAYER_A, 20);
|
||||
// Should not contain ids 0 or 1.
|
||||
for c in &result.candidates {
|
||||
assert!(c.id != 0 && c.id != 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_activate_when_insufficient() {
|
||||
assert!(should_activate_safety_net(3, 5));
|
||||
assert!(should_activate_safety_net(9, 5));
|
||||
assert!(!should_activate_safety_net(10, 5));
|
||||
assert!(!should_activate_safety_net(100, 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn l2_distance_basic() {
|
||||
let a = [1.0, 0.0, 0.0];
|
||||
let b = [0.0, 0.0, 0.0];
|
||||
assert!((l2_distance_sq(&a, &b) - 1.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_tracker_time_enforcement() {
|
||||
let budget = SafetyNetBudget {
|
||||
max_scan_time_us: 0, // Instant timeout
|
||||
max_scan_candidates: 1_000_000,
|
||||
max_distance_ops: 1_000_000,
|
||||
};
|
||||
let mut tracker = BudgetTracker::new(&budget);
|
||||
// Even with generous other budgets, time should exhaust.
|
||||
assert!(tracker.is_exceeded());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_tracker_candidate_enforcement() {
|
||||
let budget = SafetyNetBudget {
|
||||
max_scan_time_us: 1_000_000,
|
||||
max_scan_candidates: 3,
|
||||
max_distance_ops: 1_000_000,
|
||||
};
|
||||
let mut tracker = BudgetTracker::new(&budget);
|
||||
assert!(tracker.record_distance_op()); // 1 <= 3
|
||||
assert!(tracker.record_distance_op()); // 2 <= 3
|
||||
// 3rd record hits the cap (3 >= 3), returns false — budget exhausted.
|
||||
assert!(!tracker.record_distance_op());
|
||||
assert!(tracker.exhausted);
|
||||
assert_eq!(tracker.distance_ops, 3);
|
||||
}
|
||||
}
|
||||
235
vendor/ruvector/crates/rvf/rvf-runtime/src/seed_crypto.rs
vendored
Normal file
235
vendor/ruvector/crates/rvf/rvf-runtime/src/seed_crypto.rs
vendored
Normal file
@@ -0,0 +1,235 @@
|
||||
//! Cryptographic operations for QR seed signing and verification.
|
||||
//!
|
||||
//! Uses the built-in SHA-256 and HMAC-SHA256 from rvf-types — zero dependencies.
|
||||
//!
|
||||
//! Signature scheme: HMAC-SHA256 with sig_algo=2.
|
||||
//! Content integrity: SHA-256 truncated to 8 or 16 bytes.
|
||||
|
||||
use rvf_types::sha256::{ct_eq, hmac_sha256, sha256};
|
||||
|
||||
/// Signature algorithm ID for HMAC-SHA256 (built-in, zero-dep).
|
||||
pub const SIG_ALGO_HMAC_SHA256: u16 = 2;
|
||||
|
||||
/// Signature algorithm ID for Ed25519 (RFC 8032 asymmetric signing).
|
||||
#[cfg(feature = "ed25519")]
|
||||
pub const SIG_ALGO_ED25519: u16 = 0;
|
||||
|
||||
/// Compute the 8-byte content hash for SeedHeader.content_hash.
|
||||
///
|
||||
/// SHA-256 of the data payload (microkernel + manifest) truncated to 64 bits.
|
||||
pub fn seed_content_hash(data: &[u8]) -> [u8; 8] {
|
||||
let full = sha256(data);
|
||||
let mut out = [0u8; 8];
|
||||
out.copy_from_slice(&full[..8]);
|
||||
out
|
||||
}
|
||||
|
||||
/// Compute a 16-byte layer content hash.
|
||||
///
|
||||
/// SHA-256 of the layer data truncated to 128 bits.
|
||||
/// Used for LayerEntry.content_hash verification.
|
||||
pub fn layer_content_hash(data: &[u8]) -> [u8; 16] {
|
||||
let full = sha256(data);
|
||||
let mut out = [0u8; 16];
|
||||
out.copy_from_slice(&full[..16]);
|
||||
out
|
||||
}
|
||||
|
||||
/// Compute the full 32-byte content hash.
|
||||
pub fn full_content_hash(data: &[u8]) -> [u8; 32] {
|
||||
sha256(data)
|
||||
}
|
||||
|
||||
/// Sign a seed payload using HMAC-SHA256.
|
||||
///
|
||||
/// The signature covers the unsigned payload (header + microkernel + manifest).
|
||||
/// Returns a 32-byte HMAC-SHA256 tag.
|
||||
pub fn sign_seed(key: &[u8], payload: &[u8]) -> [u8; 32] {
|
||||
hmac_sha256(key, payload)
|
||||
}
|
||||
|
||||
/// Verify a seed signature using HMAC-SHA256.
|
||||
///
|
||||
/// Uses constant-time comparison to prevent timing side channels.
|
||||
pub fn verify_seed(key: &[u8], payload: &[u8], signature: &[u8]) -> bool {
|
||||
if signature.len() != 32 {
|
||||
return false;
|
||||
}
|
||||
let expected = hmac_sha256(key, payload);
|
||||
let mut sig_arr = [0u8; 32];
|
||||
sig_arr.copy_from_slice(signature);
|
||||
ct_eq(&expected, &sig_arr)
|
||||
}
|
||||
|
||||
/// Verify a layer's content hash matches its data.
|
||||
pub fn verify_layer(expected_hash: &[u8; 16], layer_data: &[u8]) -> bool {
|
||||
let computed = layer_content_hash(layer_data);
|
||||
computed == *expected_hash
|
||||
}
|
||||
|
||||
/// Verify the seed's 8-byte content hash against payload data.
|
||||
pub fn verify_content_hash(expected: &[u8; 8], data: &[u8]) -> bool {
|
||||
let computed = seed_content_hash(data);
|
||||
computed == *expected
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Ed25519 asymmetric signing (feature-gated)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Sign a seed payload using Ed25519 (RFC 8032).
|
||||
///
|
||||
/// Takes a 32-byte secret key and returns a 64-byte signature.
|
||||
/// This is asymmetric: only the holder of the secret key can sign,
|
||||
/// but anyone with the corresponding public key can verify.
|
||||
#[cfg(feature = "ed25519")]
|
||||
pub fn sign_seed_ed25519(secret_key: &[u8; 32], payload: &[u8]) -> [u8; 64] {
|
||||
rvf_types::ed25519::ed25519_sign(secret_key, payload)
|
||||
}
|
||||
|
||||
/// Verify a seed signature using Ed25519 (RFC 8032).
|
||||
///
|
||||
/// Takes a 32-byte public key and a 64-byte signature.
|
||||
/// Returns `true` if the signature is valid for the given payload.
|
||||
#[cfg(feature = "ed25519")]
|
||||
pub fn verify_seed_ed25519(public_key: &[u8; 32], payload: &[u8], signature: &[u8]) -> bool {
|
||||
if signature.len() != 64 {
|
||||
return false;
|
||||
}
|
||||
let mut sig_arr = [0u8; 64];
|
||||
sig_arr.copy_from_slice(signature);
|
||||
rvf_types::ed25519::ed25519_verify(public_key, payload, &sig_arr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn content_hash_deterministic() {
|
||||
let data = b"test microkernel data";
|
||||
let h1 = seed_content_hash(data);
|
||||
let h2 = seed_content_hash(data);
|
||||
assert_eq!(h1, h2);
|
||||
assert_ne!(h1, [0u8; 8]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn layer_hash_deterministic() {
|
||||
let data = b"layer data block";
|
||||
let h1 = layer_content_hash(data);
|
||||
let h2 = layer_content_hash(data);
|
||||
assert_eq!(h1, h2);
|
||||
assert_ne!(h1, [0u8; 16]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sign_verify_round_trip() {
|
||||
let key = b"my-secret-signing-key-1234567890";
|
||||
let payload = b"RVQS header + microkernel + manifest bytes";
|
||||
let sig = sign_seed(key, payload);
|
||||
assert!(verify_seed(key, payload, &sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_key_fails() {
|
||||
let key = b"correct-key";
|
||||
let payload = b"some payload";
|
||||
let sig = sign_seed(key, payload);
|
||||
assert!(!verify_seed(b"wrong-key!!", payload, &sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tampered_payload_fails() {
|
||||
let key = b"signing-key";
|
||||
let payload = b"original payload";
|
||||
let sig = sign_seed(key, payload);
|
||||
assert!(!verify_seed(key, b"tampered payload", &sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn short_signature_fails() {
|
||||
let key = b"key";
|
||||
assert!(!verify_seed(key, b"data", &[0u8; 16])); // Too short.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_layer_correct() {
|
||||
let data = vec![0x42u8; 4096];
|
||||
let hash = layer_content_hash(&data);
|
||||
assert!(verify_layer(&hash, &data));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_layer_tampered() {
|
||||
let data = vec![0x42u8; 4096];
|
||||
let hash = layer_content_hash(&data);
|
||||
let tampered = vec![0x43u8; 4096];
|
||||
assert!(!verify_layer(&hash, &tampered));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_content_hash_correct() {
|
||||
let data = b"microkernel + manifest";
|
||||
let hash = seed_content_hash(data);
|
||||
assert!(verify_content_hash(&hash, data));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_data_different_hashes() {
|
||||
let h1 = seed_content_hash(b"data1");
|
||||
let h2 = seed_content_hash(b"data2");
|
||||
assert_ne!(h1, h2);
|
||||
}
|
||||
|
||||
// --- Ed25519 tests (feature-gated) ---
|
||||
|
||||
#[cfg(feature = "ed25519")]
|
||||
mod ed25519_tests {
|
||||
use super::*;
|
||||
|
||||
/// Build a deterministic keypair for tests.
|
||||
fn test_keypair() -> ([u8; 32], [u8; 32]) {
|
||||
let secret = [0x42u8; 32];
|
||||
let kp = rvf_types::ed25519::Ed25519Keypair::from_secret(&secret);
|
||||
(kp.secret_key(), kp.public_key())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ed25519_sign_verify_round_trip() {
|
||||
let (secret, public) = test_keypair();
|
||||
let payload = b"RVQS header + microkernel + manifest bytes";
|
||||
let sig = sign_seed_ed25519(&secret, payload);
|
||||
assert!(verify_seed_ed25519(&public, payload, &sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ed25519_wrong_key_rejects() {
|
||||
let (secret, _public) = test_keypair();
|
||||
let other_secret = [0x99u8; 32];
|
||||
let other_kp = rvf_types::ed25519::Ed25519Keypair::from_secret(&other_secret);
|
||||
let payload = b"some payload";
|
||||
let sig = sign_seed_ed25519(&secret, payload);
|
||||
assert!(!verify_seed_ed25519(&other_kp.public_key(), payload, &sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ed25519_tampered_payload_rejects() {
|
||||
let (secret, public) = test_keypair();
|
||||
let payload = b"original payload";
|
||||
let sig = sign_seed_ed25519(&secret, payload);
|
||||
assert!(!verify_seed_ed25519(&public, b"tampered payload", &sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ed25519_short_signature_rejects() {
|
||||
let (_secret, public) = test_keypair();
|
||||
assert!(!verify_seed_ed25519(&public, b"data", &[0u8; 16]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ed25519_algo_constant() {
|
||||
assert_eq!(SIG_ALGO_ED25519, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
33
vendor/ruvector/crates/rvf/rvf-runtime/src/status.rs
vendored
Normal file
33
vendor/ruvector/crates/rvf/rvf-runtime/src/status.rs
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
//! Store status reporting.
|
||||
|
||||
/// Compaction state as reported in store status.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum CompactionState {
|
||||
/// No compaction in progress.
|
||||
Idle,
|
||||
/// Normal compaction running.
|
||||
Running,
|
||||
/// Emergency compaction (dead_space > 70%).
|
||||
Emergency,
|
||||
}
|
||||
|
||||
/// A snapshot of the store's current state.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StoreStatus {
|
||||
/// Total number of live (non-deleted) vectors.
|
||||
pub total_vectors: u64,
|
||||
/// Total number of segments in the file.
|
||||
pub total_segments: u32,
|
||||
/// Total file size in bytes.
|
||||
pub file_size: u64,
|
||||
/// Current manifest epoch.
|
||||
pub current_epoch: u32,
|
||||
/// Hardware profile identifier.
|
||||
pub profile_id: u8,
|
||||
/// Current compaction state.
|
||||
pub compaction_state: CompactionState,
|
||||
/// Ratio of dead space to total file size (0.0 - 1.0).
|
||||
pub dead_space_ratio: f64,
|
||||
/// Whether the store is open in read-only mode.
|
||||
pub read_only: bool,
|
||||
}
|
||||
2766
vendor/ruvector/crates/rvf/rvf-runtime/src/store.rs
vendored
Normal file
2766
vendor/ruvector/crates/rvf/rvf-runtime/src/store.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
952
vendor/ruvector/crates/rvf/rvf-runtime/src/witness.rs
vendored
Normal file
952
vendor/ruvector/crates/rvf/rvf-runtime/src/witness.rs
vendored
Normal file
@@ -0,0 +1,952 @@
|
||||
//! Witness bundle builder, parser, scorecard aggregator, and governance
|
||||
//! enforcement for ADR-035 capability reports.
|
||||
//!
|
||||
//! A witness bundle is the atomic proof unit: one task execution, fully
|
||||
//! captured, signed, and replayable. A scorecard aggregates bundles into
|
||||
//! a capability report.
|
||||
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use rvf_types::witness::*;
|
||||
|
||||
use crate::seed_crypto;
|
||||
|
||||
/// Errors specific to witness operations.
|
||||
#[derive(Debug)]
|
||||
pub enum WitnessError {
|
||||
/// Header parse or validation failure.
|
||||
InvalidHeader(rvf_types::RvfError),
|
||||
/// Section extends beyond payload.
|
||||
SectionOverflow { tag: u16, offset: usize },
|
||||
/// Signature verification failed.
|
||||
SignatureInvalid,
|
||||
/// Policy violation detected.
|
||||
PolicyViolation(String),
|
||||
/// Missing required section.
|
||||
MissingSection(&'static str),
|
||||
/// Bundle too large.
|
||||
TooLarge { size: usize },
|
||||
}
|
||||
|
||||
impl From<rvf_types::RvfError> for WitnessError {
|
||||
fn from(e: rvf_types::RvfError) -> Self {
|
||||
WitnessError::InvalidHeader(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Display for WitnessError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self {
|
||||
WitnessError::InvalidHeader(e) => write!(f, "invalid header: {e}"),
|
||||
WitnessError::SectionOverflow { tag, offset } => {
|
||||
write!(f, "section 0x{tag:04X} overflows at offset {offset}")
|
||||
}
|
||||
WitnessError::SignatureInvalid => write!(f, "signature verification failed"),
|
||||
WitnessError::PolicyViolation(msg) => write!(f, "policy violation: {msg}"),
|
||||
WitnessError::MissingSection(s) => write!(f, "missing section: {s}"),
|
||||
WitnessError::TooLarge { size } => write!(f, "bundle too large: {size} bytes"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Governance policy
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// A governance policy that constrains what actions are allowed.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GovernancePolicy {
|
||||
/// The governance mode.
|
||||
pub mode: GovernanceMode,
|
||||
/// Allowed tool names (empty = all allowed in Autonomous mode).
|
||||
pub allowed_tools: Vec<String>,
|
||||
/// Denied tool names (checked first).
|
||||
pub denied_tools: Vec<String>,
|
||||
/// Maximum cost in microdollars before requiring confirmation.
|
||||
pub max_cost_microdollars: u32,
|
||||
/// Maximum tool calls before requiring confirmation.
|
||||
pub max_tool_calls: u16,
|
||||
}
|
||||
|
||||
impl GovernancePolicy {
|
||||
/// Create a restricted policy (read-only).
|
||||
pub fn restricted() -> Self {
|
||||
Self {
|
||||
mode: GovernanceMode::Restricted,
|
||||
allowed_tools: vec![
|
||||
"Read".into(),
|
||||
"Glob".into(),
|
||||
"Grep".into(),
|
||||
"WebFetch".into(),
|
||||
"WebSearch".into(),
|
||||
],
|
||||
denied_tools: vec!["Bash".into(), "Write".into(), "Edit".into()],
|
||||
max_cost_microdollars: 10_000, // $0.01
|
||||
max_tool_calls: 50,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an approved policy (writes with gates).
|
||||
pub fn approved() -> Self {
|
||||
Self {
|
||||
mode: GovernanceMode::Approved,
|
||||
allowed_tools: Vec::new(), // all allowed but gated
|
||||
denied_tools: Vec::new(),
|
||||
max_cost_microdollars: 100_000, // $0.10
|
||||
max_tool_calls: 200,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an autonomous policy (bounded authority).
|
||||
pub fn autonomous() -> Self {
|
||||
Self {
|
||||
mode: GovernanceMode::Autonomous,
|
||||
allowed_tools: Vec::new(),
|
||||
denied_tools: Vec::new(),
|
||||
max_cost_microdollars: 1_000_000, // $1.00
|
||||
max_tool_calls: 500,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a tool call is allowed under this policy.
|
||||
pub fn check_tool(&self, tool: &str) -> PolicyCheck {
|
||||
// Deny list takes priority.
|
||||
if self.denied_tools.iter().any(|t| t == tool) {
|
||||
return PolicyCheck::Denied;
|
||||
}
|
||||
|
||||
match self.mode {
|
||||
GovernanceMode::Restricted => {
|
||||
if self.allowed_tools.iter().any(|t| t == tool) {
|
||||
PolicyCheck::Allowed
|
||||
} else {
|
||||
PolicyCheck::Denied
|
||||
}
|
||||
}
|
||||
GovernanceMode::Approved => PolicyCheck::Confirmed,
|
||||
GovernanceMode::Autonomous => PolicyCheck::Allowed,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the SHA-256 policy hash (truncated to 8 bytes).
|
||||
pub fn hash(&self) -> [u8; 8] {
|
||||
// Hash the mode + tool lists as a deterministic string.
|
||||
let mut policy_str = format!("mode={}", self.mode as u8);
|
||||
for t in &self.allowed_tools {
|
||||
policy_str.push_str(&format!("+{t}"));
|
||||
}
|
||||
for t in &self.denied_tools {
|
||||
policy_str.push_str(&format!("-{t}"));
|
||||
}
|
||||
policy_str.push_str(&format!(
|
||||
"|cost={}|calls={}",
|
||||
self.max_cost_microdollars, self.max_tool_calls
|
||||
));
|
||||
seed_crypto::seed_content_hash(policy_str.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Witness builder
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Builder for constructing a witness bundle.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WitnessBuilder {
|
||||
/// Task identifier.
|
||||
pub task_id: [u8; 16],
|
||||
/// Governance policy used.
|
||||
pub policy: GovernancePolicy,
|
||||
/// Task outcome.
|
||||
pub outcome: TaskOutcome,
|
||||
/// Tool call entries.
|
||||
pub trace: Vec<ToolCallEntry>,
|
||||
/// Spec / prompt text.
|
||||
pub spec: Option<Vec<u8>>,
|
||||
/// Plan graph.
|
||||
pub plan: Option<Vec<u8>>,
|
||||
/// Code diff.
|
||||
pub diff: Option<Vec<u8>>,
|
||||
/// Test log.
|
||||
pub test_log: Option<Vec<u8>>,
|
||||
/// Postmortem.
|
||||
pub postmortem: Option<Vec<u8>>,
|
||||
/// Accumulated cost.
|
||||
total_cost_microdollars: u32,
|
||||
/// Accumulated latency.
|
||||
total_latency_ms: u32,
|
||||
/// Accumulated tokens.
|
||||
total_tokens: u32,
|
||||
/// Accumulated retries.
|
||||
retry_count: u16,
|
||||
/// Policy violations recorded.
|
||||
pub policy_violations: Vec<String>,
|
||||
/// Rollback events recorded.
|
||||
pub rollback_count: u32,
|
||||
}
|
||||
|
||||
impl WitnessBuilder {
|
||||
/// Create a new witness builder.
|
||||
pub fn new(task_id: [u8; 16], policy: GovernancePolicy) -> Self {
|
||||
Self {
|
||||
task_id,
|
||||
policy,
|
||||
outcome: TaskOutcome::Skipped,
|
||||
trace: Vec::new(),
|
||||
spec: None,
|
||||
plan: None,
|
||||
diff: None,
|
||||
test_log: None,
|
||||
postmortem: None,
|
||||
total_cost_microdollars: 0,
|
||||
total_latency_ms: 0,
|
||||
total_tokens: 0,
|
||||
retry_count: 0,
|
||||
policy_violations: Vec::new(),
|
||||
rollback_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the spec / prompt.
|
||||
pub fn with_spec(mut self, spec: &[u8]) -> Self {
|
||||
self.spec = Some(spec.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the plan.
|
||||
pub fn with_plan(mut self, plan: &[u8]) -> Self {
|
||||
self.plan = Some(plan.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the diff.
|
||||
pub fn with_diff(mut self, diff: &[u8]) -> Self {
|
||||
self.diff = Some(diff.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the test log.
|
||||
pub fn with_test_log(mut self, log: &[u8]) -> Self {
|
||||
self.test_log = Some(log.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the postmortem.
|
||||
pub fn with_postmortem(mut self, pm: &[u8]) -> Self {
|
||||
self.postmortem = Some(pm.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the outcome.
|
||||
pub fn with_outcome(mut self, outcome: TaskOutcome) -> Self {
|
||||
self.outcome = outcome;
|
||||
self
|
||||
}
|
||||
|
||||
/// Record a tool call. Enforces governance policy.
|
||||
pub fn record_tool_call(&mut self, entry: ToolCallEntry) -> PolicyCheck {
|
||||
let tool_name = core::str::from_utf8(&entry.action).unwrap_or("");
|
||||
let check = self.policy.check_tool(tool_name);
|
||||
|
||||
if check == PolicyCheck::Denied {
|
||||
self.policy_violations
|
||||
.push(format!("denied tool: {tool_name}"));
|
||||
}
|
||||
|
||||
self.total_cost_microdollars = self
|
||||
.total_cost_microdollars
|
||||
.saturating_add(entry.cost_microdollars);
|
||||
self.total_latency_ms = self.total_latency_ms.saturating_add(entry.latency_ms);
|
||||
self.total_tokens = self.total_tokens.saturating_add(entry.tokens);
|
||||
|
||||
let mut recorded = entry;
|
||||
recorded.policy_check = check;
|
||||
self.trace.push(recorded);
|
||||
|
||||
// Check cost budget.
|
||||
if self.total_cost_microdollars > self.policy.max_cost_microdollars {
|
||||
self.policy_violations.push(format!(
|
||||
"cost budget exceeded: {} > {}",
|
||||
self.total_cost_microdollars, self.policy.max_cost_microdollars
|
||||
));
|
||||
}
|
||||
|
||||
// Check tool call budget.
|
||||
if self.trace.len() as u16 > self.policy.max_tool_calls {
|
||||
self.policy_violations.push(format!(
|
||||
"tool call budget exceeded: {} > {}",
|
||||
self.trace.len(),
|
||||
self.policy.max_tool_calls
|
||||
));
|
||||
}
|
||||
|
||||
check
|
||||
}
|
||||
|
||||
/// Record a retry.
|
||||
pub fn record_retry(&mut self) {
|
||||
self.retry_count = self.retry_count.saturating_add(1);
|
||||
}
|
||||
|
||||
/// Record a rollback.
|
||||
pub fn record_rollback(&mut self) {
|
||||
self.rollback_count += 1;
|
||||
}
|
||||
|
||||
/// Build the TLV payload sections.
|
||||
fn build_sections(&self) -> (Vec<u8>, u16, u16) {
|
||||
let mut payload = Vec::new();
|
||||
let mut section_count: u16 = 0;
|
||||
let mut flags: u16 = 0;
|
||||
|
||||
// Helper: write one TLV section.
|
||||
let mut write_section = |tag: u16, flag: u16, data: &[u8]| {
|
||||
payload.extend_from_slice(&tag.to_le_bytes());
|
||||
payload.extend_from_slice(&(data.len() as u32).to_le_bytes());
|
||||
payload.extend_from_slice(data);
|
||||
section_count += 1;
|
||||
flags |= flag;
|
||||
};
|
||||
|
||||
if let Some(ref spec) = self.spec {
|
||||
write_section(WIT_TAG_SPEC, WIT_HAS_SPEC, spec);
|
||||
}
|
||||
if let Some(ref plan) = self.plan {
|
||||
write_section(WIT_TAG_PLAN, WIT_HAS_PLAN, plan);
|
||||
}
|
||||
|
||||
// Trace: serialize all tool call entries.
|
||||
if !self.trace.is_empty() {
|
||||
let mut trace_buf = Vec::new();
|
||||
for entry in &self.trace {
|
||||
trace_buf.extend_from_slice(&entry.to_bytes());
|
||||
}
|
||||
write_section(WIT_TAG_TRACE, WIT_HAS_TRACE, &trace_buf);
|
||||
}
|
||||
|
||||
if let Some(ref diff) = self.diff {
|
||||
write_section(WIT_TAG_DIFF, WIT_HAS_DIFF, diff);
|
||||
}
|
||||
if let Some(ref log) = self.test_log {
|
||||
write_section(WIT_TAG_TEST_LOG, WIT_HAS_TEST_LOG, log);
|
||||
}
|
||||
if let Some(ref pm) = self.postmortem {
|
||||
write_section(WIT_TAG_POSTMORTEM, WIT_HAS_POSTMORTEM, pm);
|
||||
}
|
||||
|
||||
(payload, section_count, flags)
|
||||
}
|
||||
|
||||
/// Build an unsigned witness bundle.
|
||||
pub fn build(self) -> Result<(Vec<u8>, WitnessHeader), WitnessError> {
|
||||
let (sections, section_count, flags) = self.build_sections();
|
||||
|
||||
let total_bundle_size = WITNESS_HEADER_SIZE + sections.len();
|
||||
|
||||
let created_ns = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_nanos() as u64;
|
||||
|
||||
let header = WitnessHeader {
|
||||
magic: WITNESS_MAGIC,
|
||||
version: 1,
|
||||
flags,
|
||||
task_id: self.task_id,
|
||||
policy_hash: self.policy.hash(),
|
||||
created_ns,
|
||||
outcome: self.outcome as u8,
|
||||
governance_mode: self.policy.mode as u8,
|
||||
tool_call_count: self.trace.len() as u16,
|
||||
total_cost_microdollars: self.total_cost_microdollars,
|
||||
total_latency_ms: self.total_latency_ms,
|
||||
total_tokens: self.total_tokens,
|
||||
retry_count: self.retry_count,
|
||||
section_count,
|
||||
total_bundle_size: total_bundle_size as u32,
|
||||
};
|
||||
|
||||
let mut payload = Vec::with_capacity(total_bundle_size);
|
||||
payload.extend_from_slice(&header.to_bytes());
|
||||
payload.extend_from_slice(§ions);
|
||||
debug_assert_eq!(payload.len(), total_bundle_size);
|
||||
|
||||
Ok((payload, header))
|
||||
}
|
||||
|
||||
/// Build and sign with HMAC-SHA256.
|
||||
pub fn build_and_sign(
|
||||
self,
|
||||
signing_key: &[u8],
|
||||
) -> Result<(Vec<u8>, WitnessHeader), WitnessError> {
|
||||
let (sections, section_count, mut flags) = self.build_sections();
|
||||
flags |= WIT_SIGNED;
|
||||
|
||||
let sig_len: usize = 32; // HMAC-SHA256
|
||||
let total_bundle_size = WITNESS_HEADER_SIZE + sections.len() + sig_len;
|
||||
|
||||
let created_ns = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_nanos() as u64;
|
||||
|
||||
let header = WitnessHeader {
|
||||
magic: WITNESS_MAGIC,
|
||||
version: 1,
|
||||
flags,
|
||||
task_id: self.task_id,
|
||||
policy_hash: self.policy.hash(),
|
||||
created_ns,
|
||||
outcome: self.outcome as u8,
|
||||
governance_mode: self.policy.mode as u8,
|
||||
tool_call_count: self.trace.len() as u16,
|
||||
total_cost_microdollars: self.total_cost_microdollars,
|
||||
total_latency_ms: self.total_latency_ms,
|
||||
total_tokens: self.total_tokens,
|
||||
retry_count: self.retry_count,
|
||||
section_count,
|
||||
total_bundle_size: total_bundle_size as u32,
|
||||
};
|
||||
|
||||
// Build unsigned payload.
|
||||
let unsigned_size = WITNESS_HEADER_SIZE + sections.len();
|
||||
let mut unsigned = Vec::with_capacity(unsigned_size);
|
||||
unsigned.extend_from_slice(&header.to_bytes());
|
||||
unsigned.extend_from_slice(§ions);
|
||||
|
||||
// Sign.
|
||||
let sig = seed_crypto::sign_seed(signing_key, &unsigned);
|
||||
|
||||
let mut payload = unsigned;
|
||||
payload.extend_from_slice(&sig);
|
||||
debug_assert_eq!(payload.len(), total_bundle_size);
|
||||
|
||||
Ok((payload, header))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Parsed witness bundle
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// A parsed witness bundle with zero-copy references to sections.
|
||||
#[derive(Debug)]
|
||||
pub struct ParsedWitness<'a> {
|
||||
/// The parsed header.
|
||||
pub header: WitnessHeader,
|
||||
/// Spec section bytes.
|
||||
pub spec: Option<&'a [u8]>,
|
||||
/// Plan section bytes.
|
||||
pub plan: Option<&'a [u8]>,
|
||||
/// Tool call trace bytes (contains serialized ToolCallEntry array).
|
||||
pub trace: Option<&'a [u8]>,
|
||||
/// Diff section bytes.
|
||||
pub diff: Option<&'a [u8]>,
|
||||
/// Test log section bytes.
|
||||
pub test_log: Option<&'a [u8]>,
|
||||
/// Postmortem section bytes.
|
||||
pub postmortem: Option<&'a [u8]>,
|
||||
/// Signature bytes (if signed).
|
||||
pub signature: Option<&'a [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> ParsedWitness<'a> {
|
||||
/// Parse a witness bundle from bytes.
|
||||
pub fn parse(data: &'a [u8]) -> Result<Self, WitnessError> {
|
||||
let header = WitnessHeader::from_bytes(data)?;
|
||||
|
||||
if (header.total_bundle_size as usize) > data.len() {
|
||||
return Err(WitnessError::InvalidHeader(
|
||||
rvf_types::RvfError::SizeMismatch {
|
||||
expected: header.total_bundle_size as usize,
|
||||
got: data.len(),
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
let mut spec = None;
|
||||
let mut plan = None;
|
||||
let mut trace = None;
|
||||
let mut diff = None;
|
||||
let mut test_log = None;
|
||||
let mut postmortem = None;
|
||||
|
||||
// Determine where signature starts (if signed).
|
||||
let sig_len = if header.is_signed() { 32usize } else { 0 };
|
||||
let sections_end = header.total_bundle_size as usize - sig_len;
|
||||
|
||||
// Parse TLV sections.
|
||||
let mut pos = WITNESS_HEADER_SIZE;
|
||||
while pos + 6 <= sections_end {
|
||||
let tag = u16::from_le_bytes([data[pos], data[pos + 1]]);
|
||||
let length =
|
||||
u32::from_le_bytes([data[pos + 2], data[pos + 3], data[pos + 4], data[pos + 5]])
|
||||
as usize;
|
||||
pos += 6;
|
||||
|
||||
if pos + length > sections_end {
|
||||
return Err(WitnessError::SectionOverflow { tag, offset: pos });
|
||||
}
|
||||
|
||||
let value = &data[pos..pos + length];
|
||||
match tag {
|
||||
WIT_TAG_SPEC => spec = Some(value),
|
||||
WIT_TAG_PLAN => plan = Some(value),
|
||||
WIT_TAG_TRACE => trace = Some(value),
|
||||
WIT_TAG_DIFF => diff = Some(value),
|
||||
WIT_TAG_TEST_LOG => test_log = Some(value),
|
||||
WIT_TAG_POSTMORTEM => postmortem = Some(value),
|
||||
_ => {} // forward-compat: ignore unknown tags
|
||||
}
|
||||
|
||||
pos += length;
|
||||
}
|
||||
|
||||
let signature = if header.is_signed() && sig_len > 0 {
|
||||
let sig_start = header.total_bundle_size as usize - sig_len;
|
||||
Some(&data[sig_start..header.total_bundle_size as usize])
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(ParsedWitness {
|
||||
header,
|
||||
spec,
|
||||
plan,
|
||||
trace,
|
||||
diff,
|
||||
test_log,
|
||||
postmortem,
|
||||
signature,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse tool call entries from the trace section.
|
||||
pub fn parse_trace(&self) -> Vec<ToolCallEntry> {
|
||||
let data = match self.trace {
|
||||
Some(d) => d,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let mut entries = Vec::new();
|
||||
let mut pos = 0;
|
||||
while pos < data.len() {
|
||||
match ToolCallEntry::from_bytes(&data[pos..]) {
|
||||
Some((entry, consumed)) => {
|
||||
entries.push(entry);
|
||||
pos += consumed;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
entries
|
||||
}
|
||||
|
||||
/// Get the unsigned payload (everything before the signature).
|
||||
pub fn unsigned_payload<'b>(&self, full_data: &'b [u8]) -> Option<&'b [u8]> {
|
||||
if self.header.is_signed() {
|
||||
let end = self.header.total_bundle_size as usize - 32;
|
||||
Some(&full_data[..end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify the HMAC-SHA256 signature.
|
||||
pub fn verify_signature(&self, key: &[u8], full_data: &[u8]) -> Result<(), WitnessError> {
|
||||
let sig = self
|
||||
.signature
|
||||
.ok_or(WitnessError::MissingSection("signature"))?;
|
||||
let unsigned = self
|
||||
.unsigned_payload(full_data)
|
||||
.ok_or(WitnessError::MissingSection("unsigned payload"))?;
|
||||
if seed_crypto::verify_seed(key, unsigned, sig) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(WitnessError::SignatureInvalid)
|
||||
}
|
||||
}
|
||||
|
||||
/// Full verification: magic + signature.
|
||||
pub fn verify_all(&self, key: &[u8], full_data: &[u8]) -> Result<(), WitnessError> {
|
||||
if !self.header.is_valid_magic() {
|
||||
return Err(WitnessError::InvalidHeader(rvf_types::RvfError::BadMagic {
|
||||
expected: WITNESS_MAGIC,
|
||||
got: self.header.magic,
|
||||
}));
|
||||
}
|
||||
if self.header.is_signed() {
|
||||
self.verify_signature(key, full_data)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check evidence completeness: does this bundle have all required sections?
|
||||
pub fn evidence_complete(&self) -> bool {
|
||||
self.spec.is_some() && self.diff.is_some() && self.test_log.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scorecard aggregator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Aggregates multiple witness bundles into a capability scorecard.
|
||||
pub struct ScorecardBuilder {
|
||||
latencies: Vec<u32>,
|
||||
total_cost: u64,
|
||||
total_tokens: u64,
|
||||
total_retries: u32,
|
||||
solved: u32,
|
||||
failed: u32,
|
||||
skipped: u32,
|
||||
errors: u32,
|
||||
policy_violations: u32,
|
||||
rollback_count: u32,
|
||||
evidence_complete_count: u32,
|
||||
solved_count_for_evidence: u32,
|
||||
}
|
||||
|
||||
impl ScorecardBuilder {
|
||||
/// Create a new scorecard builder.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
latencies: Vec::new(),
|
||||
total_cost: 0,
|
||||
total_tokens: 0,
|
||||
total_retries: 0,
|
||||
solved: 0,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
policy_violations: 0,
|
||||
rollback_count: 0,
|
||||
evidence_complete_count: 0,
|
||||
solved_count_for_evidence: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a parsed witness bundle to the scorecard.
|
||||
pub fn add_witness(&mut self, parsed: &ParsedWitness<'_>, violations: u32, rollbacks: u32) {
|
||||
self.latencies.push(parsed.header.total_latency_ms);
|
||||
self.total_cost += parsed.header.total_cost_microdollars as u64;
|
||||
self.total_tokens += parsed.header.total_tokens as u64;
|
||||
self.total_retries += parsed.header.retry_count as u32;
|
||||
self.policy_violations += violations;
|
||||
self.rollback_count += rollbacks;
|
||||
|
||||
match TaskOutcome::try_from(parsed.header.outcome) {
|
||||
Ok(TaskOutcome::Solved) => {
|
||||
self.solved += 1;
|
||||
self.solved_count_for_evidence += 1;
|
||||
if parsed.evidence_complete() {
|
||||
self.evidence_complete_count += 1;
|
||||
}
|
||||
}
|
||||
Ok(TaskOutcome::Failed) => self.failed += 1,
|
||||
Ok(TaskOutcome::Skipped) => self.skipped += 1,
|
||||
Ok(TaskOutcome::Errored) => self.errors += 1,
|
||||
Err(_) => self.errors += 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Finalize and produce the scorecard.
|
||||
pub fn finish(&mut self) -> Scorecard {
|
||||
let total = self.solved + self.failed + self.skipped + self.errors;
|
||||
|
||||
// Sort latencies for percentiles.
|
||||
self.latencies.sort_unstable();
|
||||
let median = percentile(&self.latencies, 50);
|
||||
let p95 = percentile(&self.latencies, 95);
|
||||
|
||||
let cost_per_solve = if self.solved > 0 {
|
||||
(self.total_cost / self.solved as u64) as u32
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let solve_rate = if total > 0 {
|
||||
self.solved as f32 / total as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let evidence_coverage = if self.solved_count_for_evidence > 0 {
|
||||
self.evidence_complete_count as f32 / self.solved_count_for_evidence as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Scorecard {
|
||||
total_tasks: total,
|
||||
solved: self.solved,
|
||||
failed: self.failed,
|
||||
skipped: self.skipped,
|
||||
errors: self.errors,
|
||||
policy_violations: self.policy_violations,
|
||||
rollback_count: self.rollback_count,
|
||||
total_cost_microdollars: self.total_cost,
|
||||
median_latency_ms: median,
|
||||
p95_latency_ms: p95,
|
||||
total_tokens: self.total_tokens,
|
||||
total_retries: self.total_retries,
|
||||
evidence_coverage,
|
||||
cost_per_solve_microdollars: cost_per_solve,
|
||||
solve_rate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ScorecardBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute a percentile from a sorted slice.
|
||||
fn percentile(sorted: &[u32], pct: usize) -> u32 {
|
||||
if sorted.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
let idx = (pct * sorted.len() / 100).min(sorted.len() - 1);
|
||||
sorted[idx]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_policy() -> GovernancePolicy {
|
||||
GovernancePolicy::autonomous()
|
||||
}
|
||||
|
||||
fn make_entry(tool: &str, latency_ms: u32, cost: u32, tokens: u32) -> ToolCallEntry {
|
||||
ToolCallEntry {
|
||||
action: tool.as_bytes().to_vec(),
|
||||
args_hash: [0x11; 8],
|
||||
result_hash: [0x22; 8],
|
||||
latency_ms,
|
||||
cost_microdollars: cost,
|
||||
tokens,
|
||||
policy_check: PolicyCheck::Allowed,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_minimal_witness() {
|
||||
let builder =
|
||||
WitnessBuilder::new([0x01; 16], make_policy()).with_outcome(TaskOutcome::Solved);
|
||||
let (payload, header) = builder.build().unwrap();
|
||||
assert_eq!(header.magic, WITNESS_MAGIC);
|
||||
assert_eq!(payload.len(), WITNESS_HEADER_SIZE);
|
||||
assert_eq!(header.outcome, TaskOutcome::Solved as u8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_with_sections() {
|
||||
let builder = WitnessBuilder::new([0x02; 16], make_policy())
|
||||
.with_spec(b"fix authentication bug")
|
||||
.with_plan(b"1. read code\n2. fix bug\n3. test")
|
||||
.with_diff(b"--- a/src/auth.rs\n+++ b/src/auth.rs")
|
||||
.with_test_log(b"test auth::login ... ok")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
let (payload, header) = builder.build().unwrap();
|
||||
assert!(payload.len() > WITNESS_HEADER_SIZE);
|
||||
assert_eq!(header.section_count, 4); // spec + plan + diff + test_log
|
||||
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
assert_eq!(parsed.spec.unwrap(), b"fix authentication bug");
|
||||
assert_eq!(parsed.plan.unwrap(), b"1. read code\n2. fix bug\n3. test");
|
||||
assert!(parsed.evidence_complete());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_with_trace() {
|
||||
let mut builder =
|
||||
WitnessBuilder::new([0x03; 16], make_policy()).with_outcome(TaskOutcome::Solved);
|
||||
|
||||
builder.record_tool_call(make_entry("Read", 50, 100, 500));
|
||||
builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
|
||||
builder.record_tool_call(make_entry("Bash", 2000, 0, 0));
|
||||
|
||||
let (payload, header) = builder.build().unwrap();
|
||||
assert_eq!(header.tool_call_count, 3);
|
||||
assert_eq!(header.total_cost_microdollars, 300);
|
||||
assert_eq!(header.total_latency_ms, 2150);
|
||||
assert_eq!(header.total_tokens, 1500);
|
||||
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
let entries = parsed.parse_trace();
|
||||
assert_eq!(entries.len(), 3);
|
||||
assert_eq!(entries[0].action, b"Read");
|
||||
assert_eq!(entries[1].action, b"Edit");
|
||||
assert_eq!(entries[2].action, b"Bash");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signed_round_trip() {
|
||||
let key = b"test-signing-key-for-witness-ok!";
|
||||
let builder = WitnessBuilder::new([0x04; 16], make_policy())
|
||||
.with_spec(b"test spec")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
let (payload, header) = builder.build_and_sign(key).unwrap();
|
||||
assert!(header.is_signed());
|
||||
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
parsed.verify_all(key, &payload).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_key_rejected() {
|
||||
let key = b"test-signing-key-for-witness-ok!";
|
||||
let builder = WitnessBuilder::new([0x05; 16], make_policy())
|
||||
.with_spec(b"test spec")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
let (payload, _) = builder.build_and_sign(key).unwrap();
|
||||
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
assert!(parsed
|
||||
.verify_signature(b"wrong-key-should-fail-immediate!", &payload)
|
||||
.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tampered_payload_fails() {
|
||||
let key = b"test-signing-key-for-witness-ok!";
|
||||
let builder = WitnessBuilder::new([0x06; 16], make_policy())
|
||||
.with_spec(b"test spec")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
let (mut payload, _) = builder.build_and_sign(key).unwrap();
|
||||
|
||||
// Tamper with spec section.
|
||||
payload[WITNESS_HEADER_SIZE + 10] ^= 0xFF;
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
assert!(parsed.verify_signature(key, &payload).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governance_restricted_denies_writes() {
|
||||
let policy = GovernancePolicy::restricted();
|
||||
assert_eq!(policy.check_tool("Read"), PolicyCheck::Allowed);
|
||||
assert_eq!(policy.check_tool("Glob"), PolicyCheck::Allowed);
|
||||
assert_eq!(policy.check_tool("Bash"), PolicyCheck::Denied);
|
||||
assert_eq!(policy.check_tool("Write"), PolicyCheck::Denied);
|
||||
assert_eq!(policy.check_tool("Edit"), PolicyCheck::Denied);
|
||||
assert_eq!(policy.check_tool("UnknownTool"), PolicyCheck::Denied);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governance_approved_gates_everything() {
|
||||
let policy = GovernancePolicy::approved();
|
||||
assert_eq!(policy.check_tool("Read"), PolicyCheck::Confirmed);
|
||||
assert_eq!(policy.check_tool("Bash"), PolicyCheck::Confirmed);
|
||||
assert_eq!(policy.check_tool("Edit"), PolicyCheck::Confirmed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governance_autonomous_allows_all() {
|
||||
let policy = GovernancePolicy::autonomous();
|
||||
assert_eq!(policy.check_tool("Read"), PolicyCheck::Allowed);
|
||||
assert_eq!(policy.check_tool("Bash"), PolicyCheck::Allowed);
|
||||
assert_eq!(policy.check_tool("Edit"), PolicyCheck::Allowed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn policy_violation_recorded() {
|
||||
let policy = GovernancePolicy::restricted();
|
||||
let mut builder = WitnessBuilder::new([0x07; 16], policy).with_outcome(TaskOutcome::Failed);
|
||||
|
||||
let check = builder.record_tool_call(make_entry("Bash", 100, 0, 0));
|
||||
assert_eq!(check, PolicyCheck::Denied);
|
||||
assert_eq!(builder.policy_violations.len(), 1);
|
||||
assert!(builder.policy_violations[0].contains("denied tool: Bash"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cost_budget_violation() {
|
||||
let mut policy = GovernancePolicy::autonomous();
|
||||
policy.max_cost_microdollars = 500;
|
||||
let mut builder = WitnessBuilder::new([0x08; 16], policy).with_outcome(TaskOutcome::Solved);
|
||||
|
||||
builder.record_tool_call(make_entry("Read", 50, 300, 100));
|
||||
assert!(builder.policy_violations.is_empty());
|
||||
|
||||
builder.record_tool_call(make_entry("Edit", 50, 300, 100));
|
||||
assert_eq!(builder.policy_violations.len(), 1);
|
||||
assert!(builder.policy_violations[0].contains("cost budget exceeded"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scorecard_basic() {
|
||||
let policy = make_policy();
|
||||
let key = b"test-signing-key-for-witness-ok!";
|
||||
|
||||
let mut sc = ScorecardBuilder::new();
|
||||
|
||||
// Solved task with evidence.
|
||||
let b1 = WitnessBuilder::new([0x01; 16], policy.clone())
|
||||
.with_spec(b"fix bug")
|
||||
.with_diff(b"diff")
|
||||
.with_test_log(b"ok")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
let (p1, _) = b1.build_and_sign(key).unwrap();
|
||||
let w1 = ParsedWitness::parse(&p1).unwrap();
|
||||
sc.add_witness(&w1, 0, 0);
|
||||
|
||||
// Failed task.
|
||||
let b2 = WitnessBuilder::new([0x02; 16], policy.clone())
|
||||
.with_spec(b"add feature")
|
||||
.with_outcome(TaskOutcome::Failed);
|
||||
let (p2, _) = b2.build_and_sign(key).unwrap();
|
||||
let w2 = ParsedWitness::parse(&p2).unwrap();
|
||||
sc.add_witness(&w2, 1, 0);
|
||||
|
||||
// Solved task without full evidence.
|
||||
let b3 = WitnessBuilder::new([0x03; 16], policy.clone())
|
||||
.with_spec(b"refactor")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
let (p3, _) = b3.build_and_sign(key).unwrap();
|
||||
let w3 = ParsedWitness::parse(&p3).unwrap();
|
||||
sc.add_witness(&w3, 0, 1);
|
||||
|
||||
let card = sc.finish();
|
||||
assert_eq!(card.total_tasks, 3);
|
||||
assert_eq!(card.solved, 2);
|
||||
assert_eq!(card.failed, 1);
|
||||
assert_eq!(card.policy_violations, 1);
|
||||
assert_eq!(card.rollback_count, 1);
|
||||
assert!((card.solve_rate - 0.6667).abs() < 0.01);
|
||||
assert!((card.evidence_coverage - 0.5).abs() < 0.01); // 1/2 solved with full evidence
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scorecard_empty() {
|
||||
let card = ScorecardBuilder::new().finish();
|
||||
assert_eq!(card.total_tasks, 0);
|
||||
assert_eq!(card.solve_rate, 0.0);
|
||||
assert_eq!(card.median_latency_ms, 0);
|
||||
assert_eq!(card.p95_latency_ms, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn policy_hash_deterministic() {
|
||||
let p1 = GovernancePolicy::restricted();
|
||||
let p2 = GovernancePolicy::restricted();
|
||||
assert_eq!(p1.hash(), p2.hash());
|
||||
|
||||
let p3 = GovernancePolicy::autonomous();
|
||||
assert_ne!(p1.hash(), p3.hash());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn witness_error_display() {
|
||||
let e = WitnessError::PolicyViolation("denied tool: Bash".into());
|
||||
assert!(format!("{e}").contains("denied tool: Bash"));
|
||||
|
||||
let e2 = WitnessError::TooLarge { size: 99999 };
|
||||
assert!(format!("{e2}").contains("99999"));
|
||||
}
|
||||
}
|
||||
644
vendor/ruvector/crates/rvf/rvf-runtime/src/write_path.rs
vendored
Normal file
644
vendor/ruvector/crates/rvf/rvf-runtime/src/write_path.rs
vendored
Normal file
@@ -0,0 +1,644 @@
|
||||
//! Append-only write logic for the RVF runtime.
|
||||
//!
|
||||
//! All mutations append new segments to the file. The write path:
|
||||
//! 1. Allocate segment_id (monotonic counter)
|
||||
//! 2. Build payload (VEC_SEG, META_SEG, JOURNAL_SEG, etc.)
|
||||
//! 3. Write segment header + payload, fsync
|
||||
//! 4. Build new MANIFEST_SEG, fsync (two-fsync protocol)
|
||||
|
||||
use rvf_types::{SegmentHeader, SegmentType, SEGMENT_HEADER_SIZE};
|
||||
use std::io::{self, Seek, Write};
|
||||
|
||||
/// Segment writer that handles the append-only write protocol.
|
||||
pub(crate) struct SegmentWriter {
|
||||
/// Next segment ID to assign (monotonic counter).
|
||||
next_seg_id: u64,
|
||||
}
|
||||
|
||||
impl SegmentWriter {
|
||||
pub(crate) fn new(starting_id: u64) -> Self {
|
||||
Self {
|
||||
next_seg_id: starting_id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate a new segment ID.
|
||||
///
|
||||
/// Uses checked arithmetic to detect overflow (would require 2^64 segments).
|
||||
pub(crate) fn alloc_seg_id(&mut self) -> u64 {
|
||||
let id = self.next_seg_id;
|
||||
self.next_seg_id = self
|
||||
.next_seg_id
|
||||
.checked_add(1)
|
||||
.expect("segment ID counter overflow");
|
||||
id
|
||||
}
|
||||
|
||||
/// Write a VEC_SEG containing the given f32 vectors.
|
||||
///
|
||||
/// Returns the segment ID and byte offset where it was written.
|
||||
pub(crate) fn write_vec_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
vectors: &[&[f32]],
|
||||
ids: &[u64],
|
||||
dimension: u16,
|
||||
) -> io::Result<(u64, u64)> {
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
// Build payload: dimension(u16) + vector_count(u32) + [id(u64) + data(f32 * dim)]
|
||||
let vector_count = vectors.len() as u32;
|
||||
let bytes_per_vec = (dimension as usize) * 4;
|
||||
let payload_size = 2 + 4 + (vectors.len() * (8 + bytes_per_vec));
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
payload.extend_from_slice(&dimension.to_le_bytes());
|
||||
payload.extend_from_slice(&vector_count.to_le_bytes());
|
||||
for (vec_data, &vec_id) in vectors.iter().zip(ids.iter()) {
|
||||
payload.extend_from_slice(&vec_id.to_le_bytes());
|
||||
for &val in *vec_data {
|
||||
payload.extend_from_slice(&val.to_le_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Vec as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Write a JOURNAL_SEG with tombstone entries for deleted vector IDs.
|
||||
pub(crate) fn write_journal_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
deleted_ids: &[u64],
|
||||
epoch: u32,
|
||||
) -> io::Result<(u64, u64)> {
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
// Journal header (simplified): entry_count(u32) + epoch(u32) + prev_seg_id(u64)
|
||||
// Then entries: each is entry_type(u8) + pad(u8) + len(u16) + vector_id(u64)
|
||||
let entry_count = deleted_ids.len() as u32;
|
||||
let payload_size = 16 + (deleted_ids.len() * 12); // header + entries
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
// Journal header.
|
||||
payload.extend_from_slice(&entry_count.to_le_bytes());
|
||||
payload.extend_from_slice(&epoch.to_le_bytes());
|
||||
payload.extend_from_slice(&0u64.to_le_bytes()); // prev_journal_seg_id
|
||||
|
||||
// Entries: DELETE_VECTOR (type 0x01).
|
||||
for &vid in deleted_ids {
|
||||
payload.push(0x01); // DELETE_VECTOR
|
||||
payload.push(0x00); // reserved
|
||||
payload.extend_from_slice(&8u16.to_le_bytes()); // entry_length
|
||||
payload.extend_from_slice(&vid.to_le_bytes());
|
||||
}
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Journal as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Write a META_SEG for vector metadata.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn write_meta_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
metadata_payload: &[u8],
|
||||
) -> io::Result<(u64, u64)> {
|
||||
let seg_id = self.alloc_seg_id();
|
||||
let offset =
|
||||
self.write_segment(writer, SegmentType::Meta as u8, seg_id, metadata_payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Write a minimal MANIFEST_SEG recording current state.
|
||||
///
|
||||
/// This is a simplified manifest that stores:
|
||||
/// - epoch, dimension, total_vectors, total_segments, profile_id
|
||||
/// - segment directory entries (seg_id, offset, length, type)
|
||||
/// - deletion bitmap (vector IDs as simple packed u64 array)
|
||||
/// - file identity (68 bytes, appended for lineage provenance)
|
||||
#[allow(clippy::too_many_arguments, dead_code)]
|
||||
pub(crate) fn write_manifest_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
epoch: u32,
|
||||
dimension: u16,
|
||||
total_vectors: u64,
|
||||
profile_id: u8,
|
||||
segment_dir: &[(u64, u64, u64, u8)], // (seg_id, offset, payload_len, seg_type)
|
||||
deleted_ids: &[u64],
|
||||
) -> io::Result<(u64, u64)> {
|
||||
self.write_manifest_seg_with_identity(
|
||||
writer,
|
||||
epoch,
|
||||
dimension,
|
||||
total_vectors,
|
||||
profile_id,
|
||||
segment_dir,
|
||||
deleted_ids,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
/// Write a MANIFEST_SEG with optional FileIdentity appended.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn write_manifest_seg_with_identity<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
epoch: u32,
|
||||
dimension: u16,
|
||||
total_vectors: u64,
|
||||
profile_id: u8,
|
||||
segment_dir: &[(u64, u64, u64, u8)],
|
||||
deleted_ids: &[u64],
|
||||
file_identity: Option<&rvf_types::FileIdentity>,
|
||||
) -> io::Result<(u64, u64)> {
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
// Build manifest payload.
|
||||
let seg_count = segment_dir.len() as u32;
|
||||
let del_count = deleted_ids.len() as u32;
|
||||
let payload_size = 4 + 2 + 8 + 4 + 1 + 3 // header fields
|
||||
+ (segment_dir.len() * (8 + 8 + 8 + 1)) // directory
|
||||
+ 4 + (deleted_ids.len() * 8) // deletion bitmap
|
||||
+ if file_identity.is_some() { 4 + 68 } else { 0 }; // lineage marker + identity
|
||||
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
// Manifest header.
|
||||
payload.extend_from_slice(&epoch.to_le_bytes());
|
||||
payload.extend_from_slice(&dimension.to_le_bytes());
|
||||
payload.extend_from_slice(&total_vectors.to_le_bytes());
|
||||
payload.extend_from_slice(&seg_count.to_le_bytes());
|
||||
payload.push(profile_id);
|
||||
payload.extend_from_slice(&[0u8; 3]); // reserved
|
||||
|
||||
// Segment directory.
|
||||
for &(sid, off, plen, stype) in segment_dir {
|
||||
payload.extend_from_slice(&sid.to_le_bytes());
|
||||
payload.extend_from_slice(&off.to_le_bytes());
|
||||
payload.extend_from_slice(&plen.to_le_bytes());
|
||||
payload.push(stype);
|
||||
}
|
||||
|
||||
// Deletion bitmap (simplified: count + packed IDs).
|
||||
payload.extend_from_slice(&del_count.to_le_bytes());
|
||||
for &did in deleted_ids {
|
||||
payload.extend_from_slice(&did.to_le_bytes());
|
||||
}
|
||||
|
||||
// FileIdentity (optional, backward-compatible trailer).
|
||||
// Magic marker 0x46494449 ("FIDI") followed by 68-byte identity.
|
||||
if let Some(fi) = file_identity {
|
||||
payload.extend_from_slice(&0x4649_4449u32.to_le_bytes()); // "FIDI"
|
||||
payload.extend_from_slice(&fi.to_bytes());
|
||||
}
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Manifest as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Maximum kernel image size (128 MiB) to prevent DoS via oversized segments.
|
||||
#[allow(dead_code)]
|
||||
const MAX_KERNEL_IMAGE_SIZE: usize = 128 * 1024 * 1024;
|
||||
|
||||
/// Write a KERNEL_SEG containing a compressed kernel image.
|
||||
///
|
||||
/// Payload layout: `kernel_header_bytes` (128) + `kernel_image` + optional `cmdline`.
|
||||
/// Returns the segment ID and byte offset where it was written.
|
||||
///
|
||||
/// Returns an error if the kernel image exceeds 128 MiB.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn write_kernel_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
kernel_header_bytes: &[u8; 128],
|
||||
kernel_image: &[u8],
|
||||
cmdline: Option<&[u8]>,
|
||||
) -> io::Result<(u64, u64)> {
|
||||
if kernel_image.len() > Self::MAX_KERNEL_IMAGE_SIZE {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"kernel image too large: {} bytes (max {})",
|
||||
kernel_image.len(),
|
||||
Self::MAX_KERNEL_IMAGE_SIZE
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
let cmdline_len = cmdline.map_or(0, |c| c.len());
|
||||
let payload_size = 128 + kernel_image.len() + cmdline_len;
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
payload.extend_from_slice(kernel_header_bytes);
|
||||
payload.extend_from_slice(kernel_image);
|
||||
if let Some(cl) = cmdline {
|
||||
payload.extend_from_slice(cl);
|
||||
}
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Kernel as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Maximum eBPF program size (16 MiB) to prevent DoS via oversized segments.
|
||||
#[allow(dead_code)]
|
||||
const MAX_EBPF_PROGRAM_SIZE: usize = 16 * 1024 * 1024;
|
||||
|
||||
/// Write an EBPF_SEG containing eBPF program bytecode and optional BTF data.
|
||||
///
|
||||
/// Payload layout: `ebpf_header_bytes` (64) + `program_bytecode` + optional `btf_data`.
|
||||
/// Returns the segment ID and byte offset where it was written.
|
||||
///
|
||||
/// Returns an error if the combined bytecode + BTF data exceeds 16 MiB.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn write_ebpf_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
ebpf_header_bytes: &[u8; 64],
|
||||
program_bytecode: &[u8],
|
||||
btf_data: Option<&[u8]>,
|
||||
) -> io::Result<(u64, u64)> {
|
||||
let btf_len = btf_data.map_or(0, |b| b.len());
|
||||
let total_program_size = program_bytecode.len().saturating_add(btf_len);
|
||||
if total_program_size > Self::MAX_EBPF_PROGRAM_SIZE {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"eBPF program too large: {} bytes (max {})",
|
||||
total_program_size,
|
||||
Self::MAX_EBPF_PROGRAM_SIZE
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
let payload_size = 64 + program_bytecode.len() + btf_len;
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
payload.extend_from_slice(ebpf_header_bytes);
|
||||
payload.extend_from_slice(program_bytecode);
|
||||
if let Some(btf) = btf_data {
|
||||
payload.extend_from_slice(btf);
|
||||
}
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Ebpf as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Maximum WASM module size (8 MiB) to prevent DoS via oversized segments.
|
||||
#[allow(dead_code)]
|
||||
const MAX_WASM_MODULE_SIZE: usize = 8 * 1024 * 1024;
|
||||
|
||||
/// Write a WASM_SEG containing WASM bytecode for self-bootstrapping.
|
||||
///
|
||||
/// Payload layout: `wasm_header_bytes` (64) + `wasm_bytecode`.
|
||||
/// Returns the segment ID and byte offset where it was written.
|
||||
///
|
||||
/// Returns an error if the bytecode exceeds 8 MiB.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn write_wasm_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
wasm_header_bytes: &[u8; 64],
|
||||
wasm_bytecode: &[u8],
|
||||
) -> io::Result<(u64, u64)> {
|
||||
if wasm_bytecode.len() > Self::MAX_WASM_MODULE_SIZE {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"WASM module too large: {} bytes (max {})",
|
||||
wasm_bytecode.len(),
|
||||
Self::MAX_WASM_MODULE_SIZE
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
let payload_size = 64 + wasm_bytecode.len();
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
payload.extend_from_slice(wasm_header_bytes);
|
||||
payload.extend_from_slice(wasm_bytecode);
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Wasm as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Maximum dashboard bundle size (64 MiB) to prevent DoS via oversized segments.
|
||||
#[allow(dead_code)]
|
||||
const MAX_DASHBOARD_BUNDLE_SIZE: usize = 64 * 1024 * 1024;
|
||||
|
||||
/// Write a DASHBOARD_SEG containing a pre-built web dashboard bundle.
|
||||
///
|
||||
/// Payload layout: `dashboard_header_bytes` (64) + `bundle_data`.
|
||||
/// Returns the segment ID and byte offset where it was written.
|
||||
///
|
||||
/// Returns an error if the bundle exceeds 64 MiB.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn write_dashboard_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
dashboard_header_bytes: &[u8; 64],
|
||||
bundle_data: &[u8],
|
||||
) -> io::Result<(u64, u64)> {
|
||||
if bundle_data.len() > Self::MAX_DASHBOARD_BUNDLE_SIZE {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"dashboard bundle too large: {} bytes (max {})",
|
||||
bundle_data.len(),
|
||||
Self::MAX_DASHBOARD_BUNDLE_SIZE
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
let payload_size = 64 + bundle_data.len();
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
payload.extend_from_slice(dashboard_header_bytes);
|
||||
payload.extend_from_slice(bundle_data);
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Dashboard as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Write a WITNESS_SEG containing a serialized witness entry.
|
||||
///
|
||||
/// Payload layout:
|
||||
/// `witness_type` (u8) + `timestamp_ns` (u64 LE) +
|
||||
/// `action_len` (u32 LE) + `action` (bytes) + `prev_hash` (32 bytes)
|
||||
///
|
||||
/// Returns the segment ID and byte offset where it was written.
|
||||
pub(crate) fn write_witness_seg<W: Write + Seek>(
|
||||
&mut self,
|
||||
writer: &mut W,
|
||||
witness_type: u8,
|
||||
timestamp_ns: u64,
|
||||
action: &[u8],
|
||||
prev_hash: &[u8; 32],
|
||||
) -> io::Result<(u64, u64)> {
|
||||
let seg_id = self.alloc_seg_id();
|
||||
|
||||
let action_len = action.len() as u32;
|
||||
let payload_size = 1 + 8 + 4 + action.len() + 32;
|
||||
let mut payload = Vec::with_capacity(payload_size);
|
||||
|
||||
payload.push(witness_type);
|
||||
payload.extend_from_slice(×tamp_ns.to_le_bytes());
|
||||
payload.extend_from_slice(&action_len.to_le_bytes());
|
||||
payload.extend_from_slice(action);
|
||||
payload.extend_from_slice(prev_hash);
|
||||
|
||||
let offset = self.write_segment(writer, SegmentType::Witness as u8, seg_id, &payload)?;
|
||||
Ok((seg_id, offset))
|
||||
}
|
||||
|
||||
/// Low-level: write a segment header + payload to the writer.
|
||||
/// Returns the byte offset where the segment was written.
|
||||
fn write_segment<W: Write + Seek>(
|
||||
&self,
|
||||
writer: &mut W,
|
||||
seg_type: u8,
|
||||
seg_id: u64,
|
||||
payload: &[u8],
|
||||
) -> io::Result<u64> {
|
||||
let offset = writer.stream_position()?;
|
||||
|
||||
let mut header = SegmentHeader::new(seg_type, seg_id);
|
||||
header.payload_length = payload.len() as u64;
|
||||
|
||||
// Compute a simple content hash (first 16 bytes of CRC-based hash).
|
||||
let hash = content_hash(payload);
|
||||
header.content_hash = hash;
|
||||
|
||||
// Write header as raw bytes.
|
||||
let header_bytes = header_to_bytes(&header);
|
||||
writer.write_all(&header_bytes)?;
|
||||
|
||||
// Write payload.
|
||||
writer.write_all(payload)?;
|
||||
|
||||
Ok(offset)
|
||||
}
|
||||
|
||||
/// Current next segment ID.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn next_id(&self) -> u64 {
|
||||
self.next_seg_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a SegmentHeader to its 64-byte wire representation.
|
||||
fn header_to_bytes(h: &SegmentHeader) -> [u8; SEGMENT_HEADER_SIZE] {
|
||||
let mut buf = [0u8; SEGMENT_HEADER_SIZE];
|
||||
buf[0x00..0x04].copy_from_slice(&h.magic.to_le_bytes());
|
||||
buf[0x04] = h.version;
|
||||
buf[0x05] = h.seg_type;
|
||||
buf[0x06..0x08].copy_from_slice(&h.flags.to_le_bytes());
|
||||
buf[0x08..0x10].copy_from_slice(&h.segment_id.to_le_bytes());
|
||||
buf[0x10..0x18].copy_from_slice(&h.payload_length.to_le_bytes());
|
||||
buf[0x18..0x20].copy_from_slice(&h.timestamp_ns.to_le_bytes());
|
||||
buf[0x20] = h.checksum_algo;
|
||||
buf[0x21] = h.compression;
|
||||
buf[0x22..0x24].copy_from_slice(&h.reserved_0.to_le_bytes());
|
||||
buf[0x24..0x28].copy_from_slice(&h.reserved_1.to_le_bytes());
|
||||
buf[0x28..0x38].copy_from_slice(&h.content_hash);
|
||||
buf[0x38..0x3C].copy_from_slice(&h.uncompressed_len.to_le_bytes());
|
||||
buf[0x3C..0x40].copy_from_slice(&h.alignment_pad.to_le_bytes());
|
||||
buf
|
||||
}
|
||||
|
||||
/// Compute a simple 16-byte content hash (CRC32-based, rotated for distinct bytes).
|
||||
fn content_hash(data: &[u8]) -> [u8; 16] {
|
||||
let mut hash = [0u8; 16];
|
||||
let crc = crc32_slice(data);
|
||||
// Use different rotations of CRC to fill 16 bytes with distinct values.
|
||||
for i in 0..4 {
|
||||
let rotated = crc.rotate_left(i as u32 * 8);
|
||||
hash[i * 4..(i + 1) * 4].copy_from_slice(&rotated.to_le_bytes());
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Simple CRC32 computation.
|
||||
fn crc32_slice(data: &[u8]) -> u32 {
|
||||
let mut crc: u32 = 0xFFFFFFFF;
|
||||
for &byte in data {
|
||||
crc ^= byte as u32;
|
||||
for _ in 0..8 {
|
||||
if crc & 1 != 0 {
|
||||
crc = (crc >> 1) ^ 0xEDB88320;
|
||||
} else {
|
||||
crc >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
!crc
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rvf_types::SEGMENT_MAGIC;
|
||||
use std::io::Cursor;
|
||||
|
||||
#[test]
|
||||
fn write_vec_seg_round_trip() {
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
let mut writer = SegmentWriter::new(1);
|
||||
|
||||
let v1: Vec<f32> = vec![1.0, 2.0, 3.0];
|
||||
let v2: Vec<f32> = vec![4.0, 5.0, 6.0];
|
||||
let vectors: Vec<&[f32]> = vec![&v1, &v2];
|
||||
let ids = vec![10u64, 20u64];
|
||||
|
||||
let (seg_id, offset) = writer.write_vec_seg(&mut buf, &vectors, &ids, 3).unwrap();
|
||||
assert_eq!(seg_id, 1);
|
||||
assert_eq!(offset, 0);
|
||||
|
||||
// Verify the data was written.
|
||||
let data = buf.into_inner();
|
||||
assert!(data.len() > SEGMENT_HEADER_SIZE);
|
||||
|
||||
// Check magic.
|
||||
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
|
||||
assert_eq!(magic, SEGMENT_MAGIC);
|
||||
|
||||
// Check seg_type.
|
||||
assert_eq!(data[5], SegmentType::Vec as u8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seg_id_monotonic() {
|
||||
let mut writer = SegmentWriter::new(10);
|
||||
assert_eq!(writer.alloc_seg_id(), 10);
|
||||
assert_eq!(writer.alloc_seg_id(), 11);
|
||||
assert_eq!(writer.alloc_seg_id(), 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_to_bytes_size() {
|
||||
let h = SegmentHeader::new(0x01, 42);
|
||||
let bytes = header_to_bytes(&h);
|
||||
assert_eq!(bytes.len(), SEGMENT_HEADER_SIZE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_kernel_seg_round_trip() {
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
let mut writer = SegmentWriter::new(1);
|
||||
|
||||
let kernel_header = [0xAAu8; 128];
|
||||
let kernel_image = b"fake-kernel-image-data";
|
||||
|
||||
let (seg_id, offset) = writer
|
||||
.write_kernel_seg(
|
||||
&mut buf,
|
||||
&kernel_header,
|
||||
kernel_image,
|
||||
Some(b"console=ttyS0"),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(seg_id, 1);
|
||||
assert_eq!(offset, 0);
|
||||
|
||||
let data = buf.into_inner();
|
||||
assert!(data.len() > SEGMENT_HEADER_SIZE);
|
||||
|
||||
// Check magic.
|
||||
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
|
||||
assert_eq!(magic, SEGMENT_MAGIC);
|
||||
|
||||
// Check seg_type == Kernel (0x0E).
|
||||
assert_eq!(data[5], SegmentType::Kernel as u8);
|
||||
|
||||
// Verify payload starts with kernel header bytes.
|
||||
let payload_start = SEGMENT_HEADER_SIZE;
|
||||
assert_eq!(&data[payload_start..payload_start + 128], &[0xAAu8; 128]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_witness_seg_round_trip() {
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
let mut writer = SegmentWriter::new(1);
|
||||
|
||||
let witness_type = 0x01u8; // Computation
|
||||
let timestamp_ns = 1_700_000_000_000_000_000u64;
|
||||
let action = b"ingest:count=10,epoch=1";
|
||||
let prev_hash = [0u8; 32];
|
||||
|
||||
let (seg_id, offset) = writer
|
||||
.write_witness_seg(&mut buf, witness_type, timestamp_ns, action, &prev_hash)
|
||||
.unwrap();
|
||||
assert_eq!(seg_id, 1);
|
||||
assert_eq!(offset, 0);
|
||||
|
||||
let data = buf.into_inner();
|
||||
assert!(data.len() > SEGMENT_HEADER_SIZE);
|
||||
|
||||
// Check magic.
|
||||
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
|
||||
assert_eq!(magic, SEGMENT_MAGIC);
|
||||
|
||||
// Check seg_type == Witness (0x0A).
|
||||
assert_eq!(data[5], SegmentType::Witness as u8);
|
||||
|
||||
// Verify payload starts with witness_type byte.
|
||||
let payload_start = SEGMENT_HEADER_SIZE;
|
||||
assert_eq!(data[payload_start], witness_type);
|
||||
|
||||
// Verify timestamp.
|
||||
let ts_bytes: [u8; 8] = data[payload_start + 1..payload_start + 9]
|
||||
.try_into()
|
||||
.unwrap();
|
||||
assert_eq!(u64::from_le_bytes(ts_bytes), timestamp_ns);
|
||||
|
||||
// Verify action length.
|
||||
let action_len_bytes: [u8; 4] = data[payload_start + 9..payload_start + 13]
|
||||
.try_into()
|
||||
.unwrap();
|
||||
assert_eq!(u32::from_le_bytes(action_len_bytes), action.len() as u32);
|
||||
|
||||
// Verify action bytes.
|
||||
let action_start = payload_start + 13;
|
||||
let action_end = action_start + action.len();
|
||||
assert_eq!(&data[action_start..action_end], action);
|
||||
|
||||
// Verify prev_hash (32 zero bytes).
|
||||
let hash_start = action_end;
|
||||
let hash_end = hash_start + 32;
|
||||
assert_eq!(&data[hash_start..hash_end], &[0u8; 32]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_ebpf_seg_round_trip() {
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
let mut writer = SegmentWriter::new(10);
|
||||
|
||||
let ebpf_header = [0xBBu8; 64];
|
||||
let bytecode = b"ebpf-bytecode";
|
||||
|
||||
let (seg_id, offset) = writer
|
||||
.write_ebpf_seg(&mut buf, &ebpf_header, bytecode, None)
|
||||
.unwrap();
|
||||
assert_eq!(seg_id, 10);
|
||||
assert_eq!(offset, 0);
|
||||
|
||||
let data = buf.into_inner();
|
||||
assert!(data.len() > SEGMENT_HEADER_SIZE);
|
||||
|
||||
// Check seg_type == Ebpf (0x0F).
|
||||
assert_eq!(data[5], SegmentType::Ebpf as u8);
|
||||
|
||||
// Verify payload starts with eBPF header bytes.
|
||||
let payload_start = SEGMENT_HEADER_SIZE;
|
||||
assert_eq!(&data[payload_start..payload_start + 64], &[0xBBu8; 64]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user