Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,819 @@
//! SheafEdge: Constraint between nodes with restriction maps
//!
//! An edge in the sheaf graph encodes a constraint between two nodes.
//! The constraint is expressed via two restriction maps:
//!
//! - `rho_source`: Projects the source state to the shared comparison space
//! - `rho_target`: Projects the target state to the shared comparison space
//!
//! The **residual** at an edge is the difference between these projections:
//! ```text
//! r_e = rho_source(x_source) - rho_target(x_target)
//! ```
//!
//! The **weighted residual energy** contributes to global coherence:
//! ```text
//! E_e = weight * ||r_e||^2
//! ```
//!
//! # Performance Optimization
//!
//! Thread-local scratch buffers are used to eliminate per-edge allocations
//! in hot paths. Use `residual_norm_squared_no_alloc` for allocation-free
//! energy computation.
use super::node::NodeId;
use super::restriction::RestrictionMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::collections::HashMap;
use uuid::Uuid;
/// Default initial capacity for scratch buffers
const DEFAULT_SCRATCH_CAPACITY: usize = 256;
/// Thread-local scratch buffers for allocation-free edge computations
///
/// These buffers are reused across multiple edge energy calculations
/// to avoid per-edge Vec allocations in hot paths.
struct EdgeScratch {
/// Buffer for projected source state
projected_source: Vec<f32>,
/// Buffer for projected target state
projected_target: Vec<f32>,
/// Buffer for residual vector (source - target)
residual: Vec<f32>,
}
impl EdgeScratch {
/// Create a new scratch buffer with the given initial capacity
fn new(capacity: usize) -> Self {
Self {
projected_source: Vec::with_capacity(capacity),
projected_target: Vec::with_capacity(capacity),
residual: Vec::with_capacity(capacity),
}
}
/// Ensure all buffers have at least the required capacity and set length
///
/// This resizes the vectors to exactly `dim` elements, growing capacity
/// if needed but never shrinking.
#[inline]
fn prepare(&mut self, dim: usize) {
// Resize to exact dimension, reserving more capacity if needed
if self.projected_source.capacity() < dim {
self.projected_source
.reserve(dim - self.projected_source.len());
}
if self.projected_target.capacity() < dim {
self.projected_target
.reserve(dim - self.projected_target.len());
}
if self.residual.capacity() < dim {
self.residual.reserve(dim - self.residual.len());
}
// Resize to exact length (fills with 0.0 if growing)
self.projected_source.resize(dim, 0.0);
self.projected_target.resize(dim, 0.0);
self.residual.resize(dim, 0.0);
}
}
thread_local! {
/// Thread-local scratch buffers for edge computations
static SCRATCH: RefCell<EdgeScratch> = RefCell::new(EdgeScratch::new(DEFAULT_SCRATCH_CAPACITY));
}
/// Unique identifier for an edge
pub type EdgeId = Uuid;
/// An edge encoding a constraint between two nodes
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SheafEdge {
/// Unique edge identifier
pub id: EdgeId,
/// Source node identifier
pub source: NodeId,
/// Target node identifier
pub target: NodeId,
/// Weight for energy calculation (importance of this constraint)
pub weight: f32,
/// Restriction map from source to shared comparison space
pub rho_source: RestrictionMap,
/// Restriction map from target to shared comparison space
pub rho_target: RestrictionMap,
/// Edge type/label for filtering
pub edge_type: Option<String>,
/// Namespace for multi-tenant isolation
pub namespace: Option<String>,
/// Arbitrary metadata
pub metadata: HashMap<String, serde_json::Value>,
/// Creation timestamp
pub created_at: DateTime<Utc>,
/// Last update timestamp
pub updated_at: DateTime<Utc>,
}
impl SheafEdge {
/// Create a new edge with identity restriction maps
///
/// This means both source and target states must match exactly in the
/// given dimension for the edge to be coherent.
pub fn identity(source: NodeId, target: NodeId, dim: usize) -> Self {
let now = Utc::now();
Self {
id: Uuid::new_v4(),
source,
target,
weight: 1.0,
rho_source: RestrictionMap::identity(dim),
rho_target: RestrictionMap::identity(dim),
edge_type: None,
namespace: None,
metadata: HashMap::new(),
created_at: now,
updated_at: now,
}
}
/// Create a new edge with custom restriction maps
pub fn with_restrictions(
source: NodeId,
target: NodeId,
rho_source: RestrictionMap,
rho_target: RestrictionMap,
) -> Self {
debug_assert_eq!(
rho_source.output_dim(),
rho_target.output_dim(),
"Restriction maps must have same output dimension"
);
let now = Utc::now();
Self {
id: Uuid::new_v4(),
source,
target,
weight: 1.0,
rho_source,
rho_target,
edge_type: None,
namespace: None,
metadata: HashMap::new(),
created_at: now,
updated_at: now,
}
}
/// Calculate the edge residual (local mismatch)
///
/// The residual is the difference between the projected source and target states:
/// ```text
/// r_e = rho_source(x_source) - rho_target(x_target)
/// ```
///
/// # SIMD Optimization
///
/// The subtraction is performed using SIMD-friendly patterns.
#[inline]
pub fn residual(&self, source_state: &[f32], target_state: &[f32]) -> Vec<f32> {
let projected_source = self.rho_source.apply(source_state);
let projected_target = self.rho_target.apply(target_state);
// SIMD-friendly subtraction
projected_source
.iter()
.zip(projected_target.iter())
.map(|(&a, &b)| a - b)
.collect()
}
/// Calculate the residual norm squared
///
/// This is ||r_e||^2 without the weight factor.
///
/// # SIMD Optimization
///
/// Uses 4-lane accumulation for better vectorization.
///
/// # Note
///
/// This method allocates temporary vectors. For hot paths, prefer
/// `residual_norm_squared_no_alloc` which uses thread-local scratch buffers.
#[inline]
pub fn residual_norm_squared(&self, source_state: &[f32], target_state: &[f32]) -> f32 {
let residual = self.residual(source_state, target_state);
// SIMD-friendly: process 4 elements at a time using chunks_exact
let chunks = residual.chunks_exact(4);
let remainder = chunks.remainder();
let mut acc = [0.0f32; 4];
for chunk in chunks {
acc[0] += chunk[0] * chunk[0];
acc[1] += chunk[1] * chunk[1];
acc[2] += chunk[2] * chunk[2];
acc[3] += chunk[3] * chunk[3];
}
let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
for &r in remainder {
sum += r * r;
}
sum
}
/// Calculate the residual norm squared without allocation
///
/// This is ||r_e||^2 without the weight factor, using thread-local
/// scratch buffers to avoid per-call allocations.
///
/// # Performance
///
/// This method is optimized for hot paths where many edges are processed
/// in sequence. It reuses thread-local buffers to eliminate the 2-3 Vec
/// allocations that would otherwise occur per edge.
///
/// # SIMD Optimization
///
/// Uses 4-lane accumulation for better vectorization.
///
/// # Thread Safety
///
/// Uses thread-local storage, so it's safe to call from multiple threads
/// concurrently (each thread has its own scratch buffers).
#[inline]
pub fn residual_norm_squared_no_alloc(
&self,
source_state: &[f32],
target_state: &[f32],
) -> f32 {
let dim = self.comparison_dim();
SCRATCH.with(|scratch| {
let mut scratch = scratch.borrow_mut();
scratch.prepare(dim);
// Apply restriction maps into scratch buffers
self.rho_source
.apply_into(source_state, &mut scratch.projected_source);
self.rho_target
.apply_into(target_state, &mut scratch.projected_target);
// Compute residual in-place: r = projected_source - projected_target
for i in 0..dim {
scratch.residual[i] = scratch.projected_source[i] - scratch.projected_target[i];
}
// SIMD-friendly: compute norm squared with 4-lane accumulation
let chunks = scratch.residual[..dim].chunks_exact(4);
let remainder = chunks.remainder();
let mut acc = [0.0f32; 4];
for chunk in chunks {
acc[0] += chunk[0] * chunk[0];
acc[1] += chunk[1] * chunk[1];
acc[2] += chunk[2] * chunk[2];
acc[3] += chunk[3] * chunk[3];
}
let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
for &r in remainder {
sum += r * r;
}
sum
})
}
/// Calculate weighted residual energy without allocation
///
/// This is the contribution of this edge to the global coherence energy:
/// ```text
/// E_e = weight * ||r_e||^2
/// ```
///
/// Uses thread-local scratch buffers to avoid per-call allocations.
/// Preferred over `weighted_residual_energy` in hot paths.
#[inline]
pub fn weighted_residual_energy_no_alloc(
&self,
source_state: &[f32],
target_state: &[f32],
) -> f32 {
self.weight * self.residual_norm_squared_no_alloc(source_state, target_state)
}
/// Calculate weighted residual energy
///
/// This is the contribution of this edge to the global coherence energy:
/// ```text
/// E_e = weight * ||r_e||^2
/// ```
#[inline]
pub fn weighted_residual_energy(&self, source_state: &[f32], target_state: &[f32]) -> f32 {
self.weight * self.residual_norm_squared(source_state, target_state)
}
/// Calculate residual energy and return both the energy and residual vector
///
/// This is more efficient when you need both values.
#[inline]
pub fn residual_with_energy(
&self,
source_state: &[f32],
target_state: &[f32],
) -> (Vec<f32>, f32) {
let residual = self.residual(source_state, target_state);
// SIMD-friendly: process 4 elements at a time using chunks_exact
let chunks = residual.chunks_exact(4);
let remainder = chunks.remainder();
let mut acc = [0.0f32; 4];
for chunk in chunks {
acc[0] += chunk[0] * chunk[0];
acc[1] += chunk[1] * chunk[1];
acc[2] += chunk[2] * chunk[2];
acc[3] += chunk[3] * chunk[3];
}
let mut norm_sq = acc[0] + acc[1] + acc[2] + acc[3];
for &r in remainder {
norm_sq += r * r;
}
let energy = self.weight * norm_sq;
(residual, energy)
}
/// Get the output dimension of the restriction maps (comparison space dimension)
#[inline]
pub fn comparison_dim(&self) -> usize {
self.rho_source.output_dim()
}
/// Check if this edge is coherent (residual below threshold)
#[inline]
pub fn is_coherent(&self, source_state: &[f32], target_state: &[f32], threshold: f32) -> bool {
self.residual_norm_squared(source_state, target_state) <= threshold * threshold
}
/// Update the weight
pub fn set_weight(&mut self, weight: f32) {
self.weight = weight;
self.updated_at = Utc::now();
}
/// Update the restriction maps
pub fn set_restrictions(&mut self, rho_source: RestrictionMap, rho_target: RestrictionMap) {
debug_assert_eq!(
rho_source.output_dim(),
rho_target.output_dim(),
"Restriction maps must have same output dimension"
);
self.rho_source = rho_source;
self.rho_target = rho_target;
self.updated_at = Utc::now();
}
/// Compute content hash for fingerprinting
pub fn content_hash(&self) -> u64 {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
self.id.hash(&mut hasher);
self.source.hash(&mut hasher);
self.target.hash(&mut hasher);
self.weight.to_bits().hash(&mut hasher);
hasher.finish()
}
}
/// Builder for constructing SheafEdge instances
#[derive(Debug)]
pub struct SheafEdgeBuilder {
id: Option<EdgeId>,
source: NodeId,
target: NodeId,
weight: f32,
rho_source: Option<RestrictionMap>,
rho_target: Option<RestrictionMap>,
edge_type: Option<String>,
namespace: Option<String>,
metadata: HashMap<String, serde_json::Value>,
}
impl SheafEdgeBuilder {
/// Create a new builder with required source and target nodes
pub fn new(source: NodeId, target: NodeId) -> Self {
Self {
id: None,
source,
target,
weight: 1.0,
rho_source: None,
rho_target: None,
edge_type: None,
namespace: None,
metadata: HashMap::new(),
}
}
/// Set a custom edge ID
pub fn id(mut self, id: EdgeId) -> Self {
self.id = Some(id);
self
}
/// Set the weight
pub fn weight(mut self, weight: f32) -> Self {
self.weight = weight;
self
}
/// Set both restriction maps to identity (states must match exactly)
pub fn identity_restrictions(mut self, dim: usize) -> Self {
self.rho_source = Some(RestrictionMap::identity(dim));
self.rho_target = Some(RestrictionMap::identity(dim));
self
}
/// Set the source restriction map
pub fn rho_source(mut self, rho: RestrictionMap) -> Self {
self.rho_source = Some(rho);
self
}
/// Set the target restriction map
pub fn rho_target(mut self, rho: RestrictionMap) -> Self {
self.rho_target = Some(rho);
self
}
/// Set both restriction maps at once
pub fn restrictions(mut self, source: RestrictionMap, target: RestrictionMap) -> Self {
debug_assert_eq!(
source.output_dim(),
target.output_dim(),
"Restriction maps must have same output dimension"
);
self.rho_source = Some(source);
self.rho_target = Some(target);
self
}
/// Set the edge type
pub fn edge_type(mut self, edge_type: impl Into<String>) -> Self {
self.edge_type = Some(edge_type.into());
self
}
/// Set the namespace
pub fn namespace(mut self, namespace: impl Into<String>) -> Self {
self.namespace = Some(namespace.into());
self
}
/// Add metadata
pub fn metadata(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
self.metadata.insert(key.into(), value.into());
self
}
/// Build the edge
///
/// # Panics
///
/// Panics if restriction maps were not provided.
pub fn build(self) -> SheafEdge {
let rho_source = self.rho_source.expect("Source restriction map is required");
let rho_target = self.rho_target.expect("Target restriction map is required");
debug_assert_eq!(
rho_source.output_dim(),
rho_target.output_dim(),
"Restriction maps must have same output dimension"
);
let now = Utc::now();
SheafEdge {
id: self.id.unwrap_or_else(Uuid::new_v4),
source: self.source,
target: self.target,
weight: self.weight,
rho_source,
rho_target,
edge_type: self.edge_type,
namespace: self.namespace,
metadata: self.metadata,
created_at: now,
updated_at: now,
}
}
/// Try to build the edge, returning an error if restrictions are missing
pub fn try_build(self) -> Result<SheafEdge, &'static str> {
let rho_source = self
.rho_source
.ok_or("Source restriction map is required")?;
let rho_target = self
.rho_target
.ok_or("Target restriction map is required")?;
if rho_source.output_dim() != rho_target.output_dim() {
return Err("Restriction maps must have same output dimension");
}
let now = Utc::now();
Ok(SheafEdge {
id: self.id.unwrap_or_else(Uuid::new_v4),
source: self.source,
target: self.target,
weight: self.weight,
rho_source,
rho_target,
edge_type: self.edge_type,
namespace: self.namespace,
metadata: self.metadata,
created_at: now,
updated_at: now,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_test_nodes() -> (NodeId, NodeId) {
(Uuid::new_v4(), Uuid::new_v4())
}
#[test]
fn test_identity_edge() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
assert_eq!(edge.source, source);
assert_eq!(edge.target, target);
assert_eq!(edge.weight, 1.0);
assert_eq!(edge.comparison_dim(), 3);
}
#[test]
fn test_identity_residual_matching() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
let source_state = vec![1.0, 2.0, 3.0];
let target_state = vec![1.0, 2.0, 3.0];
let residual = edge.residual(&source_state, &target_state);
assert!(residual.iter().all(|&x| x.abs() < 1e-10));
assert!(edge.residual_norm_squared(&source_state, &target_state) < 1e-10);
}
#[test]
fn test_identity_residual_mismatch() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
let source_state = vec![1.0, 2.0, 3.0];
let target_state = vec![2.0, 2.0, 3.0]; // Differs by 1 in first component
let residual = edge.residual(&source_state, &target_state);
assert_eq!(residual, vec![-1.0, 0.0, 0.0]);
assert!((edge.residual_norm_squared(&source_state, &target_state) - 1.0).abs() < 1e-10);
}
#[test]
fn test_weighted_energy() {
let (source, target) = make_test_nodes();
let mut edge = SheafEdge::identity(source, target, 2);
edge.set_weight(2.0);
let source_state = vec![1.0, 0.0];
let target_state = vec![0.0, 0.0]; // Residual is [1, 0], norm^2 = 1
let energy = edge.weighted_residual_energy(&source_state, &target_state);
assert!((energy - 2.0).abs() < 1e-10); // weight * 1 = 2
}
#[test]
fn test_projection_restriction() {
let (source, target) = make_test_nodes();
// Source: 4D, project to first 2 dims
// Target: 2D, identity
let rho_source = RestrictionMap::projection(vec![0, 1], 4);
let rho_target = RestrictionMap::identity(2);
let edge = SheafEdge::with_restrictions(source, target, rho_source, rho_target);
let source_state = vec![1.0, 2.0, 100.0, 200.0]; // Extra dims ignored
let target_state = vec![1.0, 2.0];
let residual = edge.residual(&source_state, &target_state);
assert!(residual.iter().all(|&x| x.abs() < 1e-10));
}
#[test]
fn test_diagonal_restriction() {
let (source, target) = make_test_nodes();
// Source scaled by [2, 2], target by [1, 1]
// For coherence: 2*source = 1*target, so source = target/2
let rho_source = RestrictionMap::diagonal(vec![2.0, 2.0]);
let rho_target = RestrictionMap::identity(2);
let edge = SheafEdge::with_restrictions(source, target, rho_source, rho_target);
let source_state = vec![1.0, 1.0];
let target_state = vec![2.0, 2.0]; // 2*[1,1] = [2,2]
assert!(edge.residual_norm_squared(&source_state, &target_state) < 1e-10);
}
#[test]
fn test_is_coherent() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 2);
let source_state = vec![1.0, 0.0];
let target_state = vec![1.1, 0.0]; // Small difference
// Residual is [-0.1, 0], norm = 0.1
assert!(edge.is_coherent(&source_state, &target_state, 0.2)); // Below threshold
assert!(!edge.is_coherent(&source_state, &target_state, 0.05)); // Above threshold
}
#[test]
fn test_builder() {
let (source, target) = make_test_nodes();
let edge = SheafEdgeBuilder::new(source, target)
.weight(2.5)
.identity_restrictions(4)
.edge_type("citation")
.namespace("test")
.metadata("importance", serde_json::json!(0.9))
.build();
assert_eq!(edge.weight, 2.5);
assert_eq!(edge.edge_type, Some("citation".to_string()));
assert_eq!(edge.namespace, Some("test".to_string()));
assert!(edge.metadata.contains_key("importance"));
}
#[test]
fn test_residual_with_energy() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
let source_state = vec![1.0, 2.0, 3.0];
let target_state = vec![0.0, 0.0, 0.0];
let (residual, energy) = edge.residual_with_energy(&source_state, &target_state);
assert_eq!(residual, vec![1.0, 2.0, 3.0]);
assert!((energy - 14.0).abs() < 1e-10); // 1 + 4 + 9 = 14
}
#[test]
fn test_content_hash_stability() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
let hash1 = edge.content_hash();
let hash2 = edge.content_hash();
assert_eq!(hash1, hash2);
}
#[test]
fn test_residual_norm_squared_no_alloc_identity() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
let source_state = vec![1.0, 2.0, 3.0];
let target_state = vec![1.0, 2.0, 3.0];
// Should match allocating version
let alloc_result = edge.residual_norm_squared(&source_state, &target_state);
let no_alloc_result = edge.residual_norm_squared_no_alloc(&source_state, &target_state);
assert!((alloc_result - no_alloc_result).abs() < 1e-10);
assert!(no_alloc_result < 1e-10);
}
#[test]
fn test_residual_norm_squared_no_alloc_mismatch() {
let (source, target) = make_test_nodes();
let edge = SheafEdge::identity(source, target, 3);
let source_state = vec![1.0, 2.0, 3.0];
let target_state = vec![0.0, 0.0, 0.0];
// Residual is [1, 2, 3], norm^2 = 1 + 4 + 9 = 14
let alloc_result = edge.residual_norm_squared(&source_state, &target_state);
let no_alloc_result = edge.residual_norm_squared_no_alloc(&source_state, &target_state);
assert!((alloc_result - no_alloc_result).abs() < 1e-10);
assert!((no_alloc_result - 14.0).abs() < 1e-10);
}
#[test]
fn test_residual_norm_squared_no_alloc_with_projection() {
let (source, target) = make_test_nodes();
// Source: 4D, project to first 2 dims
let rho_source = RestrictionMap::projection(vec![0, 1], 4);
let rho_target = RestrictionMap::identity(2);
let edge = SheafEdge::with_restrictions(source, target, rho_source, rho_target);
let source_state = vec![1.0, 2.0, 100.0, 200.0];
let target_state = vec![1.0, 2.0];
let alloc_result = edge.residual_norm_squared(&source_state, &target_state);
let no_alloc_result = edge.residual_norm_squared_no_alloc(&source_state, &target_state);
assert!((alloc_result - no_alloc_result).abs() < 1e-10);
assert!(no_alloc_result < 1e-10);
}
#[test]
fn test_residual_norm_squared_no_alloc_with_diagonal() {
let (source, target) = make_test_nodes();
let rho_source = RestrictionMap::diagonal(vec![2.0, 2.0]);
let rho_target = RestrictionMap::identity(2);
let edge = SheafEdge::with_restrictions(source, target, rho_source, rho_target);
let source_state = vec![1.0, 1.0];
let target_state = vec![2.0, 2.0];
let alloc_result = edge.residual_norm_squared(&source_state, &target_state);
let no_alloc_result = edge.residual_norm_squared_no_alloc(&source_state, &target_state);
assert!((alloc_result - no_alloc_result).abs() < 1e-10);
assert!(no_alloc_result < 1e-10);
}
#[test]
fn test_weighted_residual_energy_no_alloc() {
let (source, target) = make_test_nodes();
let mut edge = SheafEdge::identity(source, target, 2);
edge.set_weight(2.0);
let source_state = vec![1.0, 0.0];
let target_state = vec![0.0, 0.0];
let alloc_result = edge.weighted_residual_energy(&source_state, &target_state);
let no_alloc_result = edge.weighted_residual_energy_no_alloc(&source_state, &target_state);
assert!((alloc_result - no_alloc_result).abs() < 1e-10);
assert!((no_alloc_result - 2.0).abs() < 1e-10);
}
#[test]
fn test_no_alloc_buffer_reuse() {
// Test that scratch buffers are properly reused across multiple calls
let (source, target) = make_test_nodes();
// First call with dim=3
let edge3 = SheafEdge::identity(source, target, 3);
let result3 = edge3.residual_norm_squared_no_alloc(&[1.0, 2.0, 3.0], &[0.0, 0.0, 0.0]);
assert!((result3 - 14.0).abs() < 1e-10);
// Second call with larger dim=5 (buffers should grow)
let edge5 = SheafEdge::identity(source, target, 5);
let result5 = edge5
.residual_norm_squared_no_alloc(&[1.0, 2.0, 3.0, 4.0, 5.0], &[0.0, 0.0, 0.0, 0.0, 0.0]);
assert!((result5 - 55.0).abs() < 1e-10); // 1 + 4 + 9 + 16 + 25 = 55
// Third call back to dim=3 (buffers should shrink length but keep capacity)
let result3_again =
edge3.residual_norm_squared_no_alloc(&[1.0, 2.0, 3.0], &[0.0, 0.0, 0.0]);
assert!((result3_again - 14.0).abs() < 1e-10);
}
#[test]
fn test_no_alloc_large_dimension() {
// Test with dimension larger than default capacity (256)
let (source, target) = make_test_nodes();
let dim = 512;
let edge = SheafEdge::identity(source, target, dim);
let source_state: Vec<f32> = (0..dim).map(|i| i as f32).collect();
let target_state: Vec<f32> = vec![0.0; dim];
let alloc_result = edge.residual_norm_squared(&source_state, &target_state);
let no_alloc_result = edge.residual_norm_squared_no_alloc(&source_state, &target_state);
assert!((alloc_result - no_alloc_result).abs() < 1e-4);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,216 @@
//! Knowledge Substrate: Sheaf Graph Data Structures
//!
//! This module implements the mathematical foundation for coherence computation
//! using sheaf theory. The key abstractions are:
//!
//! - **SheafNode**: Vertices carrying fixed-dimensional state vectors (stalks)
//! - **SheafEdge**: Edges encoding constraints via restriction maps
//! - **RestrictionMap**: Linear transforms defining how states constrain each other
//! - **SheafGraph**: The aggregate root managing the complete graph structure
//!
//! # Mathematical Foundation
//!
//! A sheaf on a graph assigns:
//! - A vector space F(v) to each vertex v (the "stalk")
//! - A linear map ρ: F(u) → F(e) for each edge e incident to u (the "restriction")
//!
//! The **residual** at an edge measures local inconsistency:
//! ```text
//! r_e = ρ_source(x_source) - ρ_target(x_target)
//! ```
//!
//! The **coherence energy** is the global inconsistency measure:
//! ```text
//! E(S) = Σ w_e ||r_e||²
//! ```
//!
//! # Domain Agnostic Design
//!
//! The same substrate supports multiple domains:
//!
//! | Domain | Nodes | Edges | Residual Interpretation |
//! |--------|-------|-------|------------------------|
//! | AI Agents | Facts, beliefs | Citations, implication | Contradiction energy |
//! | Finance | Trades, positions | Market dependencies | Regime mismatch |
//! | Medical | Vitals, diagnoses | Physiological causality | Clinical disagreement |
//! | Robotics | Sensors, goals | Physics, kinematics | Motion impossibility |
//!
//! # Performance Features
//!
//! - SIMD-optimized residual calculation
//! - Incremental fingerprint updates
//! - Thread-safe with rayon parallelization
//! - Cache-aligned data structures
pub mod edge;
pub mod graph;
pub mod node;
pub mod restriction;
// Re-exports
pub use edge::{EdgeId, SheafEdge, SheafEdgeBuilder};
pub use graph::{
CoherenceEnergy, CoherenceFingerprint, GraphStats, IncrementalCoherence, Namespace, ScopeId,
SheafGraph, SheafGraphBuilder,
};
pub use node::{NodeId, NodeMetadata, SheafNode, SheafNodeBuilder, StateVector};
pub use restriction::{
CsrMatrix, MatrixStorage, RestrictionMap, RestrictionMapBuilder, RestrictionMapError,
};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// A subgraph extracted from a SheafGraph for localized computation
///
/// Useful for:
/// - Computing energy in a neighborhood
/// - Isolating incoherent regions
/// - Parallel processing of graph partitions
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SheafSubgraph {
/// Nodes in the subgraph
pub nodes: HashMap<NodeId, SheafNode>,
/// Edges in the subgraph (only edges between nodes in the subgraph)
pub edges: HashMap<EdgeId, SheafEdge>,
/// Optional center node (for neighborhood subgraphs)
pub center: Option<NodeId>,
/// Number of hops from center (if applicable)
pub hops: Option<usize>,
}
impl SheafSubgraph {
/// Create a new empty subgraph
pub fn new() -> Self {
Self {
nodes: HashMap::new(),
edges: HashMap::new(),
center: None,
hops: None,
}
}
/// Create a subgraph centered on a node
pub fn centered(center: NodeId, hops: usize) -> Self {
Self {
nodes: HashMap::new(),
edges: HashMap::new(),
center: Some(center),
hops: Some(hops),
}
}
/// Add a node to the subgraph
pub fn add_node(&mut self, node: SheafNode) {
self.nodes.insert(node.id, node);
}
/// Add an edge to the subgraph
pub fn add_edge(&mut self, edge: SheafEdge) {
self.edges.insert(edge.id, edge);
}
/// Check if the subgraph contains a node
pub fn has_node(&self, id: NodeId) -> bool {
self.nodes.contains_key(&id)
}
/// Check if the subgraph contains an edge
pub fn has_edge(&self, id: EdgeId) -> bool {
self.edges.contains_key(&id)
}
/// Get the number of nodes
pub fn node_count(&self) -> usize {
self.nodes.len()
}
/// Get the number of edges
pub fn edge_count(&self) -> usize {
self.edges.len()
}
/// Compute total coherence energy within the subgraph
pub fn compute_energy(&self) -> f32 {
let mut total = 0.0;
for edge in self.edges.values() {
if let (Some(source), Some(target)) =
(self.nodes.get(&edge.source), self.nodes.get(&edge.target))
{
total +=
edge.weighted_residual_energy(source.state.as_slice(), target.state.as_slice());
}
}
total
}
/// Extract a subgraph from a SheafGraph around a center node
pub fn from_graph(graph: &SheafGraph, center: NodeId, hops: usize) -> Self {
let mut subgraph = Self::centered(center, hops);
// BFS to collect nodes within hops distance
let mut visited = std::collections::HashSet::new();
let mut frontier = vec![center];
let mut depth = 0;
while depth <= hops && !frontier.is_empty() {
let mut next_frontier = Vec::new();
for node_id in frontier {
if visited.contains(&node_id) {
continue;
}
visited.insert(node_id);
// Add node to subgraph
if let Some(node) = graph.get_node(node_id) {
subgraph.add_node(node);
}
// Explore neighbors if within hop limit
if depth < hops {
for edge_id in graph.edges_incident_to(node_id) {
if let Some(edge) = graph.get_edge(edge_id) {
let neighbor = if edge.source == node_id {
edge.target
} else {
edge.source
};
if !visited.contains(&neighbor) {
next_frontier.push(neighbor);
}
}
}
}
}
frontier = next_frontier;
depth += 1;
}
// Add edges between nodes in the subgraph
for node_id in &visited {
for edge_id in graph.edges_incident_to(*node_id) {
if let Some(edge) = graph.get_edge(edge_id) {
// Only add if both endpoints are in the subgraph
if visited.contains(&edge.source) && visited.contains(&edge.target) {
if !subgraph.has_edge(edge_id) {
subgraph.add_edge(edge);
}
}
}
}
}
subgraph
}
}
impl Default for SheafSubgraph {
fn default() -> Self {
Self::new()
}
}

View File

@@ -0,0 +1,587 @@
//! SheafNode: Entity with fixed-dimensional state vector
//!
//! A node in the sheaf graph represents an entity carrying a state vector (the "stalk"
//! of the sheaf). Nodes are domain-agnostic and can represent:
//!
//! - Facts, hypotheses, beliefs (AI agents)
//! - Trades, positions, signals (finance)
//! - Vitals, diagnoses, treatments (medical)
//! - Sensor readings, goals, plans (robotics)
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
/// Unique identifier for a node
pub type NodeId = Uuid;
/// State vector type - fixed-dimensional f32 vector
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateVector {
/// The raw vector data
data: Vec<f32>,
/// Dimensionality (cached for fast access)
dim: usize,
}
impl StateVector {
/// Create a new state vector from a slice
#[inline]
pub fn new(data: impl Into<Vec<f32>>) -> Self {
let data = data.into();
let dim = data.len();
Self { data, dim }
}
/// Create a zero vector of given dimension
#[inline]
pub fn zeros(dim: usize) -> Self {
Self {
data: vec![0.0; dim],
dim,
}
}
/// Create a random unit vector (useful for initialization)
pub fn random_unit(dim: usize) -> Self {
use rand::Rng;
let mut rng = rand::thread_rng();
let mut data: Vec<f32> = (0..dim).map(|_| rng.gen::<f32>() - 0.5).collect();
// Normalize to unit length
let norm: f32 = data.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 1e-10 {
for x in &mut data {
*x /= norm;
}
}
Self { data, dim }
}
/// Get the dimension of the vector
#[inline]
pub fn dim(&self) -> usize {
self.dim
}
/// Get the raw data as a slice
#[inline]
pub fn as_slice(&self) -> &[f32] {
&self.data
}
/// Get the raw data as a mutable slice
#[inline]
pub fn as_mut_slice(&mut self) -> &mut [f32] {
&mut self.data
}
/// Compute L2 norm squared (for energy calculations)
///
/// SIMD-optimized: Uses chunks_exact for proper auto-vectorization.
#[inline]
pub fn norm_squared(&self) -> f32 {
// Process 4 elements at a time for auto-vectorization
let chunks = self.data.chunks_exact(4);
let remainder = chunks.remainder();
let mut acc = [0.0f32; 4];
for chunk in chunks {
acc[0] += chunk[0] * chunk[0];
acc[1] += chunk[1] * chunk[1];
acc[2] += chunk[2] * chunk[2];
acc[3] += chunk[3] * chunk[3];
}
let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
for &x in remainder {
sum += x * x;
}
sum
}
/// Compute L2 norm
#[inline]
pub fn norm(&self) -> f32 {
self.norm_squared().sqrt()
}
/// Compute dot product with another vector
///
/// SIMD-optimized: Uses chunks_exact for proper auto-vectorization.
#[inline]
pub fn dot(&self, other: &Self) -> f32 {
debug_assert_eq!(self.dim, other.dim, "Vector dimensions must match");
// Process 4 elements at a time for auto-vectorization
let chunks_a = self.data.chunks_exact(4);
let chunks_b = other.data.chunks_exact(4);
let remainder_a = chunks_a.remainder();
let remainder_b = chunks_b.remainder();
let mut acc = [0.0f32; 4];
for (ca, cb) in chunks_a.zip(chunks_b) {
acc[0] += ca[0] * cb[0];
acc[1] += ca[1] * cb[1];
acc[2] += ca[2] * cb[2];
acc[3] += ca[3] * cb[3];
}
let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
for (&a, &b) in remainder_a.iter().zip(remainder_b.iter()) {
sum += a * b;
}
sum
}
/// Subtract another vector (for residual calculation)
///
/// SIMD-optimized: Processes elements in order for vectorization.
#[inline]
pub fn subtract(&self, other: &Self) -> Self {
debug_assert_eq!(self.dim, other.dim, "Vector dimensions must match");
let data: Vec<f32> = self
.data
.iter()
.zip(other.data.iter())
.map(|(&a, &b)| a - b)
.collect();
Self {
data,
dim: self.dim,
}
}
/// Add another vector
#[inline]
pub fn add(&self, other: &Self) -> Self {
debug_assert_eq!(self.dim, other.dim, "Vector dimensions must match");
let data: Vec<f32> = self
.data
.iter()
.zip(other.data.iter())
.map(|(&a, &b)| a + b)
.collect();
Self {
data,
dim: self.dim,
}
}
/// Scale the vector
#[inline]
pub fn scale(&self, factor: f32) -> Self {
let data: Vec<f32> = self.data.iter().map(|&x| x * factor).collect();
Self {
data,
dim: self.dim,
}
}
/// Update the vector in place (for incremental updates)
#[inline]
pub fn update(&mut self, new_data: &[f32]) {
debug_assert_eq!(new_data.len(), self.dim, "Update must match dimension");
self.data.copy_from_slice(new_data);
}
/// Compute hash for fingerprinting (using Blake3 would be better but keep it simple)
pub fn content_hash(&self) -> u64 {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
for &x in &self.data {
x.to_bits().hash(&mut hasher);
}
hasher.finish()
}
}
impl From<Vec<f32>> for StateVector {
fn from(data: Vec<f32>) -> Self {
Self::new(data)
}
}
impl From<&[f32]> for StateVector {
fn from(data: &[f32]) -> Self {
Self::new(data.to_vec())
}
}
impl AsRef<[f32]> for StateVector {
fn as_ref(&self) -> &[f32] {
&self.data
}
}
/// Metadata associated with a node
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct NodeMetadata {
/// Human-readable label/name
pub label: Option<String>,
/// Node type for filtering (e.g., "fact", "hypothesis", "belief")
pub node_type: Option<String>,
/// Namespace/scope for multi-tenant isolation
pub namespace: Option<String>,
/// Tags for categorization
pub tags: Vec<String>,
/// Arbitrary key-value properties
pub properties: HashMap<String, serde_json::Value>,
/// Source/provenance information
pub source: Option<String>,
/// Confidence score (0.0-1.0) if applicable
pub confidence: Option<f32>,
}
impl NodeMetadata {
/// Create empty metadata
pub fn new() -> Self {
Self::default()
}
/// Create metadata with a label
pub fn with_label(label: impl Into<String>) -> Self {
Self {
label: Some(label.into()),
..Default::default()
}
}
/// Check if node belongs to a namespace
pub fn in_namespace(&self, namespace: &str) -> bool {
self.namespace.as_deref() == Some(namespace)
}
/// Check if node has a specific tag
pub fn has_tag(&self, tag: &str) -> bool {
self.tags.iter().any(|t| t == tag)
}
}
/// A node in the sheaf graph carrying a fixed-dimensional state vector
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SheafNode {
/// Unique node identifier
pub id: NodeId,
/// Fixed-dimensional state vector (stalk of the sheaf)
pub state: StateVector,
/// Metadata for filtering and governance
pub metadata: NodeMetadata,
/// Timestamp of creation
pub created_at: DateTime<Utc>,
/// Timestamp of last state update
pub updated_at: DateTime<Utc>,
/// Version counter for optimistic concurrency
pub version: u64,
}
impl SheafNode {
/// Create a new sheaf node with the given state vector
pub fn new(state: StateVector) -> Self {
let now = Utc::now();
Self {
id: Uuid::new_v4(),
state,
metadata: NodeMetadata::default(),
created_at: now,
updated_at: now,
version: 1,
}
}
/// Create a new node with a specific ID
pub fn with_id(id: NodeId, state: StateVector) -> Self {
let now = Utc::now();
Self {
id,
state,
metadata: NodeMetadata::default(),
created_at: now,
updated_at: now,
version: 1,
}
}
/// Get the dimension of the node's state vector
#[inline]
pub fn dim(&self) -> usize {
self.state.dim()
}
/// Update the state vector
///
/// Increments version and updates timestamp.
pub fn update_state(&mut self, new_state: StateVector) {
debug_assert_eq!(
new_state.dim(),
self.state.dim(),
"State dimension must not change"
);
self.state = new_state;
self.updated_at = Utc::now();
self.version += 1;
}
/// Update the state vector in place from a slice
pub fn update_state_from_slice(&mut self, data: &[f32]) {
self.state.update(data);
self.updated_at = Utc::now();
self.version += 1;
}
/// Compute a content hash for fingerprinting
pub fn content_hash(&self) -> u64 {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
self.id.hash(&mut hasher);
hasher.write_u64(self.state.content_hash());
hasher.write_u64(self.version);
hasher.finish()
}
/// Check if node is stale (state hasn't been updated since cutoff)
pub fn is_stale(&self, cutoff: DateTime<Utc>) -> bool {
self.updated_at < cutoff
}
}
/// Builder for constructing SheafNode instances
#[derive(Debug, Default)]
pub struct SheafNodeBuilder {
id: Option<NodeId>,
state: Option<StateVector>,
metadata: NodeMetadata,
}
impl SheafNodeBuilder {
/// Create a new builder
pub fn new() -> Self {
Self::default()
}
/// Set the node ID
pub fn id(mut self, id: NodeId) -> Self {
self.id = Some(id);
self
}
/// Set the state vector
pub fn state(mut self, state: impl Into<StateVector>) -> Self {
self.state = Some(state.into());
self
}
/// Set the state from a slice
pub fn state_from_slice(mut self, data: &[f32]) -> Self {
self.state = Some(StateVector::new(data.to_vec()));
self
}
/// Set a zero state of given dimension
pub fn zero_state(mut self, dim: usize) -> Self {
self.state = Some(StateVector::zeros(dim));
self
}
/// Set a random unit state of given dimension
pub fn random_state(mut self, dim: usize) -> Self {
self.state = Some(StateVector::random_unit(dim));
self
}
/// Set the label
pub fn label(mut self, label: impl Into<String>) -> Self {
self.metadata.label = Some(label.into());
self
}
/// Set the node type
pub fn node_type(mut self, node_type: impl Into<String>) -> Self {
self.metadata.node_type = Some(node_type.into());
self
}
/// Set the namespace
pub fn namespace(mut self, namespace: impl Into<String>) -> Self {
self.metadata.namespace = Some(namespace.into());
self
}
/// Add a tag
pub fn tag(mut self, tag: impl Into<String>) -> Self {
self.metadata.tags.push(tag.into());
self
}
/// Add multiple tags
pub fn tags(mut self, tags: impl IntoIterator<Item = impl Into<String>>) -> Self {
for tag in tags {
self.metadata.tags.push(tag.into());
}
self
}
/// Set a property
pub fn property(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
self.metadata.properties.insert(key.into(), value.into());
self
}
/// Set the source
pub fn source(mut self, source: impl Into<String>) -> Self {
self.metadata.source = Some(source.into());
self
}
/// Set the confidence
pub fn confidence(mut self, confidence: f32) -> Self {
self.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
self
}
/// Build the node
///
/// # Panics
///
/// Panics if no state vector was provided.
pub fn build(self) -> SheafNode {
let state = self.state.expect("State vector is required");
let now = Utc::now();
SheafNode {
id: self.id.unwrap_or_else(Uuid::new_v4),
state,
metadata: self.metadata,
created_at: now,
updated_at: now,
version: 1,
}
}
/// Try to build the node, returning an error if state is missing
pub fn try_build(self) -> Result<SheafNode, &'static str> {
let state = self.state.ok_or("State vector is required")?;
let now = Utc::now();
Ok(SheafNode {
id: self.id.unwrap_or_else(Uuid::new_v4),
state,
metadata: self.metadata,
created_at: now,
updated_at: now,
version: 1,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_state_vector_creation() {
let v = StateVector::new(vec![1.0, 2.0, 3.0]);
assert_eq!(v.dim(), 3);
assert_eq!(v.as_slice(), &[1.0, 2.0, 3.0]);
}
#[test]
fn test_state_vector_zeros() {
let v = StateVector::zeros(5);
assert_eq!(v.dim(), 5);
assert!(v.as_slice().iter().all(|&x| x == 0.0));
}
#[test]
fn test_state_vector_norm() {
let v = StateVector::new(vec![3.0, 4.0]);
assert!((v.norm() - 5.0).abs() < 1e-6);
assert!((v.norm_squared() - 25.0).abs() < 1e-6);
}
#[test]
fn test_state_vector_dot() {
let a = StateVector::new(vec![1.0, 2.0, 3.0]);
let b = StateVector::new(vec![4.0, 5.0, 6.0]);
assert!((a.dot(&b) - 32.0).abs() < 1e-6);
}
#[test]
fn test_state_vector_subtract() {
let a = StateVector::new(vec![5.0, 10.0]);
let b = StateVector::new(vec![2.0, 3.0]);
let c = a.subtract(&b);
assert_eq!(c.as_slice(), &[3.0, 7.0]);
}
#[test]
fn test_state_vector_scale() {
let v = StateVector::new(vec![1.0, 2.0, 3.0]);
let scaled = v.scale(2.0);
assert_eq!(scaled.as_slice(), &[2.0, 4.0, 6.0]);
}
#[test]
fn test_node_builder() {
let node = SheafNodeBuilder::new()
.state_from_slice(&[1.0, 2.0, 3.0])
.label("test_node")
.node_type("fact")
.namespace("test")
.tag("important")
.confidence(0.95)
.build();
assert_eq!(node.dim(), 3);
assert_eq!(node.metadata.label, Some("test_node".to_string()));
assert_eq!(node.metadata.node_type, Some("fact".to_string()));
assert_eq!(node.metadata.namespace, Some("test".to_string()));
assert!(node.metadata.has_tag("important"));
assert_eq!(node.metadata.confidence, Some(0.95));
}
#[test]
fn test_node_update_state() {
let mut node = SheafNode::new(StateVector::new(vec![1.0, 2.0]));
let old_version = node.version;
let old_updated = node.updated_at;
std::thread::sleep(std::time::Duration::from_millis(1));
node.update_state(StateVector::new(vec![3.0, 4.0]));
assert_eq!(node.version, old_version + 1);
assert!(node.updated_at > old_updated);
assert_eq!(node.state.as_slice(), &[3.0, 4.0]);
}
#[test]
fn test_node_content_hash() {
let node1 = SheafNodeBuilder::new()
.id(Uuid::new_v4())
.state_from_slice(&[1.0, 2.0])
.build();
let node2 = SheafNodeBuilder::new()
.id(node1.id)
.state_from_slice(&[1.0, 2.0])
.build();
// Same content should produce same hash (version may differ slightly)
// This is a simple check - in practice we'd use a proper content hash
assert_eq!(node1.state.content_hash(), node2.state.content_hash());
}
#[test]
fn test_random_unit_vector() {
let v = StateVector::random_unit(100);
assert_eq!(v.dim(), 100);
// Should be approximately unit length
assert!((v.norm() - 1.0).abs() < 0.01);
}
}

View File

@@ -0,0 +1,59 @@
//! Repository trait for sheaf graph persistence.
use super::{SheafGraph, SheafNode};
use crate::error::StorageResult;
use crate::types::{GraphId, NamespaceId, NodeId};
/// Repository trait for sheaf graph persistence.
///
/// This trait defines the interface for storing and retrieving sheaf graphs.
/// Implementations may use various backends (in-memory, PostgreSQL, ruvector, etc.)
#[allow(async_fn_in_trait)]
pub trait SheafGraphRepository: Send + Sync {
/// Find a graph by its ID.
async fn find_by_id(&self, id: GraphId) -> StorageResult<Option<SheafGraph>>;
/// Save a graph (insert or update).
async fn save(&self, graph: &SheafGraph) -> StorageResult<()>;
/// Delete a graph.
async fn delete(&self, id: GraphId) -> StorageResult<()>;
/// Find all nodes in a namespace.
async fn find_nodes_by_namespace(&self, namespace: &NamespaceId) -> StorageResult<Vec<SheafNode>>;
/// Find nodes similar to a query state using vector search.
async fn find_similar_nodes(
&self,
state: &[f32],
k: usize,
) -> StorageResult<Vec<(NodeId, f32)>>;
}
/// In-memory repository implementation (for testing).
#[derive(Debug, Default)]
pub struct InMemoryGraphRepository {
graphs: parking_lot::RwLock<std::collections::HashMap<GraphId, SheafGraph>>,
}
impl InMemoryGraphRepository {
/// Create a new in-memory repository.
pub fn new() -> Self {
Self::default()
}
}
// Note: Actual async implementation would go here if the `tokio` feature is enabled.
// For now, we provide a synchronous implementation.
impl InMemoryGraphRepository {
/// Find a graph by ID (sync version).
pub fn find_by_id_sync(&self, id: GraphId) -> Option<SheafGraph> {
// Note: SheafGraph doesn't implement Clone due to DashMap,
// so we can't easily clone it. In practice, you'd need a different
// approach for in-memory storage.
let _graphs = self.graphs.read();
// This is a placeholder - real implementation would need redesign
None
}
}

File diff suppressed because it is too large Load Diff