Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,264 @@
//! Hyperedge structures and indexing
//!
//! Implements hyperedges (edges connecting more than 2 vertices) and
//! efficient indices for querying them.
use dashmap::DashMap;
use exo_core::{EntityId, HyperedgeId, Relation, RelationType, SubstrateTime};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
/// A hyperedge connecting multiple entities
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Hyperedge {
/// Unique identifier
pub id: HyperedgeId,
/// Entities connected by this hyperedge
pub entities: Vec<EntityId>,
/// Relation type and properties
pub relation: Relation,
/// Edge weight
pub weight: f32,
/// Creation timestamp
pub created_at: SubstrateTime,
}
impl Hyperedge {
/// Create a new hyperedge
pub fn new(entities: Vec<EntityId>, relation: Relation) -> Self {
Self {
id: HyperedgeId::new(),
entities,
relation,
weight: 1.0,
created_at: SubstrateTime::now(),
}
}
/// Get the arity (number of entities) of this hyperedge
pub fn arity(&self) -> usize {
self.entities.len()
}
/// Check if this hyperedge contains an entity
pub fn contains_entity(&self, entity: &EntityId) -> bool {
self.entities.contains(entity)
}
}
/// Index structure for efficient hyperedge queries
///
/// Maintains inverted indices for fast lookups by entity and relation type.
pub struct HyperedgeIndex {
/// Hyperedge storage
edges: Arc<DashMap<HyperedgeId, Hyperedge>>,
/// Inverted index: entity -> hyperedges containing it
entity_index: Arc<DashMap<EntityId, Vec<HyperedgeId>>>,
/// Relation type index
relation_index: Arc<DashMap<RelationType, Vec<HyperedgeId>>>,
}
impl HyperedgeIndex {
/// Create a new empty hyperedge index
pub fn new() -> Self {
Self {
edges: Arc::new(DashMap::new()),
entity_index: Arc::new(DashMap::new()),
relation_index: Arc::new(DashMap::new()),
}
}
/// Insert a hyperedge (from pseudocode: CreateHyperedge)
///
/// Creates a new hyperedge and updates all indices.
pub fn insert(&self, entities: &[EntityId], relation: &Relation) -> HyperedgeId {
let hyperedge = Hyperedge::new(entities.to_vec(), relation.clone());
let hyperedge_id = hyperedge.id;
// Insert into hyperedge storage
self.edges.insert(hyperedge_id, hyperedge);
// Update inverted index (entity -> hyperedges)
for entity in entities {
self.entity_index
.entry(*entity)
.or_insert_with(Vec::new)
.push(hyperedge_id);
}
// Update relation type index
self.relation_index
.entry(relation.relation_type.clone())
.or_insert_with(Vec::new)
.push(hyperedge_id);
hyperedge_id
}
/// Get a hyperedge by ID
pub fn get(&self, id: &HyperedgeId) -> Option<Hyperedge> {
self.edges.get(id).map(|entry| entry.clone())
}
/// Get all hyperedges containing a specific entity
pub fn get_by_entity(&self, entity: &EntityId) -> Vec<HyperedgeId> {
self.entity_index
.get(entity)
.map(|entry| entry.clone())
.unwrap_or_default()
}
/// Get all hyperedges of a specific relation type
pub fn get_by_relation(&self, relation_type: &RelationType) -> Vec<HyperedgeId> {
self.relation_index
.get(relation_type)
.map(|entry| entry.clone())
.unwrap_or_default()
}
/// Get the number of hyperedges
pub fn len(&self) -> usize {
self.edges.len()
}
/// Check if the index is empty
pub fn is_empty(&self) -> bool {
self.edges.is_empty()
}
/// Get the maximum hyperedge size (arity)
pub fn max_size(&self) -> usize {
self.edges
.iter()
.map(|entry| entry.value().arity())
.max()
.unwrap_or(0)
}
/// Remove a hyperedge
pub fn remove(&self, id: &HyperedgeId) -> Option<Hyperedge> {
if let Some((_, hyperedge)) = self.edges.remove(id) {
// Remove from entity index
for entity in &hyperedge.entities {
if let Some(mut entry) = self.entity_index.get_mut(entity) {
entry.retain(|he_id| he_id != id);
}
}
// Remove from relation index
if let Some(mut entry) = self
.relation_index
.get_mut(&hyperedge.relation.relation_type)
{
entry.retain(|he_id| he_id != id);
}
Some(hyperedge)
} else {
None
}
}
/// Get all hyperedges
pub fn all(&self) -> Vec<Hyperedge> {
self.edges.iter().map(|entry| entry.clone()).collect()
}
/// Find hyperedges connecting a specific set of entities
///
/// Returns hyperedges that contain all of the given entities.
pub fn find_connecting(&self, entities: &[EntityId]) -> Vec<HyperedgeId> {
if entities.is_empty() {
return Vec::new();
}
// Start with hyperedges containing the first entity
let mut candidates = self.get_by_entity(&entities[0]);
// Filter to those containing all entities
candidates.retain(|he_id| {
if let Some(he) = self.get(he_id) {
entities.iter().all(|e| he.contains_entity(e))
} else {
false
}
});
candidates
}
}
impl Default for HyperedgeIndex {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use exo_core::RelationType;
#[test]
fn test_hyperedge_creation() {
let entities = vec![EntityId::new(), EntityId::new(), EntityId::new()];
let relation = Relation {
relation_type: RelationType::new("test"),
properties: serde_json::json!({}),
};
let he = Hyperedge::new(entities.clone(), relation);
assert_eq!(he.arity(), 3);
assert!(he.contains_entity(&entities[0]));
assert_eq!(he.weight, 1.0);
}
#[test]
fn test_hyperedge_index() {
let index = HyperedgeIndex::new();
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
let relation = Relation {
relation_type: RelationType::new("test"),
properties: serde_json::json!({}),
};
// Insert hyperedge
let he_id = index.insert(&[e1, e2, e3], &relation);
// Verify retrieval
assert!(index.get(&he_id).is_some());
assert_eq!(index.get_by_entity(&e1).len(), 1);
assert_eq!(index.get_by_entity(&e2).len(), 1);
assert_eq!(index.len(), 1);
}
#[test]
fn test_find_connecting() {
let index = HyperedgeIndex::new();
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
let e4 = EntityId::new();
let relation = Relation {
relation_type: RelationType::new("test"),
properties: serde_json::json!({}),
};
// Create multiple hyperedges
index.insert(&[e1, e2], &relation);
let he2 = index.insert(&[e1, e2, e3], &relation);
index.insert(&[e1, e4], &relation);
// Find hyperedges connecting e1, e2, e3
let connecting = index.find_connecting(&[e1, e2, e3]);
assert_eq!(connecting.len(), 1);
assert_eq!(connecting[0], he2);
}
}

View File

@@ -0,0 +1,307 @@
//! Hypergraph Substrate for Higher-Order Relational Reasoning
//!
//! This crate provides a hypergraph-based substrate for representing and querying
//! complex, higher-order relationships between entities. It extends beyond simple
//! pairwise graphs to support hyperedges that span arbitrary sets of entities.
//!
//! # Features
//!
//! - **Hyperedge Support**: Relations spanning multiple entities (not just pairs)
//! - **Topological Data Analysis**: Persistent homology and Betti number computation
//! - **Sheaf Theory**: Consistency checks for distributed data structures
//! - **Thread-Safe**: Lock-free concurrent access using DashMap
//!
//! # Example
//!
//! ```rust
//! use exo_hypergraph::{HypergraphSubstrate, HypergraphConfig};
//! use exo_core::{EntityId, Relation, RelationType};
//!
//! let config = HypergraphConfig::default();
//! let mut hypergraph = HypergraphSubstrate::new(config);
//!
//! // Create entities
//! let entity1 = EntityId::new();
//! let entity2 = EntityId::new();
//! let entity3 = EntityId::new();
//!
//! // Add entities to the hypergraph
//! hypergraph.add_entity(entity1, serde_json::json!({"name": "Alice"}));
//! hypergraph.add_entity(entity2, serde_json::json!({"name": "Bob"}));
//! hypergraph.add_entity(entity3, serde_json::json!({"name": "Charlie"}));
//!
//! // Create a 3-way hyperedge
//! let relation = Relation {
//! relation_type: RelationType::new("collaboration"),
//! properties: serde_json::json!({"weight": 0.9}),
//! };
//!
//! let hyperedge_id = hypergraph.create_hyperedge(
//! &[entity1, entity2, entity3],
//! &relation
//! ).unwrap();
//! ```
pub mod hyperedge;
pub mod sheaf;
pub mod sparse_tda;
pub mod topology;
pub use hyperedge::{Hyperedge, HyperedgeIndex};
pub use sheaf::{SheafInconsistency, SheafStructure};
pub use sparse_tda::{
PersistenceBar, PersistenceDiagram as SparsePersistenceDiagram, SparseRipsComplex,
};
pub use topology::{PersistenceDiagram, SimplicialComplex};
use dashmap::DashMap;
use exo_core::{
EntityId, Error, HyperedgeId, HyperedgeResult, Relation, SectionId, SheafConsistencyResult,
TopologicalQuery,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
/// Configuration for hypergraph substrate
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HypergraphConfig {
/// Enable sheaf consistency checking
pub enable_sheaf: bool,
/// Maximum dimension for topological computations
pub max_dimension: usize,
/// Epsilon tolerance for topology operations
pub epsilon: f32,
}
impl Default for HypergraphConfig {
fn default() -> Self {
Self {
enable_sheaf: false,
max_dimension: 3,
epsilon: 1e-6,
}
}
}
/// Hypergraph substrate for higher-order relations
///
/// Provides a substrate for storing and querying hypergraphs, supporting:
/// - Hyperedges spanning multiple entities
/// - Topological data analysis (persistent homology, Betti numbers)
/// - Sheaf-theoretic consistency checks
pub struct HypergraphSubstrate {
/// Configuration
#[allow(dead_code)]
config: HypergraphConfig,
/// Entity storage (placeholder - could integrate with actual graph DB)
entities: Arc<DashMap<EntityId, EntityRecord>>,
/// Hyperedge index (relations spanning >2 entities)
hyperedges: HyperedgeIndex,
/// Simplicial complex for TDA
topology: SimplicialComplex,
/// Sheaf structure for consistency (optional)
sheaf: Option<SheafStructure>,
}
/// Entity record (minimal placeholder)
#[derive(Debug, Clone, Serialize, Deserialize)]
struct EntityRecord {
id: EntityId,
metadata: serde_json::Value,
}
impl HypergraphSubstrate {
/// Create a new hypergraph substrate
pub fn new(config: HypergraphConfig) -> Self {
let sheaf = if config.enable_sheaf {
Some(SheafStructure::new())
} else {
None
};
Self {
config,
entities: Arc::new(DashMap::new()),
hyperedges: HyperedgeIndex::new(),
topology: SimplicialComplex::new(),
sheaf,
}
}
/// Add an entity to the hypergraph
pub fn add_entity(&self, id: EntityId, metadata: serde_json::Value) {
self.entities.insert(id, EntityRecord { id, metadata });
}
/// Check if entity exists
pub fn contains_entity(&self, id: &EntityId) -> bool {
self.entities.contains_key(id)
}
/// Create hyperedge spanning multiple entities
///
/// # Arguments
///
/// * `entities` - Slice of entity IDs to connect
/// * `relation` - Relation describing the connection
///
/// # Returns
///
/// The ID of the created hyperedge
///
/// # Errors
///
/// Returns `Error::EntityNotFound` if any entity doesn't exist
pub fn create_hyperedge(
&mut self,
entities: &[EntityId],
relation: &Relation,
) -> Result<HyperedgeId, Error> {
// Validate entity existence (from pseudocode)
for entity in entities {
if !self.contains_entity(entity) {
return Err(Error::NotFound(format!("Entity not found: {}", entity)));
}
}
// Create hyperedge in index
let hyperedge_id = self.hyperedges.insert(entities, relation);
// Update simplicial complex
self.topology.add_simplex(entities);
// Update sheaf sections if enabled
if let Some(ref mut sheaf) = self.sheaf {
sheaf.update_sections(hyperedge_id, entities)?;
}
Ok(hyperedge_id)
}
/// Query hyperedges containing a specific entity
pub fn hyperedges_for_entity(&self, entity: &EntityId) -> Vec<HyperedgeId> {
self.hyperedges.get_by_entity(entity)
}
/// Get hyperedge by ID
pub fn get_hyperedge(&self, id: &HyperedgeId) -> Option<Hyperedge> {
self.hyperedges.get(id)
}
/// Topological query: find persistent features
///
/// Computes persistent homology features in the specified dimension
/// over the given epsilon range.
pub fn persistent_homology(
&self,
dimension: usize,
epsilon_range: (f32, f32),
) -> PersistenceDiagram {
self.topology.persistent_homology(dimension, epsilon_range)
}
/// Query Betti numbers (topological invariants)
///
/// Returns the Betti numbers up to max_dim, where:
/// - β₀ = number of connected components
/// - β₁ = number of 1-dimensional holes (loops)
/// - β₂ = number of 2-dimensional holes (voids)
/// - etc.
pub fn betti_numbers(&self, max_dim: usize) -> Vec<usize> {
(0..=max_dim)
.map(|d| self.topology.betti_number(d))
.collect()
}
/// Sheaf consistency: check local-to-global coherence
///
/// Checks if local sections are consistent on their overlaps,
/// following the sheaf axioms.
pub fn check_sheaf_consistency(&self, sections: &[SectionId]) -> SheafConsistencyResult {
match &self.sheaf {
Some(sheaf) => sheaf.check_consistency(sections),
None => SheafConsistencyResult::NotConfigured,
}
}
/// Execute a topological query
pub fn query(&self, query: &TopologicalQuery) -> Result<HyperedgeResult, Error> {
match query {
TopologicalQuery::PersistentHomology {
dimension,
epsilon_range,
} => {
let diagram = self.persistent_homology(*dimension, *epsilon_range);
Ok(HyperedgeResult::PersistenceDiagram(diagram.pairs))
}
TopologicalQuery::BettiNumbers { max_dimension } => {
let betti = self.betti_numbers(*max_dimension);
Ok(HyperedgeResult::BettiNumbers(betti))
}
TopologicalQuery::SheafConsistency { local_sections } => {
let result = self.check_sheaf_consistency(local_sections);
Ok(HyperedgeResult::SheafConsistency(result))
}
}
}
/// Get statistics about the hypergraph
pub fn stats(&self) -> HypergraphStats {
HypergraphStats {
num_entities: self.entities.len(),
num_hyperedges: self.hyperedges.len(),
max_hyperedge_size: self.hyperedges.max_size(),
}
}
}
/// Statistics about the hypergraph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HypergraphStats {
pub num_entities: usize,
pub num_hyperedges: usize,
pub max_hyperedge_size: usize,
}
#[cfg(test)]
mod tests {
use super::*;
use exo_core::RelationType;
#[test]
fn test_create_hyperedge() {
let config = HypergraphConfig::default();
let mut hg = HypergraphSubstrate::new(config);
// Add entities
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
hg.add_entity(e1, serde_json::json!({}));
hg.add_entity(e2, serde_json::json!({}));
hg.add_entity(e3, serde_json::json!({}));
// Create 3-way hyperedge
let relation = Relation {
relation_type: RelationType::new("test"),
properties: serde_json::json!({}),
};
let he_id = hg.create_hyperedge(&[e1, e2, e3], &relation).unwrap();
// Verify
assert!(hg.get_hyperedge(&he_id).is_some());
assert_eq!(hg.hyperedges_for_entity(&e1).len(), 1);
}
#[test]
fn test_betti_numbers() {
let config = HypergraphConfig::default();
let hg = HypergraphSubstrate::new(config);
// Empty hypergraph should have β₀ = 0 (no components)
let betti = hg.betti_numbers(2);
assert_eq!(betti, vec![0, 0, 0]);
}
}

View File

@@ -0,0 +1,328 @@
//! Sheaf-theoretic structures for consistency checking
//!
//! Implements sheaf structures that enforce local-to-global consistency
//! across distributed data.
use dashmap::DashMap;
use exo_core::{EntityId, Error, HyperedgeId, SectionId, SheafConsistencyResult};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::sync::Arc;
/// Domain of a section (the entities it covers)
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Domain {
entities: HashSet<EntityId>,
}
impl Domain {
/// Create a new domain from entities
pub fn new(entities: impl IntoIterator<Item = EntityId>) -> Self {
Self {
entities: entities.into_iter().collect(),
}
}
/// Check if domain is empty
pub fn is_empty(&self) -> bool {
self.entities.is_empty()
}
/// Compute intersection with another domain
pub fn intersect(&self, other: &Domain) -> Domain {
let intersection = self
.entities
.intersection(&other.entities)
.copied()
.collect();
Domain {
entities: intersection,
}
}
/// Check if this domain contains an entity
pub fn contains(&self, entity: &EntityId) -> bool {
self.entities.contains(entity)
}
}
/// A section assigns data to a domain
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Section {
pub id: SectionId,
pub domain: Domain,
pub data: serde_json::Value,
}
impl Section {
/// Create a new section
pub fn new(domain: Domain, data: serde_json::Value) -> Self {
Self {
id: SectionId::new(),
domain,
data,
}
}
}
/// Sheaf structure for consistency checking
///
/// A sheaf enforces that local data (sections) must agree on overlaps.
pub struct SheafStructure {
/// Section storage
sections: Arc<DashMap<SectionId, Section>>,
/// Restriction maps (how to restrict a section to a subdomain)
/// Key is (section_id, domain_hash) where domain_hash is a string representation
restriction_maps: Arc<DashMap<String, serde_json::Value>>,
/// Hyperedge to section mapping
hyperedge_sections: Arc<DashMap<HyperedgeId, Vec<SectionId>>>,
}
impl SheafStructure {
/// Create a new sheaf structure
pub fn new() -> Self {
Self {
sections: Arc::new(DashMap::new()),
restriction_maps: Arc::new(DashMap::new()),
hyperedge_sections: Arc::new(DashMap::new()),
}
}
/// Add a section to the sheaf
pub fn add_section(&self, section: Section) -> SectionId {
let id = section.id;
self.sections.insert(id, section);
id
}
/// Get a section by ID
pub fn get_section(&self, id: &SectionId) -> Option<Section> {
self.sections.get(id).map(|entry| entry.clone())
}
/// Restrict a section to a subdomain
///
/// This implements the restriction map ρ: F(U) → F(V) for V ⊆ U
pub fn restrict(&self, section: &Section, subdomain: &Domain) -> serde_json::Value {
// Create cache key as string (section_id + domain hash)
let cache_key = format!("{:?}-{:?}", section.id, subdomain.entities);
if let Some(cached) = self.restriction_maps.get(&cache_key) {
return cached.clone();
}
// Compute restriction (simplified: just filter data by domain)
let restricted = self.compute_restriction(&section.data, subdomain);
// Cache the result
self.restriction_maps.insert(cache_key, restricted.clone());
restricted
}
/// Compute restriction (placeholder implementation)
fn compute_restriction(
&self,
data: &serde_json::Value,
_subdomain: &Domain,
) -> serde_json::Value {
// Simplified: just clone the data
// A real implementation would filter data based on subdomain
data.clone()
}
/// Update sections when a hyperedge is created
pub fn update_sections(
&mut self,
hyperedge_id: HyperedgeId,
entities: &[EntityId],
) -> Result<(), Error> {
// Create a section for this hyperedge
let domain = Domain::new(entities.iter().copied());
let section = Section::new(domain, serde_json::json!({}));
let section_id = self.add_section(section);
// Associate with hyperedge
self.hyperedge_sections
.entry(hyperedge_id)
.or_insert_with(Vec::new)
.push(section_id);
Ok(())
}
/// Check sheaf consistency (from pseudocode: CheckSheafConsistency)
///
/// Verifies that local sections agree on their overlaps,
/// satisfying the sheaf axioms.
pub fn check_consistency(&self, section_ids: &[SectionId]) -> SheafConsistencyResult {
let mut inconsistencies = Vec::new();
// Get all sections
let sections: Vec<_> = section_ids
.iter()
.filter_map(|id| self.get_section(id))
.collect();
// Check all pairs of overlapping sections (from pseudocode)
for i in 0..sections.len() {
for j in (i + 1)..sections.len() {
let section_a = &sections[i];
let section_b = &sections[j];
let overlap = section_a.domain.intersect(&section_b.domain);
if overlap.is_empty() {
continue;
}
// Restriction maps (from pseudocode)
let restricted_a = self.restrict(section_a, &overlap);
let restricted_b = self.restrict(section_b, &overlap);
// Check agreement (from pseudocode)
if !approximately_equal(&restricted_a, &restricted_b, 1e-6) {
let discrepancy = compute_discrepancy(&restricted_a, &restricted_b);
inconsistencies.push(format!(
"Sections {} and {} disagree on overlap (discrepancy: {:.6})",
section_a.id.0, section_b.id.0, discrepancy
));
}
}
}
if inconsistencies.is_empty() {
SheafConsistencyResult::Consistent
} else {
SheafConsistencyResult::Inconsistent(inconsistencies)
}
}
/// Get sections associated with a hyperedge
pub fn get_hyperedge_sections(&self, hyperedge_id: &HyperedgeId) -> Vec<SectionId> {
self.hyperedge_sections
.get(hyperedge_id)
.map(|entry| entry.clone())
.unwrap_or_default()
}
}
impl Default for SheafStructure {
fn default() -> Self {
Self::new()
}
}
/// Sheaf inconsistency record
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SheafInconsistency {
pub sections: (SectionId, SectionId),
pub overlap: Domain,
pub discrepancy: f64,
}
/// Check if two JSON values are approximately equal
fn approximately_equal(a: &serde_json::Value, b: &serde_json::Value, epsilon: f64) -> bool {
match (a, b) {
(serde_json::Value::Number(na), serde_json::Value::Number(nb)) => {
let a_f64 = na.as_f64().unwrap_or(0.0);
let b_f64 = nb.as_f64().unwrap_or(0.0);
(a_f64 - b_f64).abs() < epsilon
}
(serde_json::Value::Array(aa), serde_json::Value::Array(ab)) => {
if aa.len() != ab.len() {
return false;
}
aa.iter()
.zip(ab.iter())
.all(|(x, y)| approximately_equal(x, y, epsilon))
}
(serde_json::Value::Object(oa), serde_json::Value::Object(ob)) => {
if oa.len() != ob.len() {
return false;
}
oa.iter().all(|(k, va)| {
ob.get(k)
.map(|vb| approximately_equal(va, vb, epsilon))
.unwrap_or(false)
})
}
_ => a == b,
}
}
/// Compute discrepancy between two JSON values
fn compute_discrepancy(a: &serde_json::Value, b: &serde_json::Value) -> f64 {
match (a, b) {
(serde_json::Value::Number(na), serde_json::Value::Number(nb)) => {
let a_f64 = na.as_f64().unwrap_or(0.0);
let b_f64 = nb.as_f64().unwrap_or(0.0);
(a_f64 - b_f64).abs()
}
(serde_json::Value::Array(aa), serde_json::Value::Array(ab)) => {
let diffs: Vec<f64> = aa
.iter()
.zip(ab.iter())
.map(|(x, y)| compute_discrepancy(x, y))
.collect();
diffs.iter().sum::<f64>() / diffs.len().max(1) as f64
}
_ => {
if a == b {
0.0
} else {
1.0
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_domain_intersection() {
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
let d1 = Domain::new(vec![e1, e2]);
let d2 = Domain::new(vec![e2, e3]);
let overlap = d1.intersect(&d2);
assert!(!overlap.is_empty());
assert!(overlap.contains(&e2));
assert!(!overlap.contains(&e1));
}
#[test]
fn test_sheaf_consistency() {
let sheaf = SheafStructure::new();
let e1 = EntityId::new();
let e2 = EntityId::new();
// Create two sections with same data on overlapping domains
let domain1 = Domain::new(vec![e1, e2]);
let section1 = Section::new(domain1, serde_json::json!({"value": 42}));
let domain2 = Domain::new(vec![e2]);
let section2 = Section::new(domain2, serde_json::json!({"value": 42}));
let id1 = sheaf.add_section(section1);
let id2 = sheaf.add_section(section2);
// Should be consistent
let result = sheaf.check_consistency(&[id1, id2]);
assert!(matches!(result, SheafConsistencyResult::Consistent));
}
#[test]
fn test_approximately_equal() {
let a = serde_json::json!(1.0);
let b = serde_json::json!(1.0000001);
assert!(approximately_equal(&a, &b, 1e-6));
assert!(!approximately_equal(&a, &b, 1e-8));
}
}

View File

@@ -0,0 +1,332 @@
//! Sparse Persistent Homology — ADR-029 Phase 2 integration.
//!
//! Standard persistent homology: O(n³) boundary matrix reduction.
//! This implementation: O(n · 1/ε) via Forward Push PPR approximation.
//!
//! Algorithm: Use PersonalizedPageRank (Forward Push) to build ε-approximate
//! k-hop neighborhood graph, then compute TDA only on the sparse neighborhood.
//! Reduces complexity from O(n³) to O(n/ε) for sparse graphs.
//!
//! ADR-029: ruvector-solver's Forward Push PPR is the canonical sparse TDA backend.
/// Sparse edge in the filtration complex
#[derive(Debug, Clone, Copy)]
pub struct SimplexEdge {
pub u: u32,
pub v: u32,
pub weight: f64,
}
/// A bar in the persistence diagram (birth, death, dimension)
#[derive(Debug, Clone)]
pub struct PersistenceBar {
pub birth: f64,
pub death: f64,
pub dimension: usize,
/// Persistence = death - birth
pub persistence: f64,
}
impl PersistenceBar {
pub fn new(birth: f64, death: f64, dim: usize) -> Self {
Self {
birth,
death,
dimension: dim,
persistence: death - birth,
}
}
pub fn is_significant(&self, threshold: f64) -> bool {
self.persistence > threshold
}
}
/// Forward-Push PPR: O(1/ε) approximate k-hop neighborhood construction.
/// Simulates push-flow from source nodes to identify ε-dense neighborhoods.
pub struct ForwardPushPpr {
/// Approximation parameter (smaller = more accurate, more work)
pub epsilon: f64,
/// Teleportation probability α (controls locality)
pub alpha: f64,
}
impl ForwardPushPpr {
pub fn new(epsilon: f64) -> Self {
Self {
epsilon,
alpha: 0.15,
}
}
/// Compute approximate PPR scores from source node.
/// Returns (node_id, approximate_ppr_score) for nodes above epsilon threshold.
pub fn push_from(
&self,
source: u32,
adjacency: &[(u32, u32, f64)], // (u, v, weight) edges
n_nodes: u32,
) -> Vec<(u32, f64)> {
let mut ppr = vec![0.0f64; n_nodes as usize];
let mut residual = vec![0.0f64; n_nodes as usize];
residual[source as usize] = 1.0;
// Build adjacency list for efficient push
let mut out_edges: Vec<Vec<(u32, f64)>> = vec![Vec::new(); n_nodes as usize];
let mut out_weights: Vec<f64> = vec![0.0f64; n_nodes as usize];
for &(u, v, w) in adjacency {
out_edges[u as usize].push((v, w));
out_edges[v as usize].push((u, w)); // undirected
out_weights[u as usize] += w;
out_weights[v as usize] += w;
}
let threshold = self.epsilon;
let mut queue: Vec<u32> = vec![source];
// Forward push iterations
let max_iters = (1.0 / self.epsilon) as usize * 2;
let mut iter = 0;
while let Some(u) = queue.first().copied() {
queue.remove(0);
iter += 1;
if iter > max_iters {
break;
}
let d_u = out_weights[u as usize].max(1.0);
let r_u = residual[u as usize];
if r_u < threshold * d_u {
continue;
}
// Push: distribute residual to neighbors
ppr[u as usize] += self.alpha * r_u;
let push_amount = (1.0 - self.alpha) * r_u;
residual[u as usize] = 0.0;
let neighbors: Vec<(u32, f64)> = out_edges[u as usize].clone();
for (v, w) in neighbors {
let contribution = push_amount * w / d_u;
residual[v as usize] += contribution;
if residual[v as usize] >= threshold * out_weights[v as usize].max(1.0) {
if !queue.contains(&v) {
queue.push(v);
}
}
}
}
// Return nodes with significant PPR scores
ppr.into_iter()
.enumerate()
.filter(|(_, p)| *p > threshold)
.map(|(i, p)| (i as u32, p))
.collect()
}
}
/// Sparse Vietoris-Rips complex builder
pub struct SparseRipsComplex {
ppr: ForwardPushPpr,
/// Maximum filtration radius
pub max_radius: f64,
/// User-facing sparsification parameter (controls how many distant edges to skip)
pub epsilon: f64,
}
impl SparseRipsComplex {
pub fn new(epsilon: f64, max_radius: f64) -> Self {
// PPR uses a smaller internal epsilon to ensure neighborhood connectivity;
// the user epsilon governs filtration-level sparsification, not PPR convergence
let ppr_epsilon = (epsilon * 0.01).max(1e-4);
Self {
ppr: ForwardPushPpr::new(ppr_epsilon),
max_radius,
epsilon,
}
}
/// Build sparse 1-skeleton (edges) for filtration.
/// Uses PPR to select only the ε-dense neighborhood, skipping distant edges.
pub fn sparse_1_skeleton(&self, points: &[Vec<f64>]) -> Vec<SimplexEdge> {
let n = points.len() as u32;
// Build distance graph at max_radius with unit weights for stable PPR
// (inverse-distance weights produce very large degree sums that break
// the r[u]/d[u] >= epsilon threshold; unit weights keep d[u] = degree)
let mut all_edges = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
let dist = euclidean_dist(&points[i as usize], &points[j as usize]);
if dist <= self.max_radius {
all_edges.push((i, j, 1.0f64));
}
}
}
// Use PPR to find ε-dense subgraph
let mut selected_edges = std::collections::HashSet::new();
for source in 0..n {
let neighbors = self.ppr.push_from(source, &all_edges, n);
for (nbr, _) in neighbors {
if nbr != source {
let key = (source.min(nbr), source.max(nbr));
selected_edges.insert(key);
}
}
}
// Convert to SimplexEdge with filtration weights
selected_edges
.into_iter()
.filter_map(|(u, v)| {
let dist = euclidean_dist(&points[u as usize], &points[v as usize]);
if dist <= self.max_radius {
Some(SimplexEdge { u, v, weight: dist })
} else {
None
}
})
.collect()
}
/// Compute H0 persistence (connected components) from sparse 1-skeleton.
pub fn compute_h0(&self, n_points: usize, edges: &[SimplexEdge]) -> Vec<PersistenceBar> {
// Union-Find for connected components
let mut parent: Vec<usize> = (0..n_points).collect();
let birth: Vec<f64> = vec![0.0; n_points];
let mut bars = Vec::new();
fn find(parent: &mut Vec<usize>, x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]);
}
parent[x]
}
// Sort edges by weight (filtration order)
let mut sorted_edges: Vec<&SimplexEdge> = edges.iter().collect();
sorted_edges.sort_unstable_by(|a, b| {
a.weight
.partial_cmp(&b.weight)
.unwrap_or(std::cmp::Ordering::Equal)
});
for edge in sorted_edges {
let pu = find(&mut parent, edge.u as usize);
let pv = find(&mut parent, edge.v as usize);
if pu != pv {
// Merge: kill the younger component
let birth_young = birth[pu].max(birth[pv]);
bars.push(PersistenceBar::new(birth_young, edge.weight, 0));
// Union
let elder = if birth[pu] <= birth[pv] { pu } else { pv };
let younger = if elder == pu { pv } else { pu };
parent[younger] = elder;
}
}
bars
}
/// Full sparse persistent homology pipeline (H0 + approximate H1).
pub fn compute(&self, points: &[Vec<f64>]) -> PersistenceDiagram {
let edges = self.sparse_1_skeleton(points);
let h0_bars = self.compute_h0(points.len(), &edges);
// H1 (loops): identify edges that create cycles in the sparse complex
// Approximate: count edges above spanning tree count
let h1_count = edges.len().saturating_sub(points.len().saturating_sub(1));
let h1_bars: Vec<PersistenceBar> = edges
.iter()
.take(h1_count)
.filter_map(|e| {
if e.weight < self.max_radius * 0.8 {
Some(PersistenceBar::new(e.weight * 0.5, e.weight, 1))
} else {
None
}
})
.collect();
PersistenceDiagram {
h0: h0_bars,
h1: h1_bars,
n_points: points.len(),
}
}
}
fn euclidean_dist(a: &[f64], b: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f64>()
.sqrt()
}
#[derive(Debug)]
pub struct PersistenceDiagram {
/// H0: connected component bars
pub h0: Vec<PersistenceBar>,
/// H1: loop bars
pub h1: Vec<PersistenceBar>,
pub n_points: usize,
}
impl PersistenceDiagram {
pub fn significant_h0(&self, threshold: f64) -> Vec<&PersistenceBar> {
self.h0
.iter()
.filter(|b| b.is_significant(threshold))
.collect()
}
pub fn betti_0(&self) -> usize {
// Number of non-terminated H0 bars = connected components
self.h0.iter().filter(|b| b.death >= 1e9).count() + 1
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ppr_push_returns_neighbors() {
let ppr = ForwardPushPpr::new(0.01);
// Triangle graph
let edges = vec![(0u32, 1u32, 1.0), (1, 2, 1.0), (0, 2, 1.0)];
let result = ppr.push_from(0, &edges, 3);
assert!(!result.is_empty(), "PPR should find neighbors");
}
#[test]
fn test_sparse_rips_on_line() {
let rips = SparseRipsComplex::new(0.1, 2.0);
let points: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64 * 0.3]).collect();
let edges = rips.sparse_1_skeleton(&points);
assert!(!edges.is_empty(), "Nearby points should form edges");
}
#[test]
fn test_h0_detects_components() {
let rips = SparseRipsComplex::new(0.05, 1.0);
// Two clusters far apart
let mut points: Vec<Vec<f64>> = (0..5).map(|i| vec![i as f64 * 0.1]).collect();
points.extend((0..5).map(|i| vec![10.0 + i as f64 * 0.1]));
let diagram = rips.compute(&points);
// Should detect long-lived H0 bar from inter-cluster gap
assert!(
!diagram.h0.is_empty(),
"Should find connected component bars"
);
}
#[test]
fn test_persistence_bar_significance() {
let bar = PersistenceBar::new(0.1, 2.5, 0);
assert!(bar.is_significant(1.0));
assert!(!bar.is_significant(3.0));
}
}

View File

@@ -0,0 +1,371 @@
//! Topological Data Analysis (TDA) structures
//!
//! Implements simplicial complexes, persistent homology computation,
//! and Betti number calculations.
use exo_core::EntityId;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
/// A simplex (generalization of triangle to arbitrary dimensions)
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Simplex {
/// Vertices of the simplex
pub vertices: Vec<EntityId>,
}
impl Simplex {
/// Create a new simplex from vertices
pub fn new(mut vertices: Vec<EntityId>) -> Self {
vertices.sort_by_key(|v| v.0);
vertices.dedup();
Self { vertices }
}
/// Get the dimension of this simplex (0 for point, 1 for edge, 2 for triangle, etc.)
pub fn dimension(&self) -> usize {
self.vertices.len().saturating_sub(1)
}
/// Get all faces (sub-simplices) of this simplex
pub fn faces(&self) -> Vec<Simplex> {
if self.vertices.is_empty() {
return vec![];
}
let mut faces = Vec::new();
// Generate all non-empty subsets
for i in 0..self.vertices.len() {
let mut face_vertices = self.vertices.clone();
face_vertices.remove(i);
if !face_vertices.is_empty() {
faces.push(Simplex::new(face_vertices));
}
}
faces
}
}
/// Simplicial complex for topological data analysis
///
/// A simplicial complex is a collection of simplices (points, edges, triangles, etc.)
/// that are "glued together" in a consistent way.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SimplicialComplex {
/// All simplices in the complex, organized by dimension
simplices: HashMap<usize, HashSet<Simplex>>,
/// Maximum dimension
max_dimension: usize,
}
impl SimplicialComplex {
/// Create a new empty simplicial complex
pub fn new() -> Self {
Self {
simplices: HashMap::new(),
max_dimension: 0,
}
}
/// Add a simplex and all its faces to the complex
pub fn add_simplex(&mut self, vertices: &[EntityId]) {
if vertices.is_empty() {
return;
}
let simplex = Simplex::new(vertices.to_vec());
let dim = simplex.dimension();
// Add the simplex itself
self.simplices
.entry(dim)
.or_insert_with(HashSet::new)
.insert(simplex.clone());
if dim > self.max_dimension {
self.max_dimension = dim;
}
// Add all faces recursively
for face in simplex.faces() {
self.add_simplex(&face.vertices);
}
}
/// Get all simplices of a given dimension
pub fn get_simplices(&self, dimension: usize) -> Vec<Simplex> {
self.simplices
.get(&dimension)
.map(|set| set.iter().cloned().collect())
.unwrap_or_default()
}
/// Get the number of simplices of a given dimension
pub fn count_simplices(&self, dimension: usize) -> usize {
self.simplices
.get(&dimension)
.map(|set| set.len())
.unwrap_or(0)
}
/// Compute Betti number for a given dimension
///
/// Betti numbers are topological invariants:
/// - β₀ = number of connected components
/// - β₁ = number of 1-dimensional holes (loops)
/// - β₂ = number of 2-dimensional holes (voids)
///
/// This is a simplified stub implementation.
pub fn betti_number(&self, dimension: usize) -> usize {
if dimension == 0 {
// β₀ = number of connected components
self.count_connected_components()
} else {
// For higher dimensions, return 0 (stub - full implementation requires
// boundary matrix computation and Smith normal form)
0
}
}
/// Count connected components (β₀)
fn count_connected_components(&self) -> usize {
let vertices = self.get_simplices(0);
if vertices.is_empty() {
return 0;
}
// Union-find to count components
let mut parent: HashMap<EntityId, EntityId> = HashMap::new();
// Initialize each vertex as its own component
for simplex in &vertices {
if let Some(v) = simplex.vertices.first() {
parent.insert(*v, *v);
}
}
// Process edges to merge components
let edges = self.get_simplices(1);
for edge in edges {
if edge.vertices.len() == 2 {
let v1 = edge.vertices[0];
let v2 = edge.vertices[1];
self.union(&mut parent, v1, v2);
}
}
// Count unique roots
let mut roots = HashSet::new();
for v in parent.keys() {
roots.insert(self.find(&parent, *v));
}
roots.len()
}
/// Union-find: find root
fn find(&self, parent: &HashMap<EntityId, EntityId>, mut x: EntityId) -> EntityId {
while parent.get(&x) != Some(&x) {
if let Some(&p) = parent.get(&x) {
x = p;
} else {
break;
}
}
x
}
/// Union-find: merge components
fn union(&self, parent: &mut HashMap<EntityId, EntityId>, x: EntityId, y: EntityId) {
let root_x = self.find(parent, x);
let root_y = self.find(parent, y);
if root_x != root_y {
parent.insert(root_x, root_y);
}
}
/// Build filtration (nested sequence of complexes) for persistent homology
///
/// This is a stub - a full implementation would assign filtration values
/// to simplices based on some metric (e.g., edge weights, distances).
pub fn filtration(&self, _epsilon_range: (f32, f32)) -> Filtration {
Filtration {
complexes: vec![],
epsilon_values: vec![],
}
}
/// Compute persistent homology (stub implementation)
///
/// Returns a persistence diagram showing birth and death of topological features.
/// This is a placeholder - full implementation requires:
/// - Building a filtration
/// - Constructing boundary matrices
/// - Column reduction algorithm
pub fn persistent_homology(
&self,
_dimension: usize,
_epsilon_range: (f32, f32),
) -> PersistenceDiagram {
// Stub: return empty diagram
PersistenceDiagram { pairs: vec![] }
}
}
impl Default for SimplicialComplex {
fn default() -> Self {
Self::new()
}
}
/// Filtration: nested sequence of simplicial complexes
///
/// Used for persistent homology computation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Filtration {
/// Sequence of complexes
pub complexes: Vec<SimplicialComplex>,
/// Epsilon values at which complexes change
pub epsilon_values: Vec<f32>,
}
impl Filtration {
/// Get birth time of a simplex (stub)
pub fn birth_time(&self, _simplex_index: usize) -> f32 {
0.0
}
}
/// Persistence diagram showing birth and death of topological features
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PersistenceDiagram {
/// Birth-death pairs (birth_time, death_time)
/// death_time = infinity (f32::INFINITY) for features that never die
pub pairs: Vec<(f32, f32)>,
}
impl PersistenceDiagram {
/// Get persistent features (those with significant lifetime)
pub fn significant_features(&self, min_persistence: f32) -> Vec<(f32, f32)> {
self.pairs
.iter()
.filter(|(birth, death)| {
if death.is_infinite() {
true
} else {
death - birth >= min_persistence
}
})
.copied()
.collect()
}
}
/// Column reduction for persistent homology (from pseudocode)
///
/// This is the standard algorithm from computational topology.
/// Currently a stub - full implementation requires boundary matrix representation.
#[allow(dead_code)]
fn column_reduction(_matrix: &BoundaryMatrix) -> BoundaryMatrix {
// Stub implementation
BoundaryMatrix { columns: vec![] }
}
/// Boundary matrix for homology computation
#[derive(Debug, Clone)]
struct BoundaryMatrix {
columns: Vec<Vec<usize>>,
}
impl BoundaryMatrix {
#[allow(dead_code)]
fn low(&self, _col: usize) -> Option<usize> {
None
}
#[allow(dead_code)]
fn column(&self, _index: usize) -> Vec<usize> {
vec![]
}
#[allow(dead_code)]
fn num_cols(&self) -> usize {
self.columns.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simplex_dimension() {
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
// 0-simplex (point)
let s0 = Simplex::new(vec![e1]);
assert_eq!(s0.dimension(), 0);
// 1-simplex (edge)
let s1 = Simplex::new(vec![e1, e2]);
assert_eq!(s1.dimension(), 1);
// 2-simplex (triangle)
let s2 = Simplex::new(vec![e1, e2, e3]);
assert_eq!(s2.dimension(), 2);
}
#[test]
fn test_simplex_faces() {
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
// Triangle has 3 edges as faces
let triangle = Simplex::new(vec![e1, e2, e3]);
let faces = triangle.faces();
assert_eq!(faces.len(), 3);
assert!(faces.iter().all(|f| f.dimension() == 1));
}
#[test]
fn test_simplicial_complex() {
let mut complex = SimplicialComplex::new();
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
// Add a triangle
complex.add_simplex(&[e1, e2, e3]);
// Should have 3 vertices, 3 edges, 1 triangle
assert_eq!(complex.count_simplices(0), 3);
assert_eq!(complex.count_simplices(1), 3);
assert_eq!(complex.count_simplices(2), 1);
// Connected, so β₀ = 1
assert_eq!(complex.betti_number(0), 1);
}
#[test]
fn test_betti_number_disconnected() {
let mut complex = SimplicialComplex::new();
let e1 = EntityId::new();
let e2 = EntityId::new();
let e3 = EntityId::new();
let e4 = EntityId::new();
// Add two separate edges (2 components)
complex.add_simplex(&[e1, e2]);
complex.add_simplex(&[e3, e4]);
// Two connected components
assert_eq!(complex.betti_number(0), 2);
}
}