Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,389 @@
//! Distances between Persistence Diagrams
//!
//! Bottleneck and Wasserstein distances for comparing topological signatures.
use super::{BirthDeathPair, PersistenceDiagram};
/// Bottleneck distance between persistence diagrams
///
/// d_∞(D1, D2) = inf_γ sup_p ||p - γ(p)||_∞
///
/// where γ ranges over bijections between diagrams (with diagonal).
#[derive(Debug, Clone)]
pub struct BottleneckDistance;
impl BottleneckDistance {
/// Compute bottleneck distance for dimension d
pub fn compute(d1: &PersistenceDiagram, d2: &PersistenceDiagram, dim: usize) -> f64 {
let pts1: Vec<(f64, f64)> = d1
.pairs_of_dim(dim)
.filter(|p| !p.is_essential())
.map(|p| (p.birth, p.death.unwrap_or(f64::INFINITY)))
.collect();
let pts2: Vec<(f64, f64)> = d2
.pairs_of_dim(dim)
.filter(|p| !p.is_essential())
.map(|p| (p.birth, p.death.unwrap_or(f64::INFINITY)))
.collect();
Self::bottleneck_finite(&pts1, &pts2)
}
/// Bottleneck distance for finite points
fn bottleneck_finite(pts1: &[(f64, f64)], pts2: &[(f64, f64)]) -> f64 {
if pts1.is_empty() && pts2.is_empty() {
return 0.0;
}
// Include diagonal projections
let mut all_distances = Vec::new();
// Distances between points
for &(b1, d1) in pts1 {
for &(b2, d2) in pts2 {
let dist = Self::l_inf((b1, d1), (b2, d2));
all_distances.push(dist);
}
}
// Distances to diagonal
for &(b, d) in pts1 {
let diag_dist = (d - b) / 2.0;
all_distances.push(diag_dist);
}
for &(b, d) in pts2 {
let diag_dist = (d - b) / 2.0;
all_distances.push(diag_dist);
}
if all_distances.is_empty() {
return 0.0;
}
// Sort and binary search for bottleneck
all_distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
// For small instances, use greedy matching at each threshold
for &threshold in &all_distances {
if Self::can_match(pts1, pts2, threshold) {
return threshold;
}
}
// Fallback
*all_distances.last().unwrap_or(&0.0)
}
/// Check if perfect matching exists at threshold
fn can_match(pts1: &[(f64, f64)], pts2: &[(f64, f64)], threshold: f64) -> bool {
// Simple greedy matching (not optimal but fast)
let mut used2 = vec![false; pts2.len()];
let mut matched1 = 0;
for &p1 in pts1 {
// Try to match to a point in pts2
let mut found = false;
for (j, &p2) in pts2.iter().enumerate() {
if !used2[j] && Self::l_inf(p1, p2) <= threshold {
used2[j] = true;
found = true;
break;
}
}
if !found {
// Try to match to diagonal
if Self::diag_dist(p1) <= threshold {
matched1 += 1;
continue;
}
return false;
}
matched1 += 1;
}
// Check unmatched pts2 can go to diagonal
for (j, &p2) in pts2.iter().enumerate() {
if !used2[j] && Self::diag_dist(p2) > threshold {
return false;
}
}
true
}
/// L-infinity distance between points
fn l_inf(p1: (f64, f64), p2: (f64, f64)) -> f64 {
(p1.0 - p2.0).abs().max((p1.1 - p2.1).abs())
}
/// Distance to diagonal
fn diag_dist(p: (f64, f64)) -> f64 {
(p.1 - p.0) / 2.0
}
}
/// Wasserstein distance between persistence diagrams
///
/// W_p(D1, D2) = (inf_γ Σ ||p - γ(p)||_∞^p)^{1/p}
#[derive(Debug, Clone)]
pub struct WassersteinDistance {
/// Power p (usually 1 or 2)
pub p: f64,
}
impl WassersteinDistance {
/// Create with power p
pub fn new(p: f64) -> Self {
Self { p: p.max(1.0) }
}
/// Compute W_p distance for dimension d
pub fn compute(&self, d1: &PersistenceDiagram, d2: &PersistenceDiagram, dim: usize) -> f64 {
let pts1: Vec<(f64, f64)> = d1
.pairs_of_dim(dim)
.filter(|p| !p.is_essential())
.map(|p| (p.birth, p.death.unwrap_or(f64::INFINITY)))
.collect();
let pts2: Vec<(f64, f64)> = d2
.pairs_of_dim(dim)
.filter(|p| !p.is_essential())
.map(|p| (p.birth, p.death.unwrap_or(f64::INFINITY)))
.collect();
self.wasserstein_finite(&pts1, &pts2)
}
/// Wasserstein distance for finite points (greedy approximation)
fn wasserstein_finite(&self, pts1: &[(f64, f64)], pts2: &[(f64, f64)]) -> f64 {
if pts1.is_empty() && pts2.is_empty() {
return 0.0;
}
// Greedy matching (approximation)
let mut used2 = vec![false; pts2.len()];
let mut total_cost = 0.0;
for &p1 in pts1 {
let diag_cost = Self::diag_dist(p1).powf(self.p);
// Find best match
let mut best_cost = diag_cost;
let mut best_j = None;
for (j, &p2) in pts2.iter().enumerate() {
if !used2[j] {
let cost = Self::l_inf(p1, p2).powf(self.p);
if cost < best_cost {
best_cost = cost;
best_j = Some(j);
}
}
}
total_cost += best_cost;
if let Some(j) = best_j {
used2[j] = true;
}
}
// Unmatched pts2 go to diagonal
for (j, &p2) in pts2.iter().enumerate() {
if !used2[j] {
total_cost += Self::diag_dist(p2).powf(self.p);
}
}
total_cost.powf(1.0 / self.p)
}
fn l_inf(p1: (f64, f64), p2: (f64, f64)) -> f64 {
(p1.0 - p2.0).abs().max((p1.1 - p2.1).abs())
}
fn diag_dist(p: (f64, f64)) -> f64 {
(p.1 - p.0) / 2.0
}
}
/// Persistence landscape for machine learning
#[derive(Debug, Clone)]
pub struct PersistenceLandscape {
/// Landscape functions λ_k(t)
pub landscapes: Vec<Vec<f64>>,
/// Grid points
pub grid: Vec<f64>,
/// Number of landscape functions
pub num_landscapes: usize,
}
impl PersistenceLandscape {
/// Compute landscape from persistence diagram
pub fn from_diagram(
diagram: &PersistenceDiagram,
dim: usize,
num_landscapes: usize,
resolution: usize,
) -> Self {
let pairs: Vec<(f64, f64)> = diagram
.pairs_of_dim(dim)
.filter(|p| !p.is_essential())
.map(|p| (p.birth, p.death.unwrap_or(f64::INFINITY)))
.filter(|p| p.1.is_finite())
.collect();
if pairs.is_empty() {
return Self {
landscapes: vec![vec![0.0; resolution]; num_landscapes],
grid: (0..resolution)
.map(|i| i as f64 / resolution as f64)
.collect(),
num_landscapes,
};
}
// Determine grid
let min_t = pairs.iter().map(|p| p.0).fold(f64::INFINITY, f64::min);
let max_t = pairs.iter().map(|p| p.1).fold(f64::NEG_INFINITY, f64::max);
let range = (max_t - min_t).max(1e-10);
let grid: Vec<f64> = (0..resolution)
.map(|i| min_t + (i as f64 / (resolution - 1).max(1) as f64) * range)
.collect();
// Compute tent functions at each grid point
let mut landscapes = vec![vec![0.0; resolution]; num_landscapes];
for (gi, &t) in grid.iter().enumerate() {
// Evaluate all tent functions at t
let mut values: Vec<f64> = pairs
.iter()
.map(|&(b, d)| {
if t < b || t > d {
0.0
} else if t <= (b + d) / 2.0 {
t - b
} else {
d - t
}
})
.collect();
// Sort descending
values.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
// Take top k
for (k, &v) in values.iter().take(num_landscapes).enumerate() {
landscapes[k][gi] = v;
}
}
Self {
landscapes,
grid,
num_landscapes,
}
}
/// L2 distance between landscapes
pub fn l2_distance(&self, other: &Self) -> f64 {
if self.grid.len() != other.grid.len() || self.num_landscapes != other.num_landscapes {
return f64::INFINITY;
}
let n = self.grid.len();
let dt = if n > 1 {
(self.grid[n - 1] - self.grid[0]) / (n - 1) as f64
} else {
1.0
};
let mut total = 0.0;
for k in 0..self.num_landscapes {
for i in 0..n {
let diff = self.landscapes[k][i] - other.landscapes[k][i];
total += diff * diff * dt;
}
}
total.sqrt()
}
/// Get feature vector (flattened landscape)
pub fn to_vector(&self) -> Vec<f64> {
self.landscapes
.iter()
.flat_map(|l| l.iter().copied())
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_diagram() -> PersistenceDiagram {
let mut d = PersistenceDiagram::new();
d.add(BirthDeathPair::finite(0, 0.0, 1.0));
d.add(BirthDeathPair::finite(0, 0.5, 1.5));
d.add(BirthDeathPair::finite(1, 0.2, 0.8));
d
}
#[test]
fn test_bottleneck_same() {
let d = sample_diagram();
let dist = BottleneckDistance::compute(&d, &d, 0);
assert!(dist < 1e-10);
}
#[test]
fn test_bottleneck_different() {
let d1 = sample_diagram();
let mut d2 = PersistenceDiagram::new();
d2.add(BirthDeathPair::finite(0, 0.0, 2.0));
let dist = BottleneckDistance::compute(&d1, &d2, 0);
assert!(dist > 0.0);
}
#[test]
fn test_wasserstein() {
let d1 = sample_diagram();
let d2 = sample_diagram();
let w1 = WassersteinDistance::new(1.0);
let dist = w1.compute(&d1, &d2, 0);
assert!(dist < 1e-10);
}
#[test]
fn test_persistence_landscape() {
let d = sample_diagram();
let landscape = PersistenceLandscape::from_diagram(&d, 0, 3, 20);
assert_eq!(landscape.landscapes.len(), 3);
assert_eq!(landscape.grid.len(), 20);
}
#[test]
fn test_landscape_distance() {
let d1 = sample_diagram();
let l1 = PersistenceLandscape::from_diagram(&d1, 0, 3, 20);
let l2 = PersistenceLandscape::from_diagram(&d1, 0, 3, 20);
let dist = l1.l2_distance(&l2);
assert!(dist < 1e-10);
}
#[test]
fn test_landscape_vector() {
let d = sample_diagram();
let landscape = PersistenceLandscape::from_diagram(&d, 0, 2, 10);
let vec = landscape.to_vector();
assert_eq!(vec.len(), 20); // 2 landscapes × 10 points
}
}

View File

@@ -0,0 +1,316 @@
//! Filtrations for Persistent Homology
//!
//! A filtration is a sequence of nested simplicial complexes.
use super::{PointCloud, Simplex, SimplicialComplex};
/// A filtered simplex (simplex with birth time)
#[derive(Debug, Clone)]
pub struct FilteredSimplex {
/// The simplex
pub simplex: Simplex,
/// Birth time (filtration value)
pub birth: f64,
}
impl FilteredSimplex {
pub fn new(simplex: Simplex, birth: f64) -> Self {
Self { simplex, birth }
}
}
/// Filtration: sequence of simplicial complexes
#[derive(Debug, Clone)]
pub struct Filtration {
/// Filtered simplices sorted by birth time
pub simplices: Vec<FilteredSimplex>,
/// Maximum dimension
pub max_dim: usize,
}
impl Filtration {
/// Create empty filtration
pub fn new() -> Self {
Self {
simplices: Vec::new(),
max_dim: 0,
}
}
/// Add filtered simplex
pub fn add(&mut self, simplex: Simplex, birth: f64) {
self.max_dim = self.max_dim.max(simplex.dim());
self.simplices.push(FilteredSimplex::new(simplex, birth));
}
/// Sort by birth time (required before computing persistence)
pub fn sort(&mut self) {
// Sort by birth time, then by dimension (lower dimension first)
self.simplices.sort_by(|a, b| {
a.birth
.partial_cmp(&b.birth)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.simplex.dim().cmp(&b.simplex.dim()))
});
}
/// Get complex at filtration value t
pub fn complex_at(&self, t: f64) -> SimplicialComplex {
let simplices: Vec<Simplex> = self
.simplices
.iter()
.filter(|fs| fs.birth <= t)
.map(|fs| fs.simplex.clone())
.collect();
SimplicialComplex::from_simplices(simplices)
}
/// Number of simplices
pub fn len(&self) -> usize {
self.simplices.len()
}
/// Is empty?
pub fn is_empty(&self) -> bool {
self.simplices.is_empty()
}
/// Get filtration values
pub fn filtration_values(&self) -> Vec<f64> {
let mut values: Vec<f64> = self.simplices.iter().map(|fs| fs.birth).collect();
values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
values.dedup();
values
}
}
impl Default for Filtration {
fn default() -> Self {
Self::new()
}
}
/// Vietoris-Rips filtration
///
/// At scale ε, includes all simplices whose vertices are pairwise within distance ε.
#[derive(Debug, Clone)]
pub struct VietorisRips {
/// Maximum dimension to compute
pub max_dim: usize,
/// Maximum filtration value
pub max_scale: f64,
}
impl VietorisRips {
/// Create with parameters
pub fn new(max_dim: usize, max_scale: f64) -> Self {
Self { max_dim, max_scale }
}
/// Build filtration from point cloud
pub fn build(&self, cloud: &PointCloud) -> Filtration {
let n = cloud.len();
let dist = cloud.distance_matrix();
let mut filtration = Filtration::new();
// Add vertices at time 0
for i in 0..n {
filtration.add(Simplex::vertex(i), 0.0);
}
// Add edges at their diameter
for i in 0..n {
for j in i + 1..n {
let d = dist[i * n + j];
if d <= self.max_scale {
filtration.add(Simplex::edge(i, j), d);
}
}
}
// Add higher simplices (up to max_dim)
if self.max_dim >= 2 {
// Triangles
for i in 0..n {
for j in i + 1..n {
for k in j + 1..n {
let d_ij = dist[i * n + j];
let d_ik = dist[i * n + k];
let d_jk = dist[j * n + k];
let diameter = d_ij.max(d_ik).max(d_jk);
if diameter <= self.max_scale {
filtration.add(Simplex::triangle(i, j, k), diameter);
}
}
}
}
}
if self.max_dim >= 3 {
// Tetrahedra
for i in 0..n {
for j in i + 1..n {
for k in j + 1..n {
for l in k + 1..n {
let d_ij = dist[i * n + j];
let d_ik = dist[i * n + k];
let d_il = dist[i * n + l];
let d_jk = dist[j * n + k];
let d_jl = dist[j * n + l];
let d_kl = dist[k * n + l];
let diameter = d_ij.max(d_ik).max(d_il).max(d_jk).max(d_jl).max(d_kl);
if diameter <= self.max_scale {
filtration.add(Simplex::new(vec![i, j, k, l]), diameter);
}
}
}
}
}
}
filtration.sort();
filtration
}
}
/// Alpha complex filtration (more efficient than Rips for low dimensions)
///
/// Based on Delaunay triangulation with radius filtering.
#[derive(Debug, Clone)]
pub struct AlphaComplex {
/// Maximum alpha value
pub max_alpha: f64,
}
impl AlphaComplex {
/// Create with maximum alpha
pub fn new(max_alpha: f64) -> Self {
Self { max_alpha }
}
/// Build filtration from point cloud (simplified version)
///
/// Note: Full alpha complex requires Delaunay triangulation.
/// This is a simplified version that approximates using distance thresholds.
pub fn build(&self, cloud: &PointCloud) -> Filtration {
let n = cloud.len();
let dist = cloud.distance_matrix();
let mut filtration = Filtration::new();
// Vertices at time 0
for i in 0..n {
filtration.add(Simplex::vertex(i), 0.0);
}
// Edges: birth time is half the distance (radius, not diameter)
for i in 0..n {
for j in i + 1..n {
let alpha = dist[i * n + j] / 2.0;
if alpha <= self.max_alpha {
filtration.add(Simplex::edge(i, j), alpha);
}
}
}
// Triangles: birth time based on circumradius approximation
for i in 0..n {
for j in i + 1..n {
for k in j + 1..n {
let d_ij = dist[i * n + j];
let d_ik = dist[i * n + k];
let d_jk = dist[j * n + k];
// Approximate circumradius
let s = (d_ij + d_ik + d_jk) / 2.0;
let area_sq = s * (s - d_ij) * (s - d_ik) * (s - d_jk);
let alpha = if area_sq > 0.0 {
(d_ij * d_ik * d_jk) / (4.0 * area_sq.sqrt())
} else {
d_ij.max(d_ik).max(d_jk) / 2.0
};
if alpha <= self.max_alpha {
filtration.add(Simplex::triangle(i, j, k), alpha);
}
}
}
}
filtration.sort();
filtration
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_filtration_creation() {
let mut filtration = Filtration::new();
filtration.add(Simplex::vertex(0), 0.0);
filtration.add(Simplex::vertex(1), 0.0);
filtration.add(Simplex::edge(0, 1), 1.0);
assert_eq!(filtration.len(), 3);
}
#[test]
fn test_filtration_sort() {
let mut filtration = Filtration::new();
filtration.add(Simplex::edge(0, 1), 1.0);
filtration.add(Simplex::vertex(0), 0.0);
filtration.add(Simplex::vertex(1), 0.0);
filtration.sort();
// Vertices should come before edge
assert!(filtration.simplices[0].simplex.is_vertex());
assert!(filtration.simplices[1].simplex.is_vertex());
assert!(filtration.simplices[2].simplex.is_edge());
}
#[test]
fn test_vietoris_rips() {
// Triangle of points
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0, 0.5, 0.866], 2);
let rips = VietorisRips::new(2, 2.0);
let filtration = rips.build(&cloud);
// Should have 3 vertices, 3 edges, 1 triangle
let values = filtration.filtration_values();
assert!(!values.is_empty());
}
#[test]
fn test_complex_at() {
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0, 2.0, 0.0], 2);
let rips = VietorisRips::new(1, 2.0);
let filtration = rips.build(&cloud);
// At scale 0.5, only vertices
let complex_0 = filtration.complex_at(0.5);
assert_eq!(complex_0.count_dim(0), 3);
assert_eq!(complex_0.count_dim(1), 0);
// At scale 1.5, vertices and adjacent edges
let complex_1 = filtration.complex_at(1.5);
assert_eq!(complex_1.count_dim(0), 3);
assert!(complex_1.count_dim(1) >= 2); // At least edges 0-1 and 1-2
}
#[test]
fn test_alpha_complex() {
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0, 0.0, 1.0], 2);
let alpha = AlphaComplex::new(2.0);
let filtration = alpha.build(&cloud);
assert!(filtration.len() >= 3); // At least vertices
}
}

View File

@@ -0,0 +1,216 @@
//! Persistent Homology and Topological Data Analysis
//!
//! Topological methods for analyzing shape and structure in data.
//!
//! ## Key Capabilities
//!
//! - **Persistent Homology**: Track topological features (components, loops, voids)
//! - **Betti Numbers**: Count topological features at each scale
//! - **Persistence Diagrams**: Visualize feature lifetimes
//! - **Bottleneck/Wasserstein Distance**: Compare topological signatures
//!
//! ## Integration with Mincut
//!
//! TDA complements mincut by providing:
//! - Long-term drift detection (shape changes over time)
//! - Coherence monitoring (are attention patterns stable?)
//! - Anomaly detection (topological outliers)
//!
//! ## Mathematical Background
//!
//! Given a filtration of simplicial complexes K_0 ⊆ K_1 ⊆ ... ⊆ K_n,
//! persistent homology tracks when features are born and die.
//!
//! Birth-death pairs form the persistence diagram.
mod distance;
mod filtration;
mod persistence;
mod simplex;
pub use distance::{BottleneckDistance, WassersteinDistance};
pub use filtration::{AlphaComplex, Filtration, VietorisRips};
pub use persistence::{BirthDeathPair, PersistenceDiagram, PersistentHomology};
pub use simplex::{Simplex, SimplicialComplex};
/// Betti numbers at a given scale
#[derive(Debug, Clone, PartialEq)]
pub struct BettiNumbers {
/// β_0: number of connected components
pub b0: usize,
/// β_1: number of 1-cycles (loops)
pub b1: usize,
/// β_2: number of 2-cycles (voids)
pub b2: usize,
}
impl BettiNumbers {
/// Create from values
pub fn new(b0: usize, b1: usize, b2: usize) -> Self {
Self { b0, b1, b2 }
}
/// Total number of features
pub fn total(&self) -> usize {
self.b0 + self.b1 + self.b2
}
/// Euler characteristic χ = β_0 - β_1 + β_2
pub fn euler_characteristic(&self) -> i64 {
self.b0 as i64 - self.b1 as i64 + self.b2 as i64
}
}
/// Point in Euclidean space
#[derive(Debug, Clone)]
pub struct Point {
pub coords: Vec<f64>,
}
impl Point {
/// Create point from coordinates
pub fn new(coords: Vec<f64>) -> Self {
Self { coords }
}
/// Dimension
pub fn dim(&self) -> usize {
self.coords.len()
}
/// Euclidean distance to another point
pub fn distance(&self, other: &Point) -> f64 {
self.coords
.iter()
.zip(other.coords.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f64>()
.sqrt()
}
/// Squared distance (faster)
pub fn distance_sq(&self, other: &Point) -> f64 {
self.coords
.iter()
.zip(other.coords.iter())
.map(|(a, b)| (a - b).powi(2))
.sum()
}
}
/// Point cloud for TDA
#[derive(Debug, Clone)]
pub struct PointCloud {
/// Points
pub points: Vec<Point>,
/// Dimension of ambient space
pub ambient_dim: usize,
}
impl PointCloud {
/// Create from points
pub fn new(points: Vec<Point>) -> Self {
let ambient_dim = points.first().map(|p| p.dim()).unwrap_or(0);
Self {
points,
ambient_dim,
}
}
/// Create from flat array (row-major)
pub fn from_flat(data: &[f64], dim: usize) -> Self {
let points: Vec<Point> = data
.chunks(dim)
.map(|chunk| Point::new(chunk.to_vec()))
.collect();
Self {
points,
ambient_dim: dim,
}
}
/// Number of points
pub fn len(&self) -> usize {
self.points.len()
}
/// Is empty?
pub fn is_empty(&self) -> bool {
self.points.is_empty()
}
/// Compute all pairwise distances
pub fn distance_matrix(&self) -> Vec<f64> {
let n = self.points.len();
let mut dist = vec![0.0; n * n];
for i in 0..n {
for j in i + 1..n {
let d = self.points[i].distance(&self.points[j]);
dist[i * n + j] = d;
dist[j * n + i] = d;
}
}
dist
}
/// Get bounding box
pub fn bounding_box(&self) -> Option<(Point, Point)> {
if self.points.is_empty() {
return None;
}
let dim = self.ambient_dim;
let mut min_coords = vec![f64::INFINITY; dim];
let mut max_coords = vec![f64::NEG_INFINITY; dim];
for p in &self.points {
for (i, &c) in p.coords.iter().enumerate() {
min_coords[i] = min_coords[i].min(c);
max_coords[i] = max_coords[i].max(c);
}
}
Some((Point::new(min_coords), Point::new(max_coords)))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_betti_numbers() {
let betti = BettiNumbers::new(1, 2, 0);
assert_eq!(betti.total(), 3);
assert_eq!(betti.euler_characteristic(), -1);
}
#[test]
fn test_point_distance() {
let p1 = Point::new(vec![0.0, 0.0]);
let p2 = Point::new(vec![3.0, 4.0]);
assert!((p1.distance(&p2) - 5.0).abs() < 1e-10);
}
#[test]
fn test_point_cloud() {
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0, 0.0, 1.0], 2);
assert_eq!(cloud.len(), 3);
assert_eq!(cloud.ambient_dim, 2);
}
#[test]
fn test_distance_matrix() {
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0, 0.0, 1.0], 2);
let dist = cloud.distance_matrix();
assert_eq!(dist.len(), 9);
assert!((dist[0 * 3 + 1] - 1.0).abs() < 1e-10); // (0,0) to (1,0)
assert!((dist[0 * 3 + 2] - 1.0).abs() < 1e-10); // (0,0) to (0,1)
}
}

View File

@@ -0,0 +1,407 @@
//! Persistent Homology Computation
//!
//! Compute birth-death pairs from a filtration using the standard algorithm.
use super::{BettiNumbers, Filtration, Simplex};
use std::collections::{HashMap, HashSet};
/// Birth-death pair in persistence diagram
#[derive(Debug, Clone, PartialEq)]
pub struct BirthDeathPair {
/// Dimension of the feature (0 = component, 1 = loop, ...)
pub dimension: usize,
/// Birth time
pub birth: f64,
/// Death time (None = essential, lives forever)
pub death: Option<f64>,
}
impl BirthDeathPair {
/// Create finite interval
pub fn finite(dimension: usize, birth: f64, death: f64) -> Self {
Self {
dimension,
birth,
death: Some(death),
}
}
/// Create essential (infinite) interval
pub fn essential(dimension: usize, birth: f64) -> Self {
Self {
dimension,
birth,
death: None,
}
}
/// Persistence (lifetime) of feature
pub fn persistence(&self) -> f64 {
match self.death {
Some(d) => d - self.birth,
None => f64::INFINITY,
}
}
/// Is this an essential feature (never dies)?
pub fn is_essential(&self) -> bool {
self.death.is_none()
}
/// Midpoint of interval
pub fn midpoint(&self) -> f64 {
match self.death {
Some(d) => (self.birth + d) / 2.0,
None => f64::INFINITY,
}
}
}
/// Persistence diagram: collection of birth-death pairs
#[derive(Debug, Clone)]
pub struct PersistenceDiagram {
/// Birth-death pairs
pub pairs: Vec<BirthDeathPair>,
/// Maximum dimension
pub max_dim: usize,
}
impl PersistenceDiagram {
/// Create empty diagram
pub fn new() -> Self {
Self {
pairs: Vec::new(),
max_dim: 0,
}
}
/// Add a pair
pub fn add(&mut self, pair: BirthDeathPair) {
self.max_dim = self.max_dim.max(pair.dimension);
self.pairs.push(pair);
}
/// Get pairs of dimension d
pub fn pairs_of_dim(&self, d: usize) -> impl Iterator<Item = &BirthDeathPair> {
self.pairs.iter().filter(move |p| p.dimension == d)
}
/// Get Betti numbers at scale t
pub fn betti_at(&self, t: f64) -> BettiNumbers {
let mut b0 = 0;
let mut b1 = 0;
let mut b2 = 0;
for pair in &self.pairs {
let alive = pair.birth <= t && pair.death.map(|d| d > t).unwrap_or(true);
if alive {
match pair.dimension {
0 => b0 += 1,
1 => b1 += 1,
2 => b2 += 1,
_ => {}
}
}
}
BettiNumbers::new(b0, b1, b2)
}
/// Get total persistence (sum of lifetimes)
pub fn total_persistence(&self) -> f64 {
self.pairs
.iter()
.filter(|p| !p.is_essential())
.map(|p| p.persistence())
.sum()
}
/// Get average persistence
pub fn average_persistence(&self) -> f64 {
let finite: Vec<f64> = self
.pairs
.iter()
.filter(|p| !p.is_essential())
.map(|p| p.persistence())
.collect();
if finite.is_empty() {
0.0
} else {
finite.iter().sum::<f64>() / finite.len() as f64
}
}
/// Filter by minimum persistence
pub fn filter_by_persistence(&self, min_persistence: f64) -> Self {
Self {
pairs: self
.pairs
.iter()
.filter(|p| p.persistence() >= min_persistence)
.cloned()
.collect(),
max_dim: self.max_dim,
}
}
/// Number of features of each dimension
pub fn feature_counts(&self) -> Vec<usize> {
let mut counts = vec![0; self.max_dim + 1];
for pair in &self.pairs {
if pair.dimension <= self.max_dim {
counts[pair.dimension] += 1;
}
}
counts
}
}
impl Default for PersistenceDiagram {
fn default() -> Self {
Self::new()
}
}
/// Persistent homology computation
pub struct PersistentHomology {
/// Working column representation (reduced boundary matrix)
columns: Vec<Option<HashSet<usize>>>,
/// Pivot to column mapping
pivot_to_col: HashMap<usize, usize>,
/// Birth times
birth_times: Vec<f64>,
/// Simplex dimensions
dimensions: Vec<usize>,
}
impl PersistentHomology {
/// Compute persistence from filtration
pub fn compute(filtration: &Filtration) -> PersistenceDiagram {
let mut ph = Self {
columns: Vec::new(),
pivot_to_col: HashMap::new(),
birth_times: Vec::new(),
dimensions: Vec::new(),
};
ph.run(filtration)
}
fn run(&mut self, filtration: &Filtration) -> PersistenceDiagram {
let n = filtration.simplices.len();
if n == 0 {
return PersistenceDiagram::new();
}
// Build simplex index mapping
let simplex_to_idx: HashMap<&Simplex, usize> = filtration
.simplices
.iter()
.enumerate()
.map(|(i, fs)| (&fs.simplex, i))
.collect();
// Initialize
self.columns = Vec::with_capacity(n);
self.birth_times = filtration.simplices.iter().map(|fs| fs.birth).collect();
self.dimensions = filtration
.simplices
.iter()
.map(|fs| fs.simplex.dim())
.collect();
// Build boundary matrix columns
for fs in &filtration.simplices {
let boundary = self.boundary(&fs.simplex, &simplex_to_idx);
self.columns.push(if boundary.is_empty() {
None
} else {
Some(boundary)
});
}
// Reduce matrix
self.reduce();
// Extract persistence pairs
self.extract_pairs()
}
/// Compute boundary of simplex as set of face indices
fn boundary(&self, simplex: &Simplex, idx_map: &HashMap<&Simplex, usize>) -> HashSet<usize> {
let mut boundary = HashSet::new();
for face in simplex.faces() {
if let Some(&idx) = idx_map.get(&face) {
boundary.insert(idx);
}
}
boundary
}
/// Reduce using standard persistence algorithm
fn reduce(&mut self) {
let n = self.columns.len();
for j in 0..n {
// Reduce column j
while let Some(pivot) = self.get_pivot(j) {
if let Some(&other) = self.pivot_to_col.get(&pivot) {
// Add column 'other' to column j (mod 2)
self.add_columns(j, other);
} else {
// No collision, record pivot
self.pivot_to_col.insert(pivot, j);
break;
}
}
}
}
/// Get pivot (largest index) of column
fn get_pivot(&self, col: usize) -> Option<usize> {
self.columns[col]
.as_ref()
.and_then(|c| c.iter().max().copied())
}
/// Add column src to column dst (XOR / mod 2)
fn add_columns(&mut self, dst: usize, src: usize) {
let src_col = self.columns[src].clone();
if let (Some(ref mut dst_col), Some(ref src_col)) = (&mut self.columns[dst], &src_col) {
// Symmetric difference
let mut new_col = HashSet::new();
for &idx in dst_col.iter() {
if !src_col.contains(&idx) {
new_col.insert(idx);
}
}
for &idx in src_col.iter() {
if !dst_col.contains(&idx) {
new_col.insert(idx);
}
}
if new_col.is_empty() {
self.columns[dst] = None;
} else {
*dst_col = new_col;
}
}
}
/// Extract birth-death pairs from reduced matrix
fn extract_pairs(&self) -> PersistenceDiagram {
let n = self.columns.len();
let mut diagram = PersistenceDiagram::new();
let mut paired = HashSet::new();
// Process pivot pairs (death creates pair with birth)
for (&pivot, &col) in &self.pivot_to_col {
let birth = self.birth_times[pivot];
let death = self.birth_times[col];
let dim = self.dimensions[pivot];
if death > birth {
diagram.add(BirthDeathPair::finite(dim, birth, death));
}
paired.insert(pivot);
paired.insert(col);
}
// Remaining columns are essential (infinite persistence)
for j in 0..n {
if !paired.contains(&j) && self.columns[j].is_none() {
let dim = self.dimensions[j];
let birth = self.birth_times[j];
diagram.add(BirthDeathPair::essential(dim, birth));
}
}
diagram
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::homology::{PointCloud, VietorisRips};
#[test]
fn test_birth_death_pair() {
let finite = BirthDeathPair::finite(0, 0.0, 1.0);
assert_eq!(finite.persistence(), 1.0);
assert!(!finite.is_essential());
let essential = BirthDeathPair::essential(0, 0.0);
assert!(essential.is_essential());
assert_eq!(essential.persistence(), f64::INFINITY);
}
#[test]
fn test_persistence_diagram() {
let mut diagram = PersistenceDiagram::new();
diagram.add(BirthDeathPair::essential(0, 0.0));
diagram.add(BirthDeathPair::finite(0, 0.0, 1.0));
diagram.add(BirthDeathPair::finite(1, 0.5, 2.0));
assert_eq!(diagram.pairs.len(), 3);
let betti = diagram.betti_at(0.75);
assert_eq!(betti.b0, 2); // Both 0-dim features alive
assert_eq!(betti.b1, 1); // 1-dim feature alive
}
#[test]
fn test_persistent_homology_simple() {
// Two points
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0], 2);
let rips = VietorisRips::new(1, 2.0);
let filtration = rips.build(&cloud);
let diagram = PersistentHomology::compute(&filtration);
// Should have:
// - One essential H0 (final connected component)
// - One finite H0 that dies when edge connects the points
let h0_pairs: Vec<_> = diagram.pairs_of_dim(0).collect();
assert!(!h0_pairs.is_empty());
}
#[test]
fn test_persistent_homology_triangle() {
// Three points forming triangle
let cloud = PointCloud::from_flat(&[0.0, 0.0, 1.0, 0.0, 0.5, 0.866], 2);
let rips = VietorisRips::new(2, 2.0);
let filtration = rips.build(&cloud);
let diagram = PersistentHomology::compute(&filtration);
// Should have H0 features (components merging)
let h0_count = diagram.pairs_of_dim(0).count();
assert!(h0_count > 0);
}
#[test]
fn test_filter_by_persistence() {
let mut diagram = PersistenceDiagram::new();
diagram.add(BirthDeathPair::finite(0, 0.0, 0.1));
diagram.add(BirthDeathPair::finite(0, 0.0, 1.0));
diagram.add(BirthDeathPair::essential(0, 0.0));
let filtered = diagram.filter_by_persistence(0.5);
assert_eq!(filtered.pairs.len(), 2); // Only persistence >= 0.5
}
#[test]
fn test_feature_counts() {
let mut diagram = PersistenceDiagram::new();
diagram.add(BirthDeathPair::finite(0, 0.0, 1.0));
diagram.add(BirthDeathPair::finite(0, 0.0, 1.0));
diagram.add(BirthDeathPair::finite(1, 0.0, 1.0));
let counts = diagram.feature_counts();
assert_eq!(counts[0], 2);
assert_eq!(counts[1], 1);
}
}

View File

@@ -0,0 +1,292 @@
//! Simplicial Complexes
//!
//! Basic building blocks for topological data analysis.
use std::collections::{HashMap, HashSet};
/// A simplex (k-simplex has k+1 vertices)
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Simplex {
/// Sorted vertex indices
pub vertices: Vec<usize>,
}
impl Simplex {
/// Create simplex from vertices (will be sorted)
pub fn new(mut vertices: Vec<usize>) -> Self {
vertices.sort_unstable();
vertices.dedup();
Self { vertices }
}
/// Create 0-simplex (vertex)
pub fn vertex(v: usize) -> Self {
Self { vertices: vec![v] }
}
/// Create 1-simplex (edge)
pub fn edge(v0: usize, v1: usize) -> Self {
Self::new(vec![v0, v1])
}
/// Create 2-simplex (triangle)
pub fn triangle(v0: usize, v1: usize, v2: usize) -> Self {
Self::new(vec![v0, v1, v2])
}
/// Dimension of simplex (0 = vertex, 1 = edge, 2 = triangle, ...)
pub fn dim(&self) -> usize {
if self.vertices.is_empty() {
0
} else {
self.vertices.len() - 1
}
}
/// Is this a vertex (0-simplex)?
pub fn is_vertex(&self) -> bool {
self.vertices.len() == 1
}
/// Is this an edge (1-simplex)?
pub fn is_edge(&self) -> bool {
self.vertices.len() == 2
}
/// Get all faces (boundary simplices)
pub fn faces(&self) -> Vec<Simplex> {
if self.vertices.len() <= 1 {
return vec![];
}
(0..self.vertices.len())
.map(|i| {
let face_verts: Vec<usize> = self
.vertices
.iter()
.enumerate()
.filter(|&(j, _)| j != i)
.map(|(_, &v)| v)
.collect();
Simplex::new(face_verts)
})
.collect()
}
/// Check if this simplex is a face of another
pub fn is_face_of(&self, other: &Simplex) -> bool {
if self.vertices.len() >= other.vertices.len() {
return false;
}
self.vertices.iter().all(|v| other.vertices.contains(v))
}
/// Check if two simplices share a face
pub fn shares_face_with(&self, other: &Simplex) -> bool {
let intersection: Vec<usize> = self
.vertices
.iter()
.filter(|v| other.vertices.contains(v))
.copied()
.collect();
!intersection.is_empty()
}
}
/// Simplicial complex (collection of simplices)
#[derive(Debug, Clone)]
pub struct SimplicialComplex {
/// Simplices organized by dimension
simplices: Vec<HashSet<Simplex>>,
/// Maximum dimension
max_dim: usize,
}
impl SimplicialComplex {
/// Create empty complex
pub fn new() -> Self {
Self {
simplices: vec![HashSet::new()],
max_dim: 0,
}
}
/// Create from list of simplices (automatically adds faces)
pub fn from_simplices(simplices: Vec<Simplex>) -> Self {
let mut complex = Self::new();
for s in simplices {
complex.add(s);
}
complex
}
/// Add simplex and all its faces
pub fn add(&mut self, simplex: Simplex) {
let dim = simplex.dim();
// Ensure we have enough dimension levels
while self.simplices.len() <= dim {
self.simplices.push(HashSet::new());
}
self.max_dim = self.max_dim.max(dim);
// Add all faces recursively
self.add_with_faces(simplex);
}
fn add_with_faces(&mut self, simplex: Simplex) {
let dim = simplex.dim();
if self.simplices[dim].contains(&simplex) {
return; // Already present
}
// Add faces first
for face in simplex.faces() {
self.add_with_faces(face);
}
// Add this simplex
self.simplices[dim].insert(simplex);
}
/// Check if simplex is in complex
pub fn contains(&self, simplex: &Simplex) -> bool {
let dim = simplex.dim();
if dim >= self.simplices.len() {
return false;
}
self.simplices[dim].contains(simplex)
}
/// Get all simplices of dimension d
pub fn simplices_of_dim(&self, d: usize) -> impl Iterator<Item = &Simplex> {
self.simplices.get(d).into_iter().flat_map(|s| s.iter())
}
/// Get all simplices
pub fn all_simplices(&self) -> impl Iterator<Item = &Simplex> {
self.simplices.iter().flat_map(|s| s.iter())
}
/// Number of simplices of dimension d
pub fn count_dim(&self, d: usize) -> usize {
self.simplices.get(d).map(|s| s.len()).unwrap_or(0)
}
/// Total number of simplices
pub fn size(&self) -> usize {
self.simplices.iter().map(|s| s.len()).sum()
}
/// Maximum dimension
pub fn dimension(&self) -> usize {
self.max_dim
}
/// f-vector: (f_0, f_1, f_2, ...) = counts of each dimension
pub fn f_vector(&self) -> Vec<usize> {
self.simplices.iter().map(|s| s.len()).collect()
}
/// Euler characteristic via f-vector
pub fn euler_characteristic(&self) -> i64 {
self.simplices
.iter()
.enumerate()
.map(|(d, s)| {
let sign = if d % 2 == 0 { 1 } else { -1 };
sign * s.len() as i64
})
.sum()
}
/// Get vertex set
pub fn vertices(&self) -> HashSet<usize> {
self.simplices_of_dim(0)
.flat_map(|s| s.vertices.iter().copied())
.collect()
}
/// Get edges as pairs
pub fn edges(&self) -> Vec<(usize, usize)> {
self.simplices_of_dim(1)
.filter_map(|s| {
if s.vertices.len() == 2 {
Some((s.vertices[0], s.vertices[1]))
} else {
None
}
})
.collect()
}
}
impl Default for SimplicialComplex {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simplex_creation() {
let vertex = Simplex::vertex(0);
assert_eq!(vertex.dim(), 0);
let edge = Simplex::edge(0, 1);
assert_eq!(edge.dim(), 1);
let triangle = Simplex::triangle(0, 1, 2);
assert_eq!(triangle.dim(), 2);
}
#[test]
fn test_simplex_faces() {
let triangle = Simplex::triangle(0, 1, 2);
let faces = triangle.faces();
assert_eq!(faces.len(), 3);
assert!(faces.contains(&Simplex::edge(0, 1)));
assert!(faces.contains(&Simplex::edge(0, 2)));
assert!(faces.contains(&Simplex::edge(1, 2)));
}
#[test]
fn test_simplicial_complex() {
let mut complex = SimplicialComplex::new();
complex.add(Simplex::triangle(0, 1, 2));
// Should have 1 triangle, 3 edges, 3 vertices
assert_eq!(complex.count_dim(0), 3);
assert_eq!(complex.count_dim(1), 3);
assert_eq!(complex.count_dim(2), 1);
assert_eq!(complex.euler_characteristic(), 1); // 3 - 3 + 1 = 1
}
#[test]
fn test_f_vector() {
let complex = SimplicialComplex::from_simplices(vec![
Simplex::triangle(0, 1, 2),
Simplex::triangle(1, 2, 3),
]);
let f = complex.f_vector();
assert_eq!(f[0], 4); // 4 vertices
assert_eq!(f[1], 5); // 5 edges (shared edge 1-2)
assert_eq!(f[2], 2); // 2 triangles
}
#[test]
fn test_is_face_of() {
let edge = Simplex::edge(0, 1);
let triangle = Simplex::triangle(0, 1, 2);
assert!(edge.is_face_of(&triangle));
assert!(!triangle.is_face_of(&edge));
}
}