Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,441 @@
//! Spectral Clustering
//!
//! Graph partitioning using spectral methods.
//! Efficient approximation via Chebyshev polynomials.
use super::ScaledLaplacian;
/// Spectral clustering configuration
#[derive(Debug, Clone)]
pub struct ClusteringConfig {
/// Number of clusters
pub k: usize,
/// Number of eigenvectors to use
pub num_eigenvectors: usize,
/// Power iteration steps for eigenvector approximation
pub power_iters: usize,
/// K-means iterations
pub kmeans_iters: usize,
/// Random seed
pub seed: u64,
}
impl Default for ClusteringConfig {
fn default() -> Self {
Self {
k: 2,
num_eigenvectors: 10,
power_iters: 50,
kmeans_iters: 20,
seed: 42,
}
}
}
/// Spectral clustering result
#[derive(Debug, Clone)]
pub struct ClusteringResult {
/// Cluster assignment for each vertex
pub assignments: Vec<usize>,
/// Eigenvector embedding (n × k)
pub embedding: Vec<Vec<f64>>,
/// Number of clusters
pub k: usize,
}
impl ClusteringResult {
/// Get vertices in cluster c
pub fn cluster(&self, c: usize) -> Vec<usize> {
self.assignments
.iter()
.enumerate()
.filter(|(_, &a)| a == c)
.map(|(i, _)| i)
.collect()
}
/// Cluster sizes
pub fn cluster_sizes(&self) -> Vec<usize> {
let mut sizes = vec![0; self.k];
for &a in &self.assignments {
if a < self.k {
sizes[a] += 1;
}
}
sizes
}
}
/// Spectral clustering
#[derive(Debug, Clone)]
pub struct SpectralClustering {
/// Configuration
config: ClusteringConfig,
}
impl SpectralClustering {
/// Create with configuration
pub fn new(config: ClusteringConfig) -> Self {
Self { config }
}
/// Create with just number of clusters
pub fn with_k(k: usize) -> Self {
Self::new(ClusteringConfig {
k,
num_eigenvectors: k,
..Default::default()
})
}
/// Cluster graph using normalized Laplacian eigenvectors
pub fn cluster(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
let n = laplacian.n;
let k = self.config.k.min(n);
let num_eig = self.config.num_eigenvectors.min(n);
// Compute approximate eigenvectors of Laplacian
// We want the k smallest eigenvalues (smoothest eigenvectors)
// Use inverse power method on shifted Laplacian
let embedding = self.compute_embedding(laplacian, num_eig);
// Run k-means on embedding
let assignments = self.kmeans(&embedding, k);
ClusteringResult {
assignments,
embedding,
k,
}
}
/// Cluster using Fiedler vector (k=2)
pub fn bipartition(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
let n = laplacian.n;
// Compute Fiedler vector (second smallest eigenvector)
let fiedler = self.compute_fiedler(laplacian);
// Partition by sign
let assignments: Vec<usize> = fiedler
.iter()
.map(|&v| if v >= 0.0 { 0 } else { 1 })
.collect();
ClusteringResult {
assignments,
embedding: vec![fiedler],
k: 2,
}
}
/// Compute spectral embedding (k smallest non-trivial eigenvectors)
fn compute_embedding(&self, laplacian: &ScaledLaplacian, k: usize) -> Vec<Vec<f64>> {
let n = laplacian.n;
if k == 0 || n == 0 {
return vec![];
}
// Initialize random vectors
let mut vectors: Vec<Vec<f64>> = (0..k)
.map(|i| {
(0..n)
.map(|j| {
let x = ((j * 2654435769 + i * 1103515245 + self.config.seed as usize)
as f64
/ 4294967296.0)
* 2.0
- 1.0;
x
})
.collect()
})
.collect();
// Power iteration to find smallest eigenvectors
// We use (I - L_scaled) which has largest eigenvalue where L_scaled has smallest
for _ in 0..self.config.power_iters {
for i in 0..k {
// Apply (I - L_scaled) = (2I - L)/λ_max approximately
// Simpler: just use deflated power iteration on L for smallest
let mut y = vec![0.0; n];
let lx = laplacian.apply(&vectors[i]);
// We want small eigenvalues, so use (λ_max*I - L)
let shift = 2.0; // Approximate max eigenvalue of scaled Laplacian
for j in 0..n {
y[j] = shift * vectors[i][j] - lx[j];
}
// Orthogonalize against previous vectors and constant vector
// First, remove constant component (eigenvalue 0)
let mean: f64 = y.iter().sum::<f64>() / n as f64;
for j in 0..n {
y[j] -= mean;
}
// Then orthogonalize against previous eigenvectors
for prev in 0..i {
let dot: f64 = y.iter().zip(vectors[prev].iter()).map(|(a, b)| a * b).sum();
for j in 0..n {
y[j] -= dot * vectors[prev][j];
}
}
// Normalize
let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-15 {
for j in 0..n {
y[j] /= norm;
}
}
vectors[i] = y;
}
}
vectors
}
/// Compute Fiedler vector (second smallest eigenvector)
fn compute_fiedler(&self, laplacian: &ScaledLaplacian) -> Vec<f64> {
let embedding = self.compute_embedding(laplacian, 1);
if embedding.is_empty() {
return vec![0.0; laplacian.n];
}
embedding[0].clone()
}
/// K-means clustering on embedding
fn kmeans(&self, embedding: &[Vec<f64>], k: usize) -> Vec<usize> {
if embedding.is_empty() {
return vec![];
}
let n = embedding[0].len();
let dim = embedding.len();
if n == 0 || k == 0 {
return vec![];
}
// Initialize centroids (k-means++ style)
let mut centroids: Vec<Vec<f64>> = Vec::with_capacity(k);
// First centroid: random point
let first = (self.config.seed as usize) % n;
centroids.push((0..dim).map(|d| embedding[d][first]).collect());
// Remaining centroids: proportional to squared distance
for _ in 1..k {
let mut distances: Vec<f64> = (0..n)
.map(|i| {
centroids
.iter()
.map(|c| {
(0..dim)
.map(|d| (embedding[d][i] - c[d]).powi(2))
.sum::<f64>()
})
.fold(f64::INFINITY, f64::min)
})
.collect();
let total: f64 = distances.iter().sum();
if total > 0.0 {
let threshold = (self.config.seed as f64 / 4294967296.0) * total;
let mut cumsum = 0.0;
let mut chosen = 0;
for (i, &d) in distances.iter().enumerate() {
cumsum += d;
if cumsum >= threshold {
chosen = i;
break;
}
}
centroids.push((0..dim).map(|d| embedding[d][chosen]).collect());
} else {
// Degenerate case
centroids.push(vec![0.0; dim]);
}
}
// K-means iterations
let mut assignments = vec![0; n];
for _ in 0..self.config.kmeans_iters {
// Assign points to nearest centroid
for i in 0..n {
let mut best_cluster = 0;
let mut best_dist = f64::INFINITY;
for (c, centroid) in centroids.iter().enumerate() {
let dist: f64 = (0..dim)
.map(|d| (embedding[d][i] - centroid[d]).powi(2))
.sum();
if dist < best_dist {
best_dist = dist;
best_cluster = c;
}
}
assignments[i] = best_cluster;
}
// Update centroids
let mut counts = vec![0usize; k];
for centroid in centroids.iter_mut() {
for v in centroid.iter_mut() {
*v = 0.0;
}
}
for (i, &c) in assignments.iter().enumerate() {
counts[c] += 1;
for d in 0..dim {
centroids[c][d] += embedding[d][i];
}
}
for (c, centroid) in centroids.iter_mut().enumerate() {
if counts[c] > 0 {
for v in centroid.iter_mut() {
*v /= counts[c] as f64;
}
}
}
}
assignments
}
/// Compute normalized cut value for a bipartition
pub fn normalized_cut(&self, laplacian: &ScaledLaplacian, partition: &[bool]) -> f64 {
let n = laplacian.n;
if n == 0 {
return 0.0;
}
// Compute cut and volumes
let mut cut = 0.0;
let mut vol_a = 0.0;
let mut vol_b = 0.0;
// For each entry in Laplacian
for &(i, j, v) in &laplacian.entries {
if i < n && j < n && i != j {
// This is an edge (negative Laplacian entry)
let w = -v; // Edge weight
if w > 0.0 && partition[i] != partition[j] {
cut += w;
}
}
if i == j && i < n {
// Diagonal = degree
if partition[i] {
vol_a += v;
} else {
vol_b += v;
}
}
}
// NCut = cut/vol(A) + cut/vol(B)
let ncut = if vol_a > 0.0 { cut / vol_a } else { 0.0 }
+ if vol_b > 0.0 { cut / vol_b } else { 0.0 };
ncut
}
}
#[cfg(test)]
mod tests {
use super::*;
fn two_cliques_graph() -> ScaledLaplacian {
// Two cliques of size 3 connected by one edge
let edges = vec![
// Clique 1
(0, 1, 1.0),
(0, 2, 1.0),
(1, 2, 1.0),
// Clique 2
(3, 4, 1.0),
(3, 5, 1.0),
(4, 5, 1.0),
// Bridge
(2, 3, 0.1),
];
ScaledLaplacian::from_sparse_adjacency(&edges, 6)
}
#[test]
fn test_spectral_clustering() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.cluster(&laplacian);
assert_eq!(result.assignments.len(), 6);
assert_eq!(result.k, 2);
// Should roughly separate the two cliques
let sizes = result.cluster_sizes();
assert_eq!(sizes.iter().sum::<usize>(), 6);
}
#[test]
fn test_bipartition() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.bipartition(&laplacian);
assert_eq!(result.assignments.len(), 6);
assert_eq!(result.k, 2);
}
#[test]
fn test_cluster_extraction() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.cluster(&laplacian);
let c0 = result.cluster(0);
let c1 = result.cluster(1);
// All vertices assigned
assert_eq!(c0.len() + c1.len(), 6);
}
#[test]
fn test_normalized_cut() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
// Good partition: separate cliques
let good_partition = vec![true, true, true, false, false, false];
let good_ncut = clustering.normalized_cut(&laplacian, &good_partition);
// Bad partition: mix cliques
let bad_partition = vec![true, false, true, false, true, false];
let bad_ncut = clustering.normalized_cut(&laplacian, &bad_partition);
// Good partition should have lower normalized cut
// (This is a heuristic test, actual values depend on graph structure)
assert!(good_ncut >= 0.0);
assert!(bad_ncut >= 0.0);
}
#[test]
fn test_single_node() {
let laplacian = ScaledLaplacian::from_sparse_adjacency(&[], 1);
let clustering = SpectralClustering::with_k(1);
let result = clustering.cluster(&laplacian);
assert_eq!(result.assignments.len(), 1);
assert_eq!(result.assignments[0], 0);
}
}