Files
wifi-densepose/crates/ruvector-math/src/spectral/clustering.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

442 lines
13 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Spectral Clustering
//!
//! Graph partitioning using spectral methods.
//! Efficient approximation via Chebyshev polynomials.
use super::ScaledLaplacian;
/// Spectral clustering configuration
#[derive(Debug, Clone)]
pub struct ClusteringConfig {
/// Number of clusters
pub k: usize,
/// Number of eigenvectors to use
pub num_eigenvectors: usize,
/// Power iteration steps for eigenvector approximation
pub power_iters: usize,
/// K-means iterations
pub kmeans_iters: usize,
/// Random seed
pub seed: u64,
}
impl Default for ClusteringConfig {
fn default() -> Self {
Self {
k: 2,
num_eigenvectors: 10,
power_iters: 50,
kmeans_iters: 20,
seed: 42,
}
}
}
/// Spectral clustering result
#[derive(Debug, Clone)]
pub struct ClusteringResult {
/// Cluster assignment for each vertex
pub assignments: Vec<usize>,
/// Eigenvector embedding (n × k)
pub embedding: Vec<Vec<f64>>,
/// Number of clusters
pub k: usize,
}
impl ClusteringResult {
/// Get vertices in cluster c
pub fn cluster(&self, c: usize) -> Vec<usize> {
self.assignments
.iter()
.enumerate()
.filter(|(_, &a)| a == c)
.map(|(i, _)| i)
.collect()
}
/// Cluster sizes
pub fn cluster_sizes(&self) -> Vec<usize> {
let mut sizes = vec![0; self.k];
for &a in &self.assignments {
if a < self.k {
sizes[a] += 1;
}
}
sizes
}
}
/// Spectral clustering
#[derive(Debug, Clone)]
pub struct SpectralClustering {
/// Configuration
config: ClusteringConfig,
}
impl SpectralClustering {
/// Create with configuration
pub fn new(config: ClusteringConfig) -> Self {
Self { config }
}
/// Create with just number of clusters
pub fn with_k(k: usize) -> Self {
Self::new(ClusteringConfig {
k,
num_eigenvectors: k,
..Default::default()
})
}
/// Cluster graph using normalized Laplacian eigenvectors
pub fn cluster(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
let n = laplacian.n;
let k = self.config.k.min(n);
let num_eig = self.config.num_eigenvectors.min(n);
// Compute approximate eigenvectors of Laplacian
// We want the k smallest eigenvalues (smoothest eigenvectors)
// Use inverse power method on shifted Laplacian
let embedding = self.compute_embedding(laplacian, num_eig);
// Run k-means on embedding
let assignments = self.kmeans(&embedding, k);
ClusteringResult {
assignments,
embedding,
k,
}
}
/// Cluster using Fiedler vector (k=2)
pub fn bipartition(&self, laplacian: &ScaledLaplacian) -> ClusteringResult {
let n = laplacian.n;
// Compute Fiedler vector (second smallest eigenvector)
let fiedler = self.compute_fiedler(laplacian);
// Partition by sign
let assignments: Vec<usize> = fiedler
.iter()
.map(|&v| if v >= 0.0 { 0 } else { 1 })
.collect();
ClusteringResult {
assignments,
embedding: vec![fiedler],
k: 2,
}
}
/// Compute spectral embedding (k smallest non-trivial eigenvectors)
fn compute_embedding(&self, laplacian: &ScaledLaplacian, k: usize) -> Vec<Vec<f64>> {
let n = laplacian.n;
if k == 0 || n == 0 {
return vec![];
}
// Initialize random vectors
let mut vectors: Vec<Vec<f64>> = (0..k)
.map(|i| {
(0..n)
.map(|j| {
let x = ((j * 2654435769 + i * 1103515245 + self.config.seed as usize)
as f64
/ 4294967296.0)
* 2.0
- 1.0;
x
})
.collect()
})
.collect();
// Power iteration to find smallest eigenvectors
// We use (I - L_scaled) which has largest eigenvalue where L_scaled has smallest
for _ in 0..self.config.power_iters {
for i in 0..k {
// Apply (I - L_scaled) = (2I - L)/λ_max approximately
// Simpler: just use deflated power iteration on L for smallest
let mut y = vec![0.0; n];
let lx = laplacian.apply(&vectors[i]);
// We want small eigenvalues, so use (λ_max*I - L)
let shift = 2.0; // Approximate max eigenvalue of scaled Laplacian
for j in 0..n {
y[j] = shift * vectors[i][j] - lx[j];
}
// Orthogonalize against previous vectors and constant vector
// First, remove constant component (eigenvalue 0)
let mean: f64 = y.iter().sum::<f64>() / n as f64;
for j in 0..n {
y[j] -= mean;
}
// Then orthogonalize against previous eigenvectors
for prev in 0..i {
let dot: f64 = y.iter().zip(vectors[prev].iter()).map(|(a, b)| a * b).sum();
for j in 0..n {
y[j] -= dot * vectors[prev][j];
}
}
// Normalize
let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-15 {
for j in 0..n {
y[j] /= norm;
}
}
vectors[i] = y;
}
}
vectors
}
/// Compute Fiedler vector (second smallest eigenvector)
fn compute_fiedler(&self, laplacian: &ScaledLaplacian) -> Vec<f64> {
let embedding = self.compute_embedding(laplacian, 1);
if embedding.is_empty() {
return vec![0.0; laplacian.n];
}
embedding[0].clone()
}
/// K-means clustering on embedding
fn kmeans(&self, embedding: &[Vec<f64>], k: usize) -> Vec<usize> {
if embedding.is_empty() {
return vec![];
}
let n = embedding[0].len();
let dim = embedding.len();
if n == 0 || k == 0 {
return vec![];
}
// Initialize centroids (k-means++ style)
let mut centroids: Vec<Vec<f64>> = Vec::with_capacity(k);
// First centroid: random point
let first = (self.config.seed as usize) % n;
centroids.push((0..dim).map(|d| embedding[d][first]).collect());
// Remaining centroids: proportional to squared distance
for _ in 1..k {
let mut distances: Vec<f64> = (0..n)
.map(|i| {
centroids
.iter()
.map(|c| {
(0..dim)
.map(|d| (embedding[d][i] - c[d]).powi(2))
.sum::<f64>()
})
.fold(f64::INFINITY, f64::min)
})
.collect();
let total: f64 = distances.iter().sum();
if total > 0.0 {
let threshold = (self.config.seed as f64 / 4294967296.0) * total;
let mut cumsum = 0.0;
let mut chosen = 0;
for (i, &d) in distances.iter().enumerate() {
cumsum += d;
if cumsum >= threshold {
chosen = i;
break;
}
}
centroids.push((0..dim).map(|d| embedding[d][chosen]).collect());
} else {
// Degenerate case
centroids.push(vec![0.0; dim]);
}
}
// K-means iterations
let mut assignments = vec![0; n];
for _ in 0..self.config.kmeans_iters {
// Assign points to nearest centroid
for i in 0..n {
let mut best_cluster = 0;
let mut best_dist = f64::INFINITY;
for (c, centroid) in centroids.iter().enumerate() {
let dist: f64 = (0..dim)
.map(|d| (embedding[d][i] - centroid[d]).powi(2))
.sum();
if dist < best_dist {
best_dist = dist;
best_cluster = c;
}
}
assignments[i] = best_cluster;
}
// Update centroids
let mut counts = vec![0usize; k];
for centroid in centroids.iter_mut() {
for v in centroid.iter_mut() {
*v = 0.0;
}
}
for (i, &c) in assignments.iter().enumerate() {
counts[c] += 1;
for d in 0..dim {
centroids[c][d] += embedding[d][i];
}
}
for (c, centroid) in centroids.iter_mut().enumerate() {
if counts[c] > 0 {
for v in centroid.iter_mut() {
*v /= counts[c] as f64;
}
}
}
}
assignments
}
/// Compute normalized cut value for a bipartition
pub fn normalized_cut(&self, laplacian: &ScaledLaplacian, partition: &[bool]) -> f64 {
let n = laplacian.n;
if n == 0 {
return 0.0;
}
// Compute cut and volumes
let mut cut = 0.0;
let mut vol_a = 0.0;
let mut vol_b = 0.0;
// For each entry in Laplacian
for &(i, j, v) in &laplacian.entries {
if i < n && j < n && i != j {
// This is an edge (negative Laplacian entry)
let w = -v; // Edge weight
if w > 0.0 && partition[i] != partition[j] {
cut += w;
}
}
if i == j && i < n {
// Diagonal = degree
if partition[i] {
vol_a += v;
} else {
vol_b += v;
}
}
}
// NCut = cut/vol(A) + cut/vol(B)
let ncut = if vol_a > 0.0 { cut / vol_a } else { 0.0 }
+ if vol_b > 0.0 { cut / vol_b } else { 0.0 };
ncut
}
}
#[cfg(test)]
mod tests {
use super::*;
fn two_cliques_graph() -> ScaledLaplacian {
// Two cliques of size 3 connected by one edge
let edges = vec![
// Clique 1
(0, 1, 1.0),
(0, 2, 1.0),
(1, 2, 1.0),
// Clique 2
(3, 4, 1.0),
(3, 5, 1.0),
(4, 5, 1.0),
// Bridge
(2, 3, 0.1),
];
ScaledLaplacian::from_sparse_adjacency(&edges, 6)
}
#[test]
fn test_spectral_clustering() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.cluster(&laplacian);
assert_eq!(result.assignments.len(), 6);
assert_eq!(result.k, 2);
// Should roughly separate the two cliques
let sizes = result.cluster_sizes();
assert_eq!(sizes.iter().sum::<usize>(), 6);
}
#[test]
fn test_bipartition() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.bipartition(&laplacian);
assert_eq!(result.assignments.len(), 6);
assert_eq!(result.k, 2);
}
#[test]
fn test_cluster_extraction() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
let result = clustering.cluster(&laplacian);
let c0 = result.cluster(0);
let c1 = result.cluster(1);
// All vertices assigned
assert_eq!(c0.len() + c1.len(), 6);
}
#[test]
fn test_normalized_cut() {
let laplacian = two_cliques_graph();
let clustering = SpectralClustering::with_k(2);
// Good partition: separate cliques
let good_partition = vec![true, true, true, false, false, false];
let good_ncut = clustering.normalized_cut(&laplacian, &good_partition);
// Bad partition: mix cliques
let bad_partition = vec![true, false, true, false, true, false];
let bad_ncut = clustering.normalized_cut(&laplacian, &bad_partition);
// Good partition should have lower normalized cut
// (This is a heuristic test, actual values depend on graph structure)
assert!(good_ncut >= 0.0);
assert!(bad_ncut >= 0.0);
}
#[test]
fn test_single_node() {
let laplacian = ScaledLaplacian::from_sparse_adjacency(&[], 1);
let clustering = SpectralClustering::with_k(1);
let result = clustering.cluster(&laplacian);
assert_eq!(result.assignments.len(), 1);
assert_eq!(result.assignments[0], 0);
}
}