Files
wifi-densepose/examples/vibecast-7sense/tests/integration/analysis_test.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

813 lines
25 KiB
Rust

//! Integration tests for Analysis Context
//!
//! Tests for HDBSCAN clustering, cluster assignment, motif detection,
//! entropy calculation, and transition matrix operations.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
use std::collections::{HashMap, HashSet};
// ============================================================================
// HDBSCAN Clustering Tests
// ============================================================================
mod hdbscan_clustering {
use super::*;
#[test]
fn test_cluster_with_clear_groups() {
let service = MockClusteringService::with_params(5, 3);
// Create two well-separated clusters
let base1 = create_deterministic_vector(1536, 0);
let base2 = create_deterministic_vector(1536, 1000);
let mut embeddings = Vec::new();
// Cluster 1: variations around base1
for i in 0..15 {
let noisy: Vec<f32> = base1.iter().map(|v| v + (i as f32 * 0.001)).collect();
embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy)));
}
// Cluster 2: variations around base2
for i in 0..15 {
let noisy: Vec<f32> = base2.iter().map(|v| v + (i as f32 * 0.001)).collect();
embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy)));
}
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
assert!(clusters.len() >= 1, "Should find at least one cluster");
}
#[test]
fn test_cluster_with_insufficient_data() {
let service = MockClusteringService::with_params(10, 5);
// Only 3 embeddings - below min_cluster_size
let embeddings: Vec<Embedding> = (0..3).map(|_| create_test_embedding()).collect();
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
assert_eq!(clusters.len(), 0, "Should not form clusters with too few points");
}
#[test]
fn test_cluster_method_assignment() {
let cluster = create_test_cluster();
assert_eq!(cluster.method, ClusteringMethod::Hdbscan);
}
#[test]
fn test_cluster_cohesion_in_valid_range() {
let cluster = create_test_cluster();
assert!(cluster.cohesion >= 0.0 && cluster.cohesion <= 1.0);
assert!(cluster.separation >= 0.0 && cluster.separation <= 1.0);
}
#[test]
fn test_cluster_has_members() {
let cluster = create_test_cluster_with_members(20);
assert_eq!(cluster.member_ids.len(), 20);
assert!(!cluster.centroid.is_empty());
}
#[test]
fn test_cluster_centroid_is_normalized() {
let cluster = create_test_cluster_with_members(10);
let norm: f32 = cluster.centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < 0.0001,
"Centroid should be normalized"
);
}
#[test]
fn test_multiple_clusters() {
let clusters = create_test_clusters(5);
assert_eq!(clusters.len(), 5);
// Each cluster should have unique ID
let ids: HashSet<_> = clusters.iter().map(|c| c.id.0).collect();
assert_eq!(ids.len(), 5, "All cluster IDs should be unique");
}
}
// ============================================================================
// Cluster Assignment Tests
// ============================================================================
mod cluster_assignment {
use super::*;
#[test]
fn test_assign_embedding_to_nearest_cluster() {
let service = MockClusteringService::new();
// Create clusters with known centroids
let clusters = create_test_clusters(3);
// Create embedding similar to first cluster's centroid
let embedding = create_test_embedding_with_vector(clusters[0].centroid.clone());
let assignment = service.assign_to_cluster(&embedding, &clusters).unwrap();
assert!(assignment.is_some(), "Should assign to a cluster");
let assignment = assignment.unwrap();
assert_eq!(assignment.cluster_id, clusters[0].id);
assert!(assignment.confidence > 0.0);
}
#[test]
fn test_assignment_confidence_based_on_distance() {
let service = MockClusteringService::new();
let clusters = create_test_clusters(2);
// Very close to centroid
let close_embedding = create_test_embedding_with_vector(clusters[0].centroid.clone());
let close_assignment = service
.assign_to_cluster(&close_embedding, &clusters)
.unwrap()
.unwrap();
// Farther from centroid
let far_vector: Vec<f32> = clusters[0]
.centroid
.iter()
.map(|v| v + 0.5)
.collect();
let far_embedding = create_test_embedding_with_vector(l2_normalize(&far_vector));
let far_assignment = service
.assign_to_cluster(&far_embedding, &clusters)
.unwrap()
.unwrap();
assert!(
close_assignment.confidence > far_assignment.confidence,
"Closer embeddings should have higher confidence"
);
}
#[test]
fn test_no_assignment_to_empty_clusters() {
let service = MockClusteringService::new();
let embedding = create_test_embedding();
let empty_clusters: Vec<Cluster> = vec![];
let assignment = service.assign_to_cluster(&embedding, &empty_clusters).unwrap();
assert!(assignment.is_none());
}
#[test]
fn test_assignment_includes_distance_to_centroid() {
let service = MockClusteringService::new();
let clusters = create_test_clusters(1);
let embedding = create_test_embedding();
let assignment = service
.assign_to_cluster(&embedding, &clusters)
.unwrap()
.unwrap();
assert!(
assignment.distance_to_centroid >= 0.0,
"Distance should be non-negative"
);
}
#[test]
fn test_soft_assignment_concept() {
// Test that an embedding near cluster boundary has lower confidence
let service = MockClusteringService::new();
// Create two clusters
let base1 = create_deterministic_vector(1536, 0);
let base2 = create_deterministic_vector(1536, 100);
let clusters = vec![
Cluster {
id: ClusterId::new(),
method: ClusteringMethod::Hdbscan,
member_ids: vec![],
centroid: l2_normalize(&base1),
cohesion: 0.8,
separation: 0.6,
},
Cluster {
id: ClusterId::new(),
method: ClusteringMethod::Hdbscan,
member_ids: vec![],
centroid: l2_normalize(&base2),
cohesion: 0.8,
separation: 0.6,
},
];
// Point exactly between clusters
let midpoint: Vec<f32> = base1
.iter()
.zip(base2.iter())
.map(|(a, b)| (a + b) / 2.0)
.collect();
let mid_embedding = create_test_embedding_with_vector(l2_normalize(&midpoint));
let assignment = service
.assign_to_cluster(&mid_embedding, &clusters)
.unwrap()
.unwrap();
// Confidence should reflect uncertainty
assert!(assignment.confidence < 0.9, "Boundary point should have lower confidence");
}
}
// ============================================================================
// Motif Detection Tests
// ============================================================================
mod motif_detection {
use super::*;
#[test]
fn test_detect_motifs_in_sequences() {
let service = MockMotifDetectionService::new();
// Create sequences with repeating patterns
let cluster_ids: Vec<ClusterId> = (0..5).map(|_| ClusterId::new()).collect();
let sequences: Vec<Vec<ClusterId>> = vec![
vec![
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
],
vec![
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
cluster_ids[3],
],
vec![
cluster_ids[2],
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
],
];
let motifs = service.detect_motifs(&sequences).unwrap();
// Should find the [0,1,2] pattern that appears multiple times
assert!(
motifs.iter().any(|m| m.pattern.len() >= 2),
"Should find at least one motif"
);
}
#[test]
fn test_motif_occurrence_count() {
let motif = create_test_motif();
assert!(motif.occurrence_count > 0);
assert_eq!(motif.pattern.len(), 3);
}
#[test]
fn test_motif_confidence_calculation() {
let motif = create_test_motif();
assert!(
motif.confidence >= 0.0 && motif.confidence <= 1.0,
"Confidence should be in [0, 1]"
);
}
#[test]
fn test_no_motifs_in_random_sequences() {
let service = MockMotifDetectionService::new();
// Create completely random sequences with no patterns
let sequences: Vec<Vec<ClusterId>> = (0..5)
.map(|_| (0..10).map(|_| ClusterId::new()).collect())
.collect();
let motifs = service.detect_motifs(&sequences).unwrap();
// Random sequences unlikely to have recurring motifs
// (though technically possible with mock implementation)
}
#[test]
fn test_empty_sequence_handling() {
let service = MockMotifDetectionService::new();
let empty_sequences: Vec<Vec<ClusterId>> = vec![];
let motifs = service.detect_motifs(&empty_sequences).unwrap();
assert_eq!(motifs.len(), 0);
}
#[test]
fn test_motif_duration_estimation() {
let motif = create_test_motif();
// 3-element motif at 5s per segment
assert!(motif.avg_duration_ms >= 5000);
}
}
// ============================================================================
// Entropy Calculation Tests
// ============================================================================
mod entropy_calculation {
use super::*;
#[test]
fn test_entropy_rate_uniform_distribution() {
// Create transition matrix with uniform distribution
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
let uniform_prob = 1.0 / n as f32;
let matrix = TransitionMatrix {
cluster_ids: cluster_ids.clone(),
probabilities: vec![vec![uniform_prob; n]; n],
observations: vec![vec![10; n]; n],
};
let entropy = compute_entropy_rate(&matrix);
// Maximum entropy for uniform distribution = log2(n) = 2 bits for n=4
let max_entropy = (n as f32).log2();
assert!(
(entropy - max_entropy).abs() < 0.1,
"Uniform distribution should have maximum entropy: {} vs {}",
entropy,
max_entropy
);
}
#[test]
fn test_entropy_rate_deterministic() {
// Create transition matrix with deterministic transitions
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
// Each state always transitions to the next state
let mut probabilities = vec![vec![0.0; n]; n];
for i in 0..n {
probabilities[i][(i + 1) % n] = 1.0;
}
let matrix = TransitionMatrix {
cluster_ids,
probabilities,
observations: vec![vec![10; n]; n],
};
let entropy = compute_entropy_rate(&matrix);
// Deterministic transitions should have zero entropy
assert!(
entropy < 0.1,
"Deterministic transitions should have near-zero entropy: {}",
entropy
);
}
#[test]
fn test_entropy_rate_non_negative() {
for _ in 0..10 {
let matrix = create_test_transition_matrix(5);
let entropy = compute_entropy_rate(&matrix);
assert!(
entropy >= 0.0,
"Entropy should never be negative: {}",
entropy
);
}
}
#[test]
fn test_entropy_increases_with_randomness() {
// Low entropy (predictable)
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
let mut low_rand_probs = vec![vec![0.0; n]; n];
for i in 0..n {
low_rand_probs[i][i] = 0.8; // High self-loop probability
for j in 0..n {
if i != j {
low_rand_probs[i][j] = 0.2 / (n - 1) as f32;
}
}
}
let low_entropy_matrix = TransitionMatrix {
cluster_ids: cluster_ids.clone(),
probabilities: low_rand_probs,
observations: vec![vec![10; n]; n],
};
// High entropy (uniform)
let uniform_prob = 1.0 / n as f32;
let high_entropy_matrix = TransitionMatrix {
cluster_ids,
probabilities: vec![vec![uniform_prob; n]; n],
observations: vec![vec![10; n]; n],
};
let low_entropy = compute_entropy_rate(&low_entropy_matrix);
let high_entropy = compute_entropy_rate(&high_entropy_matrix);
assert!(
high_entropy > low_entropy,
"More uniform distribution should have higher entropy: {} vs {}",
high_entropy,
low_entropy
);
}
#[test]
fn test_empty_matrix_entropy() {
let matrix = TransitionMatrix {
cluster_ids: vec![],
probabilities: vec![],
observations: vec![],
};
let entropy = compute_entropy_rate(&matrix);
assert_eq!(entropy, 0.0);
}
}
// ============================================================================
// Transition Matrix Tests
// ============================================================================
mod transition_matrix {
use super::*;
#[test]
fn test_create_transition_matrix() {
let matrix = create_test_transition_matrix(5);
assert_eq!(matrix.cluster_ids.len(), 5);
assert_eq!(matrix.probabilities.len(), 5);
assert_eq!(matrix.probabilities[0].len(), 5);
}
#[test]
fn test_transition_matrix_rows_sum_to_one() {
let matrix = create_test_transition_matrix(5);
for (i, row) in matrix.probabilities.iter().enumerate() {
let row_sum: f32 = row.iter().copied().sum();
assert!(
(row_sum - 1.0).abs() < 0.0001,
"Row {} should sum to 1.0, got {}",
i,
row_sum
);
}
}
#[test]
fn test_transition_matrix_probabilities_non_negative() {
let matrix = create_test_transition_matrix(5);
for (i, row) in matrix.probabilities.iter().enumerate() {
for (j, prob) in row.iter().copied().enumerate() {
assert!(
prob >= 0.0,
"Probability at ({}, {}) should be non-negative: {}",
i,
j,
prob
);
}
}
}
#[test]
fn test_observations_matrix() {
let matrix = create_test_transition_matrix(4);
assert_eq!(matrix.observations.len(), 4);
assert_eq!(matrix.observations[0].len(), 4);
// All observations should be positive
for row in &matrix.observations {
for &count in row {
assert!(count > 0);
}
}
}
#[test]
fn test_build_transition_matrix_from_sequence() {
let cluster_ids: Vec<ClusterId> = (0..3).map(|_| ClusterId::new()).collect();
let sequence = vec![
cluster_ids[0],
cluster_ids[1],
cluster_ids[0],
cluster_ids[2],
cluster_ids[1],
cluster_ids[0],
];
// Count transitions
let mut counts: HashMap<(usize, usize), u32> = HashMap::new();
for window in sequence.windows(2) {
let from_idx = cluster_ids.iter().position(|c| *c == window[0]).unwrap();
let to_idx = cluster_ids.iter().position(|c| *c == window[1]).unwrap();
*counts.entry((from_idx, to_idx)).or_insert(0) += 1;
}
// Sequence: [0, 1, 0, 2, 1, 0]
// Transitions: 0->1 (1x), 1->0 (2x), 0->2 (1x), 2->1 (1x)
assert_eq!(*counts.get(&(0, 1)).unwrap_or(&0), 1);
assert_eq!(*counts.get(&(1, 0)).unwrap_or(&0), 2);
}
}
// ============================================================================
// Sequence Analysis Tests
// ============================================================================
mod sequence_analysis {
use super::*;
#[test]
fn test_sequence_segment_ordering() {
let segments = create_segment_sequence(10, 500);
for i in 0..segments.len() - 1 {
assert!(
segments[i].end_ms <= segments[i + 1].start_ms,
"Segments should be in temporal order"
);
}
}
#[test]
fn test_stereotypy_calculation() {
// Stereotypy = measure of how predictable transitions are
// High stereotypy = consistent patterns
// Low stereotypy = varied patterns
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
// Highly stereotyped (deterministic cycle)
let mut stereotyped_probs = vec![vec![0.0; n]; n];
for i in 0..n {
stereotyped_probs[i][(i + 1) % n] = 1.0;
}
// Low stereotypy (uniform)
let uniform_prob = 1.0 / n as f32;
let stereotyped_matrix = TransitionMatrix {
cluster_ids: cluster_ids.clone(),
probabilities: stereotyped_probs,
observations: vec![vec![10; n]; n],
};
let varied_matrix = TransitionMatrix {
cluster_ids,
probabilities: vec![vec![uniform_prob; n]; n],
observations: vec![vec![10; n]; n],
};
let stereotyped_entropy = compute_entropy_rate(&stereotyped_matrix);
let varied_entropy = compute_entropy_rate(&varied_matrix);
// Stereotyped should have lower entropy (more predictable)
assert!(stereotyped_entropy < varied_entropy);
}
#[test]
fn test_motif_density() {
// Motif density = ratio of segments that are part of motifs
let total_segments = 100;
let motif_segments = 60;
let density = motif_segments as f32 / total_segments as f32;
assert!((density - 0.6).abs() < 0.001);
}
}
// ============================================================================
// Anomaly Detection Tests
// ============================================================================
mod anomaly_detection {
use super::*;
fn compute_local_outlier_factor(
embedding: &Embedding,
neighbors: &[Embedding],
) -> f32 {
if neighbors.is_empty() {
return 1.0;
}
// Compute average distance to neighbors
let avg_distance: f32 = neighbors
.iter()
.map(|n| cosine_distance(&embedding.vector, &n.vector))
.sum::<f32>()
/ neighbors.len() as f32;
// LOF > 1 indicates anomaly
// This is simplified; real LOF compares local density to neighbors' densities
avg_distance * 10.0 // Scale factor for detection
}
#[test]
fn test_detect_outlier_embedding() {
// Create cluster of normal embeddings
let base = create_deterministic_vector(1536, 0);
let normal_embeddings: Vec<Embedding> = (0..20)
.map(|i| {
let noisy: Vec<f32> = base.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
// Create outlier (very different)
let outlier_base = create_deterministic_vector(1536, 1000);
let outlier = create_test_embedding_with_vector(l2_normalize(&outlier_base));
// Compute LOF for outlier
let lof = compute_local_outlier_factor(&outlier, &normal_embeddings);
// LOF should be high for outlier
assert!(lof > 1.0, "Outlier should have high LOF: {}", lof);
}
#[test]
fn test_normal_embedding_not_anomalous() {
let base = create_deterministic_vector(1536, 0);
let embeddings: Vec<Embedding> = (0..20)
.map(|i| {
let noisy: Vec<f32> = base.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
// Check LOF for a normal point
let test_point = &embeddings[10];
let neighbors: Vec<Embedding> = embeddings
.iter()
.filter(|e| e.id != test_point.id)
.cloned()
.collect();
let lof = compute_local_outlier_factor(test_point, &neighbors);
// Should be relatively low for normal point
assert!(
lof < 5.0,
"Normal point should have low LOF: {}",
lof
);
}
}
// ============================================================================
// Cluster Validation Tests
// ============================================================================
mod cluster_validation {
use super::*;
fn compute_silhouette_score(
embedding: &Embedding,
own_cluster_members: &[Embedding],
other_cluster_members: &[Embedding],
) -> f32 {
if own_cluster_members.is_empty() {
return 0.0;
}
// a = average distance to own cluster members
let a: f32 = own_cluster_members
.iter()
.filter(|e| e.id != embedding.id)
.map(|e| cosine_distance(&embedding.vector, &e.vector))
.sum::<f32>()
/ (own_cluster_members.len() - 1).max(1) as f32;
// b = average distance to nearest other cluster
let b: f32 = if other_cluster_members.is_empty() {
1.0
} else {
other_cluster_members
.iter()
.map(|e| cosine_distance(&embedding.vector, &e.vector))
.sum::<f32>()
/ other_cluster_members.len() as f32
};
// Silhouette = (b - a) / max(a, b)
let max_ab = a.max(b);
if max_ab > 0.0 {
(b - a) / max_ab
} else {
0.0
}
}
#[test]
fn test_silhouette_score_well_separated_clusters() {
// Create well-separated clusters
let base1 = create_deterministic_vector(1536, 0);
let base2 = create_deterministic_vector(1536, 1000);
let cluster1: Vec<Embedding> = (0..10)
.map(|i| {
let noisy: Vec<f32> = base1.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
let cluster2: Vec<Embedding> = (0..10)
.map(|i| {
let noisy: Vec<f32> = base2.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
// Compute silhouette for point in cluster 1
let score = compute_silhouette_score(&cluster1[5], &cluster1, &cluster2);
// Should be positive (closer to own cluster)
assert!(
score > 0.0,
"Well-separated clusters should have positive silhouette: {}",
score
);
}
#[test]
fn test_silhouette_score_range() {
let embeddings = create_embedding_batch(20);
// Split into two arbitrary clusters
let cluster1: Vec<Embedding> = embeddings[0..10].to_vec();
let cluster2: Vec<Embedding> = embeddings[10..20].to_vec();
for emb in &cluster1 {
let score = compute_silhouette_score(emb, &cluster1, &cluster2);
assert!(
score >= -1.0 && score <= 1.0,
"Silhouette should be in [-1, 1]: {}",
score
);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analysis_integration_smoke_test() {
// Create embeddings
let embeddings = create_embedding_batch(50);
// Run clustering
let service = MockClusteringService::with_params(5, 3);
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
// Create transition matrix
let matrix = create_test_transition_matrix(clusters.len().max(3));
// Compute entropy
let entropy = compute_entropy_rate(&matrix);
assert!(entropy >= 0.0);
// Detect motifs
let motif_service = MockMotifDetectionService::new();
let sequences: Vec<Vec<ClusterId>> = clusters
.iter()
.map(|c| vec![c.id, c.id, c.id])
.collect();
let _motifs = motif_service.detect_motifs(&sequences).unwrap();
}
}