//! Integration tests for Analysis Context //! //! Tests for HDBSCAN clustering, cluster assignment, motif detection, //! entropy calculation, and transition matrix operations. use vibecast_tests::fixtures::*; use vibecast_tests::mocks::*; use std::collections::{HashMap, HashSet}; // ============================================================================ // HDBSCAN Clustering Tests // ============================================================================ mod hdbscan_clustering { use super::*; #[test] fn test_cluster_with_clear_groups() { let service = MockClusteringService::with_params(5, 3); // Create two well-separated clusters let base1 = create_deterministic_vector(1536, 0); let base2 = create_deterministic_vector(1536, 1000); let mut embeddings = Vec::new(); // Cluster 1: variations around base1 for i in 0..15 { let noisy: Vec = base1.iter().map(|v| v + (i as f32 * 0.001)).collect(); embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy))); } // Cluster 2: variations around base2 for i in 0..15 { let noisy: Vec = base2.iter().map(|v| v + (i as f32 * 0.001)).collect(); embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy))); } let clusters = service.cluster_hdbscan(&embeddings).unwrap(); assert!(clusters.len() >= 1, "Should find at least one cluster"); } #[test] fn test_cluster_with_insufficient_data() { let service = MockClusteringService::with_params(10, 5); // Only 3 embeddings - below min_cluster_size let embeddings: Vec = (0..3).map(|_| create_test_embedding()).collect(); let clusters = service.cluster_hdbscan(&embeddings).unwrap(); assert_eq!(clusters.len(), 0, "Should not form clusters with too few points"); } #[test] fn test_cluster_method_assignment() { let cluster = create_test_cluster(); assert_eq!(cluster.method, ClusteringMethod::Hdbscan); } #[test] fn test_cluster_cohesion_in_valid_range() { let cluster = create_test_cluster(); assert!(cluster.cohesion >= 0.0 && cluster.cohesion <= 1.0); assert!(cluster.separation >= 0.0 && cluster.separation <= 1.0); } #[test] fn test_cluster_has_members() { let cluster = create_test_cluster_with_members(20); assert_eq!(cluster.member_ids.len(), 20); assert!(!cluster.centroid.is_empty()); } #[test] fn test_cluster_centroid_is_normalized() { let cluster = create_test_cluster_with_members(10); let norm: f32 = cluster.centroid.iter().map(|x| x * x).sum::().sqrt(); assert!( (norm - 1.0).abs() < 0.0001, "Centroid should be normalized" ); } #[test] fn test_multiple_clusters() { let clusters = create_test_clusters(5); assert_eq!(clusters.len(), 5); // Each cluster should have unique ID let ids: HashSet<_> = clusters.iter().map(|c| c.id.0).collect(); assert_eq!(ids.len(), 5, "All cluster IDs should be unique"); } } // ============================================================================ // Cluster Assignment Tests // ============================================================================ mod cluster_assignment { use super::*; #[test] fn test_assign_embedding_to_nearest_cluster() { let service = MockClusteringService::new(); // Create clusters with known centroids let clusters = create_test_clusters(3); // Create embedding similar to first cluster's centroid let embedding = create_test_embedding_with_vector(clusters[0].centroid.clone()); let assignment = service.assign_to_cluster(&embedding, &clusters).unwrap(); assert!(assignment.is_some(), "Should assign to a cluster"); let assignment = assignment.unwrap(); assert_eq!(assignment.cluster_id, clusters[0].id); assert!(assignment.confidence > 0.0); } #[test] fn test_assignment_confidence_based_on_distance() { let service = MockClusteringService::new(); let clusters = create_test_clusters(2); // Very close to centroid let close_embedding = create_test_embedding_with_vector(clusters[0].centroid.clone()); let close_assignment = service .assign_to_cluster(&close_embedding, &clusters) .unwrap() .unwrap(); // Farther from centroid let far_vector: Vec = clusters[0] .centroid .iter() .map(|v| v + 0.5) .collect(); let far_embedding = create_test_embedding_with_vector(l2_normalize(&far_vector)); let far_assignment = service .assign_to_cluster(&far_embedding, &clusters) .unwrap() .unwrap(); assert!( close_assignment.confidence > far_assignment.confidence, "Closer embeddings should have higher confidence" ); } #[test] fn test_no_assignment_to_empty_clusters() { let service = MockClusteringService::new(); let embedding = create_test_embedding(); let empty_clusters: Vec = vec![]; let assignment = service.assign_to_cluster(&embedding, &empty_clusters).unwrap(); assert!(assignment.is_none()); } #[test] fn test_assignment_includes_distance_to_centroid() { let service = MockClusteringService::new(); let clusters = create_test_clusters(1); let embedding = create_test_embedding(); let assignment = service .assign_to_cluster(&embedding, &clusters) .unwrap() .unwrap(); assert!( assignment.distance_to_centroid >= 0.0, "Distance should be non-negative" ); } #[test] fn test_soft_assignment_concept() { // Test that an embedding near cluster boundary has lower confidence let service = MockClusteringService::new(); // Create two clusters let base1 = create_deterministic_vector(1536, 0); let base2 = create_deterministic_vector(1536, 100); let clusters = vec![ Cluster { id: ClusterId::new(), method: ClusteringMethod::Hdbscan, member_ids: vec![], centroid: l2_normalize(&base1), cohesion: 0.8, separation: 0.6, }, Cluster { id: ClusterId::new(), method: ClusteringMethod::Hdbscan, member_ids: vec![], centroid: l2_normalize(&base2), cohesion: 0.8, separation: 0.6, }, ]; // Point exactly between clusters let midpoint: Vec = base1 .iter() .zip(base2.iter()) .map(|(a, b)| (a + b) / 2.0) .collect(); let mid_embedding = create_test_embedding_with_vector(l2_normalize(&midpoint)); let assignment = service .assign_to_cluster(&mid_embedding, &clusters) .unwrap() .unwrap(); // Confidence should reflect uncertainty assert!(assignment.confidence < 0.9, "Boundary point should have lower confidence"); } } // ============================================================================ // Motif Detection Tests // ============================================================================ mod motif_detection { use super::*; #[test] fn test_detect_motifs_in_sequences() { let service = MockMotifDetectionService::new(); // Create sequences with repeating patterns let cluster_ids: Vec = (0..5).map(|_| ClusterId::new()).collect(); let sequences: Vec> = vec![ vec![ cluster_ids[0], cluster_ids[1], cluster_ids[2], cluster_ids[0], cluster_ids[1], cluster_ids[2], ], vec![ cluster_ids[0], cluster_ids[1], cluster_ids[2], cluster_ids[3], ], vec![ cluster_ids[2], cluster_ids[0], cluster_ids[1], cluster_ids[2], ], ]; let motifs = service.detect_motifs(&sequences).unwrap(); // Should find the [0,1,2] pattern that appears multiple times assert!( motifs.iter().any(|m| m.pattern.len() >= 2), "Should find at least one motif" ); } #[test] fn test_motif_occurrence_count() { let motif = create_test_motif(); assert!(motif.occurrence_count > 0); assert_eq!(motif.pattern.len(), 3); } #[test] fn test_motif_confidence_calculation() { let motif = create_test_motif(); assert!( motif.confidence >= 0.0 && motif.confidence <= 1.0, "Confidence should be in [0, 1]" ); } #[test] fn test_no_motifs_in_random_sequences() { let service = MockMotifDetectionService::new(); // Create completely random sequences with no patterns let sequences: Vec> = (0..5) .map(|_| (0..10).map(|_| ClusterId::new()).collect()) .collect(); let motifs = service.detect_motifs(&sequences).unwrap(); // Random sequences unlikely to have recurring motifs // (though technically possible with mock implementation) } #[test] fn test_empty_sequence_handling() { let service = MockMotifDetectionService::new(); let empty_sequences: Vec> = vec![]; let motifs = service.detect_motifs(&empty_sequences).unwrap(); assert_eq!(motifs.len(), 0); } #[test] fn test_motif_duration_estimation() { let motif = create_test_motif(); // 3-element motif at 5s per segment assert!(motif.avg_duration_ms >= 5000); } } // ============================================================================ // Entropy Calculation Tests // ============================================================================ mod entropy_calculation { use super::*; #[test] fn test_entropy_rate_uniform_distribution() { // Create transition matrix with uniform distribution let n = 4; let cluster_ids: Vec = (0..n).map(|_| ClusterId::new()).collect(); let uniform_prob = 1.0 / n as f32; let matrix = TransitionMatrix { cluster_ids: cluster_ids.clone(), probabilities: vec![vec![uniform_prob; n]; n], observations: vec![vec![10; n]; n], }; let entropy = compute_entropy_rate(&matrix); // Maximum entropy for uniform distribution = log2(n) = 2 bits for n=4 let max_entropy = (n as f32).log2(); assert!( (entropy - max_entropy).abs() < 0.1, "Uniform distribution should have maximum entropy: {} vs {}", entropy, max_entropy ); } #[test] fn test_entropy_rate_deterministic() { // Create transition matrix with deterministic transitions let n = 4; let cluster_ids: Vec = (0..n).map(|_| ClusterId::new()).collect(); // Each state always transitions to the next state let mut probabilities = vec![vec![0.0; n]; n]; for i in 0..n { probabilities[i][(i + 1) % n] = 1.0; } let matrix = TransitionMatrix { cluster_ids, probabilities, observations: vec![vec![10; n]; n], }; let entropy = compute_entropy_rate(&matrix); // Deterministic transitions should have zero entropy assert!( entropy < 0.1, "Deterministic transitions should have near-zero entropy: {}", entropy ); } #[test] fn test_entropy_rate_non_negative() { for _ in 0..10 { let matrix = create_test_transition_matrix(5); let entropy = compute_entropy_rate(&matrix); assert!( entropy >= 0.0, "Entropy should never be negative: {}", entropy ); } } #[test] fn test_entropy_increases_with_randomness() { // Low entropy (predictable) let n = 4; let cluster_ids: Vec = (0..n).map(|_| ClusterId::new()).collect(); let mut low_rand_probs = vec![vec![0.0; n]; n]; for i in 0..n { low_rand_probs[i][i] = 0.8; // High self-loop probability for j in 0..n { if i != j { low_rand_probs[i][j] = 0.2 / (n - 1) as f32; } } } let low_entropy_matrix = TransitionMatrix { cluster_ids: cluster_ids.clone(), probabilities: low_rand_probs, observations: vec![vec![10; n]; n], }; // High entropy (uniform) let uniform_prob = 1.0 / n as f32; let high_entropy_matrix = TransitionMatrix { cluster_ids, probabilities: vec![vec![uniform_prob; n]; n], observations: vec![vec![10; n]; n], }; let low_entropy = compute_entropy_rate(&low_entropy_matrix); let high_entropy = compute_entropy_rate(&high_entropy_matrix); assert!( high_entropy > low_entropy, "More uniform distribution should have higher entropy: {} vs {}", high_entropy, low_entropy ); } #[test] fn test_empty_matrix_entropy() { let matrix = TransitionMatrix { cluster_ids: vec![], probabilities: vec![], observations: vec![], }; let entropy = compute_entropy_rate(&matrix); assert_eq!(entropy, 0.0); } } // ============================================================================ // Transition Matrix Tests // ============================================================================ mod transition_matrix { use super::*; #[test] fn test_create_transition_matrix() { let matrix = create_test_transition_matrix(5); assert_eq!(matrix.cluster_ids.len(), 5); assert_eq!(matrix.probabilities.len(), 5); assert_eq!(matrix.probabilities[0].len(), 5); } #[test] fn test_transition_matrix_rows_sum_to_one() { let matrix = create_test_transition_matrix(5); for (i, row) in matrix.probabilities.iter().enumerate() { let row_sum: f32 = row.iter().copied().sum(); assert!( (row_sum - 1.0).abs() < 0.0001, "Row {} should sum to 1.0, got {}", i, row_sum ); } } #[test] fn test_transition_matrix_probabilities_non_negative() { let matrix = create_test_transition_matrix(5); for (i, row) in matrix.probabilities.iter().enumerate() { for (j, prob) in row.iter().copied().enumerate() { assert!( prob >= 0.0, "Probability at ({}, {}) should be non-negative: {}", i, j, prob ); } } } #[test] fn test_observations_matrix() { let matrix = create_test_transition_matrix(4); assert_eq!(matrix.observations.len(), 4); assert_eq!(matrix.observations[0].len(), 4); // All observations should be positive for row in &matrix.observations { for &count in row { assert!(count > 0); } } } #[test] fn test_build_transition_matrix_from_sequence() { let cluster_ids: Vec = (0..3).map(|_| ClusterId::new()).collect(); let sequence = vec![ cluster_ids[0], cluster_ids[1], cluster_ids[0], cluster_ids[2], cluster_ids[1], cluster_ids[0], ]; // Count transitions let mut counts: HashMap<(usize, usize), u32> = HashMap::new(); for window in sequence.windows(2) { let from_idx = cluster_ids.iter().position(|c| *c == window[0]).unwrap(); let to_idx = cluster_ids.iter().position(|c| *c == window[1]).unwrap(); *counts.entry((from_idx, to_idx)).or_insert(0) += 1; } // Sequence: [0, 1, 0, 2, 1, 0] // Transitions: 0->1 (1x), 1->0 (2x), 0->2 (1x), 2->1 (1x) assert_eq!(*counts.get(&(0, 1)).unwrap_or(&0), 1); assert_eq!(*counts.get(&(1, 0)).unwrap_or(&0), 2); } } // ============================================================================ // Sequence Analysis Tests // ============================================================================ mod sequence_analysis { use super::*; #[test] fn test_sequence_segment_ordering() { let segments = create_segment_sequence(10, 500); for i in 0..segments.len() - 1 { assert!( segments[i].end_ms <= segments[i + 1].start_ms, "Segments should be in temporal order" ); } } #[test] fn test_stereotypy_calculation() { // Stereotypy = measure of how predictable transitions are // High stereotypy = consistent patterns // Low stereotypy = varied patterns let n = 4; let cluster_ids: Vec = (0..n).map(|_| ClusterId::new()).collect(); // Highly stereotyped (deterministic cycle) let mut stereotyped_probs = vec![vec![0.0; n]; n]; for i in 0..n { stereotyped_probs[i][(i + 1) % n] = 1.0; } // Low stereotypy (uniform) let uniform_prob = 1.0 / n as f32; let stereotyped_matrix = TransitionMatrix { cluster_ids: cluster_ids.clone(), probabilities: stereotyped_probs, observations: vec![vec![10; n]; n], }; let varied_matrix = TransitionMatrix { cluster_ids, probabilities: vec![vec![uniform_prob; n]; n], observations: vec![vec![10; n]; n], }; let stereotyped_entropy = compute_entropy_rate(&stereotyped_matrix); let varied_entropy = compute_entropy_rate(&varied_matrix); // Stereotyped should have lower entropy (more predictable) assert!(stereotyped_entropy < varied_entropy); } #[test] fn test_motif_density() { // Motif density = ratio of segments that are part of motifs let total_segments = 100; let motif_segments = 60; let density = motif_segments as f32 / total_segments as f32; assert!((density - 0.6).abs() < 0.001); } } // ============================================================================ // Anomaly Detection Tests // ============================================================================ mod anomaly_detection { use super::*; fn compute_local_outlier_factor( embedding: &Embedding, neighbors: &[Embedding], ) -> f32 { if neighbors.is_empty() { return 1.0; } // Compute average distance to neighbors let avg_distance: f32 = neighbors .iter() .map(|n| cosine_distance(&embedding.vector, &n.vector)) .sum::() / neighbors.len() as f32; // LOF > 1 indicates anomaly // This is simplified; real LOF compares local density to neighbors' densities avg_distance * 10.0 // Scale factor for detection } #[test] fn test_detect_outlier_embedding() { // Create cluster of normal embeddings let base = create_deterministic_vector(1536, 0); let normal_embeddings: Vec = (0..20) .map(|i| { let noisy: Vec = base.iter().map(|v| v + (i as f32 * 0.001)).collect(); create_test_embedding_with_vector(l2_normalize(&noisy)) }) .collect(); // Create outlier (very different) let outlier_base = create_deterministic_vector(1536, 1000); let outlier = create_test_embedding_with_vector(l2_normalize(&outlier_base)); // Compute LOF for outlier let lof = compute_local_outlier_factor(&outlier, &normal_embeddings); // LOF should be high for outlier assert!(lof > 1.0, "Outlier should have high LOF: {}", lof); } #[test] fn test_normal_embedding_not_anomalous() { let base = create_deterministic_vector(1536, 0); let embeddings: Vec = (0..20) .map(|i| { let noisy: Vec = base.iter().map(|v| v + (i as f32 * 0.001)).collect(); create_test_embedding_with_vector(l2_normalize(&noisy)) }) .collect(); // Check LOF for a normal point let test_point = &embeddings[10]; let neighbors: Vec = embeddings .iter() .filter(|e| e.id != test_point.id) .cloned() .collect(); let lof = compute_local_outlier_factor(test_point, &neighbors); // Should be relatively low for normal point assert!( lof < 5.0, "Normal point should have low LOF: {}", lof ); } } // ============================================================================ // Cluster Validation Tests // ============================================================================ mod cluster_validation { use super::*; fn compute_silhouette_score( embedding: &Embedding, own_cluster_members: &[Embedding], other_cluster_members: &[Embedding], ) -> f32 { if own_cluster_members.is_empty() { return 0.0; } // a = average distance to own cluster members let a: f32 = own_cluster_members .iter() .filter(|e| e.id != embedding.id) .map(|e| cosine_distance(&embedding.vector, &e.vector)) .sum::() / (own_cluster_members.len() - 1).max(1) as f32; // b = average distance to nearest other cluster let b: f32 = if other_cluster_members.is_empty() { 1.0 } else { other_cluster_members .iter() .map(|e| cosine_distance(&embedding.vector, &e.vector)) .sum::() / other_cluster_members.len() as f32 }; // Silhouette = (b - a) / max(a, b) let max_ab = a.max(b); if max_ab > 0.0 { (b - a) / max_ab } else { 0.0 } } #[test] fn test_silhouette_score_well_separated_clusters() { // Create well-separated clusters let base1 = create_deterministic_vector(1536, 0); let base2 = create_deterministic_vector(1536, 1000); let cluster1: Vec = (0..10) .map(|i| { let noisy: Vec = base1.iter().map(|v| v + (i as f32 * 0.001)).collect(); create_test_embedding_with_vector(l2_normalize(&noisy)) }) .collect(); let cluster2: Vec = (0..10) .map(|i| { let noisy: Vec = base2.iter().map(|v| v + (i as f32 * 0.001)).collect(); create_test_embedding_with_vector(l2_normalize(&noisy)) }) .collect(); // Compute silhouette for point in cluster 1 let score = compute_silhouette_score(&cluster1[5], &cluster1, &cluster2); // Should be positive (closer to own cluster) assert!( score > 0.0, "Well-separated clusters should have positive silhouette: {}", score ); } #[test] fn test_silhouette_score_range() { let embeddings = create_embedding_batch(20); // Split into two arbitrary clusters let cluster1: Vec = embeddings[0..10].to_vec(); let cluster2: Vec = embeddings[10..20].to_vec(); for emb in &cluster1 { let score = compute_silhouette_score(emb, &cluster1, &cluster2); assert!( score >= -1.0 && score <= 1.0, "Silhouette should be in [-1, 1]: {}", score ); } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_analysis_integration_smoke_test() { // Create embeddings let embeddings = create_embedding_batch(50); // Run clustering let service = MockClusteringService::with_params(5, 3); let clusters = service.cluster_hdbscan(&embeddings).unwrap(); // Create transition matrix let matrix = create_test_transition_matrix(clusters.len().max(3)); // Compute entropy let entropy = compute_entropy_rate(&matrix); assert!(entropy >= 0.0); // Detect motifs let motif_service = MockMotifDetectionService::new(); let sequences: Vec> = clusters .iter() .map(|c| vec![c.id, c.id, c.id]) .collect(); let _motifs = motif_service.detect_motifs(&sequences).unwrap(); } }