Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
812
vendor/ruvector/examples/vibecast-7sense/tests/integration/analysis_test.rs
vendored
Normal file
812
vendor/ruvector/examples/vibecast-7sense/tests/integration/analysis_test.rs
vendored
Normal file
@@ -0,0 +1,812 @@
|
||||
//! Integration tests for Analysis Context
|
||||
//!
|
||||
//! Tests for HDBSCAN clustering, cluster assignment, motif detection,
|
||||
//! entropy calculation, and transition matrix operations.
|
||||
|
||||
use vibecast_tests::fixtures::*;
|
||||
use vibecast_tests::mocks::*;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
// ============================================================================
|
||||
// HDBSCAN Clustering Tests
|
||||
// ============================================================================
|
||||
|
||||
mod hdbscan_clustering {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cluster_with_clear_groups() {
|
||||
let service = MockClusteringService::with_params(5, 3);
|
||||
|
||||
// Create two well-separated clusters
|
||||
let base1 = create_deterministic_vector(1536, 0);
|
||||
let base2 = create_deterministic_vector(1536, 1000);
|
||||
|
||||
let mut embeddings = Vec::new();
|
||||
|
||||
// Cluster 1: variations around base1
|
||||
for i in 0..15 {
|
||||
let noisy: Vec<f32> = base1.iter().map(|v| v + (i as f32 * 0.001)).collect();
|
||||
embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy)));
|
||||
}
|
||||
|
||||
// Cluster 2: variations around base2
|
||||
for i in 0..15 {
|
||||
let noisy: Vec<f32> = base2.iter().map(|v| v + (i as f32 * 0.001)).collect();
|
||||
embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy)));
|
||||
}
|
||||
|
||||
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
|
||||
|
||||
assert!(clusters.len() >= 1, "Should find at least one cluster");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_with_insufficient_data() {
|
||||
let service = MockClusteringService::with_params(10, 5);
|
||||
|
||||
// Only 3 embeddings - below min_cluster_size
|
||||
let embeddings: Vec<Embedding> = (0..3).map(|_| create_test_embedding()).collect();
|
||||
|
||||
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
|
||||
|
||||
assert_eq!(clusters.len(), 0, "Should not form clusters with too few points");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_method_assignment() {
|
||||
let cluster = create_test_cluster();
|
||||
assert_eq!(cluster.method, ClusteringMethod::Hdbscan);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_cohesion_in_valid_range() {
|
||||
let cluster = create_test_cluster();
|
||||
|
||||
assert!(cluster.cohesion >= 0.0 && cluster.cohesion <= 1.0);
|
||||
assert!(cluster.separation >= 0.0 && cluster.separation <= 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_has_members() {
|
||||
let cluster = create_test_cluster_with_members(20);
|
||||
|
||||
assert_eq!(cluster.member_ids.len(), 20);
|
||||
assert!(!cluster.centroid.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_centroid_is_normalized() {
|
||||
let cluster = create_test_cluster_with_members(10);
|
||||
|
||||
let norm: f32 = cluster.centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!(
|
||||
(norm - 1.0).abs() < 0.0001,
|
||||
"Centroid should be normalized"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_clusters() {
|
||||
let clusters = create_test_clusters(5);
|
||||
|
||||
assert_eq!(clusters.len(), 5);
|
||||
|
||||
// Each cluster should have unique ID
|
||||
let ids: HashSet<_> = clusters.iter().map(|c| c.id.0).collect();
|
||||
assert_eq!(ids.len(), 5, "All cluster IDs should be unique");
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cluster Assignment Tests
|
||||
// ============================================================================
|
||||
|
||||
mod cluster_assignment {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_assign_embedding_to_nearest_cluster() {
|
||||
let service = MockClusteringService::new();
|
||||
|
||||
// Create clusters with known centroids
|
||||
let clusters = create_test_clusters(3);
|
||||
|
||||
// Create embedding similar to first cluster's centroid
|
||||
let embedding = create_test_embedding_with_vector(clusters[0].centroid.clone());
|
||||
|
||||
let assignment = service.assign_to_cluster(&embedding, &clusters).unwrap();
|
||||
|
||||
assert!(assignment.is_some(), "Should assign to a cluster");
|
||||
let assignment = assignment.unwrap();
|
||||
assert_eq!(assignment.cluster_id, clusters[0].id);
|
||||
assert!(assignment.confidence > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assignment_confidence_based_on_distance() {
|
||||
let service = MockClusteringService::new();
|
||||
let clusters = create_test_clusters(2);
|
||||
|
||||
// Very close to centroid
|
||||
let close_embedding = create_test_embedding_with_vector(clusters[0].centroid.clone());
|
||||
let close_assignment = service
|
||||
.assign_to_cluster(&close_embedding, &clusters)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
// Farther from centroid
|
||||
let far_vector: Vec<f32> = clusters[0]
|
||||
.centroid
|
||||
.iter()
|
||||
.map(|v| v + 0.5)
|
||||
.collect();
|
||||
let far_embedding = create_test_embedding_with_vector(l2_normalize(&far_vector));
|
||||
let far_assignment = service
|
||||
.assign_to_cluster(&far_embedding, &clusters)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
close_assignment.confidence > far_assignment.confidence,
|
||||
"Closer embeddings should have higher confidence"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_assignment_to_empty_clusters() {
|
||||
let service = MockClusteringService::new();
|
||||
let embedding = create_test_embedding();
|
||||
let empty_clusters: Vec<Cluster> = vec![];
|
||||
|
||||
let assignment = service.assign_to_cluster(&embedding, &empty_clusters).unwrap();
|
||||
assert!(assignment.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assignment_includes_distance_to_centroid() {
|
||||
let service = MockClusteringService::new();
|
||||
let clusters = create_test_clusters(1);
|
||||
let embedding = create_test_embedding();
|
||||
|
||||
let assignment = service
|
||||
.assign_to_cluster(&embedding, &clusters)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
assignment.distance_to_centroid >= 0.0,
|
||||
"Distance should be non-negative"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_soft_assignment_concept() {
|
||||
// Test that an embedding near cluster boundary has lower confidence
|
||||
let service = MockClusteringService::new();
|
||||
|
||||
// Create two clusters
|
||||
let base1 = create_deterministic_vector(1536, 0);
|
||||
let base2 = create_deterministic_vector(1536, 100);
|
||||
|
||||
let clusters = vec![
|
||||
Cluster {
|
||||
id: ClusterId::new(),
|
||||
method: ClusteringMethod::Hdbscan,
|
||||
member_ids: vec![],
|
||||
centroid: l2_normalize(&base1),
|
||||
cohesion: 0.8,
|
||||
separation: 0.6,
|
||||
},
|
||||
Cluster {
|
||||
id: ClusterId::new(),
|
||||
method: ClusteringMethod::Hdbscan,
|
||||
member_ids: vec![],
|
||||
centroid: l2_normalize(&base2),
|
||||
cohesion: 0.8,
|
||||
separation: 0.6,
|
||||
},
|
||||
];
|
||||
|
||||
// Point exactly between clusters
|
||||
let midpoint: Vec<f32> = base1
|
||||
.iter()
|
||||
.zip(base2.iter())
|
||||
.map(|(a, b)| (a + b) / 2.0)
|
||||
.collect();
|
||||
let mid_embedding = create_test_embedding_with_vector(l2_normalize(&midpoint));
|
||||
|
||||
let assignment = service
|
||||
.assign_to_cluster(&mid_embedding, &clusters)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
// Confidence should reflect uncertainty
|
||||
assert!(assignment.confidence < 0.9, "Boundary point should have lower confidence");
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Motif Detection Tests
|
||||
// ============================================================================
|
||||
|
||||
mod motif_detection {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_detect_motifs_in_sequences() {
|
||||
let service = MockMotifDetectionService::new();
|
||||
|
||||
// Create sequences with repeating patterns
|
||||
let cluster_ids: Vec<ClusterId> = (0..5).map(|_| ClusterId::new()).collect();
|
||||
|
||||
let sequences: Vec<Vec<ClusterId>> = vec![
|
||||
vec![
|
||||
cluster_ids[0],
|
||||
cluster_ids[1],
|
||||
cluster_ids[2],
|
||||
cluster_ids[0],
|
||||
cluster_ids[1],
|
||||
cluster_ids[2],
|
||||
],
|
||||
vec![
|
||||
cluster_ids[0],
|
||||
cluster_ids[1],
|
||||
cluster_ids[2],
|
||||
cluster_ids[3],
|
||||
],
|
||||
vec![
|
||||
cluster_ids[2],
|
||||
cluster_ids[0],
|
||||
cluster_ids[1],
|
||||
cluster_ids[2],
|
||||
],
|
||||
];
|
||||
|
||||
let motifs = service.detect_motifs(&sequences).unwrap();
|
||||
|
||||
// Should find the [0,1,2] pattern that appears multiple times
|
||||
assert!(
|
||||
motifs.iter().any(|m| m.pattern.len() >= 2),
|
||||
"Should find at least one motif"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_motif_occurrence_count() {
|
||||
let motif = create_test_motif();
|
||||
|
||||
assert!(motif.occurrence_count > 0);
|
||||
assert_eq!(motif.pattern.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_motif_confidence_calculation() {
|
||||
let motif = create_test_motif();
|
||||
|
||||
assert!(
|
||||
motif.confidence >= 0.0 && motif.confidence <= 1.0,
|
||||
"Confidence should be in [0, 1]"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_motifs_in_random_sequences() {
|
||||
let service = MockMotifDetectionService::new();
|
||||
|
||||
// Create completely random sequences with no patterns
|
||||
let sequences: Vec<Vec<ClusterId>> = (0..5)
|
||||
.map(|_| (0..10).map(|_| ClusterId::new()).collect())
|
||||
.collect();
|
||||
|
||||
let motifs = service.detect_motifs(&sequences).unwrap();
|
||||
|
||||
// Random sequences unlikely to have recurring motifs
|
||||
// (though technically possible with mock implementation)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_sequence_handling() {
|
||||
let service = MockMotifDetectionService::new();
|
||||
let empty_sequences: Vec<Vec<ClusterId>> = vec![];
|
||||
|
||||
let motifs = service.detect_motifs(&empty_sequences).unwrap();
|
||||
assert_eq!(motifs.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_motif_duration_estimation() {
|
||||
let motif = create_test_motif();
|
||||
|
||||
// 3-element motif at 5s per segment
|
||||
assert!(motif.avg_duration_ms >= 5000);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Entropy Calculation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod entropy_calculation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_entropy_rate_uniform_distribution() {
|
||||
// Create transition matrix with uniform distribution
|
||||
let n = 4;
|
||||
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
|
||||
let uniform_prob = 1.0 / n as f32;
|
||||
|
||||
let matrix = TransitionMatrix {
|
||||
cluster_ids: cluster_ids.clone(),
|
||||
probabilities: vec![vec![uniform_prob; n]; n],
|
||||
observations: vec![vec![10; n]; n],
|
||||
};
|
||||
|
||||
let entropy = compute_entropy_rate(&matrix);
|
||||
|
||||
// Maximum entropy for uniform distribution = log2(n) = 2 bits for n=4
|
||||
let max_entropy = (n as f32).log2();
|
||||
assert!(
|
||||
(entropy - max_entropy).abs() < 0.1,
|
||||
"Uniform distribution should have maximum entropy: {} vs {}",
|
||||
entropy,
|
||||
max_entropy
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_entropy_rate_deterministic() {
|
||||
// Create transition matrix with deterministic transitions
|
||||
let n = 4;
|
||||
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
|
||||
|
||||
// Each state always transitions to the next state
|
||||
let mut probabilities = vec![vec![0.0; n]; n];
|
||||
for i in 0..n {
|
||||
probabilities[i][(i + 1) % n] = 1.0;
|
||||
}
|
||||
|
||||
let matrix = TransitionMatrix {
|
||||
cluster_ids,
|
||||
probabilities,
|
||||
observations: vec![vec![10; n]; n],
|
||||
};
|
||||
|
||||
let entropy = compute_entropy_rate(&matrix);
|
||||
|
||||
// Deterministic transitions should have zero entropy
|
||||
assert!(
|
||||
entropy < 0.1,
|
||||
"Deterministic transitions should have near-zero entropy: {}",
|
||||
entropy
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_entropy_rate_non_negative() {
|
||||
for _ in 0..10 {
|
||||
let matrix = create_test_transition_matrix(5);
|
||||
let entropy = compute_entropy_rate(&matrix);
|
||||
|
||||
assert!(
|
||||
entropy >= 0.0,
|
||||
"Entropy should never be negative: {}",
|
||||
entropy
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_entropy_increases_with_randomness() {
|
||||
// Low entropy (predictable)
|
||||
let n = 4;
|
||||
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
|
||||
|
||||
let mut low_rand_probs = vec![vec![0.0; n]; n];
|
||||
for i in 0..n {
|
||||
low_rand_probs[i][i] = 0.8; // High self-loop probability
|
||||
for j in 0..n {
|
||||
if i != j {
|
||||
low_rand_probs[i][j] = 0.2 / (n - 1) as f32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let low_entropy_matrix = TransitionMatrix {
|
||||
cluster_ids: cluster_ids.clone(),
|
||||
probabilities: low_rand_probs,
|
||||
observations: vec![vec![10; n]; n],
|
||||
};
|
||||
|
||||
// High entropy (uniform)
|
||||
let uniform_prob = 1.0 / n as f32;
|
||||
let high_entropy_matrix = TransitionMatrix {
|
||||
cluster_ids,
|
||||
probabilities: vec![vec![uniform_prob; n]; n],
|
||||
observations: vec![vec![10; n]; n],
|
||||
};
|
||||
|
||||
let low_entropy = compute_entropy_rate(&low_entropy_matrix);
|
||||
let high_entropy = compute_entropy_rate(&high_entropy_matrix);
|
||||
|
||||
assert!(
|
||||
high_entropy > low_entropy,
|
||||
"More uniform distribution should have higher entropy: {} vs {}",
|
||||
high_entropy,
|
||||
low_entropy
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_matrix_entropy() {
|
||||
let matrix = TransitionMatrix {
|
||||
cluster_ids: vec![],
|
||||
probabilities: vec![],
|
||||
observations: vec![],
|
||||
};
|
||||
|
||||
let entropy = compute_entropy_rate(&matrix);
|
||||
assert_eq!(entropy, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Transition Matrix Tests
|
||||
// ============================================================================
|
||||
|
||||
mod transition_matrix {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_transition_matrix() {
|
||||
let matrix = create_test_transition_matrix(5);
|
||||
|
||||
assert_eq!(matrix.cluster_ids.len(), 5);
|
||||
assert_eq!(matrix.probabilities.len(), 5);
|
||||
assert_eq!(matrix.probabilities[0].len(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_transition_matrix_rows_sum_to_one() {
|
||||
let matrix = create_test_transition_matrix(5);
|
||||
|
||||
for (i, row) in matrix.probabilities.iter().enumerate() {
|
||||
let row_sum: f32 = row.iter().copied().sum();
|
||||
assert!(
|
||||
(row_sum - 1.0).abs() < 0.0001,
|
||||
"Row {} should sum to 1.0, got {}",
|
||||
i,
|
||||
row_sum
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_transition_matrix_probabilities_non_negative() {
|
||||
let matrix = create_test_transition_matrix(5);
|
||||
|
||||
for (i, row) in matrix.probabilities.iter().enumerate() {
|
||||
for (j, prob) in row.iter().copied().enumerate() {
|
||||
assert!(
|
||||
prob >= 0.0,
|
||||
"Probability at ({}, {}) should be non-negative: {}",
|
||||
i,
|
||||
j,
|
||||
prob
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_observations_matrix() {
|
||||
let matrix = create_test_transition_matrix(4);
|
||||
|
||||
assert_eq!(matrix.observations.len(), 4);
|
||||
assert_eq!(matrix.observations[0].len(), 4);
|
||||
|
||||
// All observations should be positive
|
||||
for row in &matrix.observations {
|
||||
for &count in row {
|
||||
assert!(count > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_transition_matrix_from_sequence() {
|
||||
let cluster_ids: Vec<ClusterId> = (0..3).map(|_| ClusterId::new()).collect();
|
||||
let sequence = vec![
|
||||
cluster_ids[0],
|
||||
cluster_ids[1],
|
||||
cluster_ids[0],
|
||||
cluster_ids[2],
|
||||
cluster_ids[1],
|
||||
cluster_ids[0],
|
||||
];
|
||||
|
||||
// Count transitions
|
||||
let mut counts: HashMap<(usize, usize), u32> = HashMap::new();
|
||||
for window in sequence.windows(2) {
|
||||
let from_idx = cluster_ids.iter().position(|c| *c == window[0]).unwrap();
|
||||
let to_idx = cluster_ids.iter().position(|c| *c == window[1]).unwrap();
|
||||
*counts.entry((from_idx, to_idx)).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
// Sequence: [0, 1, 0, 2, 1, 0]
|
||||
// Transitions: 0->1 (1x), 1->0 (2x), 0->2 (1x), 2->1 (1x)
|
||||
assert_eq!(*counts.get(&(0, 1)).unwrap_or(&0), 1);
|
||||
assert_eq!(*counts.get(&(1, 0)).unwrap_or(&0), 2);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Sequence Analysis Tests
|
||||
// ============================================================================
|
||||
|
||||
mod sequence_analysis {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sequence_segment_ordering() {
|
||||
let segments = create_segment_sequence(10, 500);
|
||||
|
||||
for i in 0..segments.len() - 1 {
|
||||
assert!(
|
||||
segments[i].end_ms <= segments[i + 1].start_ms,
|
||||
"Segments should be in temporal order"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stereotypy_calculation() {
|
||||
// Stereotypy = measure of how predictable transitions are
|
||||
// High stereotypy = consistent patterns
|
||||
// Low stereotypy = varied patterns
|
||||
|
||||
let n = 4;
|
||||
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
|
||||
|
||||
// Highly stereotyped (deterministic cycle)
|
||||
let mut stereotyped_probs = vec![vec![0.0; n]; n];
|
||||
for i in 0..n {
|
||||
stereotyped_probs[i][(i + 1) % n] = 1.0;
|
||||
}
|
||||
|
||||
// Low stereotypy (uniform)
|
||||
let uniform_prob = 1.0 / n as f32;
|
||||
|
||||
let stereotyped_matrix = TransitionMatrix {
|
||||
cluster_ids: cluster_ids.clone(),
|
||||
probabilities: stereotyped_probs,
|
||||
observations: vec![vec![10; n]; n],
|
||||
};
|
||||
|
||||
let varied_matrix = TransitionMatrix {
|
||||
cluster_ids,
|
||||
probabilities: vec![vec![uniform_prob; n]; n],
|
||||
observations: vec![vec![10; n]; n],
|
||||
};
|
||||
|
||||
let stereotyped_entropy = compute_entropy_rate(&stereotyped_matrix);
|
||||
let varied_entropy = compute_entropy_rate(&varied_matrix);
|
||||
|
||||
// Stereotyped should have lower entropy (more predictable)
|
||||
assert!(stereotyped_entropy < varied_entropy);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_motif_density() {
|
||||
// Motif density = ratio of segments that are part of motifs
|
||||
|
||||
let total_segments = 100;
|
||||
let motif_segments = 60;
|
||||
|
||||
let density = motif_segments as f32 / total_segments as f32;
|
||||
assert!((density - 0.6).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Anomaly Detection Tests
|
||||
// ============================================================================
|
||||
|
||||
mod anomaly_detection {
|
||||
use super::*;
|
||||
|
||||
fn compute_local_outlier_factor(
|
||||
embedding: &Embedding,
|
||||
neighbors: &[Embedding],
|
||||
) -> f32 {
|
||||
if neighbors.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Compute average distance to neighbors
|
||||
let avg_distance: f32 = neighbors
|
||||
.iter()
|
||||
.map(|n| cosine_distance(&embedding.vector, &n.vector))
|
||||
.sum::<f32>()
|
||||
/ neighbors.len() as f32;
|
||||
|
||||
// LOF > 1 indicates anomaly
|
||||
// This is simplified; real LOF compares local density to neighbors' densities
|
||||
avg_distance * 10.0 // Scale factor for detection
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_outlier_embedding() {
|
||||
// Create cluster of normal embeddings
|
||||
let base = create_deterministic_vector(1536, 0);
|
||||
let normal_embeddings: Vec<Embedding> = (0..20)
|
||||
.map(|i| {
|
||||
let noisy: Vec<f32> = base.iter().map(|v| v + (i as f32 * 0.001)).collect();
|
||||
create_test_embedding_with_vector(l2_normalize(&noisy))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Create outlier (very different)
|
||||
let outlier_base = create_deterministic_vector(1536, 1000);
|
||||
let outlier = create_test_embedding_with_vector(l2_normalize(&outlier_base));
|
||||
|
||||
// Compute LOF for outlier
|
||||
let lof = compute_local_outlier_factor(&outlier, &normal_embeddings);
|
||||
|
||||
// LOF should be high for outlier
|
||||
assert!(lof > 1.0, "Outlier should have high LOF: {}", lof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normal_embedding_not_anomalous() {
|
||||
let base = create_deterministic_vector(1536, 0);
|
||||
let embeddings: Vec<Embedding> = (0..20)
|
||||
.map(|i| {
|
||||
let noisy: Vec<f32> = base.iter().map(|v| v + (i as f32 * 0.001)).collect();
|
||||
create_test_embedding_with_vector(l2_normalize(&noisy))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Check LOF for a normal point
|
||||
let test_point = &embeddings[10];
|
||||
let neighbors: Vec<Embedding> = embeddings
|
||||
.iter()
|
||||
.filter(|e| e.id != test_point.id)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let lof = compute_local_outlier_factor(test_point, &neighbors);
|
||||
|
||||
// Should be relatively low for normal point
|
||||
assert!(
|
||||
lof < 5.0,
|
||||
"Normal point should have low LOF: {}",
|
||||
lof
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cluster Validation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod cluster_validation {
|
||||
use super::*;
|
||||
|
||||
fn compute_silhouette_score(
|
||||
embedding: &Embedding,
|
||||
own_cluster_members: &[Embedding],
|
||||
other_cluster_members: &[Embedding],
|
||||
) -> f32 {
|
||||
if own_cluster_members.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// a = average distance to own cluster members
|
||||
let a: f32 = own_cluster_members
|
||||
.iter()
|
||||
.filter(|e| e.id != embedding.id)
|
||||
.map(|e| cosine_distance(&embedding.vector, &e.vector))
|
||||
.sum::<f32>()
|
||||
/ (own_cluster_members.len() - 1).max(1) as f32;
|
||||
|
||||
// b = average distance to nearest other cluster
|
||||
let b: f32 = if other_cluster_members.is_empty() {
|
||||
1.0
|
||||
} else {
|
||||
other_cluster_members
|
||||
.iter()
|
||||
.map(|e| cosine_distance(&embedding.vector, &e.vector))
|
||||
.sum::<f32>()
|
||||
/ other_cluster_members.len() as f32
|
||||
};
|
||||
|
||||
// Silhouette = (b - a) / max(a, b)
|
||||
let max_ab = a.max(b);
|
||||
if max_ab > 0.0 {
|
||||
(b - a) / max_ab
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_silhouette_score_well_separated_clusters() {
|
||||
// Create well-separated clusters
|
||||
let base1 = create_deterministic_vector(1536, 0);
|
||||
let base2 = create_deterministic_vector(1536, 1000);
|
||||
|
||||
let cluster1: Vec<Embedding> = (0..10)
|
||||
.map(|i| {
|
||||
let noisy: Vec<f32> = base1.iter().map(|v| v + (i as f32 * 0.001)).collect();
|
||||
create_test_embedding_with_vector(l2_normalize(&noisy))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let cluster2: Vec<Embedding> = (0..10)
|
||||
.map(|i| {
|
||||
let noisy: Vec<f32> = base2.iter().map(|v| v + (i as f32 * 0.001)).collect();
|
||||
create_test_embedding_with_vector(l2_normalize(&noisy))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Compute silhouette for point in cluster 1
|
||||
let score = compute_silhouette_score(&cluster1[5], &cluster1, &cluster2);
|
||||
|
||||
// Should be positive (closer to own cluster)
|
||||
assert!(
|
||||
score > 0.0,
|
||||
"Well-separated clusters should have positive silhouette: {}",
|
||||
score
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_silhouette_score_range() {
|
||||
let embeddings = create_embedding_batch(20);
|
||||
|
||||
// Split into two arbitrary clusters
|
||||
let cluster1: Vec<Embedding> = embeddings[0..10].to_vec();
|
||||
let cluster2: Vec<Embedding> = embeddings[10..20].to_vec();
|
||||
|
||||
for emb in &cluster1 {
|
||||
let score = compute_silhouette_score(emb, &cluster1, &cluster2);
|
||||
assert!(
|
||||
score >= -1.0 && score <= 1.0,
|
||||
"Silhouette should be in [-1, 1]: {}",
|
||||
score
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_analysis_integration_smoke_test() {
|
||||
// Create embeddings
|
||||
let embeddings = create_embedding_batch(50);
|
||||
|
||||
// Run clustering
|
||||
let service = MockClusteringService::with_params(5, 3);
|
||||
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
|
||||
|
||||
// Create transition matrix
|
||||
let matrix = create_test_transition_matrix(clusters.len().max(3));
|
||||
|
||||
// Compute entropy
|
||||
let entropy = compute_entropy_rate(&matrix);
|
||||
assert!(entropy >= 0.0);
|
||||
|
||||
// Detect motifs
|
||||
let motif_service = MockMotifDetectionService::new();
|
||||
let sequences: Vec<Vec<ClusterId>> = clusters
|
||||
.iter()
|
||||
.map(|c| vec![c.id, c.id, c.id])
|
||||
.collect();
|
||||
let _motifs = motif_service.detect_motifs(&sequences).unwrap();
|
||||
}
|
||||
}
|
||||
753
vendor/ruvector/examples/vibecast-7sense/tests/integration/api_test.rs
vendored
Normal file
753
vendor/ruvector/examples/vibecast-7sense/tests/integration/api_test.rs
vendored
Normal file
@@ -0,0 +1,753 @@
|
||||
//! Integration tests for API Context
|
||||
//!
|
||||
//! Tests for REST endpoints, GraphQL queries/mutations, rate limiting,
|
||||
//! and error responses.
|
||||
|
||||
use vibecast_tests::fixtures::*;
|
||||
use vibecast_tests::mocks::*;
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
// ============================================================================
|
||||
// REST Endpoint Tests
|
||||
// ============================================================================
|
||||
|
||||
mod rest_endpoints {
|
||||
use super::*;
|
||||
|
||||
// Mock API paths
|
||||
const RECORDINGS_PATH: &str = "/api/v1/recordings";
|
||||
const SEGMENTS_PATH: &str = "/api/v1/segments";
|
||||
const EMBEDDINGS_PATH: &str = "/api/v1/embeddings";
|
||||
const CLUSTERS_PATH: &str = "/api/v1/clusters";
|
||||
const INTERPRETATIONS_PATH: &str = "/api/v1/interpretations";
|
||||
const SEARCH_PATH: &str = "/api/v1/search";
|
||||
const HEALTH_PATH: &str = "/api/v1/health";
|
||||
|
||||
#[test]
|
||||
fn test_recordings_list_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"recordings": [{"id": "uuid1", "duration_ms": 60000}]}"#,
|
||||
);
|
||||
|
||||
let response = client.get(RECORDINGS_PATH).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("recordings"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_recordings_create_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
201,
|
||||
r#"{"id": "new-uuid", "status": "created"}"#,
|
||||
);
|
||||
|
||||
let body = r#"{"source": "upload", "metadata": {}}"#;
|
||||
let response = client.post(RECORDINGS_PATH, body).unwrap();
|
||||
|
||||
assert_eq!(response.status, 201);
|
||||
assert!(response.body.contains("id"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segments_by_recording_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"segments": [{"id": "seg1", "start_ms": 0, "end_ms": 5000}]}"#,
|
||||
);
|
||||
|
||||
let path = format!("{}/recording123/segments", RECORDINGS_PATH);
|
||||
let response = client.get(&path).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("segments"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_generation_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
202,
|
||||
r#"{"job_id": "job123", "status": "processing"}"#,
|
||||
);
|
||||
|
||||
let body = r#"{"segment_ids": ["seg1", "seg2"], "model": "perch2"}"#;
|
||||
let response = client.post(EMBEDDINGS_PATH, body).unwrap();
|
||||
|
||||
assert_eq!(response.status, 202); // Accepted for async processing
|
||||
assert!(response.body.contains("job_id"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_similarity_search_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"results": [{"segment_id": "seg1", "distance": 0.1}], "count": 1}"#,
|
||||
);
|
||||
|
||||
let body = r#"{"query_segment_id": "query1", "k": 10}"#;
|
||||
let response = client.post(SEARCH_PATH, body).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("results"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpretation_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"interpretation": {"statements": ["Similar to alarm calls"], "confidence": 0.85}}"#,
|
||||
);
|
||||
|
||||
let body = r#"{"segment_id": "seg1", "include_citations": true}"#;
|
||||
let response = client.post(INTERPRETATIONS_PATH, body).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("interpretation"));
|
||||
assert!(response.body.contains("confidence"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_health_check_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"status": "healthy", "version": "1.0.0", "components": {"database": "ok", "index": "ok"}}"#,
|
||||
);
|
||||
|
||||
let response = client.get(HEALTH_PATH).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("healthy"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_list_endpoint() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"clusters": [{"id": "c1", "member_count": 50}], "total": 1}"#,
|
||||
);
|
||||
|
||||
let response = client.get(CLUSTERS_PATH).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("clusters"));
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// GraphQL Tests
|
||||
// ============================================================================
|
||||
|
||||
mod graphql {
|
||||
use super::*;
|
||||
|
||||
const GRAPHQL_PATH: &str = "/graphql";
|
||||
|
||||
fn create_graphql_query(query: &str) -> String {
|
||||
format!(r#"{{"query": "{}"}}"#, query.replace('"', "\\\""))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_recordings_query() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"recordings": [{"id": "rec1", "duration_ms": 60000}]}}"#,
|
||||
);
|
||||
|
||||
let query = create_graphql_query("{ recordings { id duration_ms } }");
|
||||
let response = client.post(GRAPHQL_PATH, &query).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("data"));
|
||||
assert!(response.body.contains("recordings"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_recording_with_segments() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"recording": {"id": "rec1", "segments": [{"id": "seg1"}]}}}"#,
|
||||
);
|
||||
|
||||
let query = create_graphql_query(
|
||||
"{ recording(id: \\\"rec1\\\") { id segments { id } } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &query).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("segments"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_segment_with_embedding() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"segment": {"id": "seg1", "embedding": {"id": "emb1", "norm": 1.0}}}}"#,
|
||||
);
|
||||
|
||||
let query = create_graphql_query(
|
||||
"{ segment(id: \\\"seg1\\\") { id embedding { id norm } } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &query).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("embedding"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_similarity_search() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"similarSegments": [{"segment": {"id": "s1"}, "distance": 0.1}]}}"#,
|
||||
);
|
||||
|
||||
let query = create_graphql_query(
|
||||
"{ similarSegments(segmentId: \\\"seg1\\\", k: 10) { segment { id } distance } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &query).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("similarSegments"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_create_recording_mutation() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"createRecording": {"id": "new-rec", "status": "INGESTED"}}}"#,
|
||||
);
|
||||
|
||||
let mutation = create_graphql_query(
|
||||
"mutation { createRecording(input: {source: \\\"upload\\\"}) { id status } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &mutation).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("createRecording"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_generate_embeddings_mutation() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"generateEmbeddings": {"jobId": "job1", "status": "PROCESSING"}}}"#,
|
||||
);
|
||||
|
||||
let mutation = create_graphql_query(
|
||||
"mutation { generateEmbeddings(segmentIds: [\\\"s1\\\", \\\"s2\\\"]) { jobId status } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &mutation).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("generateEmbeddings"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_run_clustering_mutation() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"runClustering": {"sessionId": "sess1", "clusterCount": 15}}}"#,
|
||||
);
|
||||
|
||||
let mutation = create_graphql_query(
|
||||
"mutation { runClustering(method: HDBSCAN, params: {minClusterSize: 5}) { sessionId clusterCount } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &mutation).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("runClustering"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_error_response() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": null, "errors": [{"message": "Segment not found", "path": ["segment"]}]}"#,
|
||||
);
|
||||
|
||||
let query = create_graphql_query("{ segment(id: \\\"nonexistent\\\") { id } }");
|
||||
let response = client.post(GRAPHQL_PATH, &query).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200); // GraphQL returns 200 even for errors
|
||||
assert!(response.body.contains("errors"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphql_nested_query() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{"data": {"recording": {"segments": [{"embedding": {"cluster": {"id": "c1"}}}]}}}"#,
|
||||
);
|
||||
|
||||
let query = create_graphql_query(
|
||||
"{ recording(id: \\\"r1\\\") { segments { embedding { cluster { id } } } } }",
|
||||
);
|
||||
let response = client.post(GRAPHQL_PATH, &query).unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("cluster"));
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Rate Limiting Tests
|
||||
// ============================================================================
|
||||
|
||||
mod rate_limiting {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_rate_limiter_allows_under_limit() {
|
||||
let limiter = MockRateLimiter::new(100); // 100 requests/second
|
||||
|
||||
// Should allow first requests
|
||||
for _ in 0..50 {
|
||||
assert!(limiter.check(), "Should allow requests under limit");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rate_limiter_blocks_over_limit() {
|
||||
let limiter = MockRateLimiter::new(10); // 10 requests/second
|
||||
|
||||
// Exhaust limit
|
||||
for _ in 0..10 {
|
||||
limiter.check();
|
||||
}
|
||||
|
||||
// Next request should be blocked
|
||||
assert!(!limiter.check(), "Should block requests over limit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rate_limiter_sliding_window() {
|
||||
let limiter = MockRateLimiter::new(5);
|
||||
|
||||
// Make 5 requests
|
||||
for _ in 0..5 {
|
||||
assert!(limiter.check());
|
||||
}
|
||||
|
||||
// 6th should be blocked
|
||||
assert!(!limiter.check());
|
||||
|
||||
// After window slides (simulated by new check), requests should be allowed
|
||||
// In real implementation, would wait for time to pass
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rate_limit_response_code() {
|
||||
// When rate limited, API should return 429
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
429,
|
||||
r#"{"error": "Too Many Requests", "retry_after": 60}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings").unwrap();
|
||||
|
||||
assert_eq!(response.status, 429);
|
||||
assert!(response.body.contains("Too Many Requests"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rate_limit_headers() {
|
||||
let mut response = MockResponse {
|
||||
status: 200,
|
||||
body: "{}".to_string(),
|
||||
headers: HashMap::new(),
|
||||
};
|
||||
|
||||
response.headers.insert("X-RateLimit-Limit".to_string(), "100".to_string());
|
||||
response.headers.insert("X-RateLimit-Remaining".to_string(), "95".to_string());
|
||||
response.headers.insert("X-RateLimit-Reset".to_string(), "1609459200".to_string());
|
||||
|
||||
assert_eq!(response.headers.get("X-RateLimit-Limit").unwrap(), "100");
|
||||
assert_eq!(response.headers.get("X-RateLimit-Remaining").unwrap(), "95");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_different_rate_limits_per_endpoint() {
|
||||
// Heavy operations should have lower limits
|
||||
let search_limiter = MockRateLimiter::new(10); // 10/sec for search
|
||||
let read_limiter = MockRateLimiter::new(100); // 100/sec for reads
|
||||
let write_limiter = MockRateLimiter::new(20); // 20/sec for writes
|
||||
|
||||
// Reads should be most permissive
|
||||
for _ in 0..50 {
|
||||
assert!(read_limiter.check());
|
||||
}
|
||||
|
||||
// Search should be more restrictive
|
||||
for _ in 0..10 {
|
||||
assert!(search_limiter.check());
|
||||
}
|
||||
assert!(!search_limiter.check());
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Error Response Tests
|
||||
// ============================================================================
|
||||
|
||||
mod error_responses {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_404_not_found() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
404,
|
||||
r#"{"error": "Not Found", "message": "Recording not found", "code": "RECORDING_NOT_FOUND"}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings/nonexistent").unwrap();
|
||||
|
||||
assert_eq!(response.status, 404);
|
||||
assert!(response.body.contains("Not Found"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_400_bad_request() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
400,
|
||||
r#"{"error": "Bad Request", "message": "Invalid segment_id format", "field": "segment_id"}"#,
|
||||
);
|
||||
|
||||
let response = client.post("/api/v1/embeddings", r#"{"segment_id": "invalid"}"#).unwrap();
|
||||
|
||||
assert_eq!(response.status, 400);
|
||||
assert!(response.body.contains("Bad Request"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_422_validation_error() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
422,
|
||||
r#"{"error": "Validation Error", "errors": [{"field": "k", "message": "must be between 1 and 100"}]}"#,
|
||||
);
|
||||
|
||||
let response = client.post("/api/v1/search", r#"{"k": 1000}"#).unwrap();
|
||||
|
||||
assert_eq!(response.status, 422);
|
||||
assert!(response.body.contains("Validation Error"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_500_internal_error() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
500,
|
||||
r#"{"error": "Internal Server Error", "message": "An unexpected error occurred", "request_id": "req-123"}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings").unwrap();
|
||||
|
||||
assert_eq!(response.status, 500);
|
||||
assert!(response.body.contains("Internal Server Error"));
|
||||
assert!(response.body.contains("request_id"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_503_service_unavailable() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
503,
|
||||
r#"{"error": "Service Unavailable", "message": "Index is rebuilding", "retry_after": 300}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/search").unwrap();
|
||||
|
||||
assert_eq!(response.status, 503);
|
||||
assert!(response.body.contains("Service Unavailable"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_response_format() {
|
||||
// All errors should have consistent format
|
||||
let error_bodies = vec![
|
||||
r#"{"error": "Not Found", "message": "Resource not found", "code": "NOT_FOUND"}"#,
|
||||
r#"{"error": "Bad Request", "message": "Invalid input", "code": "INVALID_INPUT"}"#,
|
||||
r#"{"error": "Internal Server Error", "message": "Server error", "code": "INTERNAL_ERROR"}"#,
|
||||
];
|
||||
|
||||
for body in error_bodies {
|
||||
assert!(body.contains("error"));
|
||||
assert!(body.contains("message"));
|
||||
assert!(body.contains("code"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_with_details() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
400,
|
||||
r#"{
|
||||
"error": "Bad Request",
|
||||
"message": "Multiple validation errors",
|
||||
"details": [
|
||||
{"field": "sample_rate", "error": "must be 32000"},
|
||||
{"field": "channels", "error": "must be 1 (mono)"}
|
||||
]
|
||||
}"#,
|
||||
);
|
||||
|
||||
let response = client.post("/api/v1/recordings", "{}").unwrap();
|
||||
|
||||
assert_eq!(response.status, 400);
|
||||
assert!(response.body.contains("details"));
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Authentication Tests
|
||||
// ============================================================================
|
||||
|
||||
mod authentication {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_unauthorized_without_token() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
401,
|
||||
r#"{"error": "Unauthorized", "message": "Missing or invalid authentication token"}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings").unwrap();
|
||||
|
||||
assert_eq!(response.status, 401);
|
||||
assert!(response.body.contains("Unauthorized"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_forbidden_insufficient_permissions() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
403,
|
||||
r#"{"error": "Forbidden", "message": "Insufficient permissions to access this resource"}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/admin/settings").unwrap();
|
||||
|
||||
assert_eq!(response.status, 403);
|
||||
assert!(response.body.contains("Forbidden"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_token_expired() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
401,
|
||||
r#"{"error": "Unauthorized", "message": "Token expired", "code": "TOKEN_EXPIRED"}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings").unwrap();
|
||||
|
||||
assert_eq!(response.status, 401);
|
||||
assert!(response.body.contains("TOKEN_EXPIRED"));
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Pagination Tests
|
||||
// ============================================================================
|
||||
|
||||
mod pagination {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_paginated_response() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{
|
||||
"data": [{"id": "rec1"}, {"id": "rec2"}],
|
||||
"pagination": {
|
||||
"page": 1,
|
||||
"per_page": 20,
|
||||
"total": 100,
|
||||
"total_pages": 5
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings?page=1&per_page=20").unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("pagination"));
|
||||
assert!(response.body.contains("total_pages"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cursor_based_pagination() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
200,
|
||||
r#"{
|
||||
"data": [{"id": "rec1"}, {"id": "rec2"}],
|
||||
"cursors": {
|
||||
"next": "eyJpZCI6InJlYzIifQ==",
|
||||
"previous": null
|
||||
},
|
||||
"has_more": true
|
||||
}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings?limit=20").unwrap();
|
||||
|
||||
assert_eq!(response.status, 200);
|
||||
assert!(response.body.contains("cursors"));
|
||||
assert!(response.body.contains("has_more"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_page_parameter() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
400,
|
||||
r#"{"error": "Bad Request", "message": "Page must be a positive integer"}"#,
|
||||
);
|
||||
|
||||
let response = client.get("/api/v1/recordings?page=-1").unwrap();
|
||||
|
||||
assert_eq!(response.status, 400);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Content Negotiation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod content_negotiation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_json_response() {
|
||||
let response = MockResponse {
|
||||
status: 200,
|
||||
body: r#"{"data": []}"#.to_string(),
|
||||
headers: [("Content-Type".to_string(), "application/json".to_string())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
response.headers.get("Content-Type").unwrap(),
|
||||
"application/json"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_media_type() {
|
||||
let client = MockApiClient::new();
|
||||
client.queue_response(
|
||||
415,
|
||||
r#"{"error": "Unsupported Media Type", "message": "Only application/json is supported"}"#,
|
||||
);
|
||||
|
||||
let response = client.post("/api/v1/recordings", "<xml></xml>").unwrap();
|
||||
|
||||
// Assuming XML was sent
|
||||
assert_eq!(response.status, 415);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// API Request Tracking Tests
|
||||
// ============================================================================
|
||||
|
||||
mod request_tracking {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_request_count_tracking() {
|
||||
let client = MockApiClient::new();
|
||||
|
||||
assert_eq!(client.request_count(), 0);
|
||||
|
||||
client.get("/path1").unwrap();
|
||||
assert_eq!(client.request_count(), 1);
|
||||
|
||||
client.post("/path2", "{}").unwrap();
|
||||
assert_eq!(client.request_count(), 2);
|
||||
|
||||
client.get("/path3").unwrap();
|
||||
assert_eq!(client.request_count(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_response_queuing() {
|
||||
let client = MockApiClient::new();
|
||||
|
||||
client.queue_response(200, "first");
|
||||
client.queue_response(201, "second");
|
||||
client.queue_response(202, "third");
|
||||
|
||||
let r1 = client.get("/").unwrap();
|
||||
let r2 = client.get("/").unwrap();
|
||||
let r3 = client.get("/").unwrap();
|
||||
|
||||
assert_eq!(r1.status, 200);
|
||||
assert_eq!(r2.status, 201);
|
||||
assert_eq!(r3.status, 202);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_response_when_queue_empty() {
|
||||
let client = MockApiClient::new();
|
||||
|
||||
let response = client.get("/").unwrap();
|
||||
assert_eq!(response.status, 200);
|
||||
assert_eq!(response.body, "{}");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_api_integration_smoke_test() {
|
||||
let client = MockApiClient::new();
|
||||
|
||||
// List recordings
|
||||
client.queue_response(200, r#"{"recordings": []}"#);
|
||||
let list_response = client.get("/api/v1/recordings").unwrap();
|
||||
assert_eq!(list_response.status, 200);
|
||||
|
||||
// Create recording
|
||||
client.queue_response(201, r#"{"id": "new-rec"}"#);
|
||||
let create_response = client.post("/api/v1/recordings", "{}").unwrap();
|
||||
assert_eq!(create_response.status, 201);
|
||||
|
||||
// Search
|
||||
client.queue_response(200, r#"{"results": []}"#);
|
||||
let search_response = client.post("/api/v1/search", r#"{"k": 10}"#).unwrap();
|
||||
assert_eq!(search_response.status, 200);
|
||||
|
||||
// Track all requests
|
||||
assert_eq!(client.request_count(), 3);
|
||||
}
|
||||
}
|
||||
729
vendor/ruvector/examples/vibecast-7sense/tests/integration/audio_test.rs
vendored
Normal file
729
vendor/ruvector/examples/vibecast-7sense/tests/integration/audio_test.rs
vendored
Normal file
@@ -0,0 +1,729 @@
|
||||
//! Integration tests for Audio Ingestion Context
|
||||
//!
|
||||
//! Tests for audio file loading, resampling, segmentation, and spectrogram generation.
|
||||
|
||||
use vibecast_tests::fixtures::*;
|
||||
use vibecast_tests::mocks::*;
|
||||
use std::io::Cursor;
|
||||
|
||||
// ============================================================================
|
||||
// Audio File Loading Tests
|
||||
// ============================================================================
|
||||
|
||||
mod audio_loading {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_load_wav_file_32khz() {
|
||||
// Create test WAV data at 32kHz
|
||||
let wav_bytes = create_test_wav_bytes(5000); // 5 seconds
|
||||
|
||||
// Verify WAV header
|
||||
assert_eq!(&wav_bytes[0..4], b"RIFF");
|
||||
assert_eq!(&wav_bytes[8..12], b"WAVE");
|
||||
assert_eq!(&wav_bytes[12..16], b"fmt ");
|
||||
|
||||
// Parse sample rate from header
|
||||
let sample_rate =
|
||||
u32::from_le_bytes([wav_bytes[24], wav_bytes[25], wav_bytes[26], wav_bytes[27]]);
|
||||
assert_eq!(sample_rate, 32000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_audio_correct_duration() {
|
||||
let duration_ms = 5000;
|
||||
let samples = create_test_audio_samples(duration_ms, 32000);
|
||||
|
||||
let expected_samples = (duration_ms as f64 * 32000.0 / 1000.0) as usize;
|
||||
assert_eq!(samples.len(), expected_samples);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_audio_samples_in_valid_range() {
|
||||
let samples = create_test_audio_samples(1000, 32000);
|
||||
|
||||
for (i, sample) in samples.iter().enumerate() {
|
||||
assert!(
|
||||
*sample >= -1.0 && *sample <= 1.0,
|
||||
"Sample {} out of range: {}",
|
||||
i,
|
||||
sample
|
||||
);
|
||||
assert!(
|
||||
!sample.is_nan() && !sample.is_infinite(),
|
||||
"Sample {} is NaN or Inf",
|
||||
i
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_audio_format_validation() {
|
||||
let format = AudioFormat::default();
|
||||
|
||||
assert_eq!(format.sample_rate, 32000, "Must be 32kHz for Perch 2.0");
|
||||
assert_eq!(format.channels, 1, "Must be mono");
|
||||
assert!(format.bit_depth >= 16, "Minimum 16-bit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_different_durations() {
|
||||
let durations = vec![1000, 5000, 10000, 30000, 60000];
|
||||
|
||||
for duration in durations {
|
||||
let samples = create_test_audio_samples(duration, 32000);
|
||||
let expected = (duration as f64 * 32.0) as usize;
|
||||
assert_eq!(
|
||||
samples.len(),
|
||||
expected,
|
||||
"Wrong sample count for {}ms",
|
||||
duration
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wav_bytes_parseable() {
|
||||
let wav_bytes = create_test_wav_bytes(5000);
|
||||
|
||||
// Basic WAV structure validation
|
||||
assert!(wav_bytes.len() > 44, "WAV too short for valid header");
|
||||
|
||||
// Verify data chunk
|
||||
let data_marker = &wav_bytes[36..40];
|
||||
assert_eq!(data_marker, b"data");
|
||||
|
||||
// Verify data size
|
||||
let data_size =
|
||||
u32::from_le_bytes([wav_bytes[40], wav_bytes[41], wav_bytes[42], wav_bytes[43]]);
|
||||
assert!(data_size > 0);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Resampling Tests
|
||||
// ============================================================================
|
||||
|
||||
mod resampling {
|
||||
use super::*;
|
||||
|
||||
/// Mock resampler that converts audio to target sample rate
|
||||
struct MockResampler {
|
||||
target_rate: u32,
|
||||
}
|
||||
|
||||
impl MockResampler {
|
||||
fn new(target_rate: u32) -> Self {
|
||||
Self { target_rate }
|
||||
}
|
||||
|
||||
fn resample(&self, samples: &[f32], source_rate: u32) -> Vec<f32> {
|
||||
if source_rate == self.target_rate {
|
||||
return samples.to_vec();
|
||||
}
|
||||
|
||||
let ratio = self.target_rate as f64 / source_rate as f64;
|
||||
let new_len = (samples.len() as f64 * ratio) as usize;
|
||||
|
||||
// Simple linear interpolation resampling
|
||||
(0..new_len)
|
||||
.map(|i| {
|
||||
let src_idx = i as f64 / ratio;
|
||||
let idx0 = src_idx.floor() as usize;
|
||||
let idx1 = (idx0 + 1).min(samples.len() - 1);
|
||||
let frac = src_idx - idx0 as f64;
|
||||
|
||||
samples[idx0] * (1.0 - frac as f32) + samples[idx1] * frac as f32
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resample_44100_to_32000() {
|
||||
let source_rate = 44100;
|
||||
let target_rate = 32000;
|
||||
let duration_ms = 1000;
|
||||
|
||||
// Create 44.1kHz audio
|
||||
let samples: Vec<f32> = (0..(source_rate * duration_ms / 1000) as usize)
|
||||
.map(|i| (i as f32 * 0.01).sin())
|
||||
.collect();
|
||||
|
||||
let resampler = MockResampler::new(target_rate);
|
||||
let resampled = resampler.resample(&samples, source_rate);
|
||||
|
||||
let expected_len = (target_rate * duration_ms / 1000) as usize;
|
||||
// Allow 1 sample tolerance due to rounding
|
||||
assert!(
|
||||
(resampled.len() as i64 - expected_len as i64).abs() <= 1,
|
||||
"Expected ~{} samples, got {}",
|
||||
expected_len,
|
||||
resampled.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resample_48000_to_32000() {
|
||||
let source_rate = 48000;
|
||||
let target_rate = 32000;
|
||||
let duration_ms = 1000;
|
||||
|
||||
let samples: Vec<f32> = (0..(source_rate * duration_ms / 1000) as usize)
|
||||
.map(|i| (i as f32 * 0.01).sin())
|
||||
.collect();
|
||||
|
||||
let resampler = MockResampler::new(target_rate);
|
||||
let resampled = resampler.resample(&samples, source_rate);
|
||||
|
||||
let expected_len = (target_rate * duration_ms / 1000) as usize;
|
||||
assert!(
|
||||
(resampled.len() as i64 - expected_len as i64).abs() <= 1,
|
||||
"Expected ~{} samples, got {}",
|
||||
expected_len,
|
||||
resampled.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resample_preserves_energy() {
|
||||
let source_rate = 44100;
|
||||
let target_rate = 32000;
|
||||
|
||||
let samples: Vec<f32> = (0..44100).map(|i| (i as f32 * 0.01).sin()).collect();
|
||||
|
||||
let source_energy: f32 = samples.iter().map(|x| x * x).sum::<f32>() / samples.len() as f32;
|
||||
|
||||
let resampler = MockResampler::new(target_rate);
|
||||
let resampled = resampler.resample(&samples, source_rate);
|
||||
|
||||
let target_energy: f32 =
|
||||
resampled.iter().map(|x| x * x).sum::<f32>() / resampled.len() as f32;
|
||||
|
||||
// Energy should be approximately preserved
|
||||
let energy_diff = (source_energy - target_energy).abs() / source_energy;
|
||||
assert!(
|
||||
energy_diff < 0.1,
|
||||
"Energy changed by {:.1}%",
|
||||
energy_diff * 100.0
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resample_identity_at_32000() {
|
||||
let samples = create_test_audio_samples(1000, 32000);
|
||||
|
||||
let resampler = MockResampler::new(32000);
|
||||
let resampled = resampler.resample(&samples, 32000);
|
||||
|
||||
assert_eq!(samples.len(), resampled.len());
|
||||
for (a, b) in samples.iter().zip(resampled.iter()) {
|
||||
assert!((a - b).abs() < 0.0001);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Segmentation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod segmentation {
|
||||
use super::*;
|
||||
|
||||
/// Mock energy-based segmenter
|
||||
struct MockSegmenter {
|
||||
window_ms: u64,
|
||||
hop_ms: u64,
|
||||
threshold: f32,
|
||||
min_duration_ms: u64,
|
||||
sample_rate: u32,
|
||||
}
|
||||
|
||||
impl MockSegmenter {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
window_ms: 100,
|
||||
hop_ms: 50,
|
||||
threshold: 0.1,
|
||||
min_duration_ms: 500,
|
||||
sample_rate: 32000,
|
||||
}
|
||||
}
|
||||
|
||||
fn segment(&self, samples: &[f32], recording_id: RecordingId) -> Vec<CallSegment> {
|
||||
let window_size = (self.window_ms as usize * self.sample_rate as usize) / 1000;
|
||||
let hop_size = (self.hop_ms as usize * self.sample_rate as usize) / 1000;
|
||||
|
||||
// Compute energy per window
|
||||
let mut energies: Vec<f32> = Vec::new();
|
||||
let mut i = 0;
|
||||
while i + window_size <= samples.len() {
|
||||
let energy: f32 =
|
||||
samples[i..i + window_size].iter().map(|x| x * x).sum::<f32>() / window_size as f32;
|
||||
energies.push(energy);
|
||||
i += hop_size;
|
||||
}
|
||||
|
||||
// Find segments above threshold
|
||||
let mut segments = Vec::new();
|
||||
let mut in_segment = false;
|
||||
let mut segment_start = 0;
|
||||
|
||||
for (i, energy) in energies.iter().enumerate() {
|
||||
let time_ms = (i * self.hop_ms as usize) as u64;
|
||||
|
||||
if *energy > self.threshold && !in_segment {
|
||||
in_segment = true;
|
||||
segment_start = time_ms;
|
||||
} else if *energy <= self.threshold && in_segment {
|
||||
in_segment = false;
|
||||
let duration = time_ms - segment_start;
|
||||
if duration >= self.min_duration_ms {
|
||||
segments.push(CallSegment {
|
||||
id: SegmentId::new(),
|
||||
recording_id,
|
||||
start_ms: segment_start,
|
||||
end_ms: time_ms,
|
||||
snr: 15.0,
|
||||
energy: energies[segment_start as usize / self.hop_ms as usize],
|
||||
clipping_score: 0.0,
|
||||
overlap_score: 0.0,
|
||||
quality_grade: QualityGrade::Good,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segmentation_detects_calls() {
|
||||
let segmenter = MockSegmenter::new();
|
||||
let recording_id = RecordingId::new();
|
||||
|
||||
// Create audio with clear signal/silence pattern
|
||||
let mut samples = vec![0.0f32; 64000]; // 2 seconds
|
||||
// Add a loud "call" at 200-1200ms (1 second of signal)
|
||||
for i in 6400..38400 {
|
||||
samples[i] = 0.8 * ((i as f32 * 0.05).sin()); // Louder signal
|
||||
}
|
||||
// Silence at start and end
|
||||
|
||||
let segments = segmenter.segment(&samples, recording_id);
|
||||
|
||||
assert!(
|
||||
!segments.is_empty(),
|
||||
"Should detect at least one segment"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segmentation_non_overlapping() {
|
||||
let segments = create_segment_sequence(5, 500);
|
||||
|
||||
for i in 0..segments.len() - 1 {
|
||||
assert!(
|
||||
segments[i].end_ms <= segments[i + 1].start_ms,
|
||||
"Segments {} and {} overlap",
|
||||
i,
|
||||
i + 1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_duration_constraint() {
|
||||
let segments = create_segment_sequence(10, 0);
|
||||
|
||||
for segment in &segments {
|
||||
let duration = segment.end_ms - segment.start_ms;
|
||||
assert_eq!(duration, 5000, "Perch segments should be 5 seconds");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segmentation_snr_computation() {
|
||||
let segments = create_segment_sequence(5, 500);
|
||||
|
||||
for segment in &segments {
|
||||
assert!(segment.snr > 0.0, "SNR should be positive");
|
||||
assert!(segment.snr < 100.0, "SNR should be realistic");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_within_recording_bounds() {
|
||||
let recording = create_test_recording_with_duration(60000);
|
||||
let segments = create_segment_sequence(10, 500);
|
||||
|
||||
for segment in &segments {
|
||||
assert!(
|
||||
segment.end_ms <= recording.duration_ms,
|
||||
"Segment extends beyond recording"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segmentation_preserves_recording_id() {
|
||||
let recording_id = RecordingId::new();
|
||||
let mut segment = create_test_segment();
|
||||
segment.recording_id = recording_id;
|
||||
|
||||
assert_eq!(segment.recording_id, recording_id);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Spectrogram Generation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod spectrogram {
|
||||
use super::*;
|
||||
|
||||
const MEL_BINS: usize = 128;
|
||||
const MEL_FRAMES: usize = 500;
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_dimensions() {
|
||||
let spectrogram = create_test_spectrogram();
|
||||
|
||||
assert_eq!(spectrogram.len(), MEL_FRAMES, "Should have 500 frames");
|
||||
assert_eq!(
|
||||
spectrogram[0].len(),
|
||||
MEL_BINS,
|
||||
"Should have 128 mel bins"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_values_non_negative() {
|
||||
let spectrogram = create_test_spectrogram();
|
||||
|
||||
for (frame_idx, frame) in spectrogram.iter().enumerate() {
|
||||
for (bin_idx, value) in frame.iter().enumerate() {
|
||||
assert!(
|
||||
*value >= 0.0,
|
||||
"Frame {} bin {} has negative value: {}",
|
||||
frame_idx,
|
||||
bin_idx,
|
||||
value
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_no_nan_or_inf() {
|
||||
let spectrogram = create_test_spectrogram();
|
||||
|
||||
for (frame_idx, frame) in spectrogram.iter().enumerate() {
|
||||
for (bin_idx, value) in frame.iter().enumerate() {
|
||||
assert!(
|
||||
!value.is_nan() && !value.is_infinite(),
|
||||
"Frame {} bin {} is NaN/Inf",
|
||||
frame_idx,
|
||||
bin_idx
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_energy_distribution() {
|
||||
let spectrogram = create_test_spectrogram();
|
||||
|
||||
// Compute total energy per frame
|
||||
let frame_energies: Vec<f32> = spectrogram
|
||||
.iter()
|
||||
.map(|frame| frame.iter().sum())
|
||||
.collect();
|
||||
|
||||
// Energy should vary (not all zeros or all same)
|
||||
let min_energy = frame_energies
|
||||
.iter()
|
||||
.cloned()
|
||||
.fold(f32::INFINITY, f32::min);
|
||||
let max_energy = frame_energies
|
||||
.iter()
|
||||
.cloned()
|
||||
.fold(f32::NEG_INFINITY, f32::max);
|
||||
|
||||
assert!(
|
||||
max_energy > min_energy * 1.1,
|
||||
"Energy should vary across frames"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_from_audio_samples() {
|
||||
let samples = create_test_audio_samples(5000, 32000);
|
||||
|
||||
// Simple mock spectrogram computation
|
||||
let hop_size = samples.len() / MEL_FRAMES;
|
||||
let spectrogram: Vec<Vec<f32>> = (0..MEL_FRAMES)
|
||||
.map(|frame| {
|
||||
let start = frame * hop_size;
|
||||
let end = (start + hop_size).min(samples.len());
|
||||
let chunk = &samples[start..end];
|
||||
|
||||
// Mock mel filterbank (simplified)
|
||||
(0..MEL_BINS)
|
||||
.map(|bin| {
|
||||
let freq_start = bin * chunk.len() / MEL_BINS;
|
||||
let freq_end = ((bin + 1) * chunk.len() / MEL_BINS).min(chunk.len());
|
||||
if freq_start < freq_end {
|
||||
chunk[freq_start..freq_end]
|
||||
.iter()
|
||||
.map(|x| x.abs())
|
||||
.sum::<f32>()
|
||||
/ (freq_end - freq_start) as f32
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert_eq!(spectrogram.len(), MEL_FRAMES);
|
||||
assert_eq!(spectrogram[0].len(), MEL_BINS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_temporal_resolution() {
|
||||
// 5 seconds at 32kHz = 160000 samples
|
||||
// 500 frames means ~10ms per frame
|
||||
let samples_per_frame = 160000 / MEL_FRAMES;
|
||||
let ms_per_frame = (samples_per_frame as f64 / 32.0) as u64;
|
||||
|
||||
assert!(
|
||||
ms_per_frame >= 9 && ms_per_frame <= 11,
|
||||
"Frame duration should be ~10ms, got {}ms",
|
||||
ms_per_frame
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectrogram_frequency_range() {
|
||||
// Perch 2.0 uses 60Hz to 16000Hz
|
||||
// With 128 mel bins, each bin covers approximately 125Hz
|
||||
|
||||
let min_freq = 60.0;
|
||||
let max_freq = 16000.0;
|
||||
let hz_per_bin = (max_freq - min_freq) / MEL_BINS as f32;
|
||||
|
||||
assert!(
|
||||
hz_per_bin > 100.0 && hz_per_bin < 150.0,
|
||||
"Each mel bin should cover ~125Hz, got {}Hz",
|
||||
hz_per_bin
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Recording Repository Integration Tests
|
||||
// ============================================================================
|
||||
|
||||
mod repository_integration {
|
||||
use super::*;
|
||||
use chrono::Duration as ChronoDuration;
|
||||
|
||||
#[test]
|
||||
fn test_recording_crud_operations() {
|
||||
let repo = MockRecordingRepository::new();
|
||||
|
||||
// Create
|
||||
let recording = create_test_recording();
|
||||
let id = recording.id;
|
||||
repo.save(recording).unwrap();
|
||||
|
||||
// Read
|
||||
let found = repo.find_by_id(&id).unwrap().unwrap();
|
||||
assert_eq!(found.id, id);
|
||||
|
||||
// Count
|
||||
assert_eq!(repo.count(), 1);
|
||||
|
||||
// Delete
|
||||
repo.delete(&id).unwrap();
|
||||
assert_eq!(repo.count(), 0);
|
||||
assert!(repo.find_by_id(&id).unwrap().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_recordings_by_sensor() {
|
||||
let repo = MockRecordingRepository::new();
|
||||
|
||||
// Add recordings from different sensors
|
||||
for i in 0..5 {
|
||||
let mut recording = create_test_recording();
|
||||
recording.sensor_id = format!("SENSOR_{}", i % 2);
|
||||
repo.save(recording).unwrap();
|
||||
}
|
||||
|
||||
let sensor0_recordings = repo.find_by_sensor_id("SENSOR_0").unwrap();
|
||||
let sensor1_recordings = repo.find_by_sensor_id("SENSOR_1").unwrap();
|
||||
|
||||
assert_eq!(sensor0_recordings.len(), 3);
|
||||
assert_eq!(sensor1_recordings.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_recordings_by_date_range() {
|
||||
let repo = MockRecordingRepository::new();
|
||||
let now = chrono::Utc::now();
|
||||
|
||||
// Add recordings at different times
|
||||
for i in 0..5 {
|
||||
let mut recording = create_test_recording();
|
||||
recording.start_timestamp = now - ChronoDuration::hours(i as i64);
|
||||
repo.save(recording).unwrap();
|
||||
}
|
||||
|
||||
// Find recordings from last 2 hours
|
||||
let start = now - ChronoDuration::hours(2);
|
||||
let end = now + ChronoDuration::hours(1);
|
||||
let recent = repo.find_by_date_range(start, end).unwrap();
|
||||
|
||||
assert_eq!(recent.len(), 3); // 0, 1, 2 hours ago
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_repository_by_recording() {
|
||||
let repo = MockSegmentRepository::new();
|
||||
let recording_id = RecordingId::new();
|
||||
|
||||
// Add segments for this recording
|
||||
for i in 0..5 {
|
||||
let segment = CallSegment {
|
||||
recording_id,
|
||||
start_ms: i * 5500,
|
||||
end_ms: i * 5500 + 5000,
|
||||
..Default::default()
|
||||
};
|
||||
repo.save(segment).unwrap();
|
||||
}
|
||||
|
||||
// Add segments for another recording
|
||||
let other_id = RecordingId::new();
|
||||
for i in 0..3 {
|
||||
let segment = CallSegment {
|
||||
recording_id: other_id,
|
||||
start_ms: i * 5500,
|
||||
end_ms: i * 5500 + 5000,
|
||||
..Default::default()
|
||||
};
|
||||
repo.save(segment).unwrap();
|
||||
}
|
||||
|
||||
let segments = repo.find_by_recording(&recording_id).unwrap();
|
||||
assert_eq!(segments.len(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_repository_by_time_range() {
|
||||
let repo = MockSegmentRepository::new();
|
||||
let recording_id = RecordingId::new();
|
||||
|
||||
// Add segments spanning 0-30 seconds
|
||||
for i in 0..6 {
|
||||
let segment = CallSegment {
|
||||
recording_id,
|
||||
start_ms: i * 5000,
|
||||
end_ms: (i + 1) * 5000,
|
||||
..Default::default()
|
||||
};
|
||||
repo.save(segment).unwrap();
|
||||
}
|
||||
|
||||
// Find segments in 10-20 second range
|
||||
let segments = repo
|
||||
.find_by_time_range(&recording_id, 10000, 20000)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(segments.len(), 2); // Segments at 10-15s and 15-20s
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Quality Assessment Tests
|
||||
// ============================================================================
|
||||
|
||||
mod quality_assessment {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_quality_grade_from_snr() {
|
||||
assert_eq!(QualityGrade::from_snr(25.0), QualityGrade::Excellent);
|
||||
assert_eq!(QualityGrade::from_snr(20.1), QualityGrade::Excellent);
|
||||
assert_eq!(QualityGrade::from_snr(15.0), QualityGrade::Good);
|
||||
assert_eq!(QualityGrade::from_snr(10.1), QualityGrade::Good);
|
||||
assert_eq!(QualityGrade::from_snr(7.0), QualityGrade::Fair);
|
||||
assert_eq!(QualityGrade::from_snr(5.1), QualityGrade::Fair);
|
||||
assert_eq!(QualityGrade::from_snr(3.0), QualityGrade::Poor);
|
||||
assert_eq!(QualityGrade::from_snr(0.1), QualityGrade::Poor);
|
||||
assert_eq!(QualityGrade::from_snr(-5.0), QualityGrade::Unusable);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_segments_by_quality() {
|
||||
let repo = MockSegmentRepository::new();
|
||||
|
||||
// Add segments with varying quality
|
||||
let snr_values = vec![25.0, 15.0, 7.0, 3.0, -5.0];
|
||||
for snr in snr_values {
|
||||
let segment = create_test_segment_with_snr(snr);
|
||||
repo.save(segment).unwrap();
|
||||
}
|
||||
|
||||
// Find good or better
|
||||
let good_or_better = repo.find_by_quality(QualityGrade::Good).unwrap();
|
||||
assert_eq!(good_or_better.len(), 2); // Excellent and Good
|
||||
|
||||
// Find fair or better
|
||||
let fair_or_better = repo.find_by_quality(QualityGrade::Fair).unwrap();
|
||||
assert_eq!(fair_or_better.len(), 3); // Excellent, Good, Fair
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_clipping_detection() {
|
||||
let mut segment = create_test_segment();
|
||||
|
||||
// No clipping
|
||||
segment.clipping_score = 0.0;
|
||||
assert!(segment.clipping_score < 0.01);
|
||||
|
||||
// Minor clipping
|
||||
segment.clipping_score = 0.05;
|
||||
assert!(segment.clipping_score < 0.1);
|
||||
|
||||
// Severe clipping
|
||||
segment.clipping_score = 0.3;
|
||||
assert!(segment.clipping_score > 0.2);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_audio_integration_smoke_test() {
|
||||
// Create recording
|
||||
let recording = create_test_recording();
|
||||
|
||||
// Create segments
|
||||
let segments = create_segment_sequence(5, 500);
|
||||
|
||||
// Create spectrogram
|
||||
let spectrogram = create_test_spectrogram();
|
||||
|
||||
// Verify relationships
|
||||
assert!(recording.duration_ms >= segments.last().unwrap().end_ms);
|
||||
assert_eq!(spectrogram.len(), 500);
|
||||
assert_eq!(spectrogram[0].len(), 128);
|
||||
}
|
||||
}
|
||||
603
vendor/ruvector/examples/vibecast-7sense/tests/integration/embedding_test.rs
vendored
Normal file
603
vendor/ruvector/examples/vibecast-7sense/tests/integration/embedding_test.rs
vendored
Normal file
@@ -0,0 +1,603 @@
|
||||
//! Integration tests for Embedding Context
|
||||
//!
|
||||
//! Tests for ONNX model loading, embedding generation, L2 normalization,
|
||||
//! quantization/dequantization, and batch embedding operations.
|
||||
|
||||
use vibecast_tests::fixtures::*;
|
||||
use vibecast_tests::mocks::*;
|
||||
|
||||
// ============================================================================
|
||||
// ONNX Model Loading Tests
|
||||
// ============================================================================
|
||||
|
||||
mod model_loading {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_model_version_configuration() {
|
||||
let model = ModelVersion::default();
|
||||
|
||||
assert_eq!(model.name, "perch");
|
||||
assert_eq!(model.version, "2.0");
|
||||
assert_eq!(model.dimensions, 1536);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mock_model_adapter_creation() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let version = adapter.model_version();
|
||||
|
||||
assert_eq!(version.name, "perch");
|
||||
assert_eq!(version.dimensions, 1536);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_adapter_with_custom_dimensions() {
|
||||
let adapter = MockEmbeddingModelAdapter::new().with_dimensions(768);
|
||||
let version = adapter.model_version();
|
||||
|
||||
assert_eq!(version.dimensions, 768);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_output_dimensions_match_config() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
|
||||
let embedding = adapter.embed(&audio).unwrap();
|
||||
let version = adapter.model_version();
|
||||
|
||||
assert_eq!(embedding.len(), version.dimensions);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Embedding Generation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod embedding_generation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generate_embedding_from_audio() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
|
||||
let embedding = adapter.embed(&audio).unwrap();
|
||||
|
||||
assert_eq!(embedding.len(), 1536);
|
||||
assert!(
|
||||
embedding.iter().all(|x| !x.is_nan() && !x.is_infinite()),
|
||||
"Embedding should not contain NaN or Inf"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_output_is_normalized() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
|
||||
let embedding = adapter.embed(&audio).unwrap();
|
||||
|
||||
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!(
|
||||
(norm - 1.0).abs() < 0.0001,
|
||||
"Embedding should be L2-normalized, got norm {}",
|
||||
norm
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_deterministic() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
|
||||
let embedding1 = adapter.embed(&audio).unwrap();
|
||||
let embedding2 = adapter.embed(&audio).unwrap();
|
||||
|
||||
assert_eq!(embedding1, embedding2, "Same input should produce same output");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_different_audio_produces_different_embeddings() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
|
||||
let audio1 = create_test_audio_samples(5000, 32000);
|
||||
let audio2: Vec<f32> = audio1.iter().map(|x| x * 0.5).collect();
|
||||
|
||||
let embedding1 = adapter.embed(&audio1).unwrap();
|
||||
let embedding2 = adapter.embed(&audio2).unwrap();
|
||||
|
||||
let distance = cosine_distance(&embedding1, &embedding2);
|
||||
assert!(
|
||||
distance > 0.01,
|
||||
"Different audio should produce different embeddings"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_entity_creation() {
|
||||
let embedding = create_test_embedding();
|
||||
|
||||
assert_eq!(embedding.vector.len(), 1536);
|
||||
assert!(embedding.norm > 0.0);
|
||||
assert_eq!(embedding.model_version.name, "perch");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_with_specific_vector() {
|
||||
let vector = vec![1.0; 1536];
|
||||
let embedding = create_test_embedding_with_vector(vector.clone());
|
||||
|
||||
assert_eq!(embedding.vector.len(), 1536);
|
||||
let norm: f32 = embedding.vector.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!((norm - embedding.norm).abs() < 0.0001);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// L2 Normalization Tests
|
||||
// ============================================================================
|
||||
|
||||
mod normalization {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_l2_normalize_unit_vector() {
|
||||
let vector = vec![1.0, 0.0, 0.0];
|
||||
let normalized = l2_normalize(&vector);
|
||||
|
||||
assert_eq!(normalized, vec![1.0, 0.0, 0.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_normalize_simple_vector() {
|
||||
let vector = vec![3.0, 4.0];
|
||||
let normalized = l2_normalize(&vector);
|
||||
|
||||
assert!((normalized[0] - 0.6).abs() < 0.0001);
|
||||
assert!((normalized[1] - 0.8).abs() < 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_normalize_preserves_direction() {
|
||||
let vector = vec![1.0, 2.0, 3.0, 4.0];
|
||||
let normalized = l2_normalize(&vector);
|
||||
|
||||
// Check ratios are preserved
|
||||
let original_ratio = vector[1] / vector[0];
|
||||
let normalized_ratio = normalized[1] / normalized[0];
|
||||
assert!((original_ratio - normalized_ratio).abs() < 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_normalize_high_dimensional() {
|
||||
let vector = create_random_vector(1536);
|
||||
let normalized = l2_normalize(&vector);
|
||||
|
||||
assert_normalized(&normalized, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_normalize_zero_vector() {
|
||||
let vector = vec![0.0; 10];
|
||||
let normalized = l2_normalize(&vector);
|
||||
|
||||
// Zero vector should remain zero
|
||||
assert!(normalized.iter().all(|x| *x == 0.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_normalize_idempotent() {
|
||||
let vector = create_random_vector(1536);
|
||||
let normalized1 = l2_normalize(&vector);
|
||||
let normalized2 = l2_normalize(&normalized1);
|
||||
|
||||
for (a, b) in normalized1.iter().zip(normalized2.iter()) {
|
||||
assert!((a - b).abs() < 0.0001);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalized_vector_creation() {
|
||||
let vector = create_normalized_vector(1536);
|
||||
assert_normalized(&vector, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_normalization() {
|
||||
let vectors: Vec<Vec<f32>> = (0..10).map(|i| create_deterministic_vector(1536, i)).collect();
|
||||
|
||||
let normalized: Vec<Vec<f32>> = vectors.iter().map(|v| l2_normalize(v)).collect();
|
||||
|
||||
for (i, norm_vec) in normalized.iter().enumerate() {
|
||||
assert_normalized(norm_vec, 0.0001);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Quantization/Dequantization Tests
|
||||
// ============================================================================
|
||||
|
||||
mod quantization {
|
||||
use super::*;
|
||||
|
||||
/// Quantize f32 vector to i8 (scalar quantization)
|
||||
/// Uses symmetric quantization around the midpoint to properly utilize i8 range
|
||||
fn quantize_i8(vector: &[f32]) -> (Vec<i8>, f32, f32) {
|
||||
let min_val = vector.iter().cloned().fold(f32::INFINITY, f32::min);
|
||||
let max_val = vector.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
|
||||
|
||||
// Use symmetric quantization: map [min, max] to [-127, 127]
|
||||
let scale = (max_val - min_val) / 254.0; // 254 = 127 - (-127)
|
||||
let zero_point = (min_val + max_val) / 2.0; // Midpoint of input range
|
||||
|
||||
let quantized: Vec<i8> = vector
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if scale == 0.0 {
|
||||
0i8
|
||||
} else {
|
||||
let scaled = ((v - zero_point) / scale).round();
|
||||
(scaled as i16).clamp(-127, 127) as i8
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
(quantized, scale, zero_point)
|
||||
}
|
||||
|
||||
/// Dequantize i8 vector back to f32
|
||||
fn dequantize_i8(quantized: &[i8], scale: f32, zero_point: f32) -> Vec<f32> {
|
||||
quantized
|
||||
.iter()
|
||||
.map(|q| (*q as f32) * scale + zero_point)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quantization_roundtrip() {
|
||||
let original = create_random_vector(1536);
|
||||
let (quantized, scale, zero_point) = quantize_i8(&original);
|
||||
let dequantized = dequantize_i8(&quantized, scale, zero_point);
|
||||
|
||||
assert_eq!(quantized.len(), original.len());
|
||||
assert_eq!(dequantized.len(), original.len());
|
||||
|
||||
// Check reconstruction error is small
|
||||
let mse: f32 = original
|
||||
.iter()
|
||||
.zip(dequantized.iter())
|
||||
.map(|(o, d)| (o - d).powi(2))
|
||||
.sum::<f32>()
|
||||
/ original.len() as f32;
|
||||
|
||||
let rmse = mse.sqrt();
|
||||
assert!(
|
||||
rmse < 0.1,
|
||||
"Reconstruction RMSE {} is too high",
|
||||
rmse
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quantization_compression_ratio() {
|
||||
let original = create_random_vector(1536);
|
||||
let (quantized, _, _) = quantize_i8(&original);
|
||||
|
||||
let original_bytes = original.len() * std::mem::size_of::<f32>();
|
||||
let quantized_bytes = quantized.len() * std::mem::size_of::<i8>();
|
||||
|
||||
let compression_ratio = original_bytes as f32 / quantized_bytes as f32;
|
||||
assert!(
|
||||
compression_ratio >= 3.9,
|
||||
"i8 quantization should achieve ~4x compression, got {}x",
|
||||
compression_ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quantization_preserves_relative_order() {
|
||||
let vector1 = vec![0.1, 0.5, 0.9];
|
||||
let vector2 = vec![0.2, 0.4, 0.8];
|
||||
|
||||
let (q1, s1, z1) = quantize_i8(&vector1);
|
||||
let (q2, s2, z2) = quantize_i8(&vector2);
|
||||
|
||||
// Verify relative ordering within vectors
|
||||
assert!(q1[0] < q1[1] && q1[1] < q1[2]);
|
||||
assert!(q2[0] < q2[1] && q2[1] < q2[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quantization_similarity_preservation() {
|
||||
// Create two similar vectors
|
||||
let v1 = create_deterministic_vector(1536, 0);
|
||||
let v2: Vec<f32> = v1.iter().map(|x| x + 0.01).collect();
|
||||
let v2 = l2_normalize(&v2);
|
||||
let v1 = l2_normalize(&v1);
|
||||
|
||||
let original_similarity = 1.0 - cosine_distance(&v1, &v2);
|
||||
|
||||
// Quantize and compute similarity
|
||||
let (q1, s1, z1) = quantize_i8(&v1);
|
||||
let (q2, s2, z2) = quantize_i8(&v2);
|
||||
|
||||
let d1 = dequantize_i8(&q1, s1, z1);
|
||||
let d2 = dequantize_i8(&q2, s2, z2);
|
||||
|
||||
let quantized_similarity = 1.0 - cosine_distance(&d1, &d2);
|
||||
|
||||
assert!(
|
||||
(original_similarity - quantized_similarity).abs() < 0.1,
|
||||
"Similarity should be preserved: original={}, quantized={}",
|
||||
original_similarity,
|
||||
quantized_similarity
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_product_quantization_concept() {
|
||||
// Simulate product quantization by splitting vector into subvectors
|
||||
let vector = create_random_vector(1536);
|
||||
let num_subvectors = 48;
|
||||
let subvector_dim = 1536 / num_subvectors;
|
||||
|
||||
let subvectors: Vec<&[f32]> = vector.chunks(subvector_dim).collect();
|
||||
assert_eq!(subvectors.len(), num_subvectors);
|
||||
|
||||
// Each subvector can be quantized independently
|
||||
for subvec in &subvectors {
|
||||
let (quantized, _, _) = quantize_i8(subvec);
|
||||
assert_eq!(quantized.len(), subvector_dim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Batch Embedding Tests
|
||||
// ============================================================================
|
||||
|
||||
mod batch_embedding {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_batch_embed_multiple_segments() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
|
||||
let audio_batch: Vec<Vec<f32>> = (0..5)
|
||||
.map(|i| create_test_audio_samples(5000, 32000))
|
||||
.collect();
|
||||
|
||||
let embeddings = adapter.embed_batch(&audio_batch).unwrap();
|
||||
|
||||
assert_eq!(embeddings.len(), 5);
|
||||
for embedding in &embeddings {
|
||||
assert_eq!(embedding.len(), 1536);
|
||||
assert_normalized(embedding, 0.0001);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_embedding_consistency() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
|
||||
// Single embedding
|
||||
let single = adapter.embed(&audio).unwrap();
|
||||
|
||||
// Batch embedding with same audio
|
||||
let batch = adapter.embed_batch(&vec![audio]).unwrap();
|
||||
|
||||
assert_eq!(single, batch[0], "Single and batch should produce same result");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_embedding_performance_scaling() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
|
||||
// Test with increasing batch sizes
|
||||
let batch_sizes = vec![1, 10, 50, 100];
|
||||
|
||||
for batch_size in batch_sizes {
|
||||
let audio_batch: Vec<Vec<f32>> = (0..batch_size)
|
||||
.map(|_| create_test_audio_samples(5000, 32000))
|
||||
.collect();
|
||||
|
||||
let embeddings = adapter.embed_batch(&audio_batch).unwrap();
|
||||
assert_eq!(embeddings.len(), batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_embedding_handles_empty_batch() {
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
let empty_batch: Vec<Vec<f32>> = vec![];
|
||||
|
||||
let embeddings = adapter.embed_batch(&empty_batch).unwrap();
|
||||
assert_eq!(embeddings.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_batch_factory() {
|
||||
let embeddings = create_embedding_batch(10);
|
||||
|
||||
assert_eq!(embeddings.len(), 10);
|
||||
for embedding in &embeddings {
|
||||
assert_eq!(embedding.vector.len(), 1536);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_similar_embeddings_factory() {
|
||||
let base = create_normalized_vector(1536);
|
||||
let similar = create_similar_embeddings(&base, 5, 0.1);
|
||||
|
||||
assert_eq!(similar.len(), 5);
|
||||
|
||||
// All should be similar to base
|
||||
for emb in &similar {
|
||||
let distance = cosine_distance(&base, &emb.vector);
|
||||
assert!(
|
||||
distance < 0.5,
|
||||
"Similar embedding should be close to base"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Embedding Repository Tests
|
||||
// ============================================================================
|
||||
|
||||
mod repository {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_embedding_repository_crud() {
|
||||
let repo = MockEmbeddingRepository::new();
|
||||
|
||||
let embedding = create_test_embedding();
|
||||
let id = embedding.id;
|
||||
let segment_id = embedding.segment_id;
|
||||
|
||||
// Create
|
||||
repo.save(embedding).unwrap();
|
||||
assert_eq!(repo.count(), 1);
|
||||
|
||||
// Read by ID
|
||||
let found = repo.find_by_id(&id).unwrap().unwrap();
|
||||
assert_eq!(found.id, id);
|
||||
|
||||
// Read by segment
|
||||
let by_segment = repo.find_by_segment(&segment_id).unwrap().unwrap();
|
||||
assert_eq!(by_segment.id, id);
|
||||
|
||||
// Delete
|
||||
repo.delete(&id).unwrap();
|
||||
assert_eq!(repo.count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_repository_batch_save() {
|
||||
let repo = MockEmbeddingRepository::new();
|
||||
let embeddings = create_embedding_batch(10);
|
||||
|
||||
repo.batch_save(embeddings).unwrap();
|
||||
assert_eq!(repo.count(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_repository_find_by_model() {
|
||||
let repo = MockEmbeddingRepository::new();
|
||||
|
||||
// Add embeddings with different models
|
||||
for i in 0..5 {
|
||||
let mut embedding = create_test_embedding();
|
||||
embedding.model_version.name = if i % 2 == 0 {
|
||||
"perch".to_string()
|
||||
} else {
|
||||
"birdnet".to_string()
|
||||
};
|
||||
repo.save(embedding).unwrap();
|
||||
}
|
||||
|
||||
let perch_embeddings = repo.find_by_model("perch").unwrap();
|
||||
let birdnet_embeddings = repo.find_by_model("birdnet").unwrap();
|
||||
|
||||
assert_eq!(perch_embeddings.len(), 3);
|
||||
assert_eq!(birdnet_embeddings.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_repository_get_all_vectors() {
|
||||
let repo = MockEmbeddingRepository::new();
|
||||
let embeddings = create_embedding_batch(5);
|
||||
|
||||
for emb in embeddings {
|
||||
repo.save(emb).unwrap();
|
||||
}
|
||||
|
||||
let all_vectors = repo.get_all_vectors();
|
||||
assert_eq!(all_vectors.len(), 5);
|
||||
|
||||
for (_, vector) in &all_vectors {
|
||||
assert_eq!(vector.len(), 1536);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Model Error Handling Tests
|
||||
// ============================================================================
|
||||
|
||||
mod error_handling {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_model_with_failure_rate() {
|
||||
let adapter = MockEmbeddingModelAdapter::new().with_failure_rate(0.0);
|
||||
|
||||
// Should succeed with 0% failure rate
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
let result = adapter.embed(&audio);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_vector_dimensions_detected() {
|
||||
let embedding = create_test_embedding_with_vector(vec![1.0; 768]); // Wrong dims
|
||||
assert_eq!(embedding.vector.len(), 768);
|
||||
assert_ne!(embedding.vector.len(), 1536); // Not Perch dimensions
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_validation() {
|
||||
let embeddings = create_embedding_batch(10);
|
||||
|
||||
// All embeddings should be valid
|
||||
assert_valid_embeddings(&embeddings, 1536);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dimension_assertion() {
|
||||
let vector = create_random_vector(1536);
|
||||
assert_dimensions(&vector, 1536);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_dimension_assertion_fails_on_mismatch() {
|
||||
let vector = create_random_vector(768);
|
||||
assert_dimensions(&vector, 1536); // Should panic
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_embedding_integration_smoke_test() {
|
||||
// Create adapter
|
||||
let adapter = MockEmbeddingModelAdapter::new();
|
||||
|
||||
// Create audio
|
||||
let audio = create_test_audio_samples(5000, 32000);
|
||||
|
||||
// Generate embedding
|
||||
let vector = adapter.embed(&audio).unwrap();
|
||||
|
||||
// Verify properties
|
||||
assert_eq!(vector.len(), 1536);
|
||||
assert_normalized(&vector, 0.0001);
|
||||
|
||||
// Store in repository
|
||||
let repo = MockEmbeddingRepository::new();
|
||||
let embedding = create_test_embedding_with_vector(vector);
|
||||
repo.save(embedding).unwrap();
|
||||
|
||||
assert_eq!(repo.count(), 1);
|
||||
}
|
||||
}
|
||||
664
vendor/ruvector/examples/vibecast-7sense/tests/integration/interpretation_test.rs
vendored
Normal file
664
vendor/ruvector/examples/vibecast-7sense/tests/integration/interpretation_test.rs
vendored
Normal file
@@ -0,0 +1,664 @@
|
||||
//! Integration tests for Interpretation Context
|
||||
//!
|
||||
//! Tests for evidence pack building, claim generation with citations,
|
||||
//! and validation that all claims have evidence references.
|
||||
|
||||
use vibecast_tests::fixtures::*;
|
||||
use vibecast_tests::mocks::*;
|
||||
use std::collections::HashSet;
|
||||
|
||||
// ============================================================================
|
||||
// Evidence Pack Building Tests
|
||||
// ============================================================================
|
||||
|
||||
mod evidence_pack_building {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_evidence_pack() {
|
||||
let pack = create_test_evidence_pack();
|
||||
|
||||
assert!(!pack.neighbors.is_empty());
|
||||
assert!(!pack.exemplars.is_empty());
|
||||
assert!(pack.signal_quality.snr > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_with_neighbors() {
|
||||
let pack = create_test_evidence_pack_with_neighbors(10);
|
||||
|
||||
assert_eq!(pack.neighbors.len(), 10);
|
||||
|
||||
// Verify neighbor properties
|
||||
for neighbor in &pack.neighbors {
|
||||
assert!(neighbor.distance >= 0.0);
|
||||
assert!(neighbor.relevance > 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_builder() {
|
||||
let builder = MockEvidencePackBuilder::new()
|
||||
.with_neighbor_count(15)
|
||||
.with_exemplar_count(3);
|
||||
|
||||
let segment = create_test_segment();
|
||||
let search_results = create_search_results(20);
|
||||
let clusters = create_test_clusters(5);
|
||||
|
||||
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
|
||||
|
||||
assert_eq!(pack.neighbors.len(), 15);
|
||||
assert!(pack.exemplars.len() <= 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_signal_quality() {
|
||||
let builder = MockEvidencePackBuilder::new();
|
||||
|
||||
let segment = create_test_segment_with_snr(25.0);
|
||||
let search_results = create_search_results(10);
|
||||
let clusters = create_test_clusters(2);
|
||||
|
||||
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
|
||||
|
||||
assert_eq!(pack.signal_quality.snr, 25.0);
|
||||
assert!(matches!(
|
||||
pack.signal_quality.quality_grade,
|
||||
Some(QualityGrade::Excellent)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_includes_cluster_ids() {
|
||||
let pack = create_test_evidence_pack_with_neighbors(10);
|
||||
|
||||
// Some neighbors should have cluster IDs
|
||||
let has_cluster = pack.neighbors.iter().any(|n| n.cluster_id.is_some());
|
||||
assert!(
|
||||
has_cluster,
|
||||
"At least some neighbors should have cluster assignments"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_relevance_scoring() {
|
||||
let pack = create_test_evidence_pack_with_neighbors(10);
|
||||
|
||||
for (i, neighbor) in pack.neighbors.iter().enumerate() {
|
||||
// Relevance should be inverse of distance
|
||||
let expected_relevance = 1.0 / (1.0 + neighbor.distance);
|
||||
assert!(
|
||||
(neighbor.relevance - expected_relevance).abs() < 0.001,
|
||||
"Neighbor {} has wrong relevance: {} vs expected {}",
|
||||
i,
|
||||
neighbor.relevance,
|
||||
expected_relevance
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_from_empty_search() {
|
||||
let builder = MockEvidencePackBuilder::new();
|
||||
|
||||
let segment = create_test_segment();
|
||||
let empty_results: Vec<SearchResult> = vec![];
|
||||
let clusters = create_test_clusters(2);
|
||||
|
||||
let pack = builder.build(&segment, &empty_results, &clusters).unwrap();
|
||||
|
||||
assert_eq!(pack.neighbors.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_timestamp() {
|
||||
let pack = create_test_evidence_pack();
|
||||
|
||||
let now = chrono::Utc::now();
|
||||
let age = now - pack.created_at;
|
||||
|
||||
// Pack should have been created recently
|
||||
assert!(
|
||||
age.num_seconds() < 60,
|
||||
"Evidence pack should be recently created"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Claim Generation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod claim_generation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generate_interpretation_from_evidence() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
assert!(!interpretation.statements.is_empty());
|
||||
assert!(interpretation.confidence > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpretation_includes_citations() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
assert!(
|
||||
!interpretation.citations.is_empty(),
|
||||
"Interpretation should have citations"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_claims_have_citations() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// Verify all statements have at least one citation
|
||||
let valid = generator.validate_citations(&interpretation);
|
||||
assert!(
|
||||
valid,
|
||||
"All claims should have corresponding citations"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_citation_evidence_types() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
let evidence_types: HashSet<_> =
|
||||
interpretation.citations.iter().map(|c| &c.evidence_type).collect();
|
||||
|
||||
// Should have neighbor citations at minimum
|
||||
assert!(
|
||||
evidence_types.contains(&EvidenceType::Neighbor),
|
||||
"Should cite neighbors as evidence"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_citation_strength_values() {
|
||||
let citations = create_test_citations(10);
|
||||
|
||||
for citation in &citations {
|
||||
assert!(
|
||||
citation.strength >= 0.0 && citation.strength <= 1.0,
|
||||
"Citation strength should be in [0, 1]"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpretation_confidence_from_citations() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
|
||||
// High-quality evidence
|
||||
let good_pack = create_test_evidence_pack_with_neighbors(20);
|
||||
let good_interpretation = generator.generate(&good_pack).unwrap();
|
||||
|
||||
// Low-quality evidence (fewer neighbors)
|
||||
let poor_pack = create_test_evidence_pack_with_neighbors(2);
|
||||
let poor_interpretation = generator.generate(&poor_pack).unwrap();
|
||||
|
||||
// Both should have non-zero confidence
|
||||
assert!(good_interpretation.confidence > 0.0);
|
||||
assert!(poor_interpretation.confidence > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_factory_interpretation() {
|
||||
let evidence_pack_id = EvidencePackId::new();
|
||||
let interpretation = create_test_interpretation(evidence_pack_id);
|
||||
|
||||
assert_eq!(interpretation.evidence_pack_id, evidence_pack_id);
|
||||
assert!(!interpretation.statements.is_empty());
|
||||
assert!(!interpretation.citations.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Citation Validation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod citation_validation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_citation_links_to_valid_evidence() {
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// Each citation should reference valid evidence
|
||||
for citation in &interpretation.citations {
|
||||
match &citation.evidence_type {
|
||||
EvidenceType::Neighbor => {
|
||||
// Citation should reference a segment
|
||||
assert!(!citation.evidence_id.is_empty());
|
||||
}
|
||||
EvidenceType::Exemplar => {
|
||||
assert!(!citation.evidence_id.is_empty());
|
||||
}
|
||||
EvidenceType::Cluster => {
|
||||
assert!(!citation.evidence_id.is_empty());
|
||||
}
|
||||
EvidenceType::Motif => {
|
||||
assert!(!citation.evidence_id.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_orphan_citations() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// All citations should reference an existing claim
|
||||
for citation in &interpretation.citations {
|
||||
let claim_exists = interpretation.statements.contains(&citation.claim);
|
||||
assert!(
|
||||
claim_exists,
|
||||
"Citation references non-existent claim: {}",
|
||||
citation.claim
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_citation_uuid_format() {
|
||||
let citations = create_test_citations(5);
|
||||
|
||||
for citation in &citations {
|
||||
// Evidence ID should be valid UUID string
|
||||
let parse_result = uuid::Uuid::parse_str(&citation.evidence_id);
|
||||
assert!(
|
||||
parse_result.is_ok(),
|
||||
"Evidence ID should be valid UUID: {}",
|
||||
citation.evidence_id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_citation_claim_matching() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// Group citations by claim
|
||||
let mut claims_with_citations: HashSet<String> = HashSet::new();
|
||||
for citation in &interpretation.citations {
|
||||
claims_with_citations.insert(citation.claim.clone());
|
||||
}
|
||||
|
||||
// Every statement should have at least one citation
|
||||
for statement in &interpretation.statements {
|
||||
assert!(
|
||||
claims_with_citations.contains(statement),
|
||||
"Statement has no citation: {}",
|
||||
statement
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// RAB (Retrieval-Augmented Bioacoustics) Pattern Tests
|
||||
// ============================================================================
|
||||
|
||||
mod rab_pattern {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_rab_retrieval_depth() {
|
||||
// RAB should retrieve sufficient evidence
|
||||
let builder = MockEvidencePackBuilder::new().with_neighbor_count(10);
|
||||
|
||||
let segment = create_test_segment();
|
||||
let search_results = create_search_results(50);
|
||||
let clusters = create_test_clusters(5);
|
||||
|
||||
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
|
||||
|
||||
assert!(
|
||||
pack.neighbors.len() >= 10,
|
||||
"RAB should retrieve requested depth"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rab_evidence_diversity() {
|
||||
let builder = MockEvidencePackBuilder::new()
|
||||
.with_neighbor_count(10)
|
||||
.with_exemplar_count(5);
|
||||
|
||||
let segment = create_test_segment();
|
||||
let search_results = create_search_results(20);
|
||||
let clusters = create_test_clusters(5);
|
||||
|
||||
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
|
||||
|
||||
// Should include both neighbors and exemplars
|
||||
assert!(!pack.neighbors.is_empty(), "Should have neighbors");
|
||||
assert!(!pack.exemplars.is_empty(), "Should have exemplars");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rab_constrained_interpretation() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// Statements should be descriptive (constrained to evidence)
|
||||
for statement in &interpretation.statements {
|
||||
// Check for structural descriptors (objective language)
|
||||
let is_structural = statement.contains("similar")
|
||||
|| statement.contains("distance")
|
||||
|| statement.contains("cluster")
|
||||
|| statement.contains("neighbor")
|
||||
|| statement.contains("aligns");
|
||||
|
||||
assert!(
|
||||
is_structural,
|
||||
"Statement should use structural descriptors: {}",
|
||||
statement
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rab_transparency() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// Every interpretation should be traceable to evidence
|
||||
let citation_count = interpretation.citations.len();
|
||||
let statement_count = interpretation.statements.len();
|
||||
|
||||
// Average citations per statement
|
||||
let avg_citations = citation_count as f32 / statement_count.max(1) as f32;
|
||||
|
||||
assert!(
|
||||
avg_citations >= 1.0,
|
||||
"Each statement should have at least one citation on average"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rab_confidence_reflects_evidence_quality() {
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
|
||||
// Rich evidence
|
||||
let rich_pack = EvidencePack {
|
||||
neighbors: create_test_neighbors(20),
|
||||
exemplars: (0..5).map(|_| EmbeddingId::new()).collect(),
|
||||
signal_quality: SignalQuality {
|
||||
snr: 25.0,
|
||||
quality_grade: Some(QualityGrade::Excellent),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Sparse evidence
|
||||
let sparse_pack = EvidencePack {
|
||||
neighbors: create_test_neighbors(2),
|
||||
exemplars: vec![],
|
||||
signal_quality: SignalQuality {
|
||||
snr: 5.0,
|
||||
quality_grade: Some(QualityGrade::Fair),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let rich_interp = generator.generate(&rich_pack).unwrap();
|
||||
let sparse_interp = generator.generate(&sparse_pack).unwrap();
|
||||
|
||||
// Rich evidence should yield higher confidence
|
||||
assert!(
|
||||
rich_interp.citations.len() >= sparse_interp.citations.len(),
|
||||
"Rich evidence should produce more citations"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Structural Descriptor Tests
|
||||
// ============================================================================
|
||||
|
||||
mod structural_descriptors {
|
||||
#[test]
|
||||
fn test_pitch_contour_description() {
|
||||
// Mock pitch contour stats
|
||||
struct PitchContour {
|
||||
min_freq: f32,
|
||||
max_freq: f32,
|
||||
mean_freq: f32,
|
||||
contour_type: String,
|
||||
}
|
||||
|
||||
let ascending = PitchContour {
|
||||
min_freq: 2000.0,
|
||||
max_freq: 4000.0,
|
||||
mean_freq: 3000.0,
|
||||
contour_type: "ascending".to_string(),
|
||||
};
|
||||
|
||||
assert!(ascending.max_freq > ascending.min_freq);
|
||||
assert!(ascending.mean_freq >= ascending.min_freq);
|
||||
assert!(ascending.mean_freq <= ascending.max_freq);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spectral_texture_metrics() {
|
||||
struct SpectralTexture {
|
||||
harmonicity: f32,
|
||||
spectral_centroid: f32,
|
||||
spectral_flatness: f32,
|
||||
}
|
||||
|
||||
let texture = SpectralTexture {
|
||||
harmonicity: 0.8,
|
||||
spectral_centroid: 3500.0,
|
||||
spectral_flatness: 0.2,
|
||||
};
|
||||
|
||||
// Harmonicity and flatness should be in [0, 1]
|
||||
assert!(texture.harmonicity >= 0.0 && texture.harmonicity <= 1.0);
|
||||
assert!(texture.spectral_flatness >= 0.0 && texture.spectral_flatness <= 1.0);
|
||||
// Centroid should be in audible range
|
||||
assert!(texture.spectral_centroid >= 20.0 && texture.spectral_centroid <= 20000.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rhythm_profile() {
|
||||
struct RhythmProfile {
|
||||
duration_ms: u64,
|
||||
syllable_count: u32,
|
||||
inter_syllable_intervals: Vec<u64>,
|
||||
regularity: f32,
|
||||
}
|
||||
|
||||
let profile = RhythmProfile {
|
||||
duration_ms: 2500,
|
||||
syllable_count: 4,
|
||||
inter_syllable_intervals: vec![200, 210, 205],
|
||||
regularity: 0.95,
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
profile.inter_syllable_intervals.len(),
|
||||
profile.syllable_count as usize - 1
|
||||
);
|
||||
assert!(profile.regularity >= 0.0 && profile.regularity <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Hypothesis Generation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod hypothesis_generation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_hypothesis_testability() {
|
||||
#[derive(Debug)]
|
||||
enum Testability {
|
||||
High,
|
||||
Medium,
|
||||
Low,
|
||||
}
|
||||
|
||||
struct Hypothesis {
|
||||
statement: String,
|
||||
testability: Testability,
|
||||
supporting_evidence: Vec<String>,
|
||||
}
|
||||
|
||||
let hypothesis = Hypothesis {
|
||||
statement: "Similar calls may indicate territorial behavior".to_string(),
|
||||
testability: Testability::Medium,
|
||||
supporting_evidence: vec![
|
||||
"neighbor_1".to_string(),
|
||||
"cluster_1".to_string(),
|
||||
],
|
||||
};
|
||||
|
||||
assert!(!hypothesis.statement.is_empty());
|
||||
assert!(!hypothesis.supporting_evidence.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hypothesis_grounded_in_evidence() {
|
||||
let evidence_pack = create_test_evidence_pack();
|
||||
|
||||
// A valid hypothesis should reference observable patterns
|
||||
let hypothesis = format!(
|
||||
"Based on {} similar neighbors with average distance {:.3}, this call type may be common in this habitat.",
|
||||
evidence_pack.neighbors.len(),
|
||||
evidence_pack.neighbors.iter().map(|n| n.distance).sum::<f32>() / evidence_pack.neighbors.len() as f32
|
||||
);
|
||||
|
||||
assert!(hypothesis.contains("neighbor"));
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Monitoring Summary Tests
|
||||
// ============================================================================
|
||||
|
||||
mod monitoring_summary {
|
||||
struct DiversityMetrics {
|
||||
species_richness: u32,
|
||||
shannon_index: f32,
|
||||
simpson_index: f32,
|
||||
evenness: f32,
|
||||
}
|
||||
|
||||
fn compute_shannon_index(counts: &[u32]) -> f32 {
|
||||
let total: u32 = counts.iter().sum();
|
||||
if total == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let total_f = total as f32;
|
||||
counts
|
||||
.iter()
|
||||
.filter(|&&c| c > 0)
|
||||
.map(|&c| {
|
||||
let p = c as f32 / total_f;
|
||||
-p * p.ln()
|
||||
})
|
||||
.sum::<f32>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shannon_index_uniform() {
|
||||
// Uniform distribution should maximize entropy
|
||||
let counts = vec![10, 10, 10, 10];
|
||||
let h = compute_shannon_index(&counts);
|
||||
|
||||
let max_h = (counts.len() as f32).ln();
|
||||
assert!(
|
||||
(h - max_h).abs() < 0.001,
|
||||
"Uniform distribution should have max entropy"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shannon_index_single_species() {
|
||||
// Single species should have zero entropy
|
||||
let counts = vec![100, 0, 0, 0];
|
||||
let h = compute_shannon_index(&counts);
|
||||
|
||||
assert!(h < 0.001, "Single species should have near-zero entropy");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_diversity_metrics_valid_ranges() {
|
||||
let metrics = DiversityMetrics {
|
||||
species_richness: 15,
|
||||
shannon_index: 2.5,
|
||||
simpson_index: 0.85,
|
||||
evenness: 0.9,
|
||||
};
|
||||
|
||||
assert!(metrics.shannon_index >= 0.0);
|
||||
assert!(metrics.simpson_index >= 0.0 && metrics.simpson_index <= 1.0);
|
||||
assert!(metrics.evenness >= 0.0 && metrics.evenness <= 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_interpretation_integration_smoke_test() {
|
||||
// Build evidence pack
|
||||
let segment = create_test_segment();
|
||||
let search_results = create_search_results(20);
|
||||
let clusters = create_test_clusters(5);
|
||||
|
||||
let builder = MockEvidencePackBuilder::new()
|
||||
.with_neighbor_count(10)
|
||||
.with_exemplar_count(5);
|
||||
|
||||
let evidence_pack = builder.build(&segment, &search_results, &clusters).unwrap();
|
||||
|
||||
// Generate interpretation
|
||||
let generator = MockInterpretationGenerator::new();
|
||||
let interpretation = generator.generate(&evidence_pack).unwrap();
|
||||
|
||||
// Verify structure
|
||||
assert!(!interpretation.statements.is_empty());
|
||||
assert!(!interpretation.citations.is_empty());
|
||||
assert!(interpretation.confidence > 0.0);
|
||||
|
||||
// Verify all claims have citations
|
||||
assert!(generator.validate_citations(&interpretation));
|
||||
}
|
||||
}
|
||||
219
vendor/ruvector/examples/vibecast-7sense/tests/integration/mod.rs
vendored
Normal file
219
vendor/ruvector/examples/vibecast-7sense/tests/integration/mod.rs
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
//! Integration tests for 7sense bioacoustics platform
|
||||
//!
|
||||
//! This module organizes integration tests across all six bounded contexts:
|
||||
//! - Audio Ingestion Context
|
||||
//! - Embedding Context
|
||||
//! - Vector Space Context
|
||||
//! - Learning Context (via Analysis)
|
||||
//! - Analysis Context
|
||||
//! - Interpretation Context
|
||||
//!
|
||||
//! Tests are organized by context and follow the domain-driven design boundaries.
|
||||
//!
|
||||
//! Note: Individual test files (audio_test.rs, etc.) are compiled as separate
|
||||
//! test binaries via [[test]] entries in Cargo.toml.
|
||||
|
||||
// Re-export commonly used test utilities
|
||||
pub use crate::fixtures::*;
|
||||
pub use crate::mocks::*;
|
||||
|
||||
/// Common test configuration
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TestConfig {
|
||||
/// Sample rate for audio (32kHz for Perch 2.0)
|
||||
pub sample_rate: u32,
|
||||
/// Embedding dimensions (1536 for Perch 2.0)
|
||||
pub embedding_dims: usize,
|
||||
/// Default segment duration in ms
|
||||
pub segment_duration_ms: u64,
|
||||
/// HNSW M parameter
|
||||
pub hnsw_m: usize,
|
||||
/// HNSW ef_construction
|
||||
pub hnsw_ef_construction: usize,
|
||||
/// HNSW ef_search
|
||||
pub hnsw_ef_search: usize,
|
||||
/// Minimum cluster size for HDBSCAN
|
||||
pub min_cluster_size: usize,
|
||||
/// Target recall@10 for vector search
|
||||
pub target_recall_at_10: f32,
|
||||
}
|
||||
|
||||
impl Default for TestConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
sample_rate: 32000,
|
||||
embedding_dims: 1536,
|
||||
segment_duration_ms: 5000,
|
||||
hnsw_m: 16,
|
||||
hnsw_ef_construction: 200,
|
||||
hnsw_ef_search: 100,
|
||||
min_cluster_size: 5,
|
||||
target_recall_at_10: 0.95,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TestConfig {
|
||||
/// Create config for fast tests (lower quality but faster)
|
||||
pub fn fast() -> Self {
|
||||
Self {
|
||||
hnsw_m: 8,
|
||||
hnsw_ef_construction: 50,
|
||||
hnsw_ef_search: 20,
|
||||
min_cluster_size: 3,
|
||||
target_recall_at_10: 0.90,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create config for high-quality tests (slower but more accurate)
|
||||
pub fn high_quality() -> Self {
|
||||
Self {
|
||||
hnsw_m: 32,
|
||||
hnsw_ef_construction: 400,
|
||||
hnsw_ef_search: 200,
|
||||
min_cluster_size: 10,
|
||||
target_recall_at_10: 0.99,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared test context that can be used across integration tests
|
||||
pub struct IntegrationTestContext {
|
||||
pub config: TestConfig,
|
||||
pub recording_repo: MockRecordingRepository,
|
||||
pub segment_repo: MockSegmentRepository,
|
||||
pub embedding_repo: MockEmbeddingRepository,
|
||||
pub vector_index: MockVectorIndex,
|
||||
pub clustering_service: MockClusteringService,
|
||||
pub evidence_builder: MockEvidencePackBuilder,
|
||||
pub interpretation_generator: MockInterpretationGenerator,
|
||||
}
|
||||
|
||||
impl Default for IntegrationTestContext {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl IntegrationTestContext {
|
||||
pub fn new() -> Self {
|
||||
let config = TestConfig::default();
|
||||
Self {
|
||||
config: config.clone(),
|
||||
recording_repo: MockRecordingRepository::new(),
|
||||
segment_repo: MockSegmentRepository::new(),
|
||||
embedding_repo: MockEmbeddingRepository::new(),
|
||||
vector_index: MockVectorIndex::with_config(HnswConfig {
|
||||
m: config.hnsw_m,
|
||||
ef_construction: config.hnsw_ef_construction,
|
||||
ef_search: config.hnsw_ef_search,
|
||||
max_layers: 6,
|
||||
}),
|
||||
clustering_service: MockClusteringService::with_params(config.min_cluster_size, 3),
|
||||
evidence_builder: MockEvidencePackBuilder::new(),
|
||||
interpretation_generator: MockInterpretationGenerator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_config(config: TestConfig) -> Self {
|
||||
Self {
|
||||
config: config.clone(),
|
||||
recording_repo: MockRecordingRepository::new(),
|
||||
segment_repo: MockSegmentRepository::new(),
|
||||
embedding_repo: MockEmbeddingRepository::new(),
|
||||
vector_index: MockVectorIndex::with_config(HnswConfig {
|
||||
m: config.hnsw_m,
|
||||
ef_construction: config.hnsw_ef_construction,
|
||||
ef_search: config.hnsw_ef_search,
|
||||
max_layers: 6,
|
||||
}),
|
||||
clustering_service: MockClusteringService::with_params(config.min_cluster_size, 3),
|
||||
evidence_builder: MockEvidencePackBuilder::new(),
|
||||
interpretation_generator: MockInterpretationGenerator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Populate context with test data
|
||||
pub fn with_test_data(self, num_recordings: usize, segments_per_recording: usize) -> Self {
|
||||
for _ in 0..num_recordings {
|
||||
let recording = create_test_recording();
|
||||
let recording_id = recording.id;
|
||||
self.recording_repo.save(recording).unwrap();
|
||||
|
||||
for i in 0..segments_per_recording {
|
||||
let start_ms = i as u64 * 5500;
|
||||
let segment = CallSegment {
|
||||
id: SegmentId::new(),
|
||||
recording_id,
|
||||
start_ms,
|
||||
end_ms: start_ms + 5000,
|
||||
..Default::default()
|
||||
};
|
||||
let segment_id = segment.id;
|
||||
self.segment_repo.save(segment).unwrap();
|
||||
self.recording_repo
|
||||
.add_segment_link(recording_id, segment_id);
|
||||
|
||||
let embedding = Embedding {
|
||||
segment_id,
|
||||
..Default::default()
|
||||
};
|
||||
let embedding_id = embedding.id;
|
||||
let vector = embedding.vector.clone();
|
||||
self.embedding_repo.save(embedding).unwrap();
|
||||
self.vector_index.insert(embedding_id, vector).unwrap();
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper macro for async test setup
|
||||
#[macro_export]
|
||||
macro_rules! setup_test {
|
||||
() => {
|
||||
IntegrationTestContext::new()
|
||||
};
|
||||
(fast) => {
|
||||
IntegrationTestContext::with_config(TestConfig::fast())
|
||||
};
|
||||
(high_quality) => {
|
||||
IntegrationTestContext::with_config(TestConfig::high_quality())
|
||||
};
|
||||
(populated) => {
|
||||
IntegrationTestContext::new().with_test_data(5, 10)
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_integration_context_creation() {
|
||||
let ctx = IntegrationTestContext::new();
|
||||
assert_eq!(ctx.config.sample_rate, 32000);
|
||||
assert_eq!(ctx.config.embedding_dims, 1536);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_context_with_test_data() {
|
||||
let ctx = IntegrationTestContext::new().with_test_data(2, 5);
|
||||
assert_eq!(ctx.recording_repo.count(), 2);
|
||||
assert_eq!(ctx.segment_repo.count(), 10);
|
||||
assert_eq!(ctx.embedding_repo.count(), 10);
|
||||
assert_eq!(ctx.vector_index.count(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_variants() {
|
||||
let fast = TestConfig::fast();
|
||||
let hq = TestConfig::high_quality();
|
||||
|
||||
assert!(fast.hnsw_m < hq.hnsw_m);
|
||||
assert!(fast.hnsw_ef_construction < hq.hnsw_ef_construction);
|
||||
assert!(fast.target_recall_at_10 < hq.target_recall_at_10);
|
||||
}
|
||||
}
|
||||
653
vendor/ruvector/examples/vibecast-7sense/tests/integration/vector_test.rs
vendored
Normal file
653
vendor/ruvector/examples/vibecast-7sense/tests/integration/vector_test.rs
vendored
Normal file
@@ -0,0 +1,653 @@
|
||||
//! Integration tests for Vector Space Context
|
||||
//!
|
||||
//! Tests for HNSW index creation, vector insertion, k-NN search accuracy,
|
||||
//! index persistence, and batch insertion performance.
|
||||
|
||||
use vibecast_tests::fixtures::*;
|
||||
use vibecast_tests::mocks::*;
|
||||
use std::collections::HashSet;
|
||||
use std::time::Instant;
|
||||
|
||||
// ============================================================================
|
||||
// HNSW Index Creation Tests
|
||||
// ============================================================================
|
||||
|
||||
mod index_creation {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_empty_index() {
|
||||
let index = MockVectorIndex::new();
|
||||
assert_eq!(index.count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_index_with_config() {
|
||||
let config = HnswConfig {
|
||||
m: 32,
|
||||
ef_construction: 400,
|
||||
ef_search: 200,
|
||||
max_layers: 8,
|
||||
};
|
||||
|
||||
let index = MockVectorIndex::with_config(config);
|
||||
assert_eq!(index.count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_hnsw_config() {
|
||||
let config = HnswConfig::default();
|
||||
|
||||
assert_eq!(config.m, 16);
|
||||
assert_eq!(config.ef_construction, 200);
|
||||
assert_eq!(config.ef_search, 100);
|
||||
assert_eq!(config.max_layers, 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_with_distance_metrics() {
|
||||
let cosine_index = MockVectorIndex::new().with_distance_metric(DistanceMetric::Cosine);
|
||||
let euclidean_index =
|
||||
MockVectorIndex::new().with_distance_metric(DistanceMetric::Euclidean);
|
||||
let poincare_index = MockVectorIndex::new().with_distance_metric(DistanceMetric::Poincare);
|
||||
|
||||
// All should be created successfully
|
||||
assert_eq!(cosine_index.count(), 0);
|
||||
assert_eq!(euclidean_index.count(), 0);
|
||||
assert_eq!(poincare_index.count(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Vector Insertion Tests
|
||||
// ============================================================================
|
||||
|
||||
mod vector_insertion {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_insert_single_vector() {
|
||||
let index = MockVectorIndex::new();
|
||||
let vector = create_normalized_vector(1536);
|
||||
let embedding_id = EmbeddingId::new();
|
||||
|
||||
let vector_id = index.insert(embedding_id, vector).unwrap();
|
||||
assert_eq!(index.count(), 1);
|
||||
|
||||
let retrieved = index.get(&vector_id).unwrap().unwrap();
|
||||
assert_eq!(retrieved.embedding_id, embedding_id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_multiple_vectors() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(index.count(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_normalized_vectors() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
for i in 0..10 {
|
||||
let vector = create_normalized_vector(1536);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(index.count(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_preserves_vector_data() {
|
||||
let index = MockVectorIndex::new();
|
||||
let original_vector = create_deterministic_vector(1536, 42);
|
||||
let embedding_id = EmbeddingId::new();
|
||||
|
||||
let vector_id = index.insert(embedding_id, original_vector.clone()).unwrap();
|
||||
|
||||
let retrieved = index.get(&vector_id).unwrap().unwrap();
|
||||
assert_eq!(retrieved.vector, original_vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_assigns_layer() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
let mut layers_seen = HashSet::new();
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
let vector_id = index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
let indexed = index.get(&vector_id).unwrap().unwrap();
|
||||
layers_seen.insert(indexed.layer);
|
||||
}
|
||||
|
||||
// HNSW should assign vectors to multiple layers
|
||||
assert!(layers_seen.len() >= 1, "Should have at least one layer");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_vector() {
|
||||
let index = MockVectorIndex::new();
|
||||
let vector = create_normalized_vector(1536);
|
||||
let vector_id = index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
|
||||
assert_eq!(index.count(), 1);
|
||||
|
||||
index.remove(&vector_id).unwrap();
|
||||
assert_eq!(index.count(), 0);
|
||||
assert!(index.get(&vector_id).unwrap().is_none());
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// k-NN Search Accuracy Tests
|
||||
// ============================================================================
|
||||
|
||||
mod knn_search {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_search_returns_k_results() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
// Insert 100 vectors
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
let query = create_deterministic_vector(1536, 50);
|
||||
let results = index.search(&query, 10).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_results_sorted_by_distance() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
let query = create_deterministic_vector(1536, 50);
|
||||
let results = index.search(&query, 20).unwrap();
|
||||
|
||||
// Verify sorted by distance
|
||||
for i in 0..results.len() - 1 {
|
||||
assert!(
|
||||
results[i].distance <= results[i + 1].distance,
|
||||
"Results should be sorted by distance"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_finds_exact_match() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
let target_vector = create_deterministic_vector(1536, 42);
|
||||
let target_id = index.insert(EmbeddingId::new(), target_vector.clone()).unwrap();
|
||||
|
||||
// Insert other vectors
|
||||
for i in 0..50 {
|
||||
if i != 42 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let results = index.search(&target_vector, 1).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].vector_id, target_id);
|
||||
assert!(results[0].distance < 0.0001, "Exact match should have near-zero distance");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_recall_at_10_meets_threshold() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
// Insert vectors with known structure
|
||||
let num_vectors = 1000;
|
||||
let mut all_vectors: Vec<(VectorId, Vec<f32>)> = Vec::new();
|
||||
|
||||
for i in 0..num_vectors {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
let vector_id = index.insert(EmbeddingId::new(), vector.clone()).unwrap();
|
||||
all_vectors.push((vector_id, vector));
|
||||
}
|
||||
|
||||
// Test recall with multiple queries
|
||||
let mut total_recall = 0.0;
|
||||
let num_queries = 20;
|
||||
|
||||
for query_idx in (0..num_vectors).step_by(num_vectors / num_queries) {
|
||||
let query = &all_vectors[query_idx].1;
|
||||
|
||||
// Compute true k-NN (brute force)
|
||||
let mut true_distances: Vec<(VectorId, f32)> = all_vectors
|
||||
.iter()
|
||||
.map(|(id, v)| (*id, cosine_distance(query, v)))
|
||||
.collect();
|
||||
true_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
|
||||
let true_top_10: HashSet<VectorId> =
|
||||
true_distances.iter().take(10).map(|(id, _)| *id).collect();
|
||||
|
||||
// Get approximate k-NN
|
||||
let approx_results = index.search(query, 10).unwrap();
|
||||
let approx_top_10: HashSet<VectorId> =
|
||||
approx_results.iter().map(|r| r.vector_id).collect();
|
||||
|
||||
// Compute recall
|
||||
let intersection_count = true_top_10.intersection(&approx_top_10).count();
|
||||
let recall = intersection_count as f32 / 10.0;
|
||||
total_recall += recall;
|
||||
}
|
||||
|
||||
let avg_recall = total_recall / num_queries as f32;
|
||||
assert!(
|
||||
avg_recall >= 0.95,
|
||||
"Recall@10 should be >= 0.95, got {}",
|
||||
avg_recall
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_with_varying_k() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
let query = create_normalized_vector(1536);
|
||||
|
||||
for k in [1, 5, 10, 20, 50] {
|
||||
let results = index.search(&query, k).unwrap();
|
||||
assert_eq!(results.len(), k, "Should return exactly k={} results", k);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_empty_index() {
|
||||
let index = MockVectorIndex::new();
|
||||
let query = create_normalized_vector(1536);
|
||||
|
||||
let results = index.search(&query, 10).unwrap();
|
||||
assert_eq!(results.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_k_larger_than_index_size() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
for i in 0..5 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
let query = create_normalized_vector(1536);
|
||||
let results = index.search(&query, 100).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 5, "Should return all vectors if k > index size");
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Neighbor Graph Tests
|
||||
// ============================================================================
|
||||
|
||||
mod neighbor_graph {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_neighbors() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
// Insert vectors
|
||||
let mut vector_ids = Vec::new();
|
||||
for i in 0..20 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
let id = index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
vector_ids.push(id);
|
||||
}
|
||||
|
||||
// Get neighbors of first vector
|
||||
let neighbors = index.get_neighbors(&vector_ids[0], 5).unwrap();
|
||||
|
||||
assert_eq!(neighbors.len(), 5);
|
||||
// Should not include self
|
||||
assert!(!neighbors.iter().any(|r| r.vector_id == vector_ids[0]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_similarity_edge_creation() {
|
||||
let source_id = VectorId::new();
|
||||
let target_id = VectorId::new();
|
||||
|
||||
let edge = SimilarityEdge {
|
||||
source_id,
|
||||
target_id,
|
||||
distance: 0.15,
|
||||
edge_type: "SIMILAR".to_string(),
|
||||
};
|
||||
|
||||
assert_eq!(edge.source_id, source_id);
|
||||
assert_eq!(edge.target_id, target_id);
|
||||
assert!(edge.distance < 0.2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_result_ranking() {
|
||||
let results = create_search_results(10);
|
||||
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
assert_eq!(result.rank, i + 1, "Rank should be 1-indexed");
|
||||
if i > 0 {
|
||||
assert!(
|
||||
result.distance >= results[i - 1].distance,
|
||||
"Distance should be non-decreasing"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Index Persistence Tests
|
||||
// ============================================================================
|
||||
|
||||
mod persistence {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_save_and_load_index() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
// Insert vectors
|
||||
for i in 0..50 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
let original_count = index.count();
|
||||
|
||||
// Save to bytes
|
||||
let bytes = index.save_to_bytes().unwrap();
|
||||
assert!(!bytes.is_empty());
|
||||
|
||||
// Load from bytes (mock - doesn't restore actual data)
|
||||
let loaded = MockVectorIndex::load_from_bytes(&bytes).unwrap();
|
||||
|
||||
// In real implementation, this would verify:
|
||||
// assert_eq!(loaded.count(), original_count);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_persistence_format() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
for i in 0..10 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
let bytes = index.save_to_bytes().unwrap();
|
||||
|
||||
// Check header (count as u64)
|
||||
assert!(bytes.len() >= 8);
|
||||
let count = u64::from_le_bytes([
|
||||
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
|
||||
]);
|
||||
assert_eq!(count, 10);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Batch Insertion Performance Tests
|
||||
// ============================================================================
|
||||
|
||||
mod batch_performance {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_batch_insert() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
let embeddings: Vec<(EmbeddingId, Vec<f32>)> = (0..100)
|
||||
.map(|i| (EmbeddingId::new(), create_deterministic_vector(1536, i)))
|
||||
.collect();
|
||||
|
||||
let vector_ids = index.insert_batch(embeddings).unwrap();
|
||||
|
||||
assert_eq!(vector_ids.len(), 100);
|
||||
assert_eq!(index.count(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_insert_performance() {
|
||||
let index = MockVectorIndex::new();
|
||||
let batch_size = 1000;
|
||||
|
||||
let embeddings: Vec<(EmbeddingId, Vec<f32>)> = (0..batch_size)
|
||||
.map(|i| (EmbeddingId::new(), create_deterministic_vector(1536, i)))
|
||||
.collect();
|
||||
|
||||
let start = Instant::now();
|
||||
let vector_ids = index.insert_batch(embeddings).unwrap();
|
||||
let duration = start.elapsed();
|
||||
|
||||
assert_eq!(vector_ids.len(), batch_size);
|
||||
|
||||
// Should complete reasonably fast (mock implementation)
|
||||
let vectors_per_second = batch_size as f64 / duration.as_secs_f64();
|
||||
assert!(
|
||||
vectors_per_second > 1000.0,
|
||||
"Batch insertion should be fast, got {} vec/sec",
|
||||
vectors_per_second
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incremental_vs_batch_insert() {
|
||||
// Test incremental insertion
|
||||
let index1 = MockVectorIndex::new();
|
||||
let start1 = Instant::now();
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index1.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
let duration1 = start1.elapsed();
|
||||
|
||||
// Test batch insertion
|
||||
let index2 = MockVectorIndex::new();
|
||||
let embeddings: Vec<(EmbeddingId, Vec<f32>)> = (0..100)
|
||||
.map(|i| (EmbeddingId::new(), create_deterministic_vector(1536, i)))
|
||||
.collect();
|
||||
|
||||
let start2 = Instant::now();
|
||||
index2.insert_batch(embeddings).unwrap();
|
||||
let duration2 = start2.elapsed();
|
||||
|
||||
assert_eq!(index1.count(), index2.count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scaling_with_index_size() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
let sizes = vec![100, 500, 1000];
|
||||
let mut search_times = Vec::new();
|
||||
|
||||
for size in &sizes {
|
||||
// Build index to target size
|
||||
while index.count() < *size {
|
||||
let vector = create_deterministic_vector(1536, index.count());
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
// Measure search time
|
||||
let query = create_normalized_vector(1536);
|
||||
let start = Instant::now();
|
||||
for _ in 0..100 {
|
||||
index.search(&query, 10).unwrap();
|
||||
}
|
||||
let duration = start.elapsed();
|
||||
search_times.push(duration);
|
||||
}
|
||||
|
||||
// Search time should scale sub-linearly with index size (HNSW property)
|
||||
// With mock implementation, just verify search completes
|
||||
for (i, time) in search_times.iter().enumerate() {
|
||||
assert!(
|
||||
time.as_millis() < 10000,
|
||||
"Search at size {} took too long",
|
||||
sizes[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Distance Metric Tests
|
||||
// ============================================================================
|
||||
|
||||
mod distance_metrics {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cosine_distance_identical_vectors() {
|
||||
let v = create_normalized_vector(1536);
|
||||
let dist = cosine_distance(&v, &v);
|
||||
assert!(dist < 0.0001, "Identical vectors should have distance ~0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_distance_orthogonal_vectors() {
|
||||
let v1 = vec![1.0, 0.0, 0.0];
|
||||
let v2 = vec![0.0, 1.0, 0.0];
|
||||
let dist = cosine_distance(&v1, &v2);
|
||||
assert!((dist - 1.0).abs() < 0.0001, "Orthogonal vectors should have distance 1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_distance_opposite_vectors() {
|
||||
let v1 = vec![1.0, 0.0];
|
||||
let v2 = vec![-1.0, 0.0];
|
||||
let dist = cosine_distance(&v1, &v2);
|
||||
assert!((dist - 2.0).abs() < 0.0001, "Opposite vectors should have distance 2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_euclidean_distance_identical_vectors() {
|
||||
let v = create_normalized_vector(1536);
|
||||
let dist = euclidean_distance(&v, &v);
|
||||
assert!(dist < 0.0001, "Identical vectors should have distance 0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_euclidean_distance_known_value() {
|
||||
let v1 = vec![0.0, 0.0];
|
||||
let v2 = vec![3.0, 4.0];
|
||||
let dist = euclidean_distance(&v1, &v2);
|
||||
assert!((dist - 5.0).abs() < 0.0001, "3-4-5 triangle");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_metric_symmetry() {
|
||||
let v1 = create_normalized_vector(1536);
|
||||
let v2 = create_deterministic_vector(1536, 42);
|
||||
|
||||
let cosine_12 = cosine_distance(&v1, &v2);
|
||||
let cosine_21 = cosine_distance(&v2, &v1);
|
||||
assert!((cosine_12 - cosine_21).abs() < 0.0001, "Cosine distance should be symmetric");
|
||||
|
||||
let eucl_12 = euclidean_distance(&v1, &v2);
|
||||
let eucl_21 = euclidean_distance(&v2, &v1);
|
||||
assert!((eucl_12 - eucl_21).abs() < 0.0001, "Euclidean distance should be symmetric");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triangle_inequality() {
|
||||
let v1 = create_deterministic_vector(100, 0);
|
||||
let v2 = create_deterministic_vector(100, 1);
|
||||
let v3 = create_deterministic_vector(100, 2);
|
||||
|
||||
let d12 = euclidean_distance(&v1, &v2);
|
||||
let d23 = euclidean_distance(&v2, &v3);
|
||||
let d13 = euclidean_distance(&v1, &v3);
|
||||
|
||||
assert!(
|
||||
d13 <= d12 + d23 + 0.0001,
|
||||
"Triangle inequality should hold: {} <= {} + {}",
|
||||
d13,
|
||||
d12,
|
||||
d23
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Indexed Vector Tests
|
||||
// ============================================================================
|
||||
|
||||
mod indexed_vector {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_indexed_vector_creation() {
|
||||
let indexed = IndexedVector::default();
|
||||
|
||||
assert_eq!(indexed.vector.len(), 1536);
|
||||
assert_normalized(&indexed.vector, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_indexed_vectors() {
|
||||
let vectors = create_indexed_vectors(10);
|
||||
|
||||
assert_eq!(vectors.len(), 10);
|
||||
for (i, v) in vectors.iter().enumerate() {
|
||||
assert_eq!(v.vector.len(), 1536);
|
||||
// Verify deterministic generation
|
||||
let expected = create_deterministic_vector(1536, i);
|
||||
assert_eq!(v.vector, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_vector_space_integration_smoke_test() {
|
||||
let index = MockVectorIndex::new();
|
||||
|
||||
// Insert vectors
|
||||
for i in 0..100 {
|
||||
let vector = create_deterministic_vector(1536, i);
|
||||
index.insert(EmbeddingId::new(), vector).unwrap();
|
||||
}
|
||||
|
||||
// Search
|
||||
let query = create_deterministic_vector(1536, 50);
|
||||
let results = index.search(&query, 10).unwrap();
|
||||
|
||||
// Verify
|
||||
assert_eq!(results.len(), 10);
|
||||
assert!(results[0].distance < results[9].distance);
|
||||
|
||||
// Check recall
|
||||
let exact_match = results.iter().find(|r| r.distance < 0.01);
|
||||
assert!(exact_match.is_some(), "Should find near-exact match");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user