Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,812 @@
//! Integration tests for Analysis Context
//!
//! Tests for HDBSCAN clustering, cluster assignment, motif detection,
//! entropy calculation, and transition matrix operations.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
use std::collections::{HashMap, HashSet};
// ============================================================================
// HDBSCAN Clustering Tests
// ============================================================================
mod hdbscan_clustering {
use super::*;
#[test]
fn test_cluster_with_clear_groups() {
let service = MockClusteringService::with_params(5, 3);
// Create two well-separated clusters
let base1 = create_deterministic_vector(1536, 0);
let base2 = create_deterministic_vector(1536, 1000);
let mut embeddings = Vec::new();
// Cluster 1: variations around base1
for i in 0..15 {
let noisy: Vec<f32> = base1.iter().map(|v| v + (i as f32 * 0.001)).collect();
embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy)));
}
// Cluster 2: variations around base2
for i in 0..15 {
let noisy: Vec<f32> = base2.iter().map(|v| v + (i as f32 * 0.001)).collect();
embeddings.push(create_test_embedding_with_vector(l2_normalize(&noisy)));
}
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
assert!(clusters.len() >= 1, "Should find at least one cluster");
}
#[test]
fn test_cluster_with_insufficient_data() {
let service = MockClusteringService::with_params(10, 5);
// Only 3 embeddings - below min_cluster_size
let embeddings: Vec<Embedding> = (0..3).map(|_| create_test_embedding()).collect();
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
assert_eq!(clusters.len(), 0, "Should not form clusters with too few points");
}
#[test]
fn test_cluster_method_assignment() {
let cluster = create_test_cluster();
assert_eq!(cluster.method, ClusteringMethod::Hdbscan);
}
#[test]
fn test_cluster_cohesion_in_valid_range() {
let cluster = create_test_cluster();
assert!(cluster.cohesion >= 0.0 && cluster.cohesion <= 1.0);
assert!(cluster.separation >= 0.0 && cluster.separation <= 1.0);
}
#[test]
fn test_cluster_has_members() {
let cluster = create_test_cluster_with_members(20);
assert_eq!(cluster.member_ids.len(), 20);
assert!(!cluster.centroid.is_empty());
}
#[test]
fn test_cluster_centroid_is_normalized() {
let cluster = create_test_cluster_with_members(10);
let norm: f32 = cluster.centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < 0.0001,
"Centroid should be normalized"
);
}
#[test]
fn test_multiple_clusters() {
let clusters = create_test_clusters(5);
assert_eq!(clusters.len(), 5);
// Each cluster should have unique ID
let ids: HashSet<_> = clusters.iter().map(|c| c.id.0).collect();
assert_eq!(ids.len(), 5, "All cluster IDs should be unique");
}
}
// ============================================================================
// Cluster Assignment Tests
// ============================================================================
mod cluster_assignment {
use super::*;
#[test]
fn test_assign_embedding_to_nearest_cluster() {
let service = MockClusteringService::new();
// Create clusters with known centroids
let clusters = create_test_clusters(3);
// Create embedding similar to first cluster's centroid
let embedding = create_test_embedding_with_vector(clusters[0].centroid.clone());
let assignment = service.assign_to_cluster(&embedding, &clusters).unwrap();
assert!(assignment.is_some(), "Should assign to a cluster");
let assignment = assignment.unwrap();
assert_eq!(assignment.cluster_id, clusters[0].id);
assert!(assignment.confidence > 0.0);
}
#[test]
fn test_assignment_confidence_based_on_distance() {
let service = MockClusteringService::new();
let clusters = create_test_clusters(2);
// Very close to centroid
let close_embedding = create_test_embedding_with_vector(clusters[0].centroid.clone());
let close_assignment = service
.assign_to_cluster(&close_embedding, &clusters)
.unwrap()
.unwrap();
// Farther from centroid
let far_vector: Vec<f32> = clusters[0]
.centroid
.iter()
.map(|v| v + 0.5)
.collect();
let far_embedding = create_test_embedding_with_vector(l2_normalize(&far_vector));
let far_assignment = service
.assign_to_cluster(&far_embedding, &clusters)
.unwrap()
.unwrap();
assert!(
close_assignment.confidence > far_assignment.confidence,
"Closer embeddings should have higher confidence"
);
}
#[test]
fn test_no_assignment_to_empty_clusters() {
let service = MockClusteringService::new();
let embedding = create_test_embedding();
let empty_clusters: Vec<Cluster> = vec![];
let assignment = service.assign_to_cluster(&embedding, &empty_clusters).unwrap();
assert!(assignment.is_none());
}
#[test]
fn test_assignment_includes_distance_to_centroid() {
let service = MockClusteringService::new();
let clusters = create_test_clusters(1);
let embedding = create_test_embedding();
let assignment = service
.assign_to_cluster(&embedding, &clusters)
.unwrap()
.unwrap();
assert!(
assignment.distance_to_centroid >= 0.0,
"Distance should be non-negative"
);
}
#[test]
fn test_soft_assignment_concept() {
// Test that an embedding near cluster boundary has lower confidence
let service = MockClusteringService::new();
// Create two clusters
let base1 = create_deterministic_vector(1536, 0);
let base2 = create_deterministic_vector(1536, 100);
let clusters = vec![
Cluster {
id: ClusterId::new(),
method: ClusteringMethod::Hdbscan,
member_ids: vec![],
centroid: l2_normalize(&base1),
cohesion: 0.8,
separation: 0.6,
},
Cluster {
id: ClusterId::new(),
method: ClusteringMethod::Hdbscan,
member_ids: vec![],
centroid: l2_normalize(&base2),
cohesion: 0.8,
separation: 0.6,
},
];
// Point exactly between clusters
let midpoint: Vec<f32> = base1
.iter()
.zip(base2.iter())
.map(|(a, b)| (a + b) / 2.0)
.collect();
let mid_embedding = create_test_embedding_with_vector(l2_normalize(&midpoint));
let assignment = service
.assign_to_cluster(&mid_embedding, &clusters)
.unwrap()
.unwrap();
// Confidence should reflect uncertainty
assert!(assignment.confidence < 0.9, "Boundary point should have lower confidence");
}
}
// ============================================================================
// Motif Detection Tests
// ============================================================================
mod motif_detection {
use super::*;
#[test]
fn test_detect_motifs_in_sequences() {
let service = MockMotifDetectionService::new();
// Create sequences with repeating patterns
let cluster_ids: Vec<ClusterId> = (0..5).map(|_| ClusterId::new()).collect();
let sequences: Vec<Vec<ClusterId>> = vec![
vec![
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
],
vec![
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
cluster_ids[3],
],
vec![
cluster_ids[2],
cluster_ids[0],
cluster_ids[1],
cluster_ids[2],
],
];
let motifs = service.detect_motifs(&sequences).unwrap();
// Should find the [0,1,2] pattern that appears multiple times
assert!(
motifs.iter().any(|m| m.pattern.len() >= 2),
"Should find at least one motif"
);
}
#[test]
fn test_motif_occurrence_count() {
let motif = create_test_motif();
assert!(motif.occurrence_count > 0);
assert_eq!(motif.pattern.len(), 3);
}
#[test]
fn test_motif_confidence_calculation() {
let motif = create_test_motif();
assert!(
motif.confidence >= 0.0 && motif.confidence <= 1.0,
"Confidence should be in [0, 1]"
);
}
#[test]
fn test_no_motifs_in_random_sequences() {
let service = MockMotifDetectionService::new();
// Create completely random sequences with no patterns
let sequences: Vec<Vec<ClusterId>> = (0..5)
.map(|_| (0..10).map(|_| ClusterId::new()).collect())
.collect();
let motifs = service.detect_motifs(&sequences).unwrap();
// Random sequences unlikely to have recurring motifs
// (though technically possible with mock implementation)
}
#[test]
fn test_empty_sequence_handling() {
let service = MockMotifDetectionService::new();
let empty_sequences: Vec<Vec<ClusterId>> = vec![];
let motifs = service.detect_motifs(&empty_sequences).unwrap();
assert_eq!(motifs.len(), 0);
}
#[test]
fn test_motif_duration_estimation() {
let motif = create_test_motif();
// 3-element motif at 5s per segment
assert!(motif.avg_duration_ms >= 5000);
}
}
// ============================================================================
// Entropy Calculation Tests
// ============================================================================
mod entropy_calculation {
use super::*;
#[test]
fn test_entropy_rate_uniform_distribution() {
// Create transition matrix with uniform distribution
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
let uniform_prob = 1.0 / n as f32;
let matrix = TransitionMatrix {
cluster_ids: cluster_ids.clone(),
probabilities: vec![vec![uniform_prob; n]; n],
observations: vec![vec![10; n]; n],
};
let entropy = compute_entropy_rate(&matrix);
// Maximum entropy for uniform distribution = log2(n) = 2 bits for n=4
let max_entropy = (n as f32).log2();
assert!(
(entropy - max_entropy).abs() < 0.1,
"Uniform distribution should have maximum entropy: {} vs {}",
entropy,
max_entropy
);
}
#[test]
fn test_entropy_rate_deterministic() {
// Create transition matrix with deterministic transitions
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
// Each state always transitions to the next state
let mut probabilities = vec![vec![0.0; n]; n];
for i in 0..n {
probabilities[i][(i + 1) % n] = 1.0;
}
let matrix = TransitionMatrix {
cluster_ids,
probabilities,
observations: vec![vec![10; n]; n],
};
let entropy = compute_entropy_rate(&matrix);
// Deterministic transitions should have zero entropy
assert!(
entropy < 0.1,
"Deterministic transitions should have near-zero entropy: {}",
entropy
);
}
#[test]
fn test_entropy_rate_non_negative() {
for _ in 0..10 {
let matrix = create_test_transition_matrix(5);
let entropy = compute_entropy_rate(&matrix);
assert!(
entropy >= 0.0,
"Entropy should never be negative: {}",
entropy
);
}
}
#[test]
fn test_entropy_increases_with_randomness() {
// Low entropy (predictable)
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
let mut low_rand_probs = vec![vec![0.0; n]; n];
for i in 0..n {
low_rand_probs[i][i] = 0.8; // High self-loop probability
for j in 0..n {
if i != j {
low_rand_probs[i][j] = 0.2 / (n - 1) as f32;
}
}
}
let low_entropy_matrix = TransitionMatrix {
cluster_ids: cluster_ids.clone(),
probabilities: low_rand_probs,
observations: vec![vec![10; n]; n],
};
// High entropy (uniform)
let uniform_prob = 1.0 / n as f32;
let high_entropy_matrix = TransitionMatrix {
cluster_ids,
probabilities: vec![vec![uniform_prob; n]; n],
observations: vec![vec![10; n]; n],
};
let low_entropy = compute_entropy_rate(&low_entropy_matrix);
let high_entropy = compute_entropy_rate(&high_entropy_matrix);
assert!(
high_entropy > low_entropy,
"More uniform distribution should have higher entropy: {} vs {}",
high_entropy,
low_entropy
);
}
#[test]
fn test_empty_matrix_entropy() {
let matrix = TransitionMatrix {
cluster_ids: vec![],
probabilities: vec![],
observations: vec![],
};
let entropy = compute_entropy_rate(&matrix);
assert_eq!(entropy, 0.0);
}
}
// ============================================================================
// Transition Matrix Tests
// ============================================================================
mod transition_matrix {
use super::*;
#[test]
fn test_create_transition_matrix() {
let matrix = create_test_transition_matrix(5);
assert_eq!(matrix.cluster_ids.len(), 5);
assert_eq!(matrix.probabilities.len(), 5);
assert_eq!(matrix.probabilities[0].len(), 5);
}
#[test]
fn test_transition_matrix_rows_sum_to_one() {
let matrix = create_test_transition_matrix(5);
for (i, row) in matrix.probabilities.iter().enumerate() {
let row_sum: f32 = row.iter().copied().sum();
assert!(
(row_sum - 1.0).abs() < 0.0001,
"Row {} should sum to 1.0, got {}",
i,
row_sum
);
}
}
#[test]
fn test_transition_matrix_probabilities_non_negative() {
let matrix = create_test_transition_matrix(5);
for (i, row) in matrix.probabilities.iter().enumerate() {
for (j, prob) in row.iter().copied().enumerate() {
assert!(
prob >= 0.0,
"Probability at ({}, {}) should be non-negative: {}",
i,
j,
prob
);
}
}
}
#[test]
fn test_observations_matrix() {
let matrix = create_test_transition_matrix(4);
assert_eq!(matrix.observations.len(), 4);
assert_eq!(matrix.observations[0].len(), 4);
// All observations should be positive
for row in &matrix.observations {
for &count in row {
assert!(count > 0);
}
}
}
#[test]
fn test_build_transition_matrix_from_sequence() {
let cluster_ids: Vec<ClusterId> = (0..3).map(|_| ClusterId::new()).collect();
let sequence = vec![
cluster_ids[0],
cluster_ids[1],
cluster_ids[0],
cluster_ids[2],
cluster_ids[1],
cluster_ids[0],
];
// Count transitions
let mut counts: HashMap<(usize, usize), u32> = HashMap::new();
for window in sequence.windows(2) {
let from_idx = cluster_ids.iter().position(|c| *c == window[0]).unwrap();
let to_idx = cluster_ids.iter().position(|c| *c == window[1]).unwrap();
*counts.entry((from_idx, to_idx)).or_insert(0) += 1;
}
// Sequence: [0, 1, 0, 2, 1, 0]
// Transitions: 0->1 (1x), 1->0 (2x), 0->2 (1x), 2->1 (1x)
assert_eq!(*counts.get(&(0, 1)).unwrap_or(&0), 1);
assert_eq!(*counts.get(&(1, 0)).unwrap_or(&0), 2);
}
}
// ============================================================================
// Sequence Analysis Tests
// ============================================================================
mod sequence_analysis {
use super::*;
#[test]
fn test_sequence_segment_ordering() {
let segments = create_segment_sequence(10, 500);
for i in 0..segments.len() - 1 {
assert!(
segments[i].end_ms <= segments[i + 1].start_ms,
"Segments should be in temporal order"
);
}
}
#[test]
fn test_stereotypy_calculation() {
// Stereotypy = measure of how predictable transitions are
// High stereotypy = consistent patterns
// Low stereotypy = varied patterns
let n = 4;
let cluster_ids: Vec<ClusterId> = (0..n).map(|_| ClusterId::new()).collect();
// Highly stereotyped (deterministic cycle)
let mut stereotyped_probs = vec![vec![0.0; n]; n];
for i in 0..n {
stereotyped_probs[i][(i + 1) % n] = 1.0;
}
// Low stereotypy (uniform)
let uniform_prob = 1.0 / n as f32;
let stereotyped_matrix = TransitionMatrix {
cluster_ids: cluster_ids.clone(),
probabilities: stereotyped_probs,
observations: vec![vec![10; n]; n],
};
let varied_matrix = TransitionMatrix {
cluster_ids,
probabilities: vec![vec![uniform_prob; n]; n],
observations: vec![vec![10; n]; n],
};
let stereotyped_entropy = compute_entropy_rate(&stereotyped_matrix);
let varied_entropy = compute_entropy_rate(&varied_matrix);
// Stereotyped should have lower entropy (more predictable)
assert!(stereotyped_entropy < varied_entropy);
}
#[test]
fn test_motif_density() {
// Motif density = ratio of segments that are part of motifs
let total_segments = 100;
let motif_segments = 60;
let density = motif_segments as f32 / total_segments as f32;
assert!((density - 0.6).abs() < 0.001);
}
}
// ============================================================================
// Anomaly Detection Tests
// ============================================================================
mod anomaly_detection {
use super::*;
fn compute_local_outlier_factor(
embedding: &Embedding,
neighbors: &[Embedding],
) -> f32 {
if neighbors.is_empty() {
return 1.0;
}
// Compute average distance to neighbors
let avg_distance: f32 = neighbors
.iter()
.map(|n| cosine_distance(&embedding.vector, &n.vector))
.sum::<f32>()
/ neighbors.len() as f32;
// LOF > 1 indicates anomaly
// This is simplified; real LOF compares local density to neighbors' densities
avg_distance * 10.0 // Scale factor for detection
}
#[test]
fn test_detect_outlier_embedding() {
// Create cluster of normal embeddings
let base = create_deterministic_vector(1536, 0);
let normal_embeddings: Vec<Embedding> = (0..20)
.map(|i| {
let noisy: Vec<f32> = base.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
// Create outlier (very different)
let outlier_base = create_deterministic_vector(1536, 1000);
let outlier = create_test_embedding_with_vector(l2_normalize(&outlier_base));
// Compute LOF for outlier
let lof = compute_local_outlier_factor(&outlier, &normal_embeddings);
// LOF should be high for outlier
assert!(lof > 1.0, "Outlier should have high LOF: {}", lof);
}
#[test]
fn test_normal_embedding_not_anomalous() {
let base = create_deterministic_vector(1536, 0);
let embeddings: Vec<Embedding> = (0..20)
.map(|i| {
let noisy: Vec<f32> = base.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
// Check LOF for a normal point
let test_point = &embeddings[10];
let neighbors: Vec<Embedding> = embeddings
.iter()
.filter(|e| e.id != test_point.id)
.cloned()
.collect();
let lof = compute_local_outlier_factor(test_point, &neighbors);
// Should be relatively low for normal point
assert!(
lof < 5.0,
"Normal point should have low LOF: {}",
lof
);
}
}
// ============================================================================
// Cluster Validation Tests
// ============================================================================
mod cluster_validation {
use super::*;
fn compute_silhouette_score(
embedding: &Embedding,
own_cluster_members: &[Embedding],
other_cluster_members: &[Embedding],
) -> f32 {
if own_cluster_members.is_empty() {
return 0.0;
}
// a = average distance to own cluster members
let a: f32 = own_cluster_members
.iter()
.filter(|e| e.id != embedding.id)
.map(|e| cosine_distance(&embedding.vector, &e.vector))
.sum::<f32>()
/ (own_cluster_members.len() - 1).max(1) as f32;
// b = average distance to nearest other cluster
let b: f32 = if other_cluster_members.is_empty() {
1.0
} else {
other_cluster_members
.iter()
.map(|e| cosine_distance(&embedding.vector, &e.vector))
.sum::<f32>()
/ other_cluster_members.len() as f32
};
// Silhouette = (b - a) / max(a, b)
let max_ab = a.max(b);
if max_ab > 0.0 {
(b - a) / max_ab
} else {
0.0
}
}
#[test]
fn test_silhouette_score_well_separated_clusters() {
// Create well-separated clusters
let base1 = create_deterministic_vector(1536, 0);
let base2 = create_deterministic_vector(1536, 1000);
let cluster1: Vec<Embedding> = (0..10)
.map(|i| {
let noisy: Vec<f32> = base1.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
let cluster2: Vec<Embedding> = (0..10)
.map(|i| {
let noisy: Vec<f32> = base2.iter().map(|v| v + (i as f32 * 0.001)).collect();
create_test_embedding_with_vector(l2_normalize(&noisy))
})
.collect();
// Compute silhouette for point in cluster 1
let score = compute_silhouette_score(&cluster1[5], &cluster1, &cluster2);
// Should be positive (closer to own cluster)
assert!(
score > 0.0,
"Well-separated clusters should have positive silhouette: {}",
score
);
}
#[test]
fn test_silhouette_score_range() {
let embeddings = create_embedding_batch(20);
// Split into two arbitrary clusters
let cluster1: Vec<Embedding> = embeddings[0..10].to_vec();
let cluster2: Vec<Embedding> = embeddings[10..20].to_vec();
for emb in &cluster1 {
let score = compute_silhouette_score(emb, &cluster1, &cluster2);
assert!(
score >= -1.0 && score <= 1.0,
"Silhouette should be in [-1, 1]: {}",
score
);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analysis_integration_smoke_test() {
// Create embeddings
let embeddings = create_embedding_batch(50);
// Run clustering
let service = MockClusteringService::with_params(5, 3);
let clusters = service.cluster_hdbscan(&embeddings).unwrap();
// Create transition matrix
let matrix = create_test_transition_matrix(clusters.len().max(3));
// Compute entropy
let entropy = compute_entropy_rate(&matrix);
assert!(entropy >= 0.0);
// Detect motifs
let motif_service = MockMotifDetectionService::new();
let sequences: Vec<Vec<ClusterId>> = clusters
.iter()
.map(|c| vec![c.id, c.id, c.id])
.collect();
let _motifs = motif_service.detect_motifs(&sequences).unwrap();
}
}

View File

@@ -0,0 +1,753 @@
//! Integration tests for API Context
//!
//! Tests for REST endpoints, GraphQL queries/mutations, rate limiting,
//! and error responses.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
use std::collections::HashMap;
use std::time::{Duration, Instant};
// ============================================================================
// REST Endpoint Tests
// ============================================================================
mod rest_endpoints {
use super::*;
// Mock API paths
const RECORDINGS_PATH: &str = "/api/v1/recordings";
const SEGMENTS_PATH: &str = "/api/v1/segments";
const EMBEDDINGS_PATH: &str = "/api/v1/embeddings";
const CLUSTERS_PATH: &str = "/api/v1/clusters";
const INTERPRETATIONS_PATH: &str = "/api/v1/interpretations";
const SEARCH_PATH: &str = "/api/v1/search";
const HEALTH_PATH: &str = "/api/v1/health";
#[test]
fn test_recordings_list_endpoint() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"recordings": [{"id": "uuid1", "duration_ms": 60000}]}"#,
);
let response = client.get(RECORDINGS_PATH).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("recordings"));
}
#[test]
fn test_recordings_create_endpoint() {
let client = MockApiClient::new();
client.queue_response(
201,
r#"{"id": "new-uuid", "status": "created"}"#,
);
let body = r#"{"source": "upload", "metadata": {}}"#;
let response = client.post(RECORDINGS_PATH, body).unwrap();
assert_eq!(response.status, 201);
assert!(response.body.contains("id"));
}
#[test]
fn test_segments_by_recording_endpoint() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"segments": [{"id": "seg1", "start_ms": 0, "end_ms": 5000}]}"#,
);
let path = format!("{}/recording123/segments", RECORDINGS_PATH);
let response = client.get(&path).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("segments"));
}
#[test]
fn test_embedding_generation_endpoint() {
let client = MockApiClient::new();
client.queue_response(
202,
r#"{"job_id": "job123", "status": "processing"}"#,
);
let body = r#"{"segment_ids": ["seg1", "seg2"], "model": "perch2"}"#;
let response = client.post(EMBEDDINGS_PATH, body).unwrap();
assert_eq!(response.status, 202); // Accepted for async processing
assert!(response.body.contains("job_id"));
}
#[test]
fn test_similarity_search_endpoint() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"results": [{"segment_id": "seg1", "distance": 0.1}], "count": 1}"#,
);
let body = r#"{"query_segment_id": "query1", "k": 10}"#;
let response = client.post(SEARCH_PATH, body).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("results"));
}
#[test]
fn test_interpretation_endpoint() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"interpretation": {"statements": ["Similar to alarm calls"], "confidence": 0.85}}"#,
);
let body = r#"{"segment_id": "seg1", "include_citations": true}"#;
let response = client.post(INTERPRETATIONS_PATH, body).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("interpretation"));
assert!(response.body.contains("confidence"));
}
#[test]
fn test_health_check_endpoint() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"status": "healthy", "version": "1.0.0", "components": {"database": "ok", "index": "ok"}}"#,
);
let response = client.get(HEALTH_PATH).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("healthy"));
}
#[test]
fn test_cluster_list_endpoint() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"clusters": [{"id": "c1", "member_count": 50}], "total": 1}"#,
);
let response = client.get(CLUSTERS_PATH).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("clusters"));
}
}
// ============================================================================
// GraphQL Tests
// ============================================================================
mod graphql {
use super::*;
const GRAPHQL_PATH: &str = "/graphql";
fn create_graphql_query(query: &str) -> String {
format!(r#"{{"query": "{}"}}"#, query.replace('"', "\\\""))
}
#[test]
fn test_graphql_recordings_query() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"recordings": [{"id": "rec1", "duration_ms": 60000}]}}"#,
);
let query = create_graphql_query("{ recordings { id duration_ms } }");
let response = client.post(GRAPHQL_PATH, &query).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("data"));
assert!(response.body.contains("recordings"));
}
#[test]
fn test_graphql_recording_with_segments() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"recording": {"id": "rec1", "segments": [{"id": "seg1"}]}}}"#,
);
let query = create_graphql_query(
"{ recording(id: \\\"rec1\\\") { id segments { id } } }",
);
let response = client.post(GRAPHQL_PATH, &query).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("segments"));
}
#[test]
fn test_graphql_segment_with_embedding() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"segment": {"id": "seg1", "embedding": {"id": "emb1", "norm": 1.0}}}}"#,
);
let query = create_graphql_query(
"{ segment(id: \\\"seg1\\\") { id embedding { id norm } } }",
);
let response = client.post(GRAPHQL_PATH, &query).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("embedding"));
}
#[test]
fn test_graphql_similarity_search() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"similarSegments": [{"segment": {"id": "s1"}, "distance": 0.1}]}}"#,
);
let query = create_graphql_query(
"{ similarSegments(segmentId: \\\"seg1\\\", k: 10) { segment { id } distance } }",
);
let response = client.post(GRAPHQL_PATH, &query).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("similarSegments"));
}
#[test]
fn test_graphql_create_recording_mutation() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"createRecording": {"id": "new-rec", "status": "INGESTED"}}}"#,
);
let mutation = create_graphql_query(
"mutation { createRecording(input: {source: \\\"upload\\\"}) { id status } }",
);
let response = client.post(GRAPHQL_PATH, &mutation).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("createRecording"));
}
#[test]
fn test_graphql_generate_embeddings_mutation() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"generateEmbeddings": {"jobId": "job1", "status": "PROCESSING"}}}"#,
);
let mutation = create_graphql_query(
"mutation { generateEmbeddings(segmentIds: [\\\"s1\\\", \\\"s2\\\"]) { jobId status } }",
);
let response = client.post(GRAPHQL_PATH, &mutation).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("generateEmbeddings"));
}
#[test]
fn test_graphql_run_clustering_mutation() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"runClustering": {"sessionId": "sess1", "clusterCount": 15}}}"#,
);
let mutation = create_graphql_query(
"mutation { runClustering(method: HDBSCAN, params: {minClusterSize: 5}) { sessionId clusterCount } }",
);
let response = client.post(GRAPHQL_PATH, &mutation).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("runClustering"));
}
#[test]
fn test_graphql_error_response() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": null, "errors": [{"message": "Segment not found", "path": ["segment"]}]}"#,
);
let query = create_graphql_query("{ segment(id: \\\"nonexistent\\\") { id } }");
let response = client.post(GRAPHQL_PATH, &query).unwrap();
assert_eq!(response.status, 200); // GraphQL returns 200 even for errors
assert!(response.body.contains("errors"));
}
#[test]
fn test_graphql_nested_query() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{"data": {"recording": {"segments": [{"embedding": {"cluster": {"id": "c1"}}}]}}}"#,
);
let query = create_graphql_query(
"{ recording(id: \\\"r1\\\") { segments { embedding { cluster { id } } } } }",
);
let response = client.post(GRAPHQL_PATH, &query).unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("cluster"));
}
}
// ============================================================================
// Rate Limiting Tests
// ============================================================================
mod rate_limiting {
use super::*;
#[test]
fn test_rate_limiter_allows_under_limit() {
let limiter = MockRateLimiter::new(100); // 100 requests/second
// Should allow first requests
for _ in 0..50 {
assert!(limiter.check(), "Should allow requests under limit");
}
}
#[test]
fn test_rate_limiter_blocks_over_limit() {
let limiter = MockRateLimiter::new(10); // 10 requests/second
// Exhaust limit
for _ in 0..10 {
limiter.check();
}
// Next request should be blocked
assert!(!limiter.check(), "Should block requests over limit");
}
#[test]
fn test_rate_limiter_sliding_window() {
let limiter = MockRateLimiter::new(5);
// Make 5 requests
for _ in 0..5 {
assert!(limiter.check());
}
// 6th should be blocked
assert!(!limiter.check());
// After window slides (simulated by new check), requests should be allowed
// In real implementation, would wait for time to pass
}
#[test]
fn test_rate_limit_response_code() {
// When rate limited, API should return 429
let client = MockApiClient::new();
client.queue_response(
429,
r#"{"error": "Too Many Requests", "retry_after": 60}"#,
);
let response = client.get("/api/v1/recordings").unwrap();
assert_eq!(response.status, 429);
assert!(response.body.contains("Too Many Requests"));
}
#[test]
fn test_rate_limit_headers() {
let mut response = MockResponse {
status: 200,
body: "{}".to_string(),
headers: HashMap::new(),
};
response.headers.insert("X-RateLimit-Limit".to_string(), "100".to_string());
response.headers.insert("X-RateLimit-Remaining".to_string(), "95".to_string());
response.headers.insert("X-RateLimit-Reset".to_string(), "1609459200".to_string());
assert_eq!(response.headers.get("X-RateLimit-Limit").unwrap(), "100");
assert_eq!(response.headers.get("X-RateLimit-Remaining").unwrap(), "95");
}
#[test]
fn test_different_rate_limits_per_endpoint() {
// Heavy operations should have lower limits
let search_limiter = MockRateLimiter::new(10); // 10/sec for search
let read_limiter = MockRateLimiter::new(100); // 100/sec for reads
let write_limiter = MockRateLimiter::new(20); // 20/sec for writes
// Reads should be most permissive
for _ in 0..50 {
assert!(read_limiter.check());
}
// Search should be more restrictive
for _ in 0..10 {
assert!(search_limiter.check());
}
assert!(!search_limiter.check());
}
}
// ============================================================================
// Error Response Tests
// ============================================================================
mod error_responses {
use super::*;
#[test]
fn test_404_not_found() {
let client = MockApiClient::new();
client.queue_response(
404,
r#"{"error": "Not Found", "message": "Recording not found", "code": "RECORDING_NOT_FOUND"}"#,
);
let response = client.get("/api/v1/recordings/nonexistent").unwrap();
assert_eq!(response.status, 404);
assert!(response.body.contains("Not Found"));
}
#[test]
fn test_400_bad_request() {
let client = MockApiClient::new();
client.queue_response(
400,
r#"{"error": "Bad Request", "message": "Invalid segment_id format", "field": "segment_id"}"#,
);
let response = client.post("/api/v1/embeddings", r#"{"segment_id": "invalid"}"#).unwrap();
assert_eq!(response.status, 400);
assert!(response.body.contains("Bad Request"));
}
#[test]
fn test_422_validation_error() {
let client = MockApiClient::new();
client.queue_response(
422,
r#"{"error": "Validation Error", "errors": [{"field": "k", "message": "must be between 1 and 100"}]}"#,
);
let response = client.post("/api/v1/search", r#"{"k": 1000}"#).unwrap();
assert_eq!(response.status, 422);
assert!(response.body.contains("Validation Error"));
}
#[test]
fn test_500_internal_error() {
let client = MockApiClient::new();
client.queue_response(
500,
r#"{"error": "Internal Server Error", "message": "An unexpected error occurred", "request_id": "req-123"}"#,
);
let response = client.get("/api/v1/recordings").unwrap();
assert_eq!(response.status, 500);
assert!(response.body.contains("Internal Server Error"));
assert!(response.body.contains("request_id"));
}
#[test]
fn test_503_service_unavailable() {
let client = MockApiClient::new();
client.queue_response(
503,
r#"{"error": "Service Unavailable", "message": "Index is rebuilding", "retry_after": 300}"#,
);
let response = client.get("/api/v1/search").unwrap();
assert_eq!(response.status, 503);
assert!(response.body.contains("Service Unavailable"));
}
#[test]
fn test_error_response_format() {
// All errors should have consistent format
let error_bodies = vec![
r#"{"error": "Not Found", "message": "Resource not found", "code": "NOT_FOUND"}"#,
r#"{"error": "Bad Request", "message": "Invalid input", "code": "INVALID_INPUT"}"#,
r#"{"error": "Internal Server Error", "message": "Server error", "code": "INTERNAL_ERROR"}"#,
];
for body in error_bodies {
assert!(body.contains("error"));
assert!(body.contains("message"));
assert!(body.contains("code"));
}
}
#[test]
fn test_error_with_details() {
let client = MockApiClient::new();
client.queue_response(
400,
r#"{
"error": "Bad Request",
"message": "Multiple validation errors",
"details": [
{"field": "sample_rate", "error": "must be 32000"},
{"field": "channels", "error": "must be 1 (mono)"}
]
}"#,
);
let response = client.post("/api/v1/recordings", "{}").unwrap();
assert_eq!(response.status, 400);
assert!(response.body.contains("details"));
}
}
// ============================================================================
// Authentication Tests
// ============================================================================
mod authentication {
use super::*;
#[test]
fn test_unauthorized_without_token() {
let client = MockApiClient::new();
client.queue_response(
401,
r#"{"error": "Unauthorized", "message": "Missing or invalid authentication token"}"#,
);
let response = client.get("/api/v1/recordings").unwrap();
assert_eq!(response.status, 401);
assert!(response.body.contains("Unauthorized"));
}
#[test]
fn test_forbidden_insufficient_permissions() {
let client = MockApiClient::new();
client.queue_response(
403,
r#"{"error": "Forbidden", "message": "Insufficient permissions to access this resource"}"#,
);
let response = client.get("/api/v1/admin/settings").unwrap();
assert_eq!(response.status, 403);
assert!(response.body.contains("Forbidden"));
}
#[test]
fn test_token_expired() {
let client = MockApiClient::new();
client.queue_response(
401,
r#"{"error": "Unauthorized", "message": "Token expired", "code": "TOKEN_EXPIRED"}"#,
);
let response = client.get("/api/v1/recordings").unwrap();
assert_eq!(response.status, 401);
assert!(response.body.contains("TOKEN_EXPIRED"));
}
}
// ============================================================================
// Pagination Tests
// ============================================================================
mod pagination {
use super::*;
#[test]
fn test_paginated_response() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{
"data": [{"id": "rec1"}, {"id": "rec2"}],
"pagination": {
"page": 1,
"per_page": 20,
"total": 100,
"total_pages": 5
}
}"#,
);
let response = client.get("/api/v1/recordings?page=1&per_page=20").unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("pagination"));
assert!(response.body.contains("total_pages"));
}
#[test]
fn test_cursor_based_pagination() {
let client = MockApiClient::new();
client.queue_response(
200,
r#"{
"data": [{"id": "rec1"}, {"id": "rec2"}],
"cursors": {
"next": "eyJpZCI6InJlYzIifQ==",
"previous": null
},
"has_more": true
}"#,
);
let response = client.get("/api/v1/recordings?limit=20").unwrap();
assert_eq!(response.status, 200);
assert!(response.body.contains("cursors"));
assert!(response.body.contains("has_more"));
}
#[test]
fn test_invalid_page_parameter() {
let client = MockApiClient::new();
client.queue_response(
400,
r#"{"error": "Bad Request", "message": "Page must be a positive integer"}"#,
);
let response = client.get("/api/v1/recordings?page=-1").unwrap();
assert_eq!(response.status, 400);
}
}
// ============================================================================
// Content Negotiation Tests
// ============================================================================
mod content_negotiation {
use super::*;
#[test]
fn test_json_response() {
let response = MockResponse {
status: 200,
body: r#"{"data": []}"#.to_string(),
headers: [("Content-Type".to_string(), "application/json".to_string())]
.into_iter()
.collect(),
};
assert_eq!(
response.headers.get("Content-Type").unwrap(),
"application/json"
);
}
#[test]
fn test_unsupported_media_type() {
let client = MockApiClient::new();
client.queue_response(
415,
r#"{"error": "Unsupported Media Type", "message": "Only application/json is supported"}"#,
);
let response = client.post("/api/v1/recordings", "<xml></xml>").unwrap();
// Assuming XML was sent
assert_eq!(response.status, 415);
}
}
// ============================================================================
// API Request Tracking Tests
// ============================================================================
mod request_tracking {
use super::*;
#[test]
fn test_request_count_tracking() {
let client = MockApiClient::new();
assert_eq!(client.request_count(), 0);
client.get("/path1").unwrap();
assert_eq!(client.request_count(), 1);
client.post("/path2", "{}").unwrap();
assert_eq!(client.request_count(), 2);
client.get("/path3").unwrap();
assert_eq!(client.request_count(), 3);
}
#[test]
fn test_response_queuing() {
let client = MockApiClient::new();
client.queue_response(200, "first");
client.queue_response(201, "second");
client.queue_response(202, "third");
let r1 = client.get("/").unwrap();
let r2 = client.get("/").unwrap();
let r3 = client.get("/").unwrap();
assert_eq!(r1.status, 200);
assert_eq!(r2.status, 201);
assert_eq!(r3.status, 202);
}
#[test]
fn test_default_response_when_queue_empty() {
let client = MockApiClient::new();
let response = client.get("/").unwrap();
assert_eq!(response.status, 200);
assert_eq!(response.body, "{}");
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_api_integration_smoke_test() {
let client = MockApiClient::new();
// List recordings
client.queue_response(200, r#"{"recordings": []}"#);
let list_response = client.get("/api/v1/recordings").unwrap();
assert_eq!(list_response.status, 200);
// Create recording
client.queue_response(201, r#"{"id": "new-rec"}"#);
let create_response = client.post("/api/v1/recordings", "{}").unwrap();
assert_eq!(create_response.status, 201);
// Search
client.queue_response(200, r#"{"results": []}"#);
let search_response = client.post("/api/v1/search", r#"{"k": 10}"#).unwrap();
assert_eq!(search_response.status, 200);
// Track all requests
assert_eq!(client.request_count(), 3);
}
}

View File

@@ -0,0 +1,729 @@
//! Integration tests for Audio Ingestion Context
//!
//! Tests for audio file loading, resampling, segmentation, and spectrogram generation.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
use std::io::Cursor;
// ============================================================================
// Audio File Loading Tests
// ============================================================================
mod audio_loading {
use super::*;
#[test]
fn test_load_wav_file_32khz() {
// Create test WAV data at 32kHz
let wav_bytes = create_test_wav_bytes(5000); // 5 seconds
// Verify WAV header
assert_eq!(&wav_bytes[0..4], b"RIFF");
assert_eq!(&wav_bytes[8..12], b"WAVE");
assert_eq!(&wav_bytes[12..16], b"fmt ");
// Parse sample rate from header
let sample_rate =
u32::from_le_bytes([wav_bytes[24], wav_bytes[25], wav_bytes[26], wav_bytes[27]]);
assert_eq!(sample_rate, 32000);
}
#[test]
fn test_load_audio_correct_duration() {
let duration_ms = 5000;
let samples = create_test_audio_samples(duration_ms, 32000);
let expected_samples = (duration_ms as f64 * 32000.0 / 1000.0) as usize;
assert_eq!(samples.len(), expected_samples);
}
#[test]
fn test_audio_samples_in_valid_range() {
let samples = create_test_audio_samples(1000, 32000);
for (i, sample) in samples.iter().enumerate() {
assert!(
*sample >= -1.0 && *sample <= 1.0,
"Sample {} out of range: {}",
i,
sample
);
assert!(
!sample.is_nan() && !sample.is_infinite(),
"Sample {} is NaN or Inf",
i
);
}
}
#[test]
fn test_audio_format_validation() {
let format = AudioFormat::default();
assert_eq!(format.sample_rate, 32000, "Must be 32kHz for Perch 2.0");
assert_eq!(format.channels, 1, "Must be mono");
assert!(format.bit_depth >= 16, "Minimum 16-bit");
}
#[test]
fn test_load_different_durations() {
let durations = vec![1000, 5000, 10000, 30000, 60000];
for duration in durations {
let samples = create_test_audio_samples(duration, 32000);
let expected = (duration as f64 * 32.0) as usize;
assert_eq!(
samples.len(),
expected,
"Wrong sample count for {}ms",
duration
);
}
}
#[test]
fn test_wav_bytes_parseable() {
let wav_bytes = create_test_wav_bytes(5000);
// Basic WAV structure validation
assert!(wav_bytes.len() > 44, "WAV too short for valid header");
// Verify data chunk
let data_marker = &wav_bytes[36..40];
assert_eq!(data_marker, b"data");
// Verify data size
let data_size =
u32::from_le_bytes([wav_bytes[40], wav_bytes[41], wav_bytes[42], wav_bytes[43]]);
assert!(data_size > 0);
}
}
// ============================================================================
// Resampling Tests
// ============================================================================
mod resampling {
use super::*;
/// Mock resampler that converts audio to target sample rate
struct MockResampler {
target_rate: u32,
}
impl MockResampler {
fn new(target_rate: u32) -> Self {
Self { target_rate }
}
fn resample(&self, samples: &[f32], source_rate: u32) -> Vec<f32> {
if source_rate == self.target_rate {
return samples.to_vec();
}
let ratio = self.target_rate as f64 / source_rate as f64;
let new_len = (samples.len() as f64 * ratio) as usize;
// Simple linear interpolation resampling
(0..new_len)
.map(|i| {
let src_idx = i as f64 / ratio;
let idx0 = src_idx.floor() as usize;
let idx1 = (idx0 + 1).min(samples.len() - 1);
let frac = src_idx - idx0 as f64;
samples[idx0] * (1.0 - frac as f32) + samples[idx1] * frac as f32
})
.collect()
}
}
#[test]
fn test_resample_44100_to_32000() {
let source_rate = 44100;
let target_rate = 32000;
let duration_ms = 1000;
// Create 44.1kHz audio
let samples: Vec<f32> = (0..(source_rate * duration_ms / 1000) as usize)
.map(|i| (i as f32 * 0.01).sin())
.collect();
let resampler = MockResampler::new(target_rate);
let resampled = resampler.resample(&samples, source_rate);
let expected_len = (target_rate * duration_ms / 1000) as usize;
// Allow 1 sample tolerance due to rounding
assert!(
(resampled.len() as i64 - expected_len as i64).abs() <= 1,
"Expected ~{} samples, got {}",
expected_len,
resampled.len()
);
}
#[test]
fn test_resample_48000_to_32000() {
let source_rate = 48000;
let target_rate = 32000;
let duration_ms = 1000;
let samples: Vec<f32> = (0..(source_rate * duration_ms / 1000) as usize)
.map(|i| (i as f32 * 0.01).sin())
.collect();
let resampler = MockResampler::new(target_rate);
let resampled = resampler.resample(&samples, source_rate);
let expected_len = (target_rate * duration_ms / 1000) as usize;
assert!(
(resampled.len() as i64 - expected_len as i64).abs() <= 1,
"Expected ~{} samples, got {}",
expected_len,
resampled.len()
);
}
#[test]
fn test_resample_preserves_energy() {
let source_rate = 44100;
let target_rate = 32000;
let samples: Vec<f32> = (0..44100).map(|i| (i as f32 * 0.01).sin()).collect();
let source_energy: f32 = samples.iter().map(|x| x * x).sum::<f32>() / samples.len() as f32;
let resampler = MockResampler::new(target_rate);
let resampled = resampler.resample(&samples, source_rate);
let target_energy: f32 =
resampled.iter().map(|x| x * x).sum::<f32>() / resampled.len() as f32;
// Energy should be approximately preserved
let energy_diff = (source_energy - target_energy).abs() / source_energy;
assert!(
energy_diff < 0.1,
"Energy changed by {:.1}%",
energy_diff * 100.0
);
}
#[test]
fn test_resample_identity_at_32000() {
let samples = create_test_audio_samples(1000, 32000);
let resampler = MockResampler::new(32000);
let resampled = resampler.resample(&samples, 32000);
assert_eq!(samples.len(), resampled.len());
for (a, b) in samples.iter().zip(resampled.iter()) {
assert!((a - b).abs() < 0.0001);
}
}
}
// ============================================================================
// Segmentation Tests
// ============================================================================
mod segmentation {
use super::*;
/// Mock energy-based segmenter
struct MockSegmenter {
window_ms: u64,
hop_ms: u64,
threshold: f32,
min_duration_ms: u64,
sample_rate: u32,
}
impl MockSegmenter {
fn new() -> Self {
Self {
window_ms: 100,
hop_ms: 50,
threshold: 0.1,
min_duration_ms: 500,
sample_rate: 32000,
}
}
fn segment(&self, samples: &[f32], recording_id: RecordingId) -> Vec<CallSegment> {
let window_size = (self.window_ms as usize * self.sample_rate as usize) / 1000;
let hop_size = (self.hop_ms as usize * self.sample_rate as usize) / 1000;
// Compute energy per window
let mut energies: Vec<f32> = Vec::new();
let mut i = 0;
while i + window_size <= samples.len() {
let energy: f32 =
samples[i..i + window_size].iter().map(|x| x * x).sum::<f32>() / window_size as f32;
energies.push(energy);
i += hop_size;
}
// Find segments above threshold
let mut segments = Vec::new();
let mut in_segment = false;
let mut segment_start = 0;
for (i, energy) in energies.iter().enumerate() {
let time_ms = (i * self.hop_ms as usize) as u64;
if *energy > self.threshold && !in_segment {
in_segment = true;
segment_start = time_ms;
} else if *energy <= self.threshold && in_segment {
in_segment = false;
let duration = time_ms - segment_start;
if duration >= self.min_duration_ms {
segments.push(CallSegment {
id: SegmentId::new(),
recording_id,
start_ms: segment_start,
end_ms: time_ms,
snr: 15.0,
energy: energies[segment_start as usize / self.hop_ms as usize],
clipping_score: 0.0,
overlap_score: 0.0,
quality_grade: QualityGrade::Good,
});
}
}
}
segments
}
}
#[test]
fn test_segmentation_detects_calls() {
let segmenter = MockSegmenter::new();
let recording_id = RecordingId::new();
// Create audio with clear signal/silence pattern
let mut samples = vec![0.0f32; 64000]; // 2 seconds
// Add a loud "call" at 200-1200ms (1 second of signal)
for i in 6400..38400 {
samples[i] = 0.8 * ((i as f32 * 0.05).sin()); // Louder signal
}
// Silence at start and end
let segments = segmenter.segment(&samples, recording_id);
assert!(
!segments.is_empty(),
"Should detect at least one segment"
);
}
#[test]
fn test_segmentation_non_overlapping() {
let segments = create_segment_sequence(5, 500);
for i in 0..segments.len() - 1 {
assert!(
segments[i].end_ms <= segments[i + 1].start_ms,
"Segments {} and {} overlap",
i,
i + 1
);
}
}
#[test]
fn test_segment_duration_constraint() {
let segments = create_segment_sequence(10, 0);
for segment in &segments {
let duration = segment.end_ms - segment.start_ms;
assert_eq!(duration, 5000, "Perch segments should be 5 seconds");
}
}
#[test]
fn test_segmentation_snr_computation() {
let segments = create_segment_sequence(5, 500);
for segment in &segments {
assert!(segment.snr > 0.0, "SNR should be positive");
assert!(segment.snr < 100.0, "SNR should be realistic");
}
}
#[test]
fn test_segment_within_recording_bounds() {
let recording = create_test_recording_with_duration(60000);
let segments = create_segment_sequence(10, 500);
for segment in &segments {
assert!(
segment.end_ms <= recording.duration_ms,
"Segment extends beyond recording"
);
}
}
#[test]
fn test_segmentation_preserves_recording_id() {
let recording_id = RecordingId::new();
let mut segment = create_test_segment();
segment.recording_id = recording_id;
assert_eq!(segment.recording_id, recording_id);
}
}
// ============================================================================
// Spectrogram Generation Tests
// ============================================================================
mod spectrogram {
use super::*;
const MEL_BINS: usize = 128;
const MEL_FRAMES: usize = 500;
#[test]
fn test_spectrogram_dimensions() {
let spectrogram = create_test_spectrogram();
assert_eq!(spectrogram.len(), MEL_FRAMES, "Should have 500 frames");
assert_eq!(
spectrogram[0].len(),
MEL_BINS,
"Should have 128 mel bins"
);
}
#[test]
fn test_spectrogram_values_non_negative() {
let spectrogram = create_test_spectrogram();
for (frame_idx, frame) in spectrogram.iter().enumerate() {
for (bin_idx, value) in frame.iter().enumerate() {
assert!(
*value >= 0.0,
"Frame {} bin {} has negative value: {}",
frame_idx,
bin_idx,
value
);
}
}
}
#[test]
fn test_spectrogram_no_nan_or_inf() {
let spectrogram = create_test_spectrogram();
for (frame_idx, frame) in spectrogram.iter().enumerate() {
for (bin_idx, value) in frame.iter().enumerate() {
assert!(
!value.is_nan() && !value.is_infinite(),
"Frame {} bin {} is NaN/Inf",
frame_idx,
bin_idx
);
}
}
}
#[test]
fn test_spectrogram_energy_distribution() {
let spectrogram = create_test_spectrogram();
// Compute total energy per frame
let frame_energies: Vec<f32> = spectrogram
.iter()
.map(|frame| frame.iter().sum())
.collect();
// Energy should vary (not all zeros or all same)
let min_energy = frame_energies
.iter()
.cloned()
.fold(f32::INFINITY, f32::min);
let max_energy = frame_energies
.iter()
.cloned()
.fold(f32::NEG_INFINITY, f32::max);
assert!(
max_energy > min_energy * 1.1,
"Energy should vary across frames"
);
}
#[test]
fn test_spectrogram_from_audio_samples() {
let samples = create_test_audio_samples(5000, 32000);
// Simple mock spectrogram computation
let hop_size = samples.len() / MEL_FRAMES;
let spectrogram: Vec<Vec<f32>> = (0..MEL_FRAMES)
.map(|frame| {
let start = frame * hop_size;
let end = (start + hop_size).min(samples.len());
let chunk = &samples[start..end];
// Mock mel filterbank (simplified)
(0..MEL_BINS)
.map(|bin| {
let freq_start = bin * chunk.len() / MEL_BINS;
let freq_end = ((bin + 1) * chunk.len() / MEL_BINS).min(chunk.len());
if freq_start < freq_end {
chunk[freq_start..freq_end]
.iter()
.map(|x| x.abs())
.sum::<f32>()
/ (freq_end - freq_start) as f32
} else {
0.0
}
})
.collect()
})
.collect();
assert_eq!(spectrogram.len(), MEL_FRAMES);
assert_eq!(spectrogram[0].len(), MEL_BINS);
}
#[test]
fn test_spectrogram_temporal_resolution() {
// 5 seconds at 32kHz = 160000 samples
// 500 frames means ~10ms per frame
let samples_per_frame = 160000 / MEL_FRAMES;
let ms_per_frame = (samples_per_frame as f64 / 32.0) as u64;
assert!(
ms_per_frame >= 9 && ms_per_frame <= 11,
"Frame duration should be ~10ms, got {}ms",
ms_per_frame
);
}
#[test]
fn test_spectrogram_frequency_range() {
// Perch 2.0 uses 60Hz to 16000Hz
// With 128 mel bins, each bin covers approximately 125Hz
let min_freq = 60.0;
let max_freq = 16000.0;
let hz_per_bin = (max_freq - min_freq) / MEL_BINS as f32;
assert!(
hz_per_bin > 100.0 && hz_per_bin < 150.0,
"Each mel bin should cover ~125Hz, got {}Hz",
hz_per_bin
);
}
}
// ============================================================================
// Recording Repository Integration Tests
// ============================================================================
mod repository_integration {
use super::*;
use chrono::Duration as ChronoDuration;
#[test]
fn test_recording_crud_operations() {
let repo = MockRecordingRepository::new();
// Create
let recording = create_test_recording();
let id = recording.id;
repo.save(recording).unwrap();
// Read
let found = repo.find_by_id(&id).unwrap().unwrap();
assert_eq!(found.id, id);
// Count
assert_eq!(repo.count(), 1);
// Delete
repo.delete(&id).unwrap();
assert_eq!(repo.count(), 0);
assert!(repo.find_by_id(&id).unwrap().is_none());
}
#[test]
fn test_find_recordings_by_sensor() {
let repo = MockRecordingRepository::new();
// Add recordings from different sensors
for i in 0..5 {
let mut recording = create_test_recording();
recording.sensor_id = format!("SENSOR_{}", i % 2);
repo.save(recording).unwrap();
}
let sensor0_recordings = repo.find_by_sensor_id("SENSOR_0").unwrap();
let sensor1_recordings = repo.find_by_sensor_id("SENSOR_1").unwrap();
assert_eq!(sensor0_recordings.len(), 3);
assert_eq!(sensor1_recordings.len(), 2);
}
#[test]
fn test_find_recordings_by_date_range() {
let repo = MockRecordingRepository::new();
let now = chrono::Utc::now();
// Add recordings at different times
for i in 0..5 {
let mut recording = create_test_recording();
recording.start_timestamp = now - ChronoDuration::hours(i as i64);
repo.save(recording).unwrap();
}
// Find recordings from last 2 hours
let start = now - ChronoDuration::hours(2);
let end = now + ChronoDuration::hours(1);
let recent = repo.find_by_date_range(start, end).unwrap();
assert_eq!(recent.len(), 3); // 0, 1, 2 hours ago
}
#[test]
fn test_segment_repository_by_recording() {
let repo = MockSegmentRepository::new();
let recording_id = RecordingId::new();
// Add segments for this recording
for i in 0..5 {
let segment = CallSegment {
recording_id,
start_ms: i * 5500,
end_ms: i * 5500 + 5000,
..Default::default()
};
repo.save(segment).unwrap();
}
// Add segments for another recording
let other_id = RecordingId::new();
for i in 0..3 {
let segment = CallSegment {
recording_id: other_id,
start_ms: i * 5500,
end_ms: i * 5500 + 5000,
..Default::default()
};
repo.save(segment).unwrap();
}
let segments = repo.find_by_recording(&recording_id).unwrap();
assert_eq!(segments.len(), 5);
}
#[test]
fn test_segment_repository_by_time_range() {
let repo = MockSegmentRepository::new();
let recording_id = RecordingId::new();
// Add segments spanning 0-30 seconds
for i in 0..6 {
let segment = CallSegment {
recording_id,
start_ms: i * 5000,
end_ms: (i + 1) * 5000,
..Default::default()
};
repo.save(segment).unwrap();
}
// Find segments in 10-20 second range
let segments = repo
.find_by_time_range(&recording_id, 10000, 20000)
.unwrap();
assert_eq!(segments.len(), 2); // Segments at 10-15s and 15-20s
}
}
// ============================================================================
// Quality Assessment Tests
// ============================================================================
mod quality_assessment {
use super::*;
#[test]
fn test_quality_grade_from_snr() {
assert_eq!(QualityGrade::from_snr(25.0), QualityGrade::Excellent);
assert_eq!(QualityGrade::from_snr(20.1), QualityGrade::Excellent);
assert_eq!(QualityGrade::from_snr(15.0), QualityGrade::Good);
assert_eq!(QualityGrade::from_snr(10.1), QualityGrade::Good);
assert_eq!(QualityGrade::from_snr(7.0), QualityGrade::Fair);
assert_eq!(QualityGrade::from_snr(5.1), QualityGrade::Fair);
assert_eq!(QualityGrade::from_snr(3.0), QualityGrade::Poor);
assert_eq!(QualityGrade::from_snr(0.1), QualityGrade::Poor);
assert_eq!(QualityGrade::from_snr(-5.0), QualityGrade::Unusable);
}
#[test]
fn test_find_segments_by_quality() {
let repo = MockSegmentRepository::new();
// Add segments with varying quality
let snr_values = vec![25.0, 15.0, 7.0, 3.0, -5.0];
for snr in snr_values {
let segment = create_test_segment_with_snr(snr);
repo.save(segment).unwrap();
}
// Find good or better
let good_or_better = repo.find_by_quality(QualityGrade::Good).unwrap();
assert_eq!(good_or_better.len(), 2); // Excellent and Good
// Find fair or better
let fair_or_better = repo.find_by_quality(QualityGrade::Fair).unwrap();
assert_eq!(fair_or_better.len(), 3); // Excellent, Good, Fair
}
#[test]
fn test_segment_clipping_detection() {
let mut segment = create_test_segment();
// No clipping
segment.clipping_score = 0.0;
assert!(segment.clipping_score < 0.01);
// Minor clipping
segment.clipping_score = 0.05;
assert!(segment.clipping_score < 0.1);
// Severe clipping
segment.clipping_score = 0.3;
assert!(segment.clipping_score > 0.2);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_audio_integration_smoke_test() {
// Create recording
let recording = create_test_recording();
// Create segments
let segments = create_segment_sequence(5, 500);
// Create spectrogram
let spectrogram = create_test_spectrogram();
// Verify relationships
assert!(recording.duration_ms >= segments.last().unwrap().end_ms);
assert_eq!(spectrogram.len(), 500);
assert_eq!(spectrogram[0].len(), 128);
}
}

View File

@@ -0,0 +1,603 @@
//! Integration tests for Embedding Context
//!
//! Tests for ONNX model loading, embedding generation, L2 normalization,
//! quantization/dequantization, and batch embedding operations.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
// ============================================================================
// ONNX Model Loading Tests
// ============================================================================
mod model_loading {
use super::*;
#[test]
fn test_model_version_configuration() {
let model = ModelVersion::default();
assert_eq!(model.name, "perch");
assert_eq!(model.version, "2.0");
assert_eq!(model.dimensions, 1536);
}
#[test]
fn test_mock_model_adapter_creation() {
let adapter = MockEmbeddingModelAdapter::new();
let version = adapter.model_version();
assert_eq!(version.name, "perch");
assert_eq!(version.dimensions, 1536);
}
#[test]
fn test_model_adapter_with_custom_dimensions() {
let adapter = MockEmbeddingModelAdapter::new().with_dimensions(768);
let version = adapter.model_version();
assert_eq!(version.dimensions, 768);
}
#[test]
fn test_model_output_dimensions_match_config() {
let adapter = MockEmbeddingModelAdapter::new();
let audio = create_test_audio_samples(5000, 32000);
let embedding = adapter.embed(&audio).unwrap();
let version = adapter.model_version();
assert_eq!(embedding.len(), version.dimensions);
}
}
// ============================================================================
// Embedding Generation Tests
// ============================================================================
mod embedding_generation {
use super::*;
#[test]
fn test_generate_embedding_from_audio() {
let adapter = MockEmbeddingModelAdapter::new();
let audio = create_test_audio_samples(5000, 32000);
let embedding = adapter.embed(&audio).unwrap();
assert_eq!(embedding.len(), 1536);
assert!(
embedding.iter().all(|x| !x.is_nan() && !x.is_infinite()),
"Embedding should not contain NaN or Inf"
);
}
#[test]
fn test_embedding_output_is_normalized() {
let adapter = MockEmbeddingModelAdapter::new();
let audio = create_test_audio_samples(5000, 32000);
let embedding = adapter.embed(&audio).unwrap();
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < 0.0001,
"Embedding should be L2-normalized, got norm {}",
norm
);
}
#[test]
fn test_embedding_deterministic() {
let adapter = MockEmbeddingModelAdapter::new();
let audio = create_test_audio_samples(5000, 32000);
let embedding1 = adapter.embed(&audio).unwrap();
let embedding2 = adapter.embed(&audio).unwrap();
assert_eq!(embedding1, embedding2, "Same input should produce same output");
}
#[test]
fn test_different_audio_produces_different_embeddings() {
let adapter = MockEmbeddingModelAdapter::new();
let audio1 = create_test_audio_samples(5000, 32000);
let audio2: Vec<f32> = audio1.iter().map(|x| x * 0.5).collect();
let embedding1 = adapter.embed(&audio1).unwrap();
let embedding2 = adapter.embed(&audio2).unwrap();
let distance = cosine_distance(&embedding1, &embedding2);
assert!(
distance > 0.01,
"Different audio should produce different embeddings"
);
}
#[test]
fn test_embedding_entity_creation() {
let embedding = create_test_embedding();
assert_eq!(embedding.vector.len(), 1536);
assert!(embedding.norm > 0.0);
assert_eq!(embedding.model_version.name, "perch");
}
#[test]
fn test_embedding_with_specific_vector() {
let vector = vec![1.0; 1536];
let embedding = create_test_embedding_with_vector(vector.clone());
assert_eq!(embedding.vector.len(), 1536);
let norm: f32 = embedding.vector.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((norm - embedding.norm).abs() < 0.0001);
}
}
// ============================================================================
// L2 Normalization Tests
// ============================================================================
mod normalization {
use super::*;
#[test]
fn test_l2_normalize_unit_vector() {
let vector = vec![1.0, 0.0, 0.0];
let normalized = l2_normalize(&vector);
assert_eq!(normalized, vec![1.0, 0.0, 0.0]);
}
#[test]
fn test_l2_normalize_simple_vector() {
let vector = vec![3.0, 4.0];
let normalized = l2_normalize(&vector);
assert!((normalized[0] - 0.6).abs() < 0.0001);
assert!((normalized[1] - 0.8).abs() < 0.0001);
}
#[test]
fn test_l2_normalize_preserves_direction() {
let vector = vec![1.0, 2.0, 3.0, 4.0];
let normalized = l2_normalize(&vector);
// Check ratios are preserved
let original_ratio = vector[1] / vector[0];
let normalized_ratio = normalized[1] / normalized[0];
assert!((original_ratio - normalized_ratio).abs() < 0.0001);
}
#[test]
fn test_l2_normalize_high_dimensional() {
let vector = create_random_vector(1536);
let normalized = l2_normalize(&vector);
assert_normalized(&normalized, 0.0001);
}
#[test]
fn test_l2_normalize_zero_vector() {
let vector = vec![0.0; 10];
let normalized = l2_normalize(&vector);
// Zero vector should remain zero
assert!(normalized.iter().all(|x| *x == 0.0));
}
#[test]
fn test_l2_normalize_idempotent() {
let vector = create_random_vector(1536);
let normalized1 = l2_normalize(&vector);
let normalized2 = l2_normalize(&normalized1);
for (a, b) in normalized1.iter().zip(normalized2.iter()) {
assert!((a - b).abs() < 0.0001);
}
}
#[test]
fn test_normalized_vector_creation() {
let vector = create_normalized_vector(1536);
assert_normalized(&vector, 0.0001);
}
#[test]
fn test_batch_normalization() {
let vectors: Vec<Vec<f32>> = (0..10).map(|i| create_deterministic_vector(1536, i)).collect();
let normalized: Vec<Vec<f32>> = vectors.iter().map(|v| l2_normalize(v)).collect();
for (i, norm_vec) in normalized.iter().enumerate() {
assert_normalized(norm_vec, 0.0001);
}
}
}
// ============================================================================
// Quantization/Dequantization Tests
// ============================================================================
mod quantization {
use super::*;
/// Quantize f32 vector to i8 (scalar quantization)
/// Uses symmetric quantization around the midpoint to properly utilize i8 range
fn quantize_i8(vector: &[f32]) -> (Vec<i8>, f32, f32) {
let min_val = vector.iter().cloned().fold(f32::INFINITY, f32::min);
let max_val = vector.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
// Use symmetric quantization: map [min, max] to [-127, 127]
let scale = (max_val - min_val) / 254.0; // 254 = 127 - (-127)
let zero_point = (min_val + max_val) / 2.0; // Midpoint of input range
let quantized: Vec<i8> = vector
.iter()
.map(|v| {
if scale == 0.0 {
0i8
} else {
let scaled = ((v - zero_point) / scale).round();
(scaled as i16).clamp(-127, 127) as i8
}
})
.collect();
(quantized, scale, zero_point)
}
/// Dequantize i8 vector back to f32
fn dequantize_i8(quantized: &[i8], scale: f32, zero_point: f32) -> Vec<f32> {
quantized
.iter()
.map(|q| (*q as f32) * scale + zero_point)
.collect()
}
#[test]
fn test_quantization_roundtrip() {
let original = create_random_vector(1536);
let (quantized, scale, zero_point) = quantize_i8(&original);
let dequantized = dequantize_i8(&quantized, scale, zero_point);
assert_eq!(quantized.len(), original.len());
assert_eq!(dequantized.len(), original.len());
// Check reconstruction error is small
let mse: f32 = original
.iter()
.zip(dequantized.iter())
.map(|(o, d)| (o - d).powi(2))
.sum::<f32>()
/ original.len() as f32;
let rmse = mse.sqrt();
assert!(
rmse < 0.1,
"Reconstruction RMSE {} is too high",
rmse
);
}
#[test]
fn test_quantization_compression_ratio() {
let original = create_random_vector(1536);
let (quantized, _, _) = quantize_i8(&original);
let original_bytes = original.len() * std::mem::size_of::<f32>();
let quantized_bytes = quantized.len() * std::mem::size_of::<i8>();
let compression_ratio = original_bytes as f32 / quantized_bytes as f32;
assert!(
compression_ratio >= 3.9,
"i8 quantization should achieve ~4x compression, got {}x",
compression_ratio
);
}
#[test]
fn test_quantization_preserves_relative_order() {
let vector1 = vec![0.1, 0.5, 0.9];
let vector2 = vec![0.2, 0.4, 0.8];
let (q1, s1, z1) = quantize_i8(&vector1);
let (q2, s2, z2) = quantize_i8(&vector2);
// Verify relative ordering within vectors
assert!(q1[0] < q1[1] && q1[1] < q1[2]);
assert!(q2[0] < q2[1] && q2[1] < q2[2]);
}
#[test]
fn test_quantization_similarity_preservation() {
// Create two similar vectors
let v1 = create_deterministic_vector(1536, 0);
let v2: Vec<f32> = v1.iter().map(|x| x + 0.01).collect();
let v2 = l2_normalize(&v2);
let v1 = l2_normalize(&v1);
let original_similarity = 1.0 - cosine_distance(&v1, &v2);
// Quantize and compute similarity
let (q1, s1, z1) = quantize_i8(&v1);
let (q2, s2, z2) = quantize_i8(&v2);
let d1 = dequantize_i8(&q1, s1, z1);
let d2 = dequantize_i8(&q2, s2, z2);
let quantized_similarity = 1.0 - cosine_distance(&d1, &d2);
assert!(
(original_similarity - quantized_similarity).abs() < 0.1,
"Similarity should be preserved: original={}, quantized={}",
original_similarity,
quantized_similarity
);
}
#[test]
fn test_product_quantization_concept() {
// Simulate product quantization by splitting vector into subvectors
let vector = create_random_vector(1536);
let num_subvectors = 48;
let subvector_dim = 1536 / num_subvectors;
let subvectors: Vec<&[f32]> = vector.chunks(subvector_dim).collect();
assert_eq!(subvectors.len(), num_subvectors);
// Each subvector can be quantized independently
for subvec in &subvectors {
let (quantized, _, _) = quantize_i8(subvec);
assert_eq!(quantized.len(), subvector_dim);
}
}
}
// ============================================================================
// Batch Embedding Tests
// ============================================================================
mod batch_embedding {
use super::*;
#[test]
fn test_batch_embed_multiple_segments() {
let adapter = MockEmbeddingModelAdapter::new();
let audio_batch: Vec<Vec<f32>> = (0..5)
.map(|i| create_test_audio_samples(5000, 32000))
.collect();
let embeddings = adapter.embed_batch(&audio_batch).unwrap();
assert_eq!(embeddings.len(), 5);
for embedding in &embeddings {
assert_eq!(embedding.len(), 1536);
assert_normalized(embedding, 0.0001);
}
}
#[test]
fn test_batch_embedding_consistency() {
let adapter = MockEmbeddingModelAdapter::new();
let audio = create_test_audio_samples(5000, 32000);
// Single embedding
let single = adapter.embed(&audio).unwrap();
// Batch embedding with same audio
let batch = adapter.embed_batch(&vec![audio]).unwrap();
assert_eq!(single, batch[0], "Single and batch should produce same result");
}
#[test]
fn test_batch_embedding_performance_scaling() {
let adapter = MockEmbeddingModelAdapter::new();
// Test with increasing batch sizes
let batch_sizes = vec![1, 10, 50, 100];
for batch_size in batch_sizes {
let audio_batch: Vec<Vec<f32>> = (0..batch_size)
.map(|_| create_test_audio_samples(5000, 32000))
.collect();
let embeddings = adapter.embed_batch(&audio_batch).unwrap();
assert_eq!(embeddings.len(), batch_size);
}
}
#[test]
fn test_batch_embedding_handles_empty_batch() {
let adapter = MockEmbeddingModelAdapter::new();
let empty_batch: Vec<Vec<f32>> = vec![];
let embeddings = adapter.embed_batch(&empty_batch).unwrap();
assert_eq!(embeddings.len(), 0);
}
#[test]
fn test_embedding_batch_factory() {
let embeddings = create_embedding_batch(10);
assert_eq!(embeddings.len(), 10);
for embedding in &embeddings {
assert_eq!(embedding.vector.len(), 1536);
}
}
#[test]
fn test_similar_embeddings_factory() {
let base = create_normalized_vector(1536);
let similar = create_similar_embeddings(&base, 5, 0.1);
assert_eq!(similar.len(), 5);
// All should be similar to base
for emb in &similar {
let distance = cosine_distance(&base, &emb.vector);
assert!(
distance < 0.5,
"Similar embedding should be close to base"
);
}
}
}
// ============================================================================
// Embedding Repository Tests
// ============================================================================
mod repository {
use super::*;
#[test]
fn test_embedding_repository_crud() {
let repo = MockEmbeddingRepository::new();
let embedding = create_test_embedding();
let id = embedding.id;
let segment_id = embedding.segment_id;
// Create
repo.save(embedding).unwrap();
assert_eq!(repo.count(), 1);
// Read by ID
let found = repo.find_by_id(&id).unwrap().unwrap();
assert_eq!(found.id, id);
// Read by segment
let by_segment = repo.find_by_segment(&segment_id).unwrap().unwrap();
assert_eq!(by_segment.id, id);
// Delete
repo.delete(&id).unwrap();
assert_eq!(repo.count(), 0);
}
#[test]
fn test_embedding_repository_batch_save() {
let repo = MockEmbeddingRepository::new();
let embeddings = create_embedding_batch(10);
repo.batch_save(embeddings).unwrap();
assert_eq!(repo.count(), 10);
}
#[test]
fn test_embedding_repository_find_by_model() {
let repo = MockEmbeddingRepository::new();
// Add embeddings with different models
for i in 0..5 {
let mut embedding = create_test_embedding();
embedding.model_version.name = if i % 2 == 0 {
"perch".to_string()
} else {
"birdnet".to_string()
};
repo.save(embedding).unwrap();
}
let perch_embeddings = repo.find_by_model("perch").unwrap();
let birdnet_embeddings = repo.find_by_model("birdnet").unwrap();
assert_eq!(perch_embeddings.len(), 3);
assert_eq!(birdnet_embeddings.len(), 2);
}
#[test]
fn test_embedding_repository_get_all_vectors() {
let repo = MockEmbeddingRepository::new();
let embeddings = create_embedding_batch(5);
for emb in embeddings {
repo.save(emb).unwrap();
}
let all_vectors = repo.get_all_vectors();
assert_eq!(all_vectors.len(), 5);
for (_, vector) in &all_vectors {
assert_eq!(vector.len(), 1536);
}
}
}
// ============================================================================
// Model Error Handling Tests
// ============================================================================
mod error_handling {
use super::*;
#[test]
fn test_model_with_failure_rate() {
let adapter = MockEmbeddingModelAdapter::new().with_failure_rate(0.0);
// Should succeed with 0% failure rate
let audio = create_test_audio_samples(5000, 32000);
let result = adapter.embed(&audio);
assert!(result.is_ok());
}
#[test]
fn test_invalid_vector_dimensions_detected() {
let embedding = create_test_embedding_with_vector(vec![1.0; 768]); // Wrong dims
assert_eq!(embedding.vector.len(), 768);
assert_ne!(embedding.vector.len(), 1536); // Not Perch dimensions
}
#[test]
fn test_embedding_validation() {
let embeddings = create_embedding_batch(10);
// All embeddings should be valid
assert_valid_embeddings(&embeddings, 1536);
}
#[test]
fn test_dimension_assertion() {
let vector = create_random_vector(1536);
assert_dimensions(&vector, 1536);
}
#[test]
#[should_panic]
fn test_dimension_assertion_fails_on_mismatch() {
let vector = create_random_vector(768);
assert_dimensions(&vector, 1536); // Should panic
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_embedding_integration_smoke_test() {
// Create adapter
let adapter = MockEmbeddingModelAdapter::new();
// Create audio
let audio = create_test_audio_samples(5000, 32000);
// Generate embedding
let vector = adapter.embed(&audio).unwrap();
// Verify properties
assert_eq!(vector.len(), 1536);
assert_normalized(&vector, 0.0001);
// Store in repository
let repo = MockEmbeddingRepository::new();
let embedding = create_test_embedding_with_vector(vector);
repo.save(embedding).unwrap();
assert_eq!(repo.count(), 1);
}
}

View File

@@ -0,0 +1,664 @@
//! Integration tests for Interpretation Context
//!
//! Tests for evidence pack building, claim generation with citations,
//! and validation that all claims have evidence references.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
use std::collections::HashSet;
// ============================================================================
// Evidence Pack Building Tests
// ============================================================================
mod evidence_pack_building {
use super::*;
#[test]
fn test_create_evidence_pack() {
let pack = create_test_evidence_pack();
assert!(!pack.neighbors.is_empty());
assert!(!pack.exemplars.is_empty());
assert!(pack.signal_quality.snr > 0.0);
}
#[test]
fn test_evidence_pack_with_neighbors() {
let pack = create_test_evidence_pack_with_neighbors(10);
assert_eq!(pack.neighbors.len(), 10);
// Verify neighbor properties
for neighbor in &pack.neighbors {
assert!(neighbor.distance >= 0.0);
assert!(neighbor.relevance > 0.0);
}
}
#[test]
fn test_evidence_pack_builder() {
let builder = MockEvidencePackBuilder::new()
.with_neighbor_count(15)
.with_exemplar_count(3);
let segment = create_test_segment();
let search_results = create_search_results(20);
let clusters = create_test_clusters(5);
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
assert_eq!(pack.neighbors.len(), 15);
assert!(pack.exemplars.len() <= 3);
}
#[test]
fn test_evidence_pack_signal_quality() {
let builder = MockEvidencePackBuilder::new();
let segment = create_test_segment_with_snr(25.0);
let search_results = create_search_results(10);
let clusters = create_test_clusters(2);
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
assert_eq!(pack.signal_quality.snr, 25.0);
assert!(matches!(
pack.signal_quality.quality_grade,
Some(QualityGrade::Excellent)
));
}
#[test]
fn test_evidence_pack_includes_cluster_ids() {
let pack = create_test_evidence_pack_with_neighbors(10);
// Some neighbors should have cluster IDs
let has_cluster = pack.neighbors.iter().any(|n| n.cluster_id.is_some());
assert!(
has_cluster,
"At least some neighbors should have cluster assignments"
);
}
#[test]
fn test_evidence_pack_relevance_scoring() {
let pack = create_test_evidence_pack_with_neighbors(10);
for (i, neighbor) in pack.neighbors.iter().enumerate() {
// Relevance should be inverse of distance
let expected_relevance = 1.0 / (1.0 + neighbor.distance);
assert!(
(neighbor.relevance - expected_relevance).abs() < 0.001,
"Neighbor {} has wrong relevance: {} vs expected {}",
i,
neighbor.relevance,
expected_relevance
);
}
}
#[test]
fn test_evidence_pack_from_empty_search() {
let builder = MockEvidencePackBuilder::new();
let segment = create_test_segment();
let empty_results: Vec<SearchResult> = vec![];
let clusters = create_test_clusters(2);
let pack = builder.build(&segment, &empty_results, &clusters).unwrap();
assert_eq!(pack.neighbors.len(), 0);
}
#[test]
fn test_evidence_pack_timestamp() {
let pack = create_test_evidence_pack();
let now = chrono::Utc::now();
let age = now - pack.created_at;
// Pack should have been created recently
assert!(
age.num_seconds() < 60,
"Evidence pack should be recently created"
);
}
}
// ============================================================================
// Claim Generation Tests
// ============================================================================
mod claim_generation {
use super::*;
#[test]
fn test_generate_interpretation_from_evidence() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
assert!(!interpretation.statements.is_empty());
assert!(interpretation.confidence > 0.0);
}
#[test]
fn test_interpretation_includes_citations() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
assert!(
!interpretation.citations.is_empty(),
"Interpretation should have citations"
);
}
#[test]
fn test_all_claims_have_citations() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
// Verify all statements have at least one citation
let valid = generator.validate_citations(&interpretation);
assert!(
valid,
"All claims should have corresponding citations"
);
}
#[test]
fn test_citation_evidence_types() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
let evidence_types: HashSet<_> =
interpretation.citations.iter().map(|c| &c.evidence_type).collect();
// Should have neighbor citations at minimum
assert!(
evidence_types.contains(&EvidenceType::Neighbor),
"Should cite neighbors as evidence"
);
}
#[test]
fn test_citation_strength_values() {
let citations = create_test_citations(10);
for citation in &citations {
assert!(
citation.strength >= 0.0 && citation.strength <= 1.0,
"Citation strength should be in [0, 1]"
);
}
}
#[test]
fn test_interpretation_confidence_from_citations() {
let generator = MockInterpretationGenerator::new();
// High-quality evidence
let good_pack = create_test_evidence_pack_with_neighbors(20);
let good_interpretation = generator.generate(&good_pack).unwrap();
// Low-quality evidence (fewer neighbors)
let poor_pack = create_test_evidence_pack_with_neighbors(2);
let poor_interpretation = generator.generate(&poor_pack).unwrap();
// Both should have non-zero confidence
assert!(good_interpretation.confidence > 0.0);
assert!(poor_interpretation.confidence > 0.0);
}
#[test]
fn test_factory_interpretation() {
let evidence_pack_id = EvidencePackId::new();
let interpretation = create_test_interpretation(evidence_pack_id);
assert_eq!(interpretation.evidence_pack_id, evidence_pack_id);
assert!(!interpretation.statements.is_empty());
assert!(!interpretation.citations.is_empty());
}
}
// ============================================================================
// Citation Validation Tests
// ============================================================================
mod citation_validation {
use super::*;
#[test]
fn test_citation_links_to_valid_evidence() {
let evidence_pack = create_test_evidence_pack();
let generator = MockInterpretationGenerator::new();
let interpretation = generator.generate(&evidence_pack).unwrap();
// Each citation should reference valid evidence
for citation in &interpretation.citations {
match &citation.evidence_type {
EvidenceType::Neighbor => {
// Citation should reference a segment
assert!(!citation.evidence_id.is_empty());
}
EvidenceType::Exemplar => {
assert!(!citation.evidence_id.is_empty());
}
EvidenceType::Cluster => {
assert!(!citation.evidence_id.is_empty());
}
EvidenceType::Motif => {
assert!(!citation.evidence_id.is_empty());
}
}
}
}
#[test]
fn test_no_orphan_citations() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
// All citations should reference an existing claim
for citation in &interpretation.citations {
let claim_exists = interpretation.statements.contains(&citation.claim);
assert!(
claim_exists,
"Citation references non-existent claim: {}",
citation.claim
);
}
}
#[test]
fn test_citation_uuid_format() {
let citations = create_test_citations(5);
for citation in &citations {
// Evidence ID should be valid UUID string
let parse_result = uuid::Uuid::parse_str(&citation.evidence_id);
assert!(
parse_result.is_ok(),
"Evidence ID should be valid UUID: {}",
citation.evidence_id
);
}
}
#[test]
fn test_citation_claim_matching() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
// Group citations by claim
let mut claims_with_citations: HashSet<String> = HashSet::new();
for citation in &interpretation.citations {
claims_with_citations.insert(citation.claim.clone());
}
// Every statement should have at least one citation
for statement in &interpretation.statements {
assert!(
claims_with_citations.contains(statement),
"Statement has no citation: {}",
statement
);
}
}
}
// ============================================================================
// RAB (Retrieval-Augmented Bioacoustics) Pattern Tests
// ============================================================================
mod rab_pattern {
use super::*;
#[test]
fn test_rab_retrieval_depth() {
// RAB should retrieve sufficient evidence
let builder = MockEvidencePackBuilder::new().with_neighbor_count(10);
let segment = create_test_segment();
let search_results = create_search_results(50);
let clusters = create_test_clusters(5);
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
assert!(
pack.neighbors.len() >= 10,
"RAB should retrieve requested depth"
);
}
#[test]
fn test_rab_evidence_diversity() {
let builder = MockEvidencePackBuilder::new()
.with_neighbor_count(10)
.with_exemplar_count(5);
let segment = create_test_segment();
let search_results = create_search_results(20);
let clusters = create_test_clusters(5);
let pack = builder.build(&segment, &search_results, &clusters).unwrap();
// Should include both neighbors and exemplars
assert!(!pack.neighbors.is_empty(), "Should have neighbors");
assert!(!pack.exemplars.is_empty(), "Should have exemplars");
}
#[test]
fn test_rab_constrained_interpretation() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
// Statements should be descriptive (constrained to evidence)
for statement in &interpretation.statements {
// Check for structural descriptors (objective language)
let is_structural = statement.contains("similar")
|| statement.contains("distance")
|| statement.contains("cluster")
|| statement.contains("neighbor")
|| statement.contains("aligns");
assert!(
is_structural,
"Statement should use structural descriptors: {}",
statement
);
}
}
#[test]
fn test_rab_transparency() {
let generator = MockInterpretationGenerator::new();
let evidence_pack = create_test_evidence_pack();
let interpretation = generator.generate(&evidence_pack).unwrap();
// Every interpretation should be traceable to evidence
let citation_count = interpretation.citations.len();
let statement_count = interpretation.statements.len();
// Average citations per statement
let avg_citations = citation_count as f32 / statement_count.max(1) as f32;
assert!(
avg_citations >= 1.0,
"Each statement should have at least one citation on average"
);
}
#[test]
fn test_rab_confidence_reflects_evidence_quality() {
let generator = MockInterpretationGenerator::new();
// Rich evidence
let rich_pack = EvidencePack {
neighbors: create_test_neighbors(20),
exemplars: (0..5).map(|_| EmbeddingId::new()).collect(),
signal_quality: SignalQuality {
snr: 25.0,
quality_grade: Some(QualityGrade::Excellent),
..Default::default()
},
..Default::default()
};
// Sparse evidence
let sparse_pack = EvidencePack {
neighbors: create_test_neighbors(2),
exemplars: vec![],
signal_quality: SignalQuality {
snr: 5.0,
quality_grade: Some(QualityGrade::Fair),
..Default::default()
},
..Default::default()
};
let rich_interp = generator.generate(&rich_pack).unwrap();
let sparse_interp = generator.generate(&sparse_pack).unwrap();
// Rich evidence should yield higher confidence
assert!(
rich_interp.citations.len() >= sparse_interp.citations.len(),
"Rich evidence should produce more citations"
);
}
}
// ============================================================================
// Structural Descriptor Tests
// ============================================================================
mod structural_descriptors {
#[test]
fn test_pitch_contour_description() {
// Mock pitch contour stats
struct PitchContour {
min_freq: f32,
max_freq: f32,
mean_freq: f32,
contour_type: String,
}
let ascending = PitchContour {
min_freq: 2000.0,
max_freq: 4000.0,
mean_freq: 3000.0,
contour_type: "ascending".to_string(),
};
assert!(ascending.max_freq > ascending.min_freq);
assert!(ascending.mean_freq >= ascending.min_freq);
assert!(ascending.mean_freq <= ascending.max_freq);
}
#[test]
fn test_spectral_texture_metrics() {
struct SpectralTexture {
harmonicity: f32,
spectral_centroid: f32,
spectral_flatness: f32,
}
let texture = SpectralTexture {
harmonicity: 0.8,
spectral_centroid: 3500.0,
spectral_flatness: 0.2,
};
// Harmonicity and flatness should be in [0, 1]
assert!(texture.harmonicity >= 0.0 && texture.harmonicity <= 1.0);
assert!(texture.spectral_flatness >= 0.0 && texture.spectral_flatness <= 1.0);
// Centroid should be in audible range
assert!(texture.spectral_centroid >= 20.0 && texture.spectral_centroid <= 20000.0);
}
#[test]
fn test_rhythm_profile() {
struct RhythmProfile {
duration_ms: u64,
syllable_count: u32,
inter_syllable_intervals: Vec<u64>,
regularity: f32,
}
let profile = RhythmProfile {
duration_ms: 2500,
syllable_count: 4,
inter_syllable_intervals: vec![200, 210, 205],
regularity: 0.95,
};
assert_eq!(
profile.inter_syllable_intervals.len(),
profile.syllable_count as usize - 1
);
assert!(profile.regularity >= 0.0 && profile.regularity <= 1.0);
}
}
// ============================================================================
// Hypothesis Generation Tests
// ============================================================================
mod hypothesis_generation {
use super::*;
#[test]
fn test_hypothesis_testability() {
#[derive(Debug)]
enum Testability {
High,
Medium,
Low,
}
struct Hypothesis {
statement: String,
testability: Testability,
supporting_evidence: Vec<String>,
}
let hypothesis = Hypothesis {
statement: "Similar calls may indicate territorial behavior".to_string(),
testability: Testability::Medium,
supporting_evidence: vec![
"neighbor_1".to_string(),
"cluster_1".to_string(),
],
};
assert!(!hypothesis.statement.is_empty());
assert!(!hypothesis.supporting_evidence.is_empty());
}
#[test]
fn test_hypothesis_grounded_in_evidence() {
let evidence_pack = create_test_evidence_pack();
// A valid hypothesis should reference observable patterns
let hypothesis = format!(
"Based on {} similar neighbors with average distance {:.3}, this call type may be common in this habitat.",
evidence_pack.neighbors.len(),
evidence_pack.neighbors.iter().map(|n| n.distance).sum::<f32>() / evidence_pack.neighbors.len() as f32
);
assert!(hypothesis.contains("neighbor"));
}
}
// ============================================================================
// Monitoring Summary Tests
// ============================================================================
mod monitoring_summary {
struct DiversityMetrics {
species_richness: u32,
shannon_index: f32,
simpson_index: f32,
evenness: f32,
}
fn compute_shannon_index(counts: &[u32]) -> f32 {
let total: u32 = counts.iter().sum();
if total == 0 {
return 0.0;
}
let total_f = total as f32;
counts
.iter()
.filter(|&&c| c > 0)
.map(|&c| {
let p = c as f32 / total_f;
-p * p.ln()
})
.sum::<f32>()
}
#[test]
fn test_shannon_index_uniform() {
// Uniform distribution should maximize entropy
let counts = vec![10, 10, 10, 10];
let h = compute_shannon_index(&counts);
let max_h = (counts.len() as f32).ln();
assert!(
(h - max_h).abs() < 0.001,
"Uniform distribution should have max entropy"
);
}
#[test]
fn test_shannon_index_single_species() {
// Single species should have zero entropy
let counts = vec![100, 0, 0, 0];
let h = compute_shannon_index(&counts);
assert!(h < 0.001, "Single species should have near-zero entropy");
}
#[test]
fn test_diversity_metrics_valid_ranges() {
let metrics = DiversityMetrics {
species_richness: 15,
shannon_index: 2.5,
simpson_index: 0.85,
evenness: 0.9,
};
assert!(metrics.shannon_index >= 0.0);
assert!(metrics.simpson_index >= 0.0 && metrics.simpson_index <= 1.0);
assert!(metrics.evenness >= 0.0 && metrics.evenness <= 1.0);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_interpretation_integration_smoke_test() {
// Build evidence pack
let segment = create_test_segment();
let search_results = create_search_results(20);
let clusters = create_test_clusters(5);
let builder = MockEvidencePackBuilder::new()
.with_neighbor_count(10)
.with_exemplar_count(5);
let evidence_pack = builder.build(&segment, &search_results, &clusters).unwrap();
// Generate interpretation
let generator = MockInterpretationGenerator::new();
let interpretation = generator.generate(&evidence_pack).unwrap();
// Verify structure
assert!(!interpretation.statements.is_empty());
assert!(!interpretation.citations.is_empty());
assert!(interpretation.confidence > 0.0);
// Verify all claims have citations
assert!(generator.validate_citations(&interpretation));
}
}

View File

@@ -0,0 +1,219 @@
//! Integration tests for 7sense bioacoustics platform
//!
//! This module organizes integration tests across all six bounded contexts:
//! - Audio Ingestion Context
//! - Embedding Context
//! - Vector Space Context
//! - Learning Context (via Analysis)
//! - Analysis Context
//! - Interpretation Context
//!
//! Tests are organized by context and follow the domain-driven design boundaries.
//!
//! Note: Individual test files (audio_test.rs, etc.) are compiled as separate
//! test binaries via [[test]] entries in Cargo.toml.
// Re-export commonly used test utilities
pub use crate::fixtures::*;
pub use crate::mocks::*;
/// Common test configuration
#[derive(Debug, Clone)]
pub struct TestConfig {
/// Sample rate for audio (32kHz for Perch 2.0)
pub sample_rate: u32,
/// Embedding dimensions (1536 for Perch 2.0)
pub embedding_dims: usize,
/// Default segment duration in ms
pub segment_duration_ms: u64,
/// HNSW M parameter
pub hnsw_m: usize,
/// HNSW ef_construction
pub hnsw_ef_construction: usize,
/// HNSW ef_search
pub hnsw_ef_search: usize,
/// Minimum cluster size for HDBSCAN
pub min_cluster_size: usize,
/// Target recall@10 for vector search
pub target_recall_at_10: f32,
}
impl Default for TestConfig {
fn default() -> Self {
Self {
sample_rate: 32000,
embedding_dims: 1536,
segment_duration_ms: 5000,
hnsw_m: 16,
hnsw_ef_construction: 200,
hnsw_ef_search: 100,
min_cluster_size: 5,
target_recall_at_10: 0.95,
}
}
}
impl TestConfig {
/// Create config for fast tests (lower quality but faster)
pub fn fast() -> Self {
Self {
hnsw_m: 8,
hnsw_ef_construction: 50,
hnsw_ef_search: 20,
min_cluster_size: 3,
target_recall_at_10: 0.90,
..Default::default()
}
}
/// Create config for high-quality tests (slower but more accurate)
pub fn high_quality() -> Self {
Self {
hnsw_m: 32,
hnsw_ef_construction: 400,
hnsw_ef_search: 200,
min_cluster_size: 10,
target_recall_at_10: 0.99,
..Default::default()
}
}
}
/// Shared test context that can be used across integration tests
pub struct IntegrationTestContext {
pub config: TestConfig,
pub recording_repo: MockRecordingRepository,
pub segment_repo: MockSegmentRepository,
pub embedding_repo: MockEmbeddingRepository,
pub vector_index: MockVectorIndex,
pub clustering_service: MockClusteringService,
pub evidence_builder: MockEvidencePackBuilder,
pub interpretation_generator: MockInterpretationGenerator,
}
impl Default for IntegrationTestContext {
fn default() -> Self {
Self::new()
}
}
impl IntegrationTestContext {
pub fn new() -> Self {
let config = TestConfig::default();
Self {
config: config.clone(),
recording_repo: MockRecordingRepository::new(),
segment_repo: MockSegmentRepository::new(),
embedding_repo: MockEmbeddingRepository::new(),
vector_index: MockVectorIndex::with_config(HnswConfig {
m: config.hnsw_m,
ef_construction: config.hnsw_ef_construction,
ef_search: config.hnsw_ef_search,
max_layers: 6,
}),
clustering_service: MockClusteringService::with_params(config.min_cluster_size, 3),
evidence_builder: MockEvidencePackBuilder::new(),
interpretation_generator: MockInterpretationGenerator::new(),
}
}
pub fn with_config(config: TestConfig) -> Self {
Self {
config: config.clone(),
recording_repo: MockRecordingRepository::new(),
segment_repo: MockSegmentRepository::new(),
embedding_repo: MockEmbeddingRepository::new(),
vector_index: MockVectorIndex::with_config(HnswConfig {
m: config.hnsw_m,
ef_construction: config.hnsw_ef_construction,
ef_search: config.hnsw_ef_search,
max_layers: 6,
}),
clustering_service: MockClusteringService::with_params(config.min_cluster_size, 3),
evidence_builder: MockEvidencePackBuilder::new(),
interpretation_generator: MockInterpretationGenerator::new(),
}
}
/// Populate context with test data
pub fn with_test_data(self, num_recordings: usize, segments_per_recording: usize) -> Self {
for _ in 0..num_recordings {
let recording = create_test_recording();
let recording_id = recording.id;
self.recording_repo.save(recording).unwrap();
for i in 0..segments_per_recording {
let start_ms = i as u64 * 5500;
let segment = CallSegment {
id: SegmentId::new(),
recording_id,
start_ms,
end_ms: start_ms + 5000,
..Default::default()
};
let segment_id = segment.id;
self.segment_repo.save(segment).unwrap();
self.recording_repo
.add_segment_link(recording_id, segment_id);
let embedding = Embedding {
segment_id,
..Default::default()
};
let embedding_id = embedding.id;
let vector = embedding.vector.clone();
self.embedding_repo.save(embedding).unwrap();
self.vector_index.insert(embedding_id, vector).unwrap();
}
}
self
}
}
/// Helper macro for async test setup
#[macro_export]
macro_rules! setup_test {
() => {
IntegrationTestContext::new()
};
(fast) => {
IntegrationTestContext::with_config(TestConfig::fast())
};
(high_quality) => {
IntegrationTestContext::with_config(TestConfig::high_quality())
};
(populated) => {
IntegrationTestContext::new().with_test_data(5, 10)
};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_integration_context_creation() {
let ctx = IntegrationTestContext::new();
assert_eq!(ctx.config.sample_rate, 32000);
assert_eq!(ctx.config.embedding_dims, 1536);
}
#[test]
fn test_context_with_test_data() {
let ctx = IntegrationTestContext::new().with_test_data(2, 5);
assert_eq!(ctx.recording_repo.count(), 2);
assert_eq!(ctx.segment_repo.count(), 10);
assert_eq!(ctx.embedding_repo.count(), 10);
assert_eq!(ctx.vector_index.count(), 10);
}
#[test]
fn test_config_variants() {
let fast = TestConfig::fast();
let hq = TestConfig::high_quality();
assert!(fast.hnsw_m < hq.hnsw_m);
assert!(fast.hnsw_ef_construction < hq.hnsw_ef_construction);
assert!(fast.target_recall_at_10 < hq.target_recall_at_10);
}
}

View File

@@ -0,0 +1,653 @@
//! Integration tests for Vector Space Context
//!
//! Tests for HNSW index creation, vector insertion, k-NN search accuracy,
//! index persistence, and batch insertion performance.
use vibecast_tests::fixtures::*;
use vibecast_tests::mocks::*;
use std::collections::HashSet;
use std::time::Instant;
// ============================================================================
// HNSW Index Creation Tests
// ============================================================================
mod index_creation {
use super::*;
#[test]
fn test_create_empty_index() {
let index = MockVectorIndex::new();
assert_eq!(index.count(), 0);
}
#[test]
fn test_create_index_with_config() {
let config = HnswConfig {
m: 32,
ef_construction: 400,
ef_search: 200,
max_layers: 8,
};
let index = MockVectorIndex::with_config(config);
assert_eq!(index.count(), 0);
}
#[test]
fn test_default_hnsw_config() {
let config = HnswConfig::default();
assert_eq!(config.m, 16);
assert_eq!(config.ef_construction, 200);
assert_eq!(config.ef_search, 100);
assert_eq!(config.max_layers, 6);
}
#[test]
fn test_index_with_distance_metrics() {
let cosine_index = MockVectorIndex::new().with_distance_metric(DistanceMetric::Cosine);
let euclidean_index =
MockVectorIndex::new().with_distance_metric(DistanceMetric::Euclidean);
let poincare_index = MockVectorIndex::new().with_distance_metric(DistanceMetric::Poincare);
// All should be created successfully
assert_eq!(cosine_index.count(), 0);
assert_eq!(euclidean_index.count(), 0);
assert_eq!(poincare_index.count(), 0);
}
}
// ============================================================================
// Vector Insertion Tests
// ============================================================================
mod vector_insertion {
use super::*;
#[test]
fn test_insert_single_vector() {
let index = MockVectorIndex::new();
let vector = create_normalized_vector(1536);
let embedding_id = EmbeddingId::new();
let vector_id = index.insert(embedding_id, vector).unwrap();
assert_eq!(index.count(), 1);
let retrieved = index.get(&vector_id).unwrap().unwrap();
assert_eq!(retrieved.embedding_id, embedding_id);
}
#[test]
fn test_insert_multiple_vectors() {
let index = MockVectorIndex::new();
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
assert_eq!(index.count(), 100);
}
#[test]
fn test_insert_normalized_vectors() {
let index = MockVectorIndex::new();
for i in 0..10 {
let vector = create_normalized_vector(1536);
index.insert(EmbeddingId::new(), vector).unwrap();
}
assert_eq!(index.count(), 10);
}
#[test]
fn test_insert_preserves_vector_data() {
let index = MockVectorIndex::new();
let original_vector = create_deterministic_vector(1536, 42);
let embedding_id = EmbeddingId::new();
let vector_id = index.insert(embedding_id, original_vector.clone()).unwrap();
let retrieved = index.get(&vector_id).unwrap().unwrap();
assert_eq!(retrieved.vector, original_vector);
}
#[test]
fn test_insert_assigns_layer() {
let index = MockVectorIndex::new();
let mut layers_seen = HashSet::new();
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
let vector_id = index.insert(EmbeddingId::new(), vector).unwrap();
let indexed = index.get(&vector_id).unwrap().unwrap();
layers_seen.insert(indexed.layer);
}
// HNSW should assign vectors to multiple layers
assert!(layers_seen.len() >= 1, "Should have at least one layer");
}
#[test]
fn test_remove_vector() {
let index = MockVectorIndex::new();
let vector = create_normalized_vector(1536);
let vector_id = index.insert(EmbeddingId::new(), vector).unwrap();
assert_eq!(index.count(), 1);
index.remove(&vector_id).unwrap();
assert_eq!(index.count(), 0);
assert!(index.get(&vector_id).unwrap().is_none());
}
}
// ============================================================================
// k-NN Search Accuracy Tests
// ============================================================================
mod knn_search {
use super::*;
#[test]
fn test_search_returns_k_results() {
let index = MockVectorIndex::new();
// Insert 100 vectors
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
let query = create_deterministic_vector(1536, 50);
let results = index.search(&query, 10).unwrap();
assert_eq!(results.len(), 10);
}
#[test]
fn test_search_results_sorted_by_distance() {
let index = MockVectorIndex::new();
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
let query = create_deterministic_vector(1536, 50);
let results = index.search(&query, 20).unwrap();
// Verify sorted by distance
for i in 0..results.len() - 1 {
assert!(
results[i].distance <= results[i + 1].distance,
"Results should be sorted by distance"
);
}
}
#[test]
fn test_search_finds_exact_match() {
let index = MockVectorIndex::new();
let target_vector = create_deterministic_vector(1536, 42);
let target_id = index.insert(EmbeddingId::new(), target_vector.clone()).unwrap();
// Insert other vectors
for i in 0..50 {
if i != 42 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
}
let results = index.search(&target_vector, 1).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].vector_id, target_id);
assert!(results[0].distance < 0.0001, "Exact match should have near-zero distance");
}
#[test]
fn test_recall_at_10_meets_threshold() {
let index = MockVectorIndex::new();
// Insert vectors with known structure
let num_vectors = 1000;
let mut all_vectors: Vec<(VectorId, Vec<f32>)> = Vec::new();
for i in 0..num_vectors {
let vector = create_deterministic_vector(1536, i);
let vector_id = index.insert(EmbeddingId::new(), vector.clone()).unwrap();
all_vectors.push((vector_id, vector));
}
// Test recall with multiple queries
let mut total_recall = 0.0;
let num_queries = 20;
for query_idx in (0..num_vectors).step_by(num_vectors / num_queries) {
let query = &all_vectors[query_idx].1;
// Compute true k-NN (brute force)
let mut true_distances: Vec<(VectorId, f32)> = all_vectors
.iter()
.map(|(id, v)| (*id, cosine_distance(query, v)))
.collect();
true_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let true_top_10: HashSet<VectorId> =
true_distances.iter().take(10).map(|(id, _)| *id).collect();
// Get approximate k-NN
let approx_results = index.search(query, 10).unwrap();
let approx_top_10: HashSet<VectorId> =
approx_results.iter().map(|r| r.vector_id).collect();
// Compute recall
let intersection_count = true_top_10.intersection(&approx_top_10).count();
let recall = intersection_count as f32 / 10.0;
total_recall += recall;
}
let avg_recall = total_recall / num_queries as f32;
assert!(
avg_recall >= 0.95,
"Recall@10 should be >= 0.95, got {}",
avg_recall
);
}
#[test]
fn test_search_with_varying_k() {
let index = MockVectorIndex::new();
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
let query = create_normalized_vector(1536);
for k in [1, 5, 10, 20, 50] {
let results = index.search(&query, k).unwrap();
assert_eq!(results.len(), k, "Should return exactly k={} results", k);
}
}
#[test]
fn test_search_empty_index() {
let index = MockVectorIndex::new();
let query = create_normalized_vector(1536);
let results = index.search(&query, 10).unwrap();
assert_eq!(results.len(), 0);
}
#[test]
fn test_search_k_larger_than_index_size() {
let index = MockVectorIndex::new();
for i in 0..5 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
let query = create_normalized_vector(1536);
let results = index.search(&query, 100).unwrap();
assert_eq!(results.len(), 5, "Should return all vectors if k > index size");
}
}
// ============================================================================
// Neighbor Graph Tests
// ============================================================================
mod neighbor_graph {
use super::*;
#[test]
fn test_get_neighbors() {
let index = MockVectorIndex::new();
// Insert vectors
let mut vector_ids = Vec::new();
for i in 0..20 {
let vector = create_deterministic_vector(1536, i);
let id = index.insert(EmbeddingId::new(), vector).unwrap();
vector_ids.push(id);
}
// Get neighbors of first vector
let neighbors = index.get_neighbors(&vector_ids[0], 5).unwrap();
assert_eq!(neighbors.len(), 5);
// Should not include self
assert!(!neighbors.iter().any(|r| r.vector_id == vector_ids[0]));
}
#[test]
fn test_similarity_edge_creation() {
let source_id = VectorId::new();
let target_id = VectorId::new();
let edge = SimilarityEdge {
source_id,
target_id,
distance: 0.15,
edge_type: "SIMILAR".to_string(),
};
assert_eq!(edge.source_id, source_id);
assert_eq!(edge.target_id, target_id);
assert!(edge.distance < 0.2);
}
#[test]
fn test_search_result_ranking() {
let results = create_search_results(10);
for (i, result) in results.iter().enumerate() {
assert_eq!(result.rank, i + 1, "Rank should be 1-indexed");
if i > 0 {
assert!(
result.distance >= results[i - 1].distance,
"Distance should be non-decreasing"
);
}
}
}
}
// ============================================================================
// Index Persistence Tests
// ============================================================================
mod persistence {
use super::*;
#[test]
fn test_save_and_load_index() {
let index = MockVectorIndex::new();
// Insert vectors
for i in 0..50 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
let original_count = index.count();
// Save to bytes
let bytes = index.save_to_bytes().unwrap();
assert!(!bytes.is_empty());
// Load from bytes (mock - doesn't restore actual data)
let loaded = MockVectorIndex::load_from_bytes(&bytes).unwrap();
// In real implementation, this would verify:
// assert_eq!(loaded.count(), original_count);
}
#[test]
fn test_persistence_format() {
let index = MockVectorIndex::new();
for i in 0..10 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
let bytes = index.save_to_bytes().unwrap();
// Check header (count as u64)
assert!(bytes.len() >= 8);
let count = u64::from_le_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]);
assert_eq!(count, 10);
}
}
// ============================================================================
// Batch Insertion Performance Tests
// ============================================================================
mod batch_performance {
use super::*;
#[test]
fn test_batch_insert() {
let index = MockVectorIndex::new();
let embeddings: Vec<(EmbeddingId, Vec<f32>)> = (0..100)
.map(|i| (EmbeddingId::new(), create_deterministic_vector(1536, i)))
.collect();
let vector_ids = index.insert_batch(embeddings).unwrap();
assert_eq!(vector_ids.len(), 100);
assert_eq!(index.count(), 100);
}
#[test]
fn test_batch_insert_performance() {
let index = MockVectorIndex::new();
let batch_size = 1000;
let embeddings: Vec<(EmbeddingId, Vec<f32>)> = (0..batch_size)
.map(|i| (EmbeddingId::new(), create_deterministic_vector(1536, i)))
.collect();
let start = Instant::now();
let vector_ids = index.insert_batch(embeddings).unwrap();
let duration = start.elapsed();
assert_eq!(vector_ids.len(), batch_size);
// Should complete reasonably fast (mock implementation)
let vectors_per_second = batch_size as f64 / duration.as_secs_f64();
assert!(
vectors_per_second > 1000.0,
"Batch insertion should be fast, got {} vec/sec",
vectors_per_second
);
}
#[test]
fn test_incremental_vs_batch_insert() {
// Test incremental insertion
let index1 = MockVectorIndex::new();
let start1 = Instant::now();
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
index1.insert(EmbeddingId::new(), vector).unwrap();
}
let duration1 = start1.elapsed();
// Test batch insertion
let index2 = MockVectorIndex::new();
let embeddings: Vec<(EmbeddingId, Vec<f32>)> = (0..100)
.map(|i| (EmbeddingId::new(), create_deterministic_vector(1536, i)))
.collect();
let start2 = Instant::now();
index2.insert_batch(embeddings).unwrap();
let duration2 = start2.elapsed();
assert_eq!(index1.count(), index2.count());
}
#[test]
fn test_scaling_with_index_size() {
let index = MockVectorIndex::new();
let sizes = vec![100, 500, 1000];
let mut search_times = Vec::new();
for size in &sizes {
// Build index to target size
while index.count() < *size {
let vector = create_deterministic_vector(1536, index.count());
index.insert(EmbeddingId::new(), vector).unwrap();
}
// Measure search time
let query = create_normalized_vector(1536);
let start = Instant::now();
for _ in 0..100 {
index.search(&query, 10).unwrap();
}
let duration = start.elapsed();
search_times.push(duration);
}
// Search time should scale sub-linearly with index size (HNSW property)
// With mock implementation, just verify search completes
for (i, time) in search_times.iter().enumerate() {
assert!(
time.as_millis() < 10000,
"Search at size {} took too long",
sizes[i]
);
}
}
}
// ============================================================================
// Distance Metric Tests
// ============================================================================
mod distance_metrics {
use super::*;
#[test]
fn test_cosine_distance_identical_vectors() {
let v = create_normalized_vector(1536);
let dist = cosine_distance(&v, &v);
assert!(dist < 0.0001, "Identical vectors should have distance ~0");
}
#[test]
fn test_cosine_distance_orthogonal_vectors() {
let v1 = vec![1.0, 0.0, 0.0];
let v2 = vec![0.0, 1.0, 0.0];
let dist = cosine_distance(&v1, &v2);
assert!((dist - 1.0).abs() < 0.0001, "Orthogonal vectors should have distance 1");
}
#[test]
fn test_cosine_distance_opposite_vectors() {
let v1 = vec![1.0, 0.0];
let v2 = vec![-1.0, 0.0];
let dist = cosine_distance(&v1, &v2);
assert!((dist - 2.0).abs() < 0.0001, "Opposite vectors should have distance 2");
}
#[test]
fn test_euclidean_distance_identical_vectors() {
let v = create_normalized_vector(1536);
let dist = euclidean_distance(&v, &v);
assert!(dist < 0.0001, "Identical vectors should have distance 0");
}
#[test]
fn test_euclidean_distance_known_value() {
let v1 = vec![0.0, 0.0];
let v2 = vec![3.0, 4.0];
let dist = euclidean_distance(&v1, &v2);
assert!((dist - 5.0).abs() < 0.0001, "3-4-5 triangle");
}
#[test]
fn test_distance_metric_symmetry() {
let v1 = create_normalized_vector(1536);
let v2 = create_deterministic_vector(1536, 42);
let cosine_12 = cosine_distance(&v1, &v2);
let cosine_21 = cosine_distance(&v2, &v1);
assert!((cosine_12 - cosine_21).abs() < 0.0001, "Cosine distance should be symmetric");
let eucl_12 = euclidean_distance(&v1, &v2);
let eucl_21 = euclidean_distance(&v2, &v1);
assert!((eucl_12 - eucl_21).abs() < 0.0001, "Euclidean distance should be symmetric");
}
#[test]
fn test_triangle_inequality() {
let v1 = create_deterministic_vector(100, 0);
let v2 = create_deterministic_vector(100, 1);
let v3 = create_deterministic_vector(100, 2);
let d12 = euclidean_distance(&v1, &v2);
let d23 = euclidean_distance(&v2, &v3);
let d13 = euclidean_distance(&v1, &v3);
assert!(
d13 <= d12 + d23 + 0.0001,
"Triangle inequality should hold: {} <= {} + {}",
d13,
d12,
d23
);
}
}
// ============================================================================
// Indexed Vector Tests
// ============================================================================
mod indexed_vector {
use super::*;
#[test]
fn test_indexed_vector_creation() {
let indexed = IndexedVector::default();
assert_eq!(indexed.vector.len(), 1536);
assert_normalized(&indexed.vector, 0.0001);
}
#[test]
fn test_create_indexed_vectors() {
let vectors = create_indexed_vectors(10);
assert_eq!(vectors.len(), 10);
for (i, v) in vectors.iter().enumerate() {
assert_eq!(v.vector.len(), 1536);
// Verify deterministic generation
let expected = create_deterministic_vector(1536, i);
assert_eq!(v.vector, expected);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vector_space_integration_smoke_test() {
let index = MockVectorIndex::new();
// Insert vectors
for i in 0..100 {
let vector = create_deterministic_vector(1536, i);
index.insert(EmbeddingId::new(), vector).unwrap();
}
// Search
let query = create_deterministic_vector(1536, 50);
let results = index.search(&query, 10).unwrap();
// Verify
assert_eq!(results.len(), 10);
assert!(results[0].distance < results[9].distance);
// Check recall
let exact_match = results.iter().find(|r| r.distance < 0.01);
assert!(exact_match.is_some(), "Should find near-exact match");
}
}