Files
wifi-densepose/vendor/ruvector/examples/vibecast-7sense/tests/fixtures/mod.rs

1170 lines
32 KiB
Rust

//! Test fixtures and factories for 7sense bioacoustics platform
//!
//! This module provides reusable test data generators, builders, and fixtures
//! for testing the six bounded contexts of the 7sense system.
use std::collections::HashMap;
use uuid::Uuid;
use chrono::{DateTime, Utc};
// ============================================================================
// Shared Kernel Types (mirroring sevensense-core)
// ============================================================================
/// Recording identifier
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct RecordingId(pub Uuid);
impl RecordingId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for RecordingId {
fn default() -> Self {
Self::new()
}
}
/// Segment identifier
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct SegmentId(pub Uuid);
impl SegmentId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for SegmentId {
fn default() -> Self {
Self::new()
}
}
/// Embedding identifier
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct EmbeddingId(pub Uuid);
impl EmbeddingId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for EmbeddingId {
fn default() -> Self {
Self::new()
}
}
/// Cluster identifier
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ClusterId(pub Uuid);
impl ClusterId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for ClusterId {
fn default() -> Self {
Self::new()
}
}
/// Vector identifier for HNSW index
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct VectorId(pub Uuid);
impl VectorId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for VectorId {
fn default() -> Self {
Self::new()
}
}
/// Motif identifier
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct MotifId(pub Uuid);
impl MotifId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for MotifId {
fn default() -> Self {
Self::new()
}
}
/// Evidence pack identifier
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct EvidencePackId(pub Uuid);
impl EvidencePackId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
}
impl Default for EvidencePackId {
fn default() -> Self {
Self::new()
}
}
// ============================================================================
// Audio Ingestion Context Fixtures
// ============================================================================
/// Quality grade for audio segments
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum QualityGrade {
Excellent,
Good,
Fair,
Poor,
Unusable,
}
impl QualityGrade {
pub fn from_snr(snr: f32) -> Self {
if snr > 20.0 {
QualityGrade::Excellent
} else if snr > 10.0 {
QualityGrade::Good
} else if snr > 5.0 {
QualityGrade::Fair
} else if snr > 0.0 {
QualityGrade::Poor
} else {
QualityGrade::Unusable
}
}
}
/// Geographic location
#[derive(Clone, Debug, Default)]
pub struct GeoLocation {
pub latitude: f64,
pub longitude: f64,
pub altitude: Option<f32>,
}
/// Audio format specification
#[derive(Clone, Debug)]
pub struct AudioFormat {
pub sample_rate: u32,
pub channels: u8,
pub bit_depth: u8,
}
impl Default for AudioFormat {
fn default() -> Self {
Self {
sample_rate: 32000, // Perch 2.0 requirement
channels: 1,
bit_depth: 16,
}
}
}
/// Recording aggregate for testing
#[derive(Clone, Debug)]
pub struct Recording {
pub id: RecordingId,
pub sensor_id: String,
pub location: GeoLocation,
pub start_timestamp: DateTime<Utc>,
pub duration_ms: u64,
pub format: AudioFormat,
pub habitat: Option<String>,
pub file_path: String,
}
impl Default for Recording {
fn default() -> Self {
Self {
id: RecordingId::new(),
sensor_id: "SENSOR_001".to_string(),
location: GeoLocation {
latitude: 37.7749,
longitude: -122.4194,
altitude: Some(10.0),
},
start_timestamp: Utc::now(),
duration_ms: 60000, // 1 minute
format: AudioFormat::default(),
habitat: Some("wetland".to_string()),
file_path: "/data/recordings/test.wav".to_string(),
}
}
}
/// Call segment entity for testing
#[derive(Clone, Debug)]
pub struct CallSegment {
pub id: SegmentId,
pub recording_id: RecordingId,
pub start_ms: u64,
pub end_ms: u64,
pub snr: f32,
pub energy: f32,
pub clipping_score: f32,
pub overlap_score: f32,
pub quality_grade: QualityGrade,
}
impl Default for CallSegment {
fn default() -> Self {
Self {
id: SegmentId::new(),
recording_id: RecordingId::new(),
start_ms: 0,
end_ms: 5000, // 5 seconds (Perch window)
snr: 15.0,
energy: 0.5,
clipping_score: 0.0,
overlap_score: 0.0,
quality_grade: QualityGrade::Good,
}
}
}
/// Factory function to create a test recording
pub fn create_test_recording() -> Recording {
Recording::default()
}
/// Factory function to create a test recording with custom duration
pub fn create_test_recording_with_duration(duration_ms: u64) -> Recording {
Recording {
duration_ms,
..Default::default()
}
}
/// Factory function to create a test segment
pub fn create_test_segment() -> CallSegment {
CallSegment::default()
}
/// Factory function to create a segment with specific time range
pub fn create_test_segment_at(start_ms: u64, end_ms: u64) -> CallSegment {
CallSegment {
start_ms,
end_ms,
..Default::default()
}
}
/// Factory function to create a segment with specific SNR
pub fn create_test_segment_with_snr(snr: f32) -> CallSegment {
CallSegment {
snr,
quality_grade: QualityGrade::from_snr(snr),
..Default::default()
}
}
/// Factory function to create multiple consecutive segments
pub fn create_segment_sequence(count: usize, gap_ms: u64) -> Vec<CallSegment> {
let recording_id = RecordingId::new();
let segment_duration_ms = 5000u64; // 5 seconds per segment
(0..count)
.map(|i| {
let start = i as u64 * (segment_duration_ms + gap_ms);
CallSegment {
id: SegmentId::new(),
recording_id,
start_ms: start,
end_ms: start + segment_duration_ms,
snr: 15.0 + (i as f32 * 0.5), // Varying SNR
energy: 0.4 + (i as f32 * 0.05),
clipping_score: 0.0,
overlap_score: 0.0,
quality_grade: QualityGrade::Good,
}
})
.collect()
}
// ============================================================================
// Embedding Context Fixtures
// ============================================================================
/// Model version for embeddings
#[derive(Clone, Debug)]
pub struct ModelVersion {
pub name: String,
pub version: String,
pub dimensions: usize,
}
impl Default for ModelVersion {
fn default() -> Self {
Self {
name: "perch".to_string(),
version: "2.0".to_string(),
dimensions: 1536,
}
}
}
/// Embedding entity for testing
#[derive(Clone, Debug)]
pub struct Embedding {
pub id: EmbeddingId,
pub segment_id: SegmentId,
pub vector: Vec<f32>,
pub model_version: ModelVersion,
pub norm: f32,
pub created_at: DateTime<Utc>,
}
impl Default for Embedding {
fn default() -> Self {
let vector = create_random_vector(1536);
let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
Self {
id: EmbeddingId::new(),
segment_id: SegmentId::new(),
vector,
model_version: ModelVersion::default(),
norm,
created_at: Utc::now(),
}
}
}
/// Factory function to create a test embedding
pub fn create_test_embedding() -> Embedding {
Embedding::default()
}
/// Factory function to create an embedding with specific vector
pub fn create_test_embedding_with_vector(vector: Vec<f32>) -> Embedding {
let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
Embedding {
vector,
norm,
..Default::default()
}
}
/// Factory function to create a random vector with specified dimensions
pub fn create_random_vector(dims: usize) -> Vec<f32> {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
// Deterministic pseudo-random for reproducibility
let mut hasher = DefaultHasher::new();
dims.hash(&mut hasher);
let seed = hasher.finish();
(0..dims)
.map(|i| {
let x = ((seed.wrapping_mul(i as u64 + 1)) % 10000) as f32 / 10000.0;
x * 2.0 - 1.0 // Range [-1, 1]
})
.collect()
}
/// Factory function to create a deterministic vector based on index
pub fn create_deterministic_vector(dims: usize, index: usize) -> Vec<f32> {
(0..dims)
.map(|i| {
let phase = (index as f32 * 0.1 + i as f32 * 0.01).sin();
phase
})
.collect()
}
/// Factory function to create an L2-normalized vector
pub fn create_normalized_vector(dims: usize) -> Vec<f32> {
let vector = create_random_vector(dims);
l2_normalize(&vector)
}
/// L2 normalize a vector
pub fn l2_normalize(vector: &[f32]) -> Vec<f32> {
let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
vector.iter().map(|x| x / norm).collect()
} else {
vector.to_vec()
}
}
/// Factory function to create similar embeddings (clustered)
pub fn create_similar_embeddings(base_vector: &[f32], count: usize, noise: f32) -> Vec<Embedding> {
// Scale noise by sqrt(dims) to maintain reasonable angular distance
// In high-dimensional spaces, random perturbations cause large angular changes
let dims = base_vector.len();
let scaled_noise = noise / (dims as f32).sqrt();
(0..count)
.map(|i| {
let noisy_vector: Vec<f32> = base_vector
.iter()
.enumerate()
.map(|(j, &v)| {
// Use (i+1) to ensure first embedding also has noise
let noise_val = (((i + 1) * (j + 1)) as f32 * 0.01).sin() * scaled_noise;
v + noise_val
})
.collect();
let normalized = l2_normalize(&noisy_vector);
create_test_embedding_with_vector(normalized)
})
.collect()
}
/// Factory function for batch embeddings
pub fn create_embedding_batch(count: usize) -> Vec<Embedding> {
(0..count).map(|_| create_test_embedding()).collect()
}
// ============================================================================
// Vector Space Context Fixtures
// ============================================================================
/// HNSW configuration for testing
#[derive(Clone, Debug)]
pub struct HnswConfig {
pub m: usize, // Max connections per node per layer
pub ef_construction: usize, // Build-time search width
pub ef_search: usize, // Query-time search width
pub max_layers: usize,
}
impl Default for HnswConfig {
fn default() -> Self {
Self {
m: 16,
ef_construction: 200,
ef_search: 100,
max_layers: 6,
}
}
}
/// Distance metric types
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DistanceMetric {
Cosine,
Euclidean,
Poincare,
}
/// Indexed vector for HNSW
#[derive(Clone, Debug)]
pub struct IndexedVector {
pub id: VectorId,
pub embedding_id: EmbeddingId,
pub vector: Vec<f32>,
pub layer: usize,
}
impl Default for IndexedVector {
fn default() -> Self {
Self {
id: VectorId::new(),
embedding_id: EmbeddingId::new(),
vector: create_normalized_vector(1536),
layer: 0,
}
}
}
/// Similarity edge between vectors
#[derive(Clone, Debug)]
pub struct SimilarityEdge {
pub source_id: VectorId,
pub target_id: VectorId,
pub distance: f32,
pub edge_type: String,
}
/// Search result from k-NN query
#[derive(Clone, Debug)]
pub struct SearchResult {
pub vector_id: VectorId,
pub distance: f32,
pub rank: usize,
}
/// Factory function to create indexed vectors for testing
pub fn create_indexed_vectors(count: usize) -> Vec<IndexedVector> {
(0..count)
.map(|i| IndexedVector {
id: VectorId::new(),
embedding_id: EmbeddingId::new(),
vector: create_deterministic_vector(1536, i),
layer: i % 4, // Distribute across layers
})
.collect()
}
/// Factory function to create test search results
pub fn create_search_results(count: usize) -> Vec<SearchResult> {
(0..count)
.map(|i| SearchResult {
vector_id: VectorId::new(),
distance: 0.1 + (i as f32 * 0.05),
rank: i + 1,
})
.collect()
}
/// Compute cosine distance between two vectors
pub fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
assert_eq!(a.len(), b.len(), "Vectors must have same dimensions");
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a > 0.0 && norm_b > 0.0 {
1.0 - (dot / (norm_a * norm_b))
} else {
1.0
}
}
/// Compute euclidean distance between two vectors
pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
assert_eq!(a.len(), b.len(), "Vectors must have same dimensions");
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f32>()
.sqrt()
}
// ============================================================================
// Analysis Context Fixtures
// ============================================================================
/// Clustering method enumeration
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ClusteringMethod {
Hdbscan,
KMeans,
Spectral,
}
/// Cluster for testing
#[derive(Clone, Debug)]
pub struct Cluster {
pub id: ClusterId,
pub method: ClusteringMethod,
pub member_ids: Vec<EmbeddingId>,
pub centroid: Vec<f32>,
pub cohesion: f32,
pub separation: f32,
}
impl Default for Cluster {
fn default() -> Self {
Self {
id: ClusterId::new(),
method: ClusteringMethod::Hdbscan,
member_ids: vec![EmbeddingId::new(); 10],
centroid: create_normalized_vector(1536),
cohesion: 0.8,
separation: 0.6,
}
}
}
/// Cluster assignment
#[derive(Clone, Debug)]
pub struct ClusterAssignment {
pub segment_id: SegmentId,
pub cluster_id: ClusterId,
pub confidence: f32,
pub distance_to_centroid: f32,
}
/// Motif pattern
#[derive(Clone, Debug)]
pub struct Motif {
pub id: MotifId,
pub pattern: Vec<ClusterId>,
pub occurrence_count: usize,
pub confidence: f32,
pub avg_duration_ms: u64,
}
impl Default for Motif {
fn default() -> Self {
Self {
id: MotifId::new(),
pattern: vec![ClusterId::new(); 3],
occurrence_count: 5,
confidence: 0.85,
avg_duration_ms: 15000,
}
}
}
/// Transition matrix for sequence analysis
#[derive(Clone, Debug)]
pub struct TransitionMatrix {
pub cluster_ids: Vec<ClusterId>,
pub probabilities: Vec<Vec<f32>>,
pub observations: Vec<Vec<u32>>,
}
/// Factory function to create a test cluster
pub fn create_test_cluster() -> Cluster {
Cluster::default()
}
/// Factory function to create a cluster with specific members
pub fn create_test_cluster_with_members(member_count: usize) -> Cluster {
let members: Vec<EmbeddingId> = (0..member_count).map(|_| EmbeddingId::new()).collect();
// Create centroid by averaging random vectors
let vectors: Vec<Vec<f32>> = (0..member_count)
.map(|i| create_deterministic_vector(1536, i))
.collect();
let centroid: Vec<f32> = (0..1536)
.map(|dim| {
let sum: f32 = vectors.iter().map(|v| v[dim]).sum();
sum / member_count as f32
})
.collect();
Cluster {
member_ids: members,
centroid: l2_normalize(&centroid),
..Default::default()
}
}
/// Factory function to create clusters
pub fn create_test_clusters(count: usize) -> Vec<Cluster> {
(0..count)
.map(|i| {
let base_vector = create_deterministic_vector(1536, i * 100);
Cluster {
id: ClusterId::new(),
method: ClusteringMethod::Hdbscan,
member_ids: (0..10).map(|_| EmbeddingId::new()).collect(),
centroid: l2_normalize(&base_vector),
cohesion: 0.7 + (i as f32 * 0.02),
separation: 0.5 + (i as f32 * 0.03),
}
})
.collect()
}
/// Factory function to create a test motif
pub fn create_test_motif() -> Motif {
Motif::default()
}
/// Factory function to create a transition matrix
pub fn create_test_transition_matrix(cluster_count: usize) -> TransitionMatrix {
let cluster_ids: Vec<ClusterId> = (0..cluster_count).map(|_| ClusterId::new()).collect();
// Create random transition probabilities (rows sum to 1.0)
let probabilities: Vec<Vec<f32>> = (0..cluster_count)
.map(|i| {
let raw: Vec<f32> = (0..cluster_count)
.map(|j| ((i + j) as f32 * 0.1).sin().abs() + 0.1)
.collect();
let sum: f32 = raw.iter().sum();
raw.iter().map(|p| p / sum).collect()
})
.collect();
let observations: Vec<Vec<u32>> = (0..cluster_count)
.map(|i| (0..cluster_count).map(|j| ((i + j) % 10 + 1) as u32).collect())
.collect();
TransitionMatrix {
cluster_ids,
probabilities,
observations,
}
}
/// Compute entropy rate from transition matrix
pub fn compute_entropy_rate(matrix: &TransitionMatrix) -> f32 {
let n = matrix.probabilities.len();
if n == 0 {
return 0.0;
}
// Compute stationary distribution (simplified: uniform)
let stationary: Vec<f32> = vec![1.0 / n as f32; n];
// H(X) = -sum_i pi_i * sum_j p_ij * log(p_ij)
let mut entropy = 0.0;
for i in 0..n {
let mut row_entropy = 0.0;
for j in 0..n {
let p = matrix.probabilities[i][j];
if p > 0.0 {
row_entropy -= p * p.ln();
}
}
entropy += stationary[i] * row_entropy;
}
entropy / (2.0_f32).ln() // Convert to bits
}
// ============================================================================
// Interpretation Context Fixtures
// ============================================================================
/// Evidence type for citations
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum EvidenceType {
Neighbor,
Exemplar,
Motif,
Cluster,
}
/// Citation linking interpretation to evidence
#[derive(Clone, Debug)]
pub struct Citation {
pub claim: String,
pub evidence_type: EvidenceType,
pub evidence_id: String,
pub strength: f32,
}
/// Retrieved neighbor for evidence pack
#[derive(Clone, Debug)]
pub struct RetrievedNeighbor {
pub segment_id: SegmentId,
pub distance: f32,
pub cluster_id: Option<ClusterId>,
pub relevance: f32,
}
/// Signal quality assessment
#[derive(Clone, Debug, Default)]
pub struct SignalQuality {
pub snr: f32,
pub clipping_score: f32,
pub overlap_score: f32,
pub quality_grade: Option<QualityGrade>,
}
/// Evidence pack for RAB interpretation
#[derive(Clone, Debug)]
pub struct EvidencePack {
pub id: EvidencePackId,
pub query_segment_id: SegmentId,
pub neighbors: Vec<RetrievedNeighbor>,
pub exemplars: Vec<EmbeddingId>,
pub signal_quality: SignalQuality,
pub created_at: DateTime<Utc>,
}
impl Default for EvidencePack {
fn default() -> Self {
Self {
id: EvidencePackId::new(),
query_segment_id: SegmentId::new(),
neighbors: create_test_neighbors(5),
exemplars: (0..3).map(|_| EmbeddingId::new()).collect(),
signal_quality: SignalQuality {
snr: 15.0,
clipping_score: 0.02,
overlap_score: 0.1,
quality_grade: Some(QualityGrade::Good),
},
created_at: Utc::now(),
}
}
}
/// Interpretation with citations
#[derive(Clone, Debug)]
pub struct Interpretation {
pub id: Uuid,
pub evidence_pack_id: EvidencePackId,
pub statements: Vec<String>,
pub citations: Vec<Citation>,
pub confidence: f32,
pub created_at: DateTime<Utc>,
}
/// Factory function to create test neighbors
pub fn create_test_neighbors(count: usize) -> Vec<RetrievedNeighbor> {
(0..count)
.map(|i| RetrievedNeighbor {
segment_id: SegmentId::new(),
distance: 0.1 + (i as f32 * 0.05),
cluster_id: if i % 2 == 0 {
Some(ClusterId::new())
} else {
None
},
relevance: 1.0 / (1.0 + 0.1 + (i as f32 * 0.05)),
})
.collect()
}
/// Factory function to create test citations
pub fn create_test_citations(count: usize) -> Vec<Citation> {
(0..count)
.map(|i| Citation {
claim: format!("Test claim {}", i + 1),
evidence_type: match i % 4 {
0 => EvidenceType::Neighbor,
1 => EvidenceType::Exemplar,
2 => EvidenceType::Cluster,
_ => EvidenceType::Motif,
},
evidence_id: Uuid::new_v4().to_string(),
// Spread strength evenly across [0.5, 1.0] range
strength: 0.5 + (i as f32 / count.max(1) as f32) * 0.5,
})
.collect()
}
/// Factory function to create a test evidence pack
pub fn create_test_evidence_pack() -> EvidencePack {
EvidencePack::default()
}
/// Factory function to create an evidence pack with specific neighbor count
pub fn create_test_evidence_pack_with_neighbors(neighbor_count: usize) -> EvidencePack {
EvidencePack {
neighbors: create_test_neighbors(neighbor_count),
..Default::default()
}
}
/// Factory function to create a test interpretation
pub fn create_test_interpretation(evidence_pack_id: EvidencePackId) -> Interpretation {
let citations = create_test_citations(3);
Interpretation {
id: Uuid::new_v4(),
evidence_pack_id,
statements: vec![
"This vocalization exhibits a descending frequency contour.".to_string(),
"Similar calls were detected in wetland habitat.".to_string(),
"The call matches cluster A with high confidence.".to_string(),
],
citations,
confidence: 0.85,
created_at: Utc::now(),
}
}
// ============================================================================
// Audio Data Fixtures
// ============================================================================
/// Generate test audio samples at 32kHz
pub fn create_test_audio_samples(duration_ms: u64, sample_rate: u32) -> Vec<f32> {
let num_samples = (duration_ms as f64 * sample_rate as f64 / 1000.0) as usize;
// Generate a simple sine wave with varying frequency (simulating a bird call)
(0..num_samples)
.map(|i| {
let t = i as f32 / sample_rate as f32;
// Frequency sweep from 2000Hz to 4000Hz
let freq = 2000.0 + 2000.0 * (t * 0.5).sin();
// Amplitude envelope
let envelope = (std::f32::consts::PI * t * 200.0).sin().abs();
envelope * (2.0 * std::f32::consts::PI * freq * t).sin()
})
.collect()
}
/// Generate test mel spectrogram (500 frames x 128 mel bins)
pub fn create_test_spectrogram() -> Vec<Vec<f32>> {
let frames = 500;
let mel_bins = 128;
(0..frames)
.map(|frame| {
// Vary amplitude across frames (simulates signal onset/offset)
let amplitude = 0.3 + 0.7 * ((frame as f32 / 50.0).sin().abs());
(0..mel_bins)
.map(|bin| {
// Create a pattern that simulates a frequency sweep
let center = 64.0 + 30.0 * (frame as f32 / 100.0).sin();
let distance = (bin as f32 - center).abs();
amplitude * (-distance / 20.0).exp()
})
.collect()
})
.collect()
}
/// Create test WAV file bytes (simplified format)
pub fn create_test_wav_bytes(duration_ms: u64) -> Vec<u8> {
let sample_rate = 32000u32;
let samples = create_test_audio_samples(duration_ms, sample_rate);
// Convert to i16 samples
let i16_samples: Vec<i16> = samples
.iter()
.map(|&s| (s * 32767.0).clamp(-32768.0, 32767.0) as i16)
.collect();
// Create minimal WAV header
let data_size = (i16_samples.len() * 2) as u32;
let file_size = data_size + 36;
let mut bytes = Vec::with_capacity(file_size as usize + 8);
// RIFF header
bytes.extend_from_slice(b"RIFF");
bytes.extend_from_slice(&file_size.to_le_bytes());
bytes.extend_from_slice(b"WAVE");
// fmt chunk
bytes.extend_from_slice(b"fmt ");
bytes.extend_from_slice(&16u32.to_le_bytes()); // chunk size
bytes.extend_from_slice(&1u16.to_le_bytes()); // audio format (PCM)
bytes.extend_from_slice(&1u16.to_le_bytes()); // num channels
bytes.extend_from_slice(&sample_rate.to_le_bytes()); // sample rate
bytes.extend_from_slice(&(sample_rate * 2).to_le_bytes()); // byte rate
bytes.extend_from_slice(&2u16.to_le_bytes()); // block align
bytes.extend_from_slice(&16u16.to_le_bytes()); // bits per sample
// data chunk
bytes.extend_from_slice(b"data");
bytes.extend_from_slice(&data_size.to_le_bytes());
// audio data
for sample in i16_samples {
bytes.extend_from_slice(&sample.to_le_bytes());
}
bytes
}
// ============================================================================
// Test Context and Utilities
// ============================================================================
/// Test context for managing test state
pub struct TestContext {
pub recordings: HashMap<RecordingId, Recording>,
pub segments: HashMap<SegmentId, CallSegment>,
pub embeddings: HashMap<EmbeddingId, Embedding>,
pub clusters: HashMap<ClusterId, Cluster>,
}
impl Default for TestContext {
fn default() -> Self {
Self::new()
}
}
impl TestContext {
pub fn new() -> Self {
Self {
recordings: HashMap::new(),
segments: HashMap::new(),
embeddings: HashMap::new(),
clusters: HashMap::new(),
}
}
pub fn with_recording(mut self, recording: Recording) -> Self {
self.recordings.insert(recording.id, recording);
self
}
pub fn with_segment(mut self, segment: CallSegment) -> Self {
self.segments.insert(segment.id, segment);
self
}
pub fn with_embedding(mut self, embedding: Embedding) -> Self {
self.embeddings.insert(embedding.id, embedding);
self
}
pub fn with_cluster(mut self, cluster: Cluster) -> Self {
self.clusters.insert(cluster.id, cluster);
self
}
/// Create a fully populated test context
pub fn fully_populated(num_recordings: usize, segments_per_recording: usize) -> Self {
let mut ctx = Self::new();
for _ in 0..num_recordings {
let recording = create_test_recording();
let recording_id = recording.id;
ctx.recordings.insert(recording_id, recording);
for i in 0..segments_per_recording {
let start_ms = i as u64 * 5500; // 5s segment + 500ms gap
let segment = CallSegment {
id: SegmentId::new(),
recording_id,
start_ms,
end_ms: start_ms + 5000,
..Default::default()
};
let segment_id = segment.id;
ctx.segments.insert(segment_id, segment);
let embedding = Embedding {
segment_id,
..Default::default()
};
ctx.embeddings.insert(embedding.id, embedding);
}
}
// Create some clusters
for _ in 0..5 {
let cluster = create_test_cluster();
ctx.clusters.insert(cluster.id, cluster);
}
ctx
}
}
// ============================================================================
// Assertion Helpers
// ============================================================================
/// Assert that a vector is L2-normalized (within epsilon)
pub fn assert_normalized(vector: &[f32], epsilon: f32) {
let norm: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < epsilon,
"Vector norm {} is not within {} of 1.0",
norm,
epsilon
);
}
/// Assert that vectors have expected dimensions
pub fn assert_dimensions(vector: &[f32], expected: usize) {
assert_eq!(
vector.len(),
expected,
"Vector has {} dimensions, expected {}",
vector.len(),
expected
);
}
/// Assert that all embeddings in a batch have valid structure
pub fn assert_valid_embeddings(embeddings: &[Embedding], expected_dims: usize) {
for (i, emb) in embeddings.iter().enumerate() {
assert_dimensions(&emb.vector, expected_dims);
assert!(
!emb.vector.iter().any(|x| x.is_nan() || x.is_infinite()),
"Embedding {} contains NaN or Inf values",
i
);
}
}
/// Assert recall meets threshold
pub fn assert_recall_at_k(retrieved: &[VectorId], relevant: &[VectorId], k: usize, min_recall: f32) {
let retrieved_set: std::collections::HashSet<_> = retrieved.iter().take(k).collect();
let relevant_set: std::collections::HashSet<_> = relevant.iter().collect();
let intersection_count = retrieved_set.intersection(&relevant_set).count();
let recall = intersection_count as f32 / relevant.len().min(k) as f32;
assert!(
recall >= min_recall,
"Recall@{} is {}, expected >= {}",
k,
recall,
min_recall
);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fixture_creation() {
let recording = create_test_recording();
assert!(recording.duration_ms > 0);
assert_eq!(recording.format.sample_rate, 32000);
}
#[test]
fn test_random_vector_determinism() {
let v1 = create_random_vector(1536);
let v2 = create_random_vector(1536);
assert_eq!(v1, v2, "Random vectors should be deterministic");
}
#[test]
fn test_l2_normalization() {
let vector = vec![3.0, 4.0];
let normalized = l2_normalize(&vector);
assert_normalized(&normalized, 0.0001);
assert!((normalized[0] - 0.6).abs() < 0.0001);
assert!((normalized[1] - 0.8).abs() < 0.0001);
}
#[test]
fn test_cosine_distance() {
let a = vec![1.0, 0.0];
let b = vec![0.0, 1.0];
let dist = cosine_distance(&a, &b);
assert!((dist - 1.0).abs() < 0.0001, "Perpendicular vectors should have distance 1.0");
let c = vec![1.0, 0.0];
let same_dist = cosine_distance(&a, &c);
assert!(same_dist < 0.0001, "Same vectors should have distance 0.0");
}
#[test]
fn test_quality_grade_from_snr() {
assert_eq!(QualityGrade::from_snr(25.0), QualityGrade::Excellent);
assert_eq!(QualityGrade::from_snr(15.0), QualityGrade::Good);
assert_eq!(QualityGrade::from_snr(7.0), QualityGrade::Fair);
assert_eq!(QualityGrade::from_snr(2.0), QualityGrade::Poor);
assert_eq!(QualityGrade::from_snr(-5.0), QualityGrade::Unusable);
}
#[test]
fn test_test_context_builder() {
let ctx = TestContext::fully_populated(2, 3);
assert_eq!(ctx.recordings.len(), 2);
assert_eq!(ctx.segments.len(), 6);
assert_eq!(ctx.embeddings.len(), 6);
assert_eq!(ctx.clusters.len(), 5);
}
#[test]
fn test_entropy_rate_computation() {
let matrix = create_test_transition_matrix(4);
let entropy = compute_entropy_rate(&matrix);
assert!(entropy >= 0.0, "Entropy should be non-negative");
assert!(entropy < 10.0, "Entropy should be reasonable");
}
}