Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/examples/vibecast-7sense/benches/api_benchmark.rs
+++ b/examples/vibecast-7sense/benches/api_benchmark.rs
@@ -0,0 +1,759 @@
+//! API Benchmark Suite for 7sense
+//!
+//! Performance targets from ADR-004:
+//! - Query latency: <100ms total (end-to-end)
+//! - Neighbor search: <50ms p99
+//! - Evidence pack generation: <200ms
+
+use criterion::{
+    black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
+};
+use std::collections::HashMap;
+use std::time::Duration;
+
+mod utils;
+use utils::*;
+
+// ============================================================================
+// Simulated API Types
+// ============================================================================
+
+/// Neighbor search request
+#[derive(Clone, Debug)]
+struct NeighborSearchRequest {
+    embedding: Vec<f32>,
+    k: usize,
+    filter: Option<SearchFilter>,
+    include_metadata: bool,
+}
+
+/// Search filter for neighbor queries
+#[derive(Clone, Debug)]
+struct SearchFilter {
+    species: Option<Vec<String>>,
+    location: Option<BoundingBox>,
+    time_range: Option<TimeRange>,
+    min_confidence: Option<f32>,
+}
+
+#[derive(Clone, Debug)]
+struct BoundingBox {
+    min_lat: f32,
+    max_lat: f32,
+    min_lon: f32,
+    max_lon: f32,
+}
+
+#[derive(Clone, Debug)]
+struct TimeRange {
+    start: i64,
+    end: i64,
+}
+
+/// Neighbor search response
+#[derive(Clone, Debug)]
+struct NeighborSearchResponse {
+    results: Vec<SearchResult>,
+    total_time_ms: u64,
+    cache_hit: bool,
+}
+
+#[derive(Clone, Debug)]
+struct SearchResult {
+    id: String,
+    distance: f32,
+    metadata: Option<EmbeddingMetadata>,
+}
+
+/// Embedding metadata
+#[derive(Clone, Debug)]
+struct EmbeddingMetadata {
+    recording_id: String,
+    species: Option<String>,
+    call_type: Option<String>,
+    location: Option<Location>,
+    timestamp: i64,
+    confidence: f32,
+    audio_url: Option<String>,
+}
+
+#[derive(Clone, Debug)]
+struct Location {
+    lat: f32,
+    lon: f32,
+    site_name: Option<String>,
+}
+
+/// Evidence pack for interpretability
+#[derive(Clone, Debug)]
+struct EvidencePack {
+    query_embedding: Vec<f32>,
+    neighbors: Vec<NeighborEvidence>,
+    cluster_info: ClusterInfo,
+    spectrogram_url: Option<String>,
+    attention_map: Option<Vec<Vec<f32>>>,
+    confidence_breakdown: ConfidenceBreakdown,
+}
+
+#[derive(Clone, Debug)]
+struct NeighborEvidence {
+    result: SearchResult,
+    similarity_score: f32,
+    contributing_features: Vec<FeatureContribution>,
+}
+
+#[derive(Clone, Debug)]
+struct FeatureContribution {
+    feature_name: String,
+    contribution: f32,
+}
+
+#[derive(Clone, Debug)]
+struct ClusterInfo {
+    cluster_id: i32,
+    cluster_size: usize,
+    centroid_distance: f32,
+    typical_species: Vec<String>,
+}
+
+#[derive(Clone, Debug)]
+struct ConfidenceBreakdown {
+    neighbor_agreement: f32,
+    cluster_membership: f32,
+    embedding_quality: f32,
+    overall: f32,
+}
+
+// ============================================================================
+// Simulated API Service
+// ============================================================================
+
+/// Simulated API service for benchmarking
+struct ApiService {
+    index: SimpleHnswIndex,
+    metadata_store: HashMap<usize, EmbeddingMetadata>,
+    cluster_centroids: Vec<Vec<f32>>,
+    cluster_assignments: Vec<i32>,
+}
+
+impl ApiService {
+    fn new(index: SimpleHnswIndex, num_clusters: usize) -> Self {
+        let size = index.len();
+
+        // Generate fake metadata
+        let mut metadata_store = HashMap::new();
+        let species = ["Robin", "Sparrow", "Blackbird", "Thrush", "Finch"];
+        let call_types = ["song", "call", "alarm", "contact"];
+
+        for i in 0..size {
+            metadata_store.insert(
+                i,
+                EmbeddingMetadata {
+                    recording_id: format!("rec_{}", i),
+                    species: Some(species[i % species.len()].to_string()),
+                    call_type: Some(call_types[i % call_types.len()].to_string()),
+                    location: Some(Location {
+                        lat: 51.5 + (i as f32 * 0.001),
+                        lon: -0.1 + (i as f32 * 0.001),
+                        site_name: Some(format!("Site {}", i % 10)),
+                    }),
+                    timestamp: 1700000000 + (i as i64 * 300),
+                    confidence: 0.7 + (i as f32 % 30) / 100.0,
+                    audio_url: Some(format!("https://audio.example.com/{}.wav", i)),
+                },
+            );
+        }
+
+        // Generate cluster centroids and assignments
+        let cluster_centroids = generate_random_vectors(num_clusters, PERCH_EMBEDDING_DIM);
+        let cluster_assignments: Vec<i32> = (0..size).map(|i| (i % num_clusters) as i32).collect();
+
+        Self {
+            index,
+            metadata_store,
+            cluster_centroids,
+            cluster_assignments,
+        }
+    }
+
+    /// Execute neighbor search
+    fn neighbor_search(&self, request: &NeighborSearchRequest) -> NeighborSearchResponse {
+        let start = std::time::Instant::now();
+
+        // Perform HNSW search
+        let raw_results = self.index.search(&request.embedding, request.k * 2);
+
+        // Apply filters
+        let filtered_results: Vec<_> = raw_results
+            .into_iter()
+            .filter(|(idx, _)| self.apply_filter(*idx, &request.filter))
+            .take(request.k)
+            .collect();
+
+        // Build response with optional metadata
+        let results: Vec<SearchResult> = filtered_results
+            .into_iter()
+            .map(|(idx, distance)| SearchResult {
+                id: format!("emb_{}", idx),
+                distance,
+                metadata: if request.include_metadata {
+                    self.metadata_store.get(&idx).cloned()
+                } else {
+                    None
+                },
+            })
+            .collect();
+
+        NeighborSearchResponse {
+            results,
+            total_time_ms: start.elapsed().as_millis() as u64,
+            cache_hit: false,
+        }
+    }
+
+    fn apply_filter(&self, idx: usize, filter: &Option<SearchFilter>) -> bool {
+        match filter {
+            None => true,
+            Some(f) => {
+                if let Some(metadata) = self.metadata_store.get(&idx) {
+                    // Species filter
+                    if let Some(species_list) = &f.species {
+                        if let Some(species) = &metadata.species {
+                            if !species_list.contains(species) {
+                                return false;
+                            }
+                        } else {
+                            return false;
+                        }
+                    }
+
+                    // Confidence filter
+                    if let Some(min_conf) = f.min_confidence {
+                        if metadata.confidence < min_conf {
+                            return false;
+                        }
+                    }
+
+                    // Time range filter
+                    if let Some(time_range) = &f.time_range {
+                        if metadata.timestamp < time_range.start
+                            || metadata.timestamp > time_range.end
+                        {
+                            return false;
+                        }
+                    }
+
+                    // Location filter
+                    if let Some(bbox) = &f.location {
+                        if let Some(loc) = &metadata.location {
+                            if loc.lat < bbox.min_lat
+                                || loc.lat > bbox.max_lat
+                                || loc.lon < bbox.min_lon
+                                || loc.lon > bbox.max_lon
+                            {
+                                return false;
+                            }
+                        } else {
+                            return false;
+                        }
+                    }
+
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Generate evidence pack for interpretability
+    fn generate_evidence_pack(&self, embedding: &[f32], k: usize) -> EvidencePack {
+        // Get neighbors
+        let raw_results = self.index.search(embedding, k);
+
+        let neighbors: Vec<NeighborEvidence> = raw_results
+            .iter()
+            .map(|(idx, distance)| {
+                let metadata = self.metadata_store.get(idx).cloned();
+                let similarity = 1.0 / (1.0 + distance);
+
+                // Generate feature contributions (mock)
+                let contributions: Vec<FeatureContribution> = (0..5)
+                    .map(|i| FeatureContribution {
+                        feature_name: format!("feature_{}", i),
+                        contribution: similarity * (1.0 - i as f32 * 0.1),
+                    })
+                    .collect();
+
+                NeighborEvidence {
+                    result: SearchResult {
+                        id: format!("emb_{}", idx),
+                        distance: *distance,
+                        metadata,
+                    },
+                    similarity_score: similarity,
+                    contributing_features: contributions,
+                }
+            })
+            .collect();
+
+        // Compute cluster info
+        let cluster_info = self.compute_cluster_info(embedding);
+
+        // Compute confidence breakdown
+        let confidence_breakdown = self.compute_confidence(embedding, &neighbors);
+
+        EvidencePack {
+            query_embedding: embedding.to_vec(),
+            neighbors,
+            cluster_info,
+            spectrogram_url: Some("https://spectrograms.example.com/query.png".to_string()),
+            attention_map: Some(self.generate_attention_map()),
+            confidence_breakdown,
+        }
+    }
+
+    fn compute_cluster_info(&self, embedding: &[f32]) -> ClusterInfo {
+        // Find nearest cluster
+        let mut best_cluster = 0;
+        let mut best_distance = f32::MAX;
+
+        for (i, centroid) in self.cluster_centroids.iter().enumerate() {
+            let dist = l2_distance(embedding, centroid);
+            if dist < best_distance {
+                best_distance = dist;
+                best_cluster = i;
+            }
+        }
+
+        // Count cluster members
+        let cluster_size = self
+            .cluster_assignments
+            .iter()
+            .filter(|&&c| c == best_cluster as i32)
+            .count();
+
+        ClusterInfo {
+            cluster_id: best_cluster as i32,
+            cluster_size,
+            centroid_distance: best_distance,
+            typical_species: vec!["Robin".to_string(), "Sparrow".to_string()],
+        }
+    }
+
+    fn compute_confidence(&self, _embedding: &[f32], neighbors: &[NeighborEvidence]) -> ConfidenceBreakdown {
+        // Compute neighbor agreement
+        let neighbor_agreement = if !neighbors.is_empty() {
+            let avg_sim: f32 = neighbors.iter().map(|n| n.similarity_score).sum::<f32>()
+                / neighbors.len() as f32;
+            avg_sim
+        } else {
+            0.0
+        };
+
+        ConfidenceBreakdown {
+            neighbor_agreement,
+            cluster_membership: 0.85,
+            embedding_quality: 0.92,
+            overall: (neighbor_agreement + 0.85 + 0.92) / 3.0,
+        }
+    }
+
+    fn generate_attention_map(&self) -> Vec<Vec<f32>> {
+        // Generate a small mock attention map
+        (0..32)
+            .map(|i| (0..128).map(|j| ((i * j) % 100) as f32 / 100.0).collect())
+            .collect()
+    }
+}
+
+// ============================================================================
+// Neighbor Search Benchmarks
+// ============================================================================
+
+/// Benchmark neighbor search endpoint
+fn benchmark_neighbor_search_endpoint(c: &mut Criterion) {
+    let mut group = c.benchmark_group("neighbor_search_endpoint");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(15));
+
+    // Build test service
+    let index = setup_test_index(50_000);
+    let service = ApiService::new(index, 50);
+
+    let query = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+
+    // Basic search without metadata
+    group.bench_function("basic_k10", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 10,
+            filter: None,
+            include_metadata: false,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    // Search with metadata
+    group.bench_function("with_metadata_k10", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 10,
+            filter: None,
+            include_metadata: true,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    // Search with filters
+    group.bench_function("filtered_k10", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 10,
+            filter: Some(SearchFilter {
+                species: Some(vec!["Robin".to_string(), "Sparrow".to_string()]),
+                location: None,
+                time_range: None,
+                min_confidence: Some(0.8),
+            }),
+            include_metadata: true,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    // Different k values
+    for &k in &[10, 50, 100] {
+        group.bench_with_input(BenchmarkId::new("k", k), &k, |b, &k| {
+            let request = NeighborSearchRequest {
+                embedding: query.clone(),
+                k,
+                filter: None,
+                include_metadata: true,
+            };
+            b.iter(|| black_box(service.neighbor_search(&request)));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark search throughput under concurrent load
+fn benchmark_search_throughput(c: &mut Criterion) {
+    let mut group = c.benchmark_group("search_throughput");
+    group.sample_size(20);
+    group.measurement_time(Duration::from_secs(20));
+
+    let index = setup_test_index(50_000);
+    let service = ApiService::new(index, 50);
+
+    // Batch of queries
+    let queries = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+    let requests: Vec<NeighborSearchRequest> = queries
+        .into_iter()
+        .map(|embedding| NeighborSearchRequest {
+            embedding,
+            k: 10,
+            filter: None,
+            include_metadata: true,
+        })
+        .collect();
+
+    group.throughput(Throughput::Elements(requests.len() as u64));
+    group.bench_function("batch_100_queries", |b| {
+        b.iter(|| {
+            for request in &requests {
+                black_box(service.neighbor_search(request));
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Evidence Pack Benchmarks
+// ============================================================================
+
+/// Benchmark evidence pack generation
+fn benchmark_evidence_pack_generation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("evidence_pack_generation");
+    group.sample_size(30);
+    group.measurement_time(Duration::from_secs(15));
+
+    let index = setup_test_index(50_000);
+    let service = ApiService::new(index, 50);
+
+    let query = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+
+    // Basic evidence pack
+    group.bench_function("basic", |b| {
+        b.iter(|| black_box(service.generate_evidence_pack(&query, 10)));
+    });
+
+    // Different neighbor counts
+    for &k in &[5, 10, 20, 50] {
+        group.bench_with_input(BenchmarkId::new("neighbors", k), &k, |b, &k| {
+            b.iter(|| black_box(service.generate_evidence_pack(&query, k)));
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Filter Performance Benchmarks
+// ============================================================================
+
+/// Benchmark filter application performance
+fn benchmark_filter_performance(c: &mut Criterion) {
+    let mut group = c.benchmark_group("filter_performance");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(10));
+
+    let index = setup_test_index(50_000);
+    let service = ApiService::new(index, 50);
+
+    let query = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+
+    // No filter
+    group.bench_function("no_filter", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 100,
+            filter: None,
+            include_metadata: false,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    // Species filter only
+    group.bench_function("species_filter", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 100,
+            filter: Some(SearchFilter {
+                species: Some(vec!["Robin".to_string()]),
+                location: None,
+                time_range: None,
+                min_confidence: None,
+            }),
+            include_metadata: false,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    // Confidence filter only
+    group.bench_function("confidence_filter", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 100,
+            filter: Some(SearchFilter {
+                species: None,
+                location: None,
+                time_range: None,
+                min_confidence: Some(0.9),
+            }),
+            include_metadata: false,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    // All filters combined
+    group.bench_function("all_filters", |b| {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 100,
+            filter: Some(SearchFilter {
+                species: Some(vec!["Robin".to_string(), "Sparrow".to_string()]),
+                location: Some(BoundingBox {
+                    min_lat: 51.0,
+                    max_lat: 52.0,
+                    min_lon: -1.0,
+                    max_lon: 1.0,
+                }),
+                time_range: Some(TimeRange {
+                    start: 1700000000,
+                    end: 1710000000,
+                }),
+                min_confidence: Some(0.8),
+            }),
+            include_metadata: false,
+        };
+        b.iter(|| black_box(service.neighbor_search(&request)));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Latency Analysis
+// ============================================================================
+
+/// Analyze end-to-end latency against targets
+fn analyze_api_latency() {
+    use std::time::Instant;
+
+    println!("\n=== API Latency Analysis ===\n");
+
+    // Build service
+    let index = setup_test_index(100_000);
+    let service = ApiService::new(index, 50);
+
+    let queries = generate_random_vectors(1000, PERCH_EMBEDDING_DIM);
+
+    // Neighbor search latency
+    let mut search_latencies = Vec::new();
+    for query in &queries {
+        let request = NeighborSearchRequest {
+            embedding: query.clone(),
+            k: 10,
+            filter: None,
+            include_metadata: true,
+        };
+
+        let start = Instant::now();
+        let _ = service.neighbor_search(&request);
+        search_latencies.push(start.elapsed());
+    }
+
+    let search_stats = PerformanceStats::from_latencies(search_latencies);
+    println!("Neighbor Search (k=10, with metadata):");
+    println!("{}", search_stats.report());
+    println!(
+        "  p99 target: {}ms ({})",
+        targets::QUERY_LATENCY_P99_MS,
+        if search_stats.p99 <= Duration::from_millis(targets::QUERY_LATENCY_P99_MS) {
+            "PASS"
+        } else {
+            "FAIL"
+        }
+    );
+    println!(
+        "  Total target: {}ms ({})",
+        targets::TOTAL_QUERY_LATENCY_MS,
+        if search_stats.p99 <= Duration::from_millis(targets::TOTAL_QUERY_LATENCY_MS) {
+            "PASS"
+        } else {
+            "FAIL"
+        }
+    );
+    println!();
+
+    // Evidence pack latency
+    let mut evidence_latencies = Vec::new();
+    for query in queries.iter().take(100) {
+        let start = Instant::now();
+        let _ = service.generate_evidence_pack(query, 10);
+        evidence_latencies.push(start.elapsed());
+    }
+
+    let evidence_stats = PerformanceStats::from_latencies(evidence_latencies);
+    println!("Evidence Pack Generation (10 neighbors):");
+    println!("{}", evidence_stats.report());
+    println!(
+        "  p99 target: 200ms ({})",
+        if evidence_stats.p99 <= Duration::from_millis(200) {
+            "PASS"
+        } else {
+            "FAIL"
+        }
+    );
+}
+
+// ============================================================================
+// Criterion Groups
+// ============================================================================
+
+criterion_group!(
+    name = search_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_neighbor_search_endpoint, benchmark_search_throughput
+);
+
+criterion_group!(
+    name = evidence_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_evidence_pack_generation
+);
+
+criterion_group!(
+    name = filter_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_filter_performance
+);
+
+criterion_main!(search_benches, evidence_benches, filter_benches);
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_neighbor_search_basic() {
+        let index = setup_test_index(1000);
+        let service = ApiService::new(index, 10);
+
+        let query = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+        let request = NeighborSearchRequest {
+            embedding: query,
+            k: 10,
+            filter: None,
+            include_metadata: true,
+        };
+
+        let response = service.neighbor_search(&request);
+        assert_eq!(response.results.len(), 10);
+        assert!(response.results.iter().all(|r| r.metadata.is_some()));
+    }
+
+    #[test]
+    fn test_neighbor_search_with_filter() {
+        let index = setup_test_index(1000);
+        let service = ApiService::new(index, 10);
+
+        let query = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+        let request = NeighborSearchRequest {
+            embedding: query,
+            k: 10,
+            filter: Some(SearchFilter {
+                species: Some(vec!["Robin".to_string()]),
+                location: None,
+                time_range: None,
+                min_confidence: Some(0.7),
+            }),
+            include_metadata: true,
+        };
+
+        let response = service.neighbor_search(&request);
+        // All results should match filter
+        for result in &response.results {
+            if let Some(metadata) = &result.metadata {
+                assert_eq!(metadata.species, Some("Robin".to_string()));
+                assert!(metadata.confidence >= 0.7);
+            }
+        }
+    }
+
+    #[test]
+    fn test_evidence_pack_generation() {
+        let index = setup_test_index(1000);
+        let service = ApiService::new(index, 10);
+
+        let query = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+        let evidence = service.generate_evidence_pack(&query, 10);
+
+        assert_eq!(evidence.neighbors.len(), 10);
+        assert!(evidence.confidence_breakdown.overall > 0.0);
+        assert!(evidence.cluster_info.cluster_size > 0);
+    }
+
+    #[test]
+    #[ignore] // Run with: cargo test --release -- --ignored --nocapture
+    fn run_api_latency_analysis() {
+        analyze_api_latency();
+    }
+}
--- a/examples/vibecast-7sense/benches/clustering_benchmark.rs
+++ b/examples/vibecast-7sense/benches/clustering_benchmark.rs
@@ -0,0 +1,684 @@
+//! Clustering Benchmark Suite for 7sense
+//!
+//! Benchmarks for clustering algorithms used in bird call analysis:
+//! - HDBSCAN for species/call-type clustering
+//! - Cluster assignment for new embeddings
+//! - Motif detection in audio sequences
+//! - Centroid computation and updates
+
+use criterion::{
+    black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
+};
+use std::collections::{HashMap, HashSet};
+use std::time::Duration;
+
+mod utils;
+use utils::*;
+
+/// Number of clusters for benchmark
+const NUM_CLUSTERS: usize = 50;
+
+// ============================================================================
+// Simplified HDBSCAN Implementation for Benchmarking
+// ============================================================================
+
+/// Simplified HDBSCAN-like clustering for benchmarking
+/// In production, this would use the actual HDBSCAN algorithm
+struct SimpleHdbscan {
+    min_cluster_size: usize,
+    min_samples: usize,
+    epsilon: f32,
+}
+
+impl SimpleHdbscan {
+    fn new(min_cluster_size: usize, min_samples: usize, epsilon: f32) -> Self {
+        Self {
+            min_cluster_size,
+            min_samples,
+            epsilon,
+        }
+    }
+
+    /// Fit the clustering model on embeddings
+    /// Returns cluster labels (-1 for noise)
+    fn fit(&self, embeddings: &[Vec<f32>]) -> Vec<i32> {
+        let n = embeddings.len();
+        let mut labels = vec![-1i32; n];
+        let mut cluster_id = 0;
+        let mut visited = vec![false; n];
+
+        for i in 0..n {
+            if visited[i] {
+                continue;
+            }
+
+            // Find neighbors within epsilon
+            let neighbors = self.region_query(embeddings, i);
+
+            if neighbors.len() >= self.min_samples {
+                // Expand cluster
+                let cluster_members = self.expand_cluster(embeddings, i, &neighbors, &mut visited);
+
+                if cluster_members.len() >= self.min_cluster_size {
+                    for &member in &cluster_members {
+                        labels[member] = cluster_id;
+                    }
+                    cluster_id += 1;
+                }
+            }
+        }
+
+        labels
+    }
+
+    fn region_query(&self, embeddings: &[Vec<f32>], point_idx: usize) -> Vec<usize> {
+        let point = &embeddings[point_idx];
+        embeddings
+            .iter()
+            .enumerate()
+            .filter(|(_, other)| l2_distance(point, other) <= self.epsilon)
+            .map(|(idx, _)| idx)
+            .collect()
+    }
+
+    fn expand_cluster(
+        &self,
+        embeddings: &[Vec<f32>],
+        seed: usize,
+        initial_neighbors: &[usize],
+        visited: &mut [bool],
+    ) -> Vec<usize> {
+        let mut cluster = vec![seed];
+        visited[seed] = true;
+
+        let mut to_process: Vec<usize> = initial_neighbors.to_vec();
+
+        while let Some(point_idx) = to_process.pop() {
+            if visited[point_idx] {
+                continue;
+            }
+            visited[point_idx] = true;
+            cluster.push(point_idx);
+
+            let neighbors = self.region_query(embeddings, point_idx);
+            if neighbors.len() >= self.min_samples {
+                to_process.extend(neighbors.iter().filter(|&&n| !visited[n]));
+            }
+        }
+
+        cluster
+    }
+}
+
+/// Cluster assignment for new embeddings
+struct ClusterAssigner {
+    centroids: Vec<Vec<f32>>,
+    cluster_ids: Vec<usize>,
+}
+
+impl ClusterAssigner {
+    fn new(centroids: Vec<Vec<f32>>) -> Self {
+        let cluster_ids = (0..centroids.len()).collect();
+        Self {
+            centroids,
+            cluster_ids,
+        }
+    }
+
+    /// Assign a single embedding to the nearest cluster
+    fn assign(&self, embedding: &[f32]) -> (usize, f32) {
+        let mut best_cluster = 0;
+        let mut best_distance = f32::MAX;
+
+        for (i, centroid) in self.centroids.iter().enumerate() {
+            let dist = l2_distance(embedding, centroid);
+            if dist < best_distance {
+                best_distance = dist;
+                best_cluster = self.cluster_ids[i];
+            }
+        }
+
+        (best_cluster, best_distance)
+    }
+
+    /// Batch assign embeddings
+    fn batch_assign(&self, embeddings: &[Vec<f32>]) -> Vec<(usize, f32)> {
+        embeddings.iter().map(|e| self.assign(e)).collect()
+    }
+}
+
+/// Compute cluster centroids from labeled embeddings
+fn compute_centroids(embeddings: &[Vec<f32>], labels: &[i32]) -> HashMap<i32, Vec<f32>> {
+    let dims = embeddings[0].len();
+    let mut sums: HashMap<i32, Vec<f32>> = HashMap::new();
+    let mut counts: HashMap<i32, usize> = HashMap::new();
+
+    for (embedding, &label) in embeddings.iter().zip(labels.iter()) {
+        if label >= 0 {
+            let sum = sums.entry(label).or_insert_with(|| vec![0.0; dims]);
+            for (s, &e) in sum.iter_mut().zip(embedding.iter()) {
+                *s += e;
+            }
+            *counts.entry(label).or_insert(0) += 1;
+        }
+    }
+
+    let mut centroids = HashMap::new();
+    for (label, sum) in sums {
+        let count = counts[&label] as f32;
+        let centroid: Vec<f32> = sum.iter().map(|&s| s / count).collect();
+        centroids.insert(label, centroid);
+    }
+
+    centroids
+}
+
+// ============================================================================
+// Motif Detection
+// ============================================================================
+
+/// Simplified motif detector for recurring audio patterns
+struct MotifDetector {
+    min_length: usize,
+    max_gap: usize,
+    similarity_threshold: f32,
+}
+
+impl MotifDetector {
+    fn new(min_length: usize, max_gap: usize, similarity_threshold: f32) -> Self {
+        Self {
+            min_length,
+            max_gap,
+            similarity_threshold,
+        }
+    }
+
+    /// Detect motifs in a sequence of embeddings
+    fn detect_motifs(&self, embeddings: &[Vec<f32>]) -> Vec<Motif> {
+        let mut motifs = Vec::new();
+        let n = embeddings.len();
+
+        // Simplified matrix profile approach
+        for i in 0..n.saturating_sub(self.min_length) {
+            for j in (i + self.min_length)..n.saturating_sub(self.min_length) {
+                // Check if subsequences are similar
+                let sim = self.subsequence_similarity(embeddings, i, j, self.min_length);
+
+                if sim >= self.similarity_threshold {
+                    motifs.push(Motif {
+                        start_a: i,
+                        start_b: j,
+                        length: self.min_length,
+                        similarity: sim,
+                    });
+                }
+            }
+        }
+
+        motifs
+    }
+
+    fn subsequence_similarity(
+        &self,
+        embeddings: &[Vec<f32>],
+        start_a: usize,
+        start_b: usize,
+        length: usize,
+    ) -> f32 {
+        let mut total_sim = 0.0;
+
+        for i in 0..length {
+            let sim = cosine_similarity(&embeddings[start_a + i], &embeddings[start_b + i]);
+            total_sim += sim;
+        }
+
+        total_sim / length as f32
+    }
+}
+
+#[derive(Debug, Clone)]
+struct Motif {
+    start_a: usize,
+    start_b: usize,
+    length: usize,
+    similarity: f32,
+}
+
+// ============================================================================
+// HDBSCAN Benchmarks
+// ============================================================================
+
+/// Benchmark HDBSCAN clustering
+fn benchmark_hdbscan(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hdbscan");
+    group.sample_size(20);
+    group.measurement_time(Duration::from_secs(30));
+
+    for &size in &[500, 1000, 2000] {
+        // Generate clustered data for more realistic benchmark
+        let embeddings = generate_clustered_vectors(size, PERCH_EMBEDDING_DIM, NUM_CLUSTERS, 0.1);
+
+        let hdbscan = SimpleHdbscan::new(5, 3, 0.5);
+
+        group.throughput(Throughput::Elements(size as u64));
+        group.bench_with_input(BenchmarkId::new("fit", size), &size, |b, _| {
+            b.iter(|| black_box(hdbscan.fit(&embeddings)));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark HDBSCAN with different parameters
+fn benchmark_hdbscan_params(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hdbscan_params");
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(20));
+
+    let size = 1000;
+    let embeddings = generate_clustered_vectors(size, PERCH_EMBEDDING_DIM, NUM_CLUSTERS, 0.1);
+
+    for min_cluster_size in [5, 10, 20] {
+        let hdbscan = SimpleHdbscan::new(min_cluster_size, 3, 0.5);
+
+        group.bench_with_input(
+            BenchmarkId::new("min_cluster_size", min_cluster_size),
+            &min_cluster_size,
+            |b, _| {
+                b.iter(|| black_box(hdbscan.fit(&embeddings)));
+            },
+        );
+    }
+
+    for epsilon in [0.3, 0.5, 0.7] {
+        let hdbscan = SimpleHdbscan::new(5, 3, epsilon);
+
+        group.bench_with_input(
+            BenchmarkId::new("epsilon", format!("{:.1}", epsilon)),
+            &epsilon,
+            |b, _| {
+                b.iter(|| black_box(hdbscan.fit(&embeddings)));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Cluster Assignment Benchmarks
+// ============================================================================
+
+/// Benchmark cluster assignment for new embeddings
+fn benchmark_cluster_assignment(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cluster_assignment");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(10));
+
+    // Generate centroids
+    let centroids = generate_random_vectors(NUM_CLUSTERS, PERCH_EMBEDDING_DIM);
+    let assigner = ClusterAssigner::new(centroids);
+
+    // Benchmark single assignment
+    let single_embedding = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+    group.bench_function("single", |b| {
+        b.iter(|| black_box(assigner.assign(&single_embedding)));
+    });
+
+    // Benchmark batch assignment
+    for &batch_size in &[100, 1000, 10000] {
+        let embeddings = generate_random_vectors(batch_size, PERCH_EMBEDDING_DIM);
+
+        group.throughput(Throughput::Elements(batch_size as u64));
+        group.bench_with_input(BenchmarkId::new("batch", batch_size), &batch_size, |b, _| {
+            b.iter(|| black_box(assigner.batch_assign(&embeddings)));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark cluster assignment with different numbers of clusters
+fn benchmark_cluster_assignment_scalability(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cluster_assignment_scalability");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(10));
+
+    let embeddings = generate_random_vectors(1000, PERCH_EMBEDDING_DIM);
+
+    for num_clusters in [10, 50, 100, 200, 500] {
+        let centroids = generate_random_vectors(num_clusters, PERCH_EMBEDDING_DIM);
+        let assigner = ClusterAssigner::new(centroids);
+
+        group.throughput(Throughput::Elements(embeddings.len() as u64));
+        group.bench_with_input(
+            BenchmarkId::new("num_clusters", num_clusters),
+            &num_clusters,
+            |b, _| {
+                b.iter(|| black_box(assigner.batch_assign(&embeddings)));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Centroid Computation Benchmarks
+// ============================================================================
+
+/// Benchmark centroid computation
+fn benchmark_centroid_computation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("centroid_computation");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(10));
+
+    for &size in &[1000, 5000, 10000] {
+        let embeddings = generate_clustered_vectors(size, PERCH_EMBEDDING_DIM, NUM_CLUSTERS, 0.1);
+
+        // Create synthetic labels
+        let labels: Vec<i32> = (0..size).map(|i| (i % NUM_CLUSTERS) as i32).collect();
+
+        group.throughput(Throughput::Elements(size as u64));
+        group.bench_with_input(BenchmarkId::new("size", size), &size, |b, _| {
+            b.iter(|| black_box(compute_centroids(&embeddings, &labels)));
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark incremental centroid update
+fn benchmark_centroid_update(c: &mut Criterion) {
+    let mut group = c.benchmark_group("centroid_update");
+    group.sample_size(100);
+
+    // Pre-compute initial centroid
+    let cluster_size = 1000;
+    let cluster_embeddings = generate_random_vectors(cluster_size, PERCH_EMBEDDING_DIM);
+    let initial_centroid: Vec<f32> = (0..PERCH_EMBEDDING_DIM)
+        .map(|d| {
+            cluster_embeddings.iter().map(|e| e[d]).sum::<f32>() / cluster_size as f32
+        })
+        .collect();
+
+    // New embedding to add
+    let new_embedding = generate_random_vectors(1, PERCH_EMBEDDING_DIM).remove(0);
+
+    group.bench_function("incremental_update", |b| {
+        b.iter(|| {
+            // Incremental centroid update formula:
+            // new_centroid = old_centroid + (new_point - old_centroid) / (n + 1)
+            let n = cluster_size as f32;
+            let updated: Vec<f32> = initial_centroid
+                .iter()
+                .zip(new_embedding.iter())
+                .map(|(&c, &e)| c + (e - c) / (n + 1.0))
+                .collect();
+            black_box(updated)
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Motif Detection Benchmarks
+// ============================================================================
+
+/// Benchmark motif detection
+fn benchmark_motif_detection(c: &mut Criterion) {
+    let mut group = c.benchmark_group("motif_detection");
+    group.sample_size(20);
+    group.measurement_time(Duration::from_secs(20));
+
+    let detector = MotifDetector::new(3, 10, 0.8);
+
+    for &seq_length in &[50, 100, 200] {
+        // Generate sequence with some repeated patterns
+        let mut embeddings = generate_clustered_vectors(seq_length, PERCH_EMBEDDING_DIM, 10, 0.05);
+
+        group.throughput(Throughput::Elements(seq_length as u64));
+        group.bench_with_input(BenchmarkId::new("seq_length", seq_length), &seq_length, |b, _| {
+            b.iter(|| black_box(detector.detect_motifs(&embeddings)));
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Silhouette Score Computation
+// ============================================================================
+
+/// Compute silhouette score for cluster quality assessment
+fn compute_silhouette_score(embeddings: &[Vec<f32>], labels: &[i32]) -> f32 {
+    let n = embeddings.len();
+    if n < 2 {
+        return 0.0;
+    }
+
+    let unique_labels: HashSet<i32> = labels.iter().filter(|&&l| l >= 0).copied().collect();
+    if unique_labels.len() < 2 {
+        return 0.0;
+    }
+
+    let mut total_score = 0.0;
+    let mut count = 0;
+
+    for i in 0..n {
+        let label_i = labels[i];
+        if label_i < 0 {
+            continue;
+        }
+
+        // Compute a(i): mean intra-cluster distance
+        let mut intra_sum = 0.0;
+        let mut intra_count = 0;
+        for j in 0..n {
+            if i != j && labels[j] == label_i {
+                intra_sum += l2_distance(&embeddings[i], &embeddings[j]);
+                intra_count += 1;
+            }
+        }
+        let a_i = if intra_count > 0 {
+            intra_sum / intra_count as f32
+        } else {
+            0.0
+        };
+
+        // Compute b(i): min mean inter-cluster distance
+        let mut b_i = f32::MAX;
+        for &other_label in &unique_labels {
+            if other_label != label_i {
+                let mut inter_sum = 0.0;
+                let mut inter_count = 0;
+                for j in 0..n {
+                    if labels[j] == other_label {
+                        inter_sum += l2_distance(&embeddings[i], &embeddings[j]);
+                        inter_count += 1;
+                    }
+                }
+                if inter_count > 0 {
+                    let mean_inter = inter_sum / inter_count as f32;
+                    b_i = b_i.min(mean_inter);
+                }
+            }
+        }
+
+        // Silhouette coefficient for point i
+        if b_i.is_finite() {
+            let s_i = (b_i - a_i) / a_i.max(b_i);
+            total_score += s_i;
+            count += 1;
+        }
+    }
+
+    if count > 0 {
+        total_score / count as f32
+    } else {
+        0.0
+    }
+}
+
+/// Benchmark silhouette score computation
+fn benchmark_silhouette_score(c: &mut Criterion) {
+    let mut group = c.benchmark_group("silhouette_score");
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(30));
+
+    for &size in &[100, 500] {
+        let embeddings = generate_clustered_vectors(size, PERCH_EMBEDDING_DIM, 10, 0.1);
+        let labels: Vec<i32> = (0..size).map(|i| (i % 10) as i32).collect();
+
+        group.bench_with_input(BenchmarkId::new("size", size), &size, |b, _| {
+            b.iter(|| black_box(compute_silhouette_score(&embeddings, &labels)));
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Criterion Groups
+// ============================================================================
+
+criterion_group!(
+    name = hdbscan_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_hdbscan, benchmark_hdbscan_params
+);
+
+criterion_group!(
+    name = assignment_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_cluster_assignment, benchmark_cluster_assignment_scalability
+);
+
+criterion_group!(
+    name = centroid_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_centroid_computation, benchmark_centroid_update
+);
+
+criterion_group!(
+    name = motif_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_motif_detection
+);
+
+criterion_group!(
+    name = quality_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_silhouette_score
+);
+
+criterion_main!(
+    hdbscan_benches,
+    assignment_benches,
+    centroid_benches,
+    motif_benches,
+    quality_benches
+);
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_hdbscan_clustering() {
+        let embeddings = generate_clustered_vectors(100, 128, 5, 0.05);
+        let hdbscan = SimpleHdbscan::new(5, 3, 0.5);
+
+        let labels = hdbscan.fit(&embeddings);
+        assert_eq!(labels.len(), 100);
+
+        // Should have some non-noise labels
+        let non_noise: Vec<_> = labels.iter().filter(|&&l| l >= 0).collect();
+        assert!(!non_noise.is_empty());
+    }
+
+    #[test]
+    fn test_cluster_assignment() {
+        let centroids = generate_random_vectors(10, 128);
+        let assigner = ClusterAssigner::new(centroids.clone());
+
+        // Assign a centroid to itself should return that cluster
+        let (cluster, dist) = assigner.assign(&centroids[5]);
+        assert_eq!(cluster, 5);
+        assert!(dist < 1e-5);
+    }
+
+    #[test]
+    fn test_centroid_computation() {
+        let embeddings = vec![
+            vec![1.0, 0.0],
+            vec![0.0, 1.0],
+            vec![1.0, 1.0],
+            vec![-1.0, -1.0],
+        ];
+        let labels = vec![0, 0, 0, 1];
+
+        let centroids = compute_centroids(&embeddings, &labels);
+
+        assert_eq!(centroids.len(), 2);
+
+        // Cluster 0 centroid should be (2/3, 2/3)
+        let c0 = &centroids[&0];
+        assert!((c0[0] - 2.0 / 3.0).abs() < 1e-5);
+        assert!((c0[1] - 2.0 / 3.0).abs() < 1e-5);
+
+        // Cluster 1 centroid should be (-1, -1)
+        let c1 = &centroids[&1];
+        assert!((c1[0] - (-1.0)).abs() < 1e-5);
+        assert!((c1[1] - (-1.0)).abs() < 1e-5);
+    }
+
+    #[test]
+    fn test_motif_detection() {
+        // Create sequence with a repeated pattern
+        let mut embeddings = Vec::new();
+        let pattern: Vec<Vec<f32>> = (0..3)
+            .map(|i| {
+                let mut v = vec![0.0f32; 128];
+                v[i] = 1.0;
+                v
+            })
+            .collect();
+
+        // Insert pattern twice with gap
+        embeddings.extend(pattern.clone());
+        embeddings.extend(generate_random_vectors(5, 128));
+        embeddings.extend(pattern);
+
+        let detector = MotifDetector::new(3, 10, 0.9);
+        let motifs = detector.detect_motifs(&embeddings);
+
+        // Should detect at least one motif
+        // Note: Due to noise, this may not always work perfectly
+        println!("Found {} motifs", motifs.len());
+    }
+
+    #[test]
+    fn test_silhouette_score() {
+        // Perfect clustering: two well-separated clusters
+        let embeddings = vec![
+            vec![0.0, 0.0],
+            vec![0.1, 0.0],
+            vec![0.0, 0.1],
+            vec![10.0, 10.0],
+            vec![10.1, 10.0],
+            vec![10.0, 10.1],
+        ];
+        let labels = vec![0, 0, 0, 1, 1, 1];
+
+        let score = compute_silhouette_score(&embeddings, &labels);
+
+        // Score should be close to 1 for well-separated clusters
+        assert!(score > 0.5, "Silhouette score {} too low", score);
+    }
+}
--- a/examples/vibecast-7sense/benches/embedding_benchmark.rs
+++ b/examples/vibecast-7sense/benches/embedding_benchmark.rs
@@ -0,0 +1,563 @@
+//! Embedding Benchmark Suite for 7sense
+//!
+//! Performance targets from ADR-004:
+//! - Embedding inference: >100 segments/second
+//! - Mel spectrogram compute: <20ms per segment
+//! - Embedding normalization: <5ms per segment
+//! - Batch ingestion: 1M vectors/minute
+
+use criterion::{
+    black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
+};
+use std::time::Duration;
+
+mod utils;
+use utils::*;
+
+/// Audio segment parameters (5 seconds at 32kHz)
+const AUDIO_SAMPLE_RATE: usize = 32_000;
+const SEGMENT_DURATION_SECS: f32 = 5.0;
+const SEGMENT_SAMPLES: usize = (AUDIO_SAMPLE_RATE as f32 * SEGMENT_DURATION_SECS) as usize;
+
+/// Mel spectrogram parameters
+const N_MELS: usize = 128;
+const N_FFT: usize = 2048;
+const HOP_LENGTH: usize = 512;
+const MEL_FRAMES: usize = (SEGMENT_SAMPLES / HOP_LENGTH) + 1;
+
+// ============================================================================
+// Simulated Audio Processing
+// ============================================================================
+
+/// Generate synthetic audio samples for benchmarking
+fn generate_audio_segment() -> Vec<f32> {
+    let mut samples = Vec::with_capacity(SEGMENT_SAMPLES);
+    let mut seed = 12345u64;
+
+    for i in 0..SEGMENT_SAMPLES {
+        // Simple synthetic audio with multiple frequencies
+        let t = i as f32 / AUDIO_SAMPLE_RATE as f32;
+        let sample = (2.0 * std::f32::consts::PI * 440.0 * t).sin() * 0.3
+            + (2.0 * std::f32::consts::PI * 880.0 * t).sin() * 0.2
+            + (2.0 * std::f32::consts::PI * 1320.0 * t).sin() * 0.1;
+
+        // Add some noise
+        seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1);
+        let noise = ((seed >> 33) as f32 / u32::MAX as f32) * 0.1 - 0.05;
+
+        samples.push(sample + noise);
+    }
+
+    samples
+}
+
+/// Simulated mel spectrogram computation
+/// In production, this would use actual FFT and mel filterbank
+fn compute_mel_spectrogram(audio: &[f32]) -> Vec<Vec<f32>> {
+    let num_frames = (audio.len() / HOP_LENGTH) + 1;
+    let mut spectrogram = Vec::with_capacity(num_frames);
+
+    for frame_idx in 0..num_frames {
+        let start = frame_idx * HOP_LENGTH;
+        let end = (start + N_FFT).min(audio.len());
+
+        // Simulated FFT and mel filterbank
+        let mut mel_frame = vec![0.0f32; N_MELS];
+        for (i, &sample) in audio[start..end].iter().enumerate() {
+            let bin = i % N_MELS;
+            mel_frame[bin] += sample.abs();
+        }
+
+        // Apply log scaling
+        for val in mel_frame.iter_mut() {
+            *val = (*val + 1e-10).ln();
+        }
+
+        spectrogram.push(mel_frame);
+    }
+
+    spectrogram
+}
+
+/// Simulated embedding inference (mock ONNX model)
+/// In production, this would use the actual Perch 2.0 model
+fn compute_embedding(spectrogram: &[Vec<f32>]) -> Vec<f32> {
+    let mut embedding = vec![0.0f32; PERCH_EMBEDDING_DIM];
+
+    // Simulated neural network computation
+    for (i, frame) in spectrogram.iter().enumerate() {
+        for (j, &mel) in frame.iter().enumerate() {
+            let embed_idx = (i * N_MELS + j) % PERCH_EMBEDDING_DIM;
+            embedding[embed_idx] += mel * 0.01;
+        }
+    }
+
+    // Normalize to unit length
+    let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if norm > 0.0 {
+        for x in embedding.iter_mut() {
+            *x /= norm;
+        }
+    }
+
+    embedding
+}
+
+/// L2 normalize an embedding vector
+fn normalize_embedding(embedding: &mut [f32]) {
+    let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if norm > 0.0 {
+        for x in embedding.iter_mut() {
+            *x /= norm;
+        }
+    }
+}
+
+// ============================================================================
+// Spectrogram Generation Benchmarks
+// ============================================================================
+
+/// Benchmark mel spectrogram generation
+fn benchmark_spectrogram_generation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("spectrogram_generation");
+    group.sample_size(100);
+    group.measurement_time(Duration::from_secs(10));
+
+    let audio = generate_audio_segment();
+
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("single_segment", |b| {
+        b.iter(|| black_box(compute_mel_spectrogram(&audio)));
+    });
+
+    // Batch spectrogram computation
+    let batch_sizes = [10, 50, 100];
+    for &batch_size in &batch_sizes {
+        let audio_batch: Vec<Vec<f32>> = (0..batch_size).map(|_| generate_audio_segment()).collect();
+
+        group.throughput(Throughput::Elements(batch_size as u64));
+        group.bench_with_input(
+            BenchmarkId::new("batch", batch_size),
+            &batch_size,
+            |b, _| {
+                b.iter(|| {
+                    for audio in &audio_batch {
+                        black_box(compute_mel_spectrogram(audio));
+                    }
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Embedding Inference Benchmarks
+// ============================================================================
+
+/// Benchmark embedding inference (mock ONNX)
+fn benchmark_embedding_inference(c: &mut Criterion) {
+    let mut group = c.benchmark_group("embedding_inference");
+    group.sample_size(100);
+    group.measurement_time(Duration::from_secs(10));
+
+    // Pre-compute spectrogram
+    let audio = generate_audio_segment();
+    let spectrogram = compute_mel_spectrogram(&audio);
+
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("single_inference", |b| {
+        b.iter(|| black_box(compute_embedding(&spectrogram)));
+    });
+
+    // Batch inference
+    let batch_sizes = [10, 32, 64, 128];
+    for &batch_size in &batch_sizes {
+        let spectrograms: Vec<Vec<Vec<f32>>> = (0..batch_size)
+            .map(|_| {
+                let audio = generate_audio_segment();
+                compute_mel_spectrogram(&audio)
+            })
+            .collect();
+
+        group.throughput(Throughput::Elements(batch_size as u64));
+        group.bench_with_input(
+            BenchmarkId::new("batch", batch_size),
+            &batch_size,
+            |b, _| {
+                b.iter(|| {
+                    for spec in &spectrograms {
+                        black_box(compute_embedding(spec));
+                    }
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark full pipeline: audio -> spectrogram -> embedding
+fn benchmark_full_pipeline(c: &mut Criterion) {
+    let mut group = c.benchmark_group("full_embedding_pipeline");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(15));
+
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("single_segment", |b| {
+        b.iter(|| {
+            let audio = generate_audio_segment();
+            let spectrogram = compute_mel_spectrogram(&audio);
+            let embedding = compute_embedding(&spectrogram);
+            black_box(embedding)
+        });
+    });
+
+    // Batch pipeline
+    for &batch_size in &[10, 50, 100] {
+        group.throughput(Throughput::Elements(batch_size as u64));
+        group.bench_with_input(
+            BenchmarkId::new("batch", batch_size),
+            &batch_size,
+            |b, &size| {
+                b.iter(|| {
+                    for _ in 0..size {
+                        let audio = generate_audio_segment();
+                        let spectrogram = compute_mel_spectrogram(&audio);
+                        let embedding = compute_embedding(&spectrogram);
+                        black_box(embedding);
+                    }
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Normalization Benchmarks
+// ============================================================================
+
+/// Benchmark embedding normalization
+fn benchmark_normalization(c: &mut Criterion) {
+    let mut group = c.benchmark_group("normalization");
+    group.sample_size(100);
+    group.measurement_time(Duration::from_secs(5));
+
+    // Generate random unnormalized embeddings
+    let embeddings: Vec<Vec<f32>> = (0..1000)
+        .map(|i| {
+            let mut vec = vec![0.0f32; PERCH_EMBEDDING_DIM];
+            let mut seed = (i as u64).wrapping_mul(6364136223846793005).wrapping_add(1);
+            for v in vec.iter_mut() {
+                seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1);
+                *v = ((seed >> 33) as f32 / u32::MAX as f32) * 2.0 - 1.0;
+            }
+            vec
+        })
+        .collect();
+
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("single", |b| {
+        let mut embedding = embeddings[0].clone();
+        b.iter(|| {
+            normalize_embedding(&mut embedding);
+            black_box(&embedding);
+        });
+    });
+
+    group.throughput(Throughput::Elements(1000));
+    group.bench_function("batch_1000", |b| {
+        let mut batch = embeddings.clone();
+        b.iter(|| {
+            for embedding in batch.iter_mut() {
+                normalize_embedding(embedding);
+            }
+            black_box(&batch);
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Quantization Benchmarks
+// ============================================================================
+
+/// Benchmark scalar quantization (float32 -> int8)
+fn benchmark_quantization(c: &mut Criterion) {
+    let mut group = c.benchmark_group("quantization");
+    group.sample_size(100);
+    group.measurement_time(Duration::from_secs(10));
+
+    // Generate embeddings and calibrate quantizer
+    let embeddings = generate_random_vectors(1000, PERCH_EMBEDDING_DIM);
+    let mut quantizer = ScalarQuantizer::new(PERCH_EMBEDDING_DIM);
+    quantizer.calibrate(&embeddings);
+
+    // Benchmark quantization
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("quantize_single", |b| {
+        let embedding = &embeddings[0];
+        b.iter(|| black_box(quantizer.quantize(embedding)));
+    });
+
+    // Batch quantization
+    group.throughput(Throughput::Elements(1000));
+    group.bench_function("quantize_batch_1000", |b| {
+        b.iter(|| {
+            for embedding in &embeddings {
+                black_box(quantizer.quantize(embedding));
+            }
+        });
+    });
+
+    // Benchmark dequantization
+    let quantized: Vec<Vec<u8>> = embeddings.iter().map(|e| quantizer.quantize(e)).collect();
+
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("dequantize_single", |b| {
+        let q = &quantized[0];
+        b.iter(|| black_box(quantizer.dequantize(q)));
+    });
+
+    group.throughput(Throughput::Elements(1000));
+    group.bench_function("dequantize_batch_1000", |b| {
+        b.iter(|| {
+            for q in &quantized {
+                black_box(quantizer.dequantize(q));
+            }
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark quantization error measurement
+fn benchmark_quantization_error(c: &mut Criterion) {
+    let mut group = c.benchmark_group("quantization_error");
+    group.sample_size(50);
+
+    let embeddings = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+    let mut quantizer = ScalarQuantizer::new(PERCH_EMBEDDING_DIM);
+    quantizer.calibrate(&embeddings);
+
+    group.bench_function("measure_error", |b| {
+        b.iter(|| {
+            let mut total_error = 0.0f32;
+            let mut max_error = 0.0f32;
+
+            for embedding in &embeddings {
+                let quantized = quantizer.quantize(embedding);
+                let dequantized = quantizer.dequantize(&quantized);
+
+                let error: f32 = embedding
+                    .iter()
+                    .zip(dequantized.iter())
+                    .map(|(a, b)| (a - b).abs())
+                    .sum();
+
+                total_error += error;
+                max_error = max_error.max(error);
+            }
+
+            black_box((total_error / embeddings.len() as f32, max_error))
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Throughput Analysis
+// ============================================================================
+
+/// Analyze embedding throughput against targets
+fn analyze_embedding_throughput() {
+    use std::time::Instant;
+
+    println!("\n=== Embedding Throughput Analysis ===\n");
+
+    // Target: 100 segments/second
+    let target_segments_per_sec = targets::EMBEDDING_SEGMENTS_PER_SECOND;
+    let num_segments = 100;
+
+    let start = Instant::now();
+
+    for _ in 0..num_segments {
+        let audio = generate_audio_segment();
+        let spectrogram = compute_mel_spectrogram(&audio);
+        let _embedding = compute_embedding(&spectrogram);
+    }
+
+    let elapsed = start.elapsed();
+    let throughput = num_segments as f64 / elapsed.as_secs_f64();
+
+    println!("Processed {} segments in {:?}", num_segments, elapsed);
+    println!("Throughput: {:.1} segments/sec", throughput);
+    println!(
+        "Target: {} segments/sec ({})",
+        target_segments_per_sec,
+        if throughput >= target_segments_per_sec as f64 {
+            "PASS"
+        } else {
+            "FAIL"
+        }
+    );
+}
+
+// ============================================================================
+// Half-Precision (float16) Simulation
+// ============================================================================
+
+/// Simulate float16 quantization for warm tier storage
+fn simulate_float16(embedding: &[f32]) -> Vec<u16> {
+    embedding
+        .iter()
+        .map(|&v| half::f16::from_f32(v).to_bits())
+        .collect()
+}
+
+/// Benchmark float16 conversion
+fn benchmark_float16_conversion(c: &mut Criterion) {
+    let mut group = c.benchmark_group("float16_conversion");
+    group.sample_size(100);
+
+    let embeddings = generate_random_vectors(1000, PERCH_EMBEDDING_DIM);
+
+    group.throughput(Throughput::Elements(1000));
+    group.bench_function("to_float16", |b| {
+        b.iter(|| {
+            for embedding in &embeddings {
+                black_box(simulate_float16(embedding));
+            }
+        });
+    });
+
+    // Benchmark float16 -> float32 conversion
+    let float16_embeddings: Vec<Vec<u16>> = embeddings.iter().map(|e| simulate_float16(e)).collect();
+
+    group.bench_function("from_float16", |b| {
+        b.iter(|| {
+            for embedding in &float16_embeddings {
+                let restored: Vec<f32> = embedding
+                    .iter()
+                    .map(|&bits| half::f16::from_bits(bits).to_f32())
+                    .collect();
+                black_box(restored);
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Criterion Groups
+// ============================================================================
+
+criterion_group!(
+    name = spectrogram_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_spectrogram_generation
+);
+
+criterion_group!(
+    name = inference_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_embedding_inference, benchmark_full_pipeline
+);
+
+criterion_group!(
+    name = normalization_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_normalization
+);
+
+criterion_group!(
+    name = quantization_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_quantization, benchmark_quantization_error, benchmark_float16_conversion
+);
+
+criterion_main!(
+    spectrogram_benches,
+    inference_benches,
+    normalization_benches,
+    quantization_benches
+);
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_audio_generation() {
+        let audio = generate_audio_segment();
+        assert_eq!(audio.len(), SEGMENT_SAMPLES);
+
+        // Check samples are in reasonable range
+        for &sample in &audio {
+            assert!(sample.abs() < 2.0);
+        }
+    }
+
+    #[test]
+    fn test_spectrogram_generation() {
+        let audio = generate_audio_segment();
+        let spectrogram = compute_mel_spectrogram(&audio);
+
+        assert!(!spectrogram.is_empty());
+        assert_eq!(spectrogram[0].len(), N_MELS);
+    }
+
+    #[test]
+    fn test_embedding_computation() {
+        let audio = generate_audio_segment();
+        let spectrogram = compute_mel_spectrogram(&audio);
+        let embedding = compute_embedding(&spectrogram);
+
+        assert_eq!(embedding.len(), PERCH_EMBEDDING_DIM);
+
+        // Check normalization
+        let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
+        assert!((norm - 1.0).abs() < 1e-5);
+    }
+
+    #[test]
+    fn test_quantization_roundtrip() {
+        let embeddings = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+        let mut quantizer = ScalarQuantizer::new(PERCH_EMBEDDING_DIM);
+        quantizer.calibrate(&embeddings);
+
+        for embedding in &embeddings {
+            let quantized = quantizer.quantize(embedding);
+            let dequantized = quantizer.dequantize(&quantized);
+
+            // Check dimensions preserved
+            assert_eq!(dequantized.len(), embedding.len());
+
+            // Check error is bounded
+            let max_error: f32 = embedding
+                .iter()
+                .zip(dequantized.iter())
+                .map(|(a, b)| (a - b).abs())
+                .fold(0.0, f32::max);
+
+            // Max error per dimension should be small
+            assert!(max_error < 0.1, "Max error {} too large", max_error);
+        }
+    }
+
+    #[test]
+    #[ignore] // Run with: cargo test --release -- --ignored --nocapture
+    fn run_throughput_analysis() {
+        analyze_embedding_throughput();
+    }
+}
--- a/examples/vibecast-7sense/benches/hnsw_benchmark.rs
+++ b/examples/vibecast-7sense/benches/hnsw_benchmark.rs
@@ -0,0 +1,505 @@
+//! HNSW Benchmark Suite for 7sense
+//!
+//! Performance targets from ADR-004:
+//! - HNSW Search: 150x speedup vs brute force
+//! - Query Latency p99: < 50ms
+//! - Recall@10: >= 0.95
+//! - Recall@100: >= 0.98
+//! - Insert Throughput: >= 10,000 vectors/s
+//! - Build Time: < 30 min for 1M vectors
+
+use criterion::{
+    black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
+};
+use std::time::{Duration, Instant};
+
+mod utils;
+use utils::*;
+
+/// Index sizes to benchmark
+const SMALL_INDEX: usize = 10_000;
+const MEDIUM_INDEX: usize = 100_000;
+const LARGE_INDEX: usize = 500_000;
+
+/// K values for search benchmarks
+const K_VALUES: &[usize] = &[10, 50, 100];
+
+// ============================================================================
+// HNSW Search Benchmarks
+// ============================================================================
+
+/// Benchmark HNSW search performance with different index sizes and k values
+fn benchmark_hnsw_search(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_search");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(10));
+
+    // Generate query vectors once
+    let queries = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+
+    for &size in &[SMALL_INDEX, MEDIUM_INDEX] {
+        // Build index
+        println!("Building index with {} vectors...", size);
+        let index = setup_test_index(size);
+
+        for &k in K_VALUES {
+            group.throughput(Throughput::Elements(queries.len() as u64));
+            group.bench_with_input(
+                BenchmarkId::new(format!("size_{}_k_{}", size, k), k),
+                &k,
+                |b, &k| {
+                    b.iter(|| {
+                        for query in &queries {
+                            black_box(index.search(query, k));
+                        }
+                    });
+                },
+            );
+        }
+    }
+
+    group.finish();
+}
+
+/// Benchmark HNSW search with different ef_search values
+fn benchmark_hnsw_search_ef(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_search_ef");
+    group.sample_size(30);
+    group.measurement_time(Duration::from_secs(8));
+
+    let size = MEDIUM_INDEX;
+    let mut index = setup_test_index(size);
+    let queries = generate_random_vectors(50, PERCH_EMBEDDING_DIM);
+    let k = 10;
+
+    for ef in [64, 128, 256, 512] {
+        index.set_ef_search(ef);
+
+        group.throughput(Throughput::Elements(queries.len() as u64));
+        group.bench_with_input(BenchmarkId::new("ef", ef), &ef, |b, _| {
+            b.iter(|| {
+                for query in &queries {
+                    black_box(index.search(query, k));
+                }
+            });
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark HNSW vs brute force to calculate speedup ratio
+fn benchmark_hnsw_vs_brute_force(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_vs_brute_force");
+    group.sample_size(20);
+    group.measurement_time(Duration::from_secs(15));
+
+    // Use smaller index for brute force comparison
+    let size = 10_000;
+    let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+    let mut index = SimpleHnswIndex::new_default();
+
+    for vec in &vectors {
+        index.add(vec.clone());
+    }
+
+    let queries = generate_random_vectors(20, PERCH_EMBEDDING_DIM);
+    let k = 10;
+
+    // Benchmark brute force
+    group.bench_function("brute_force", |b| {
+        b.iter(|| {
+            for query in &queries {
+                black_box(brute_force_knn(query, &vectors, k));
+            }
+        });
+    });
+
+    // Benchmark HNSW
+    group.bench_function("hnsw", |b| {
+        b.iter(|| {
+            for query in &queries {
+                black_box(index.search(query, k));
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// HNSW Insert Benchmarks
+// ============================================================================
+
+/// Benchmark single vector insertion
+fn benchmark_hnsw_insert_single(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_insert_single");
+    group.sample_size(50);
+    group.measurement_time(Duration::from_secs(10));
+
+    // Benchmark insertion into indices of different sizes
+    for &initial_size in &[1000, 10_000, 50_000] {
+        let vectors_to_insert = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+
+        group.bench_with_input(
+            BenchmarkId::new("initial_size", initial_size),
+            &initial_size,
+            |b, &size| {
+                b.iter_batched(
+                    || {
+                        // Setup: create index with initial vectors
+                        setup_test_index(size)
+                    },
+                    |mut index| {
+                        // Insert new vectors
+                        for vec in &vectors_to_insert {
+                            black_box(index.add(vec.clone()));
+                        }
+                    },
+                    criterion::BatchSize::SmallInput,
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+/// Benchmark batch vector insertion
+fn benchmark_hnsw_insert_batch(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_insert_batch");
+    group.sample_size(20);
+    group.measurement_time(Duration::from_secs(15));
+
+    for &batch_size in &[100, 1000, 5000] {
+        let vectors = generate_random_vectors(batch_size, PERCH_EMBEDDING_DIM);
+
+        group.throughput(Throughput::Elements(batch_size as u64));
+        group.bench_with_input(
+            BenchmarkId::new("batch_size", batch_size),
+            &batch_size,
+            |b, _| {
+                b.iter_batched(
+                    || {
+                        // Setup: create empty index
+                        SimpleHnswIndex::new_default()
+                    },
+                    |mut index| {
+                        // Insert batch
+                        black_box(index.batch_add(vectors.clone()));
+                    },
+                    criterion::BatchSize::SmallInput,
+                );
+            },
+        );
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// HNSW Build Benchmarks
+// ============================================================================
+
+/// Benchmark index construction time
+fn benchmark_hnsw_build(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_build");
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(30));
+
+    for &size in &[1000, 5000, 10_000] {
+        let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+
+        group.throughput(Throughput::Elements(size as u64));
+        group.bench_with_input(BenchmarkId::new("vectors", size), &size, |b, _| {
+            b.iter(|| {
+                let mut index = SimpleHnswIndex::new_default();
+                for vec in &vectors {
+                    index.add(vec.clone());
+                }
+                black_box(index)
+            });
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark index construction with different M parameters
+fn benchmark_hnsw_build_m_param(c: &mut Criterion) {
+    let mut group = c.benchmark_group("hnsw_build_m_param");
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(20));
+
+    let size = 5000;
+    let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+
+    for m in [16, 24, 32, 48] {
+        group.bench_with_input(BenchmarkId::new("M", m), &m, |b, &m| {
+            b.iter(|| {
+                let mut index =
+                    SimpleHnswIndex::new(PERCH_EMBEDDING_DIM, m, DEFAULT_EF_CONSTRUCTION, DEFAULT_EF_SEARCH);
+                for vec in &vectors {
+                    index.add(vec.clone());
+                }
+                black_box(index)
+            });
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Recall Measurement
+// ============================================================================
+
+/// Measure and report recall metrics (not a benchmark, but a validation)
+fn measure_recall(c: &mut Criterion) {
+    let mut group = c.benchmark_group("recall_measurement");
+    group.sample_size(10);
+
+    let size = 10_000;
+    let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+    let mut index = SimpleHnswIndex::new_default();
+
+    for vec in &vectors {
+        index.add(vec.clone());
+    }
+
+    let queries = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+
+    // This benchmark measures time to compute recall (including brute force)
+    group.bench_function("recall_computation", |b| {
+        b.iter(|| {
+            let mut total_recall_10 = 0.0;
+            let mut total_recall_100 = 0.0;
+
+            for query in &queries {
+                let hnsw_results = index.search(query, 100);
+                let ground_truth = brute_force_knn(query, &vectors, 100);
+
+                total_recall_10 += measure_recall_at_k(&hnsw_results, &ground_truth, 10);
+                total_recall_100 += measure_recall_at_k(&hnsw_results, &ground_truth, 100);
+            }
+
+            let avg_recall_10 = total_recall_10 / queries.len() as f32;
+            let avg_recall_100 = total_recall_100 / queries.len() as f32;
+
+            black_box((avg_recall_10, avg_recall_100))
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Speedup Ratio Calculation
+// ============================================================================
+
+/// Calculate and report the speedup ratio of HNSW vs brute force
+/// This is run as a single iteration with detailed output
+fn calculate_speedup_ratio() {
+    println!("\n=== HNSW vs Brute Force Speedup Analysis ===\n");
+
+    for &size in &[1_000, 5_000, 10_000, 50_000] {
+        println!("Index size: {} vectors", size);
+        println!("Dimension: {}", PERCH_EMBEDDING_DIM);
+
+        let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+        let mut index = SimpleHnswIndex::new_default();
+
+        for vec in &vectors {
+            index.add(vec.clone());
+        }
+
+        let queries = generate_random_vectors(100, PERCH_EMBEDDING_DIM);
+        let k = 10;
+
+        // Time brute force
+        let bf_start = Instant::now();
+        for query in &queries {
+            let _ = brute_force_knn(query, &vectors, k);
+        }
+        let bf_time = bf_start.elapsed();
+
+        // Time HNSW
+        let hnsw_start = Instant::now();
+        for query in &queries {
+            let _ = index.search(query, k);
+        }
+        let hnsw_time = hnsw_start.elapsed();
+
+        let speedup = bf_time.as_secs_f64() / hnsw_time.as_secs_f64();
+
+        // Calculate recall
+        let mut total_recall = 0.0;
+        for query in &queries {
+            let hnsw_results = index.search(query, k);
+            let ground_truth = brute_force_knn(query, &vectors, k);
+            total_recall += measure_recall_at_k(&hnsw_results, &ground_truth, k);
+        }
+        let avg_recall = total_recall / queries.len() as f32;
+
+        println!("  Brute Force: {:?} ({} queries)", bf_time, queries.len());
+        println!("  HNSW:        {:?} ({} queries)", hnsw_time, queries.len());
+        println!("  Speedup:     {:.1}x", speedup);
+        println!("  Recall@{}:   {:.3}", k, avg_recall);
+        println!(
+            "  Target:      {}x speedup ({})",
+            targets::HNSW_SPEEDUP_VS_BRUTE_FORCE,
+            if speedup >= targets::HNSW_SPEEDUP_VS_BRUTE_FORCE {
+                "PASS"
+            } else {
+                "FAIL"
+            }
+        );
+        println!();
+    }
+}
+
+// ============================================================================
+// Latency Distribution Analysis
+// ============================================================================
+
+/// Analyze query latency distribution
+fn analyze_latency_distribution() {
+    println!("\n=== Query Latency Distribution Analysis ===\n");
+
+    let size = MEDIUM_INDEX;
+    println!("Building index with {} vectors...", size);
+    let index = setup_test_index(size);
+
+    let queries = generate_random_vectors(1000, PERCH_EMBEDDING_DIM);
+    let k = 10;
+
+    let mut latencies = Vec::with_capacity(queries.len());
+
+    for query in &queries {
+        let start = Instant::now();
+        let _ = index.search(query, k);
+        latencies.push(start.elapsed());
+    }
+
+    let stats = PerformanceStats::from_latencies(latencies);
+
+    println!("Query latency statistics (k={}, {} queries):", k, queries.len());
+    println!("{}", stats.report());
+    println!();
+    println!("Performance targets:");
+    println!(
+        "  p50 target:  {}ms ({})",
+        targets::QUERY_LATENCY_P50_MS,
+        if stats.p50 <= Duration::from_millis(targets::QUERY_LATENCY_P50_MS) {
+            "PASS"
+        } else {
+            "FAIL"
+        }
+    );
+    println!(
+        "  p99 target:  {}ms ({})",
+        targets::QUERY_LATENCY_P99_MS,
+        if stats.p99 <= Duration::from_millis(targets::QUERY_LATENCY_P99_MS) {
+            "PASS"
+        } else {
+            "FAIL"
+        }
+    );
+}
+
+// ============================================================================
+// Criterion Groups
+// ============================================================================
+
+criterion_group!(
+    name = search_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_hnsw_search, benchmark_hnsw_search_ef, benchmark_hnsw_vs_brute_force
+);
+
+criterion_group!(
+    name = insert_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_hnsw_insert_single, benchmark_hnsw_insert_batch
+);
+
+criterion_group!(
+    name = build_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = benchmark_hnsw_build, benchmark_hnsw_build_m_param
+);
+
+criterion_group!(
+    name = recall_benches;
+    config = Criterion::default().with_output_color(true);
+    targets = measure_recall
+);
+
+criterion_main!(search_benches, insert_benches, build_benches, recall_benches);
+
+// ============================================================================
+// Additional Analysis Functions (run separately)
+// ============================================================================
+
+#[cfg(test)]
+mod analysis {
+    use super::*;
+
+    #[test]
+    #[ignore] // Run with: cargo test --release -- --ignored --nocapture
+    fn run_speedup_analysis() {
+        calculate_speedup_ratio();
+    }
+
+    #[test]
+    #[ignore]
+    fn run_latency_analysis() {
+        analyze_latency_distribution();
+    }
+
+    #[test]
+    fn test_target_recall_at_10() {
+        let size = 5_000;
+        let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+        let mut index = SimpleHnswIndex::new_default();
+
+        for vec in &vectors {
+            index.add(vec.clone());
+        }
+
+        let queries = generate_random_vectors(50, PERCH_EMBEDDING_DIM);
+
+        let mut total_recall = 0.0;
+        for query in &queries {
+            let hnsw_results = index.search(query, 10);
+            let ground_truth = brute_force_knn(query, &vectors, 10);
+            total_recall += measure_recall_at_k(&hnsw_results, &ground_truth, 10);
+        }
+
+        let avg_recall = total_recall / queries.len() as f32;
+        println!("Average Recall@10: {:.3}", avg_recall);
+        assert!(
+            avg_recall as f64 >= targets::RECALL_AT_10,
+            "Recall@10 {} below target {}",
+            avg_recall,
+            targets::RECALL_AT_10
+        );
+    }
+
+    #[test]
+    fn test_insert_throughput() {
+        let vectors = generate_random_vectors(1000, PERCH_EMBEDDING_DIM);
+        let mut index = SimpleHnswIndex::new_default();
+
+        let start = Instant::now();
+        for vec in &vectors {
+            index.add(vec.clone());
+        }
+        let elapsed = start.elapsed();
+
+        let throughput = vectors.len() as f64 / elapsed.as_secs_f64();
+        println!("Insert throughput: {:.0} vectors/sec", throughput);
+
+        // Note: This is a simplified index, real HNSW should achieve higher throughput
+    }
+}
--- a/examples/vibecast-7sense/benches/utils.rs
+++ b/examples/vibecast-7sense/benches/utils.rs
@@ -0,0 +1,673 @@
+//! Benchmark Utilities for 7sense Performance Testing
+//!
+//! This module provides common utilities for benchmarking:
+//! - Random vector generation
+//! - Test index setup
+//! - Recall calculation
+//! - Ground truth computation
+//! - Performance metrics
+
+use std::collections::HashSet;
+use std::time::{Duration, Instant};
+
+/// Embedding dimensions for Perch 2.0 model
+pub const PERCH_EMBEDDING_DIM: usize = 1536;
+
+/// Default HNSW parameters from ADR-004
+pub const DEFAULT_M: usize = 32;
+pub const DEFAULT_EF_CONSTRUCTION: usize = 200;
+pub const DEFAULT_EF_SEARCH: usize = 128;
+pub const HIGH_RECALL_EF_SEARCH: usize = 256;
+
+/// Performance targets from ADR-004
+pub mod targets {
+    use std::time::Duration;
+
+    /// HNSW Search Targets
+    pub const HNSW_SPEEDUP_VS_BRUTE_FORCE: f64 = 150.0;
+    pub const QUERY_LATENCY_P50_MS: u64 = 10;
+    pub const QUERY_LATENCY_P99_MS: u64 = 50;
+    pub const RECALL_AT_10: f64 = 0.95;
+    pub const RECALL_AT_100: f64 = 0.98;
+
+    /// Embedding Inference Targets
+    pub const EMBEDDING_SEGMENTS_PER_SECOND: u64 = 100;
+
+    /// Batch Ingestion Targets
+    pub const BATCH_VECTORS_PER_MINUTE: u64 = 1_000_000;
+    pub const INSERT_THROUGHPUT_PER_SECOND: u64 = 10_000;
+
+    /// Query Latency Targets
+    pub const TOTAL_QUERY_LATENCY_MS: u64 = 100;
+
+    /// Build Time Targets
+    pub const BUILD_TIME_1M_VECTORS: Duration = Duration::from_secs(30 * 60);
+
+    /// Quantization Targets
+    pub const MAX_RECALL_LOSS_INT8: f64 = 0.03;
+}
+
+/// Generate random f32 vectors for benchmarking
+///
+/// # Arguments
+/// * `count` - Number of vectors to generate
+/// * `dims` - Dimensionality of each vector
+///
+/// # Returns
+/// A vector of random f32 vectors, normalized to unit length
+pub fn generate_random_vectors(count: usize, dims: usize) -> Vec<Vec<f32>> {
+    use std::f32::consts::PI;
+
+    let mut vectors = Vec::with_capacity(count);
+
+    for i in 0..count {
+        let mut vec = Vec::with_capacity(dims);
+
+        // Use a simple deterministic random generator for reproducibility
+        let mut seed = (i as u64).wrapping_mul(6364136223846793005).wrapping_add(1);
+
+        for _ in 0..dims {
+            seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1);
+            let val = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
+            vec.push(val);
+        }
+
+        // Normalize to unit length
+        let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+        if norm > 0.0 {
+            for x in vec.iter_mut() {
+                *x /= norm;
+            }
+        }
+
+        vectors.push(vec);
+    }
+
+    vectors
+}
+
+/// Generate clustered random vectors for more realistic benchmarking
+///
+/// # Arguments
+/// * `count` - Total number of vectors to generate
+/// * `dims` - Dimensionality of each vector
+/// * `num_clusters` - Number of clusters to create
+/// * `cluster_spread` - Standard deviation within clusters (0.0 to 1.0)
+///
+/// # Returns
+/// A vector of random f32 vectors organized around cluster centers
+pub fn generate_clustered_vectors(
+    count: usize,
+    dims: usize,
+    num_clusters: usize,
+    cluster_spread: f32,
+) -> Vec<Vec<f32>> {
+    let mut vectors = Vec::with_capacity(count);
+
+    // Generate cluster centers
+    let centers = generate_random_vectors(num_clusters, dims);
+
+    // Assign vectors to clusters
+    for i in 0..count {
+        let cluster_idx = i % num_clusters;
+        let center = &centers[cluster_idx];
+
+        let mut vec = Vec::with_capacity(dims);
+
+        // Use deterministic random for offset
+        let mut seed = (i as u64).wrapping_mul(2862933555777941757).wrapping_add(3);
+
+        for d in 0..dims {
+            seed = seed.wrapping_mul(2862933555777941757).wrapping_add(3);
+            let noise = ((seed >> 33) as f32) / (u32::MAX as f32) * 2.0 - 1.0;
+            let val = center[d] + noise * cluster_spread;
+            vec.push(val);
+        }
+
+        // Normalize to unit length
+        let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+        if norm > 0.0 {
+            for x in vec.iter_mut() {
+                *x /= norm;
+            }
+        }
+
+        vectors.push(vec);
+    }
+
+    vectors
+}
+
+/// Compute L2 (Euclidean) distance between two vectors
+#[inline]
+pub fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
+    debug_assert_eq!(a.len(), b.len());
+    a.iter()
+        .zip(b.iter())
+        .map(|(x, y)| (x - y) * (x - y))
+        .sum::<f32>()
+        .sqrt()
+}
+
+/// Compute L2 squared distance (faster, no sqrt)
+#[inline]
+pub fn l2_distance_squared(a: &[f32], b: &[f32]) -> f32 {
+    debug_assert_eq!(a.len(), b.len());
+    a.iter()
+        .zip(b.iter())
+        .map(|(x, y)| (x - y) * (x - y))
+        .sum()
+}
+
+/// Compute cosine similarity between two vectors
+#[inline]
+pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+    debug_assert_eq!(a.len(), b.len());
+    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if norm_a > 0.0 && norm_b > 0.0 {
+        dot / (norm_a * norm_b)
+    } else {
+        0.0
+    }
+}
+
+/// Compute brute-force k-nearest neighbors (ground truth)
+///
+/// # Arguments
+/// * `query` - Query vector
+/// * `dataset` - Dataset of vectors to search
+/// * `k` - Number of neighbors to find
+///
+/// # Returns
+/// Vector of (index, distance) pairs sorted by distance
+pub fn brute_force_knn(query: &[f32], dataset: &[Vec<f32>], k: usize) -> Vec<(usize, f32)> {
+    let mut distances: Vec<(usize, f32)> = dataset
+        .iter()
+        .enumerate()
+        .map(|(i, vec)| (i, l2_distance(query, vec)))
+        .collect();
+
+    distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+    distances.truncate(k);
+    distances
+}
+
+/// Measure recall@k for approximate nearest neighbor results
+///
+/// # Arguments
+/// * `results` - Approximate results (index, distance) pairs
+/// * `ground_truth` - Exact brute-force results (index, distance) pairs
+/// * `k` - Number of top results to consider
+///
+/// # Returns
+/// Recall value between 0.0 and 1.0
+pub fn measure_recall_at_k(
+    results: &[(usize, f32)],
+    ground_truth: &[(usize, f32)],
+    k: usize,
+) -> f32 {
+    let k = k.min(results.len()).min(ground_truth.len());
+    if k == 0 {
+        return 0.0;
+    }
+
+    let result_set: HashSet<usize> = results.iter().take(k).map(|(idx, _)| *idx).collect();
+    let truth_set: HashSet<usize> = ground_truth.iter().take(k).map(|(idx, _)| *idx).collect();
+
+    let intersection = result_set.intersection(&truth_set).count();
+    intersection as f32 / k as f32
+}
+
+/// Calculate percentile from a sorted slice of durations
+pub fn percentile(sorted_latencies: &[Duration], p: f64) -> Duration {
+    if sorted_latencies.is_empty() {
+        return Duration::ZERO;
+    }
+    let idx = ((sorted_latencies.len() as f64 - 1.0) * p / 100.0).round() as usize;
+    sorted_latencies[idx.min(sorted_latencies.len() - 1)]
+}
+
+/// Performance statistics from benchmark runs
+#[derive(Debug, Clone)]
+pub struct PerformanceStats {
+    pub count: usize,
+    pub total_time: Duration,
+    pub min: Duration,
+    pub max: Duration,
+    pub mean: Duration,
+    pub p50: Duration,
+    pub p95: Duration,
+    pub p99: Duration,
+    pub p999: Duration,
+    pub throughput_per_sec: f64,
+}
+
+impl PerformanceStats {
+    /// Calculate statistics from a collection of latency measurements
+    pub fn from_latencies(mut latencies: Vec<Duration>) -> Self {
+        if latencies.is_empty() {
+            return Self {
+                count: 0,
+                total_time: Duration::ZERO,
+                min: Duration::ZERO,
+                max: Duration::ZERO,
+                mean: Duration::ZERO,
+                p50: Duration::ZERO,
+                p95: Duration::ZERO,
+                p99: Duration::ZERO,
+                p999: Duration::ZERO,
+                throughput_per_sec: 0.0,
+            };
+        }
+
+        latencies.sort();
+
+        let total_time: Duration = latencies.iter().sum();
+        let count = latencies.len();
+        let mean = total_time / count as u32;
+
+        Self {
+            count,
+            total_time,
+            min: latencies[0],
+            max: latencies[count - 1],
+            mean,
+            p50: percentile(&latencies, 50.0),
+            p95: percentile(&latencies, 95.0),
+            p99: percentile(&latencies, 99.0),
+            p999: percentile(&latencies, 99.9),
+            throughput_per_sec: count as f64 / total_time.as_secs_f64(),
+        }
+    }
+
+    /// Check if stats meet p99 latency target
+    pub fn meets_p99_target(&self, target_ms: u64) -> bool {
+        self.p99 <= Duration::from_millis(target_ms)
+    }
+
+    /// Check if stats meet throughput target
+    pub fn meets_throughput_target(&self, target_per_sec: u64) -> bool {
+        self.throughput_per_sec >= target_per_sec as f64
+    }
+
+    /// Format as a readable report
+    pub fn report(&self) -> String {
+        format!(
+            "Count: {}\n\
+             Total Time: {:?}\n\
+             Min: {:?}\n\
+             Max: {:?}\n\
+             Mean: {:?}\n\
+             P50: {:?}\n\
+             P95: {:?}\n\
+             P99: {:?}\n\
+             P99.9: {:?}\n\
+             Throughput: {:.2} ops/sec",
+            self.count,
+            self.total_time,
+            self.min,
+            self.max,
+            self.mean,
+            self.p50,
+            self.p95,
+            self.p99,
+            self.p999,
+            self.throughput_per_sec
+        )
+    }
+}
+
+/// Simple HNSW-like index for benchmarking
+/// This is a simplified implementation for benchmark purposes
+pub struct SimpleHnswIndex {
+    vectors: Vec<Vec<f32>>,
+    dims: usize,
+    m: usize,
+    ef_construction: usize,
+    ef_search: usize,
+    // Simplified graph structure: each vector has a list of neighbor indices
+    graph: Vec<Vec<usize>>,
+}
+
+impl SimpleHnswIndex {
+    /// Create a new empty index
+    pub fn new(dims: usize, m: usize, ef_construction: usize, ef_search: usize) -> Self {
+        Self {
+            vectors: Vec::new(),
+            dims,
+            m,
+            ef_construction,
+            ef_search,
+            graph: Vec::new(),
+        }
+    }
+
+    /// Create an index with default parameters for Perch embeddings
+    pub fn new_default() -> Self {
+        Self::new(
+            PERCH_EMBEDDING_DIM,
+            DEFAULT_M,
+            DEFAULT_EF_CONSTRUCTION,
+            DEFAULT_EF_SEARCH,
+        )
+    }
+
+    /// Get the number of vectors in the index
+    pub fn len(&self) -> usize {
+        self.vectors.len()
+    }
+
+    /// Check if the index is empty
+    pub fn is_empty(&self) -> bool {
+        self.vectors.is_empty()
+    }
+
+    /// Add a single vector to the index
+    pub fn add(&mut self, vector: Vec<f32>) -> usize {
+        assert_eq!(vector.len(), self.dims);
+        let id = self.vectors.len();
+
+        // Find neighbors for the new vector
+        let neighbors = if self.vectors.is_empty() {
+            Vec::new()
+        } else {
+            self.search_internal(&vector, self.m.min(self.vectors.len()))
+                .into_iter()
+                .map(|(idx, _)| idx)
+                .collect()
+        };
+
+        self.vectors.push(vector);
+        self.graph.push(neighbors.clone());
+
+        // Update bidirectional connections
+        for &neighbor_id in &neighbors {
+            if self.graph[neighbor_id].len() < self.m * 2 {
+                self.graph[neighbor_id].push(id);
+            }
+        }
+
+        id
+    }
+
+    /// Batch add vectors to the index
+    pub fn batch_add(&mut self, vectors: Vec<Vec<f32>>) -> Vec<usize> {
+        vectors.into_iter().map(|v| self.add(v)).collect()
+    }
+
+    /// Search for k nearest neighbors
+    pub fn search(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
+        assert_eq!(query.len(), self.dims);
+        if self.vectors.is_empty() {
+            return Vec::new();
+        }
+        self.search_internal(query, k)
+    }
+
+    /// Internal search implementation with simplified HNSW-like traversal
+    fn search_internal(&self, query: &[f32], k: usize) -> Vec<(usize, f32)> {
+        use std::collections::{BinaryHeap, HashSet};
+        use std::cmp::Reverse;
+
+        let ef = self.ef_search.max(k);
+
+        // Start from a random entry point
+        let entry_point = 0;
+
+        let mut visited: HashSet<usize> = HashSet::new();
+        let mut candidates: BinaryHeap<Reverse<(ordered_float::OrderedFloat<f32>, usize)>> =
+            BinaryHeap::new();
+        let mut results: BinaryHeap<(ordered_float::OrderedFloat<f32>, usize)> = BinaryHeap::new();
+
+        let entry_dist = l2_distance(query, &self.vectors[entry_point]);
+        candidates.push(Reverse((ordered_float::OrderedFloat(entry_dist), entry_point)));
+        results.push((ordered_float::OrderedFloat(entry_dist), entry_point));
+        visited.insert(entry_point);
+
+        while let Some(Reverse((dist, current))) = candidates.pop() {
+            let worst_dist = if results.len() >= ef {
+                results.peek().map(|(d, _)| d.0).unwrap_or(f32::MAX)
+            } else {
+                f32::MAX
+            };
+
+            if dist.0 > worst_dist {
+                break;
+            }
+
+            // Explore neighbors
+            for &neighbor in &self.graph[current] {
+                if visited.insert(neighbor) {
+                    let neighbor_dist = l2_distance(query, &self.vectors[neighbor]);
+
+                    if results.len() < ef || neighbor_dist < worst_dist {
+                        candidates.push(Reverse((
+                            ordered_float::OrderedFloat(neighbor_dist),
+                            neighbor,
+                        )));
+                        results.push((ordered_float::OrderedFloat(neighbor_dist), neighbor));
+
+                        if results.len() > ef {
+                            results.pop();
+                        }
+                    }
+                }
+            }
+        }
+
+        // Convert to output format and sort by distance
+        let mut output: Vec<(usize, f32)> =
+            results.into_iter().map(|(d, idx)| (idx, d.0)).collect();
+        output.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+        output.truncate(k);
+        output
+    }
+
+    /// Set ef_search parameter for queries
+    pub fn set_ef_search(&mut self, ef: usize) {
+        self.ef_search = ef;
+    }
+}
+
+/// Setup a test index with the specified number of vectors
+pub fn setup_test_index(size: usize) -> SimpleHnswIndex {
+    let vectors = generate_random_vectors(size, PERCH_EMBEDDING_DIM);
+    let mut index = SimpleHnswIndex::new_default();
+
+    for vec in vectors {
+        index.add(vec);
+    }
+
+    index
+}
+
+/// Scalar quantizer for int8 compression
+pub struct ScalarQuantizer {
+    mins: Vec<f32>,
+    maxs: Vec<f32>,
+    scales: Vec<f32>,
+    dims: usize,
+}
+
+impl ScalarQuantizer {
+    /// Create a new quantizer for the specified dimensions
+    pub fn new(dims: usize) -> Self {
+        Self {
+            mins: vec![f32::MAX; dims],
+            maxs: vec![f32::MIN; dims],
+            scales: vec![1.0; dims],
+            dims,
+        }
+    }
+
+    /// Calibrate the quantizer from a sample of embeddings
+    pub fn calibrate(&mut self, embeddings: &[Vec<f32>]) {
+        // Find min/max per dimension
+        for embedding in embeddings {
+            for (d, &val) in embedding.iter().enumerate() {
+                if val < self.mins[d] {
+                    self.mins[d] = val;
+                }
+                if val > self.maxs[d] {
+                    self.maxs[d] = val;
+                }
+            }
+        }
+
+        // Compute scales
+        for d in 0..self.dims {
+            let range = self.maxs[d] - self.mins[d];
+            if range > 0.0 {
+                self.scales[d] = 255.0 / range;
+            } else {
+                self.scales[d] = 1.0;
+            }
+        }
+    }
+
+    /// Quantize a float32 embedding to int8
+    pub fn quantize(&self, embedding: &[f32]) -> Vec<u8> {
+        embedding
+            .iter()
+            .enumerate()
+            .map(|(d, &val)| {
+                let normalized = (val - self.mins[d]) * self.scales[d];
+                normalized.round().clamp(0.0, 255.0) as u8
+            })
+            .collect()
+    }
+
+    /// Dequantize an int8 embedding back to float32
+    pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
+        quantized
+            .iter()
+            .enumerate()
+            .map(|(d, &val)| (val as f32) / self.scales[d] + self.mins[d])
+            .collect()
+    }
+}
+
+/// Timer utility for measuring operations
+pub struct Timer {
+    start: Instant,
+}
+
+impl Timer {
+    /// Start a new timer
+    pub fn start() -> Self {
+        Self {
+            start: Instant::now(),
+        }
+    }
+
+    /// Get elapsed time
+    pub fn elapsed(&self) -> Duration {
+        self.start.elapsed()
+    }
+
+    /// Stop and return elapsed time
+    pub fn stop(self) -> Duration {
+        self.start.elapsed()
+    }
+}
+
+/// Measure execution time of a closure
+pub fn measure_time<F, R>(f: F) -> (R, Duration)
+where
+    F: FnOnce() -> R,
+{
+    let start = Instant::now();
+    let result = f();
+    let duration = start.elapsed();
+    (result, duration)
+}
+
+/// Measure average execution time over multiple iterations
+pub fn measure_average<F>(iterations: usize, mut f: F) -> PerformanceStats
+where
+    F: FnMut() -> (),
+{
+    let latencies: Vec<Duration> = (0..iterations)
+        .map(|_| {
+            let start = Instant::now();
+            f();
+            start.elapsed()
+        })
+        .collect();
+
+    PerformanceStats::from_latencies(latencies)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_generate_random_vectors() {
+        let vectors = generate_random_vectors(100, 1536);
+        assert_eq!(vectors.len(), 100);
+        assert_eq!(vectors[0].len(), 1536);
+
+        // Check normalization
+        for vec in &vectors {
+            let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+            assert!((norm - 1.0).abs() < 1e-5);
+        }
+    }
+
+    #[test]
+    fn test_l2_distance() {
+        let a = vec![1.0, 0.0, 0.0];
+        let b = vec![0.0, 1.0, 0.0];
+        let dist = l2_distance(&a, &b);
+        assert!((dist - std::f32::consts::SQRT_2).abs() < 1e-5);
+    }
+
+    #[test]
+    fn test_recall_at_k() {
+        let results: Vec<(usize, f32)> = vec![(0, 0.1), (1, 0.2), (2, 0.3), (3, 0.4), (5, 0.5)];
+        let ground_truth: Vec<(usize, f32)> =
+            vec![(0, 0.1), (1, 0.2), (2, 0.3), (4, 0.4), (5, 0.5)];
+
+        let recall = measure_recall_at_k(&results, &ground_truth, 5);
+        assert!((recall - 0.8).abs() < 1e-5); // 4 out of 5 match
+    }
+
+    #[test]
+    fn test_scalar_quantizer() {
+        let vectors = generate_random_vectors(100, 128);
+        let mut quantizer = ScalarQuantizer::new(128);
+        quantizer.calibrate(&vectors);
+
+        for vec in &vectors {
+            let quantized = quantizer.quantize(vec);
+            let dequantized = quantizer.dequantize(&quantized);
+
+            // Check that dequantized is close to original
+            let error: f32 = vec
+                .iter()
+                .zip(dequantized.iter())
+                .map(|(a, b)| (a - b).abs())
+                .sum::<f32>()
+                / vec.len() as f32;
+
+            assert!(error < 0.1); // Average error should be small
+        }
+    }
+
+    #[test]
+    fn test_performance_stats() {
+        let latencies: Vec<Duration> = (0..100)
+            .map(|i| Duration::from_micros(100 + i * 10))
+            .collect();
+
+        let stats = PerformanceStats::from_latencies(latencies);
+        assert_eq!(stats.count, 100);
+        assert!(stats.min <= stats.p50);
+        assert!(stats.p50 <= stats.p95);
+        assert!(stats.p95 <= stats.p99);
+        assert!(stats.p99 <= stats.max);
+    }
+}