Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/dna/benches/biomarker_bench.rs
+++ b/vendor/ruvector/examples/dna/benches/biomarker_bench.rs
@@ -0,0 +1,181 @@
+//! Criterion benchmarks for Biomarker Analysis Engine
+//!
+//! Performance benchmarks covering ADR-014 targets:
+//! - Risk scoring (<50 μs)
+//! - Profile vector encoding (<100 μs)
+//! - Population generation (<500ms for 10k)
+//! - Streaming throughput (>100k readings/sec)
+//! - Z-score and classification (<5 μs)
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rvdna::biomarker::*;
+use rvdna::biomarker_stream::*;
+use std::collections::HashMap;
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+fn sample_genotypes() -> HashMap<String, String> {
+    let mut gts = HashMap::new();
+    gts.insert("rs429358".into(), "TT".into());
+    gts.insert("rs7412".into(), "CC".into());
+    gts.insert("rs4680".into(), "AG".into());
+    gts.insert("rs1799971".into(), "AA".into());
+    gts.insert("rs762551".into(), "AA".into());
+    gts.insert("rs1801133".into(), "AG".into());
+    gts.insert("rs1801131".into(), "TT".into());
+    gts.insert("rs1042522".into(), "CG".into());
+    gts.insert("rs80357906".into(), "DD".into());
+    gts.insert("rs4363657".into(), "TT".into());
+    gts
+}
+
+fn full_panel_genotypes() -> HashMap<String, String> {
+    // All 17 SNPs from health.rs
+    let mut gts = sample_genotypes();
+    gts.insert("rs28897696".into(), "GG".into());
+    gts.insert("rs11571833".into(), "AA".into());
+    gts.insert("rs4988235".into(), "AG".into());
+    gts.insert("rs53576".into(), "GG".into());
+    gts.insert("rs6311".into(), "CT".into());
+    gts.insert("rs1800497".into(), "AG".into());
+    gts.insert("rs1800566".into(), "CC".into());
+    gts
+}
+
+// ============================================================================
+// Risk Scoring Benchmarks (target: <50 μs)
+// ============================================================================
+
+fn risk_scoring_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("biomarker_scoring");
+
+    // Setup: create a representative genotype map
+    let gts = sample_genotypes();
+
+    group.bench_function("compute_risk_scores", |b| {
+        b.iter(|| black_box(compute_risk_scores(&gts)));
+    });
+
+    group.bench_function("compute_risk_scores_full_panel", |b| {
+        let full_gts = full_panel_genotypes();
+        b.iter(|| black_box(compute_risk_scores(&full_gts)));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Profile Vector Benchmarks (target: <100 μs)
+// ============================================================================
+
+fn vector_encoding_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("biomarker_vector");
+
+    let gts = sample_genotypes();
+    let profile = compute_risk_scores(&gts);
+
+    group.bench_function("encode_profile_vector", |b| {
+        b.iter(|| black_box(encode_profile_vector(&profile)));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Population Generation Benchmarks (target: <500ms for 10k)
+// ============================================================================
+
+fn population_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("biomarker_population");
+
+    group.bench_function("generate_100", |b| {
+        b.iter(|| black_box(generate_synthetic_population(100, 42)));
+    });
+
+    group.bench_function("generate_1000", |b| {
+        b.iter(|| black_box(generate_synthetic_population(1000, 42)));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Streaming Benchmarks (target: >100k readings/sec)
+// ============================================================================
+
+fn streaming_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("biomarker_streaming");
+
+    group.bench_function("generate_1000_readings", |b| {
+        let config = StreamConfig::default();
+        b.iter(|| black_box(generate_readings(&config, 1000, 42)));
+    });
+
+    group.bench_function("process_1000_readings", |b| {
+        let config = StreamConfig::default();
+        let readings = generate_readings(&config, 1000, 42);
+        b.iter(|| {
+            let mut processor = StreamProcessor::new(config.clone());
+            for reading in &readings {
+                black_box(processor.process_reading(reading));
+            }
+        });
+    });
+
+    group.bench_function("ring_buffer_1000_push", |b| {
+        b.iter(|| {
+            let mut rb: RingBuffer<f64> = RingBuffer::new(100);
+            for i in 0..1000 {
+                rb.push(black_box(i as f64));
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Z-Score and Classification Benchmarks (target: <5 μs)
+// ============================================================================
+
+fn classification_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("biomarker_classification");
+    let refs = biomarker_references();
+
+    group.bench_function("z_score_single", |b| {
+        let r = &refs[0];
+        b.iter(|| black_box(z_score(180.0, r)));
+    });
+
+    group.bench_function("classify_single", |b| {
+        let r = &refs[0];
+        b.iter(|| black_box(classify_biomarker(180.0, r)));
+    });
+
+    group.bench_function("z_score_all_biomarkers", |b| {
+        b.iter(|| {
+            for r in refs {
+                let mid = (r.normal_low + r.normal_high) / 2.0;
+                black_box(z_score(mid, r));
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Criterion Configuration
+// ============================================================================
+
+criterion_group!(
+    benches,
+    risk_scoring_benchmarks,
+    vector_encoding_benchmarks,
+    population_benchmarks,
+    streaming_benchmarks,
+    classification_benchmarks,
+);
+criterion_main!(benches);
--- a/vendor/ruvector/examples/dna/benches/dna_bench.rs
+++ b/vendor/ruvector/examples/dna/benches/dna_bench.rs
@@ -0,0 +1,420 @@
+//! Criterion benchmarks for DNA Analyzer
+//!
+//! Comprehensive performance benchmarks covering:
+//! - K-mer encoding and HNSW indexing
+//! - Sequence alignment
+//! - Variant calling
+//! - Protein translation
+//! - Full pipeline integration
+
+use ::rvdna::prelude::*;
+use ::rvdna::types::KmerIndex as TypesKmerIndex;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+/// Generate random DNA sequence of specified length
+fn random_dna(len: usize, seed: u64) -> DnaSequence {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let bases = [Nucleotide::A, Nucleotide::C, Nucleotide::G, Nucleotide::T];
+    let sequence: Vec<Nucleotide> = (0..len).map(|_| bases[rng.gen_range(0..4)]).collect();
+    DnaSequence::new(sequence)
+}
+
+/// Generate multiple random sequences
+fn random_sequences(count: usize, len: usize, seed: u64) -> Vec<DnaSequence> {
+    (0..count)
+        .map(|i| random_dna(len, seed + i as u64))
+        .collect()
+}
+
+// ============================================================================
+// K-mer Benchmarks
+// ============================================================================
+
+fn kmer_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("kmer");
+
+    group.bench_function("encode_1kb", |b| {
+        let seq = random_dna(1_000, 42);
+        b.iter(|| black_box(seq.to_kmer_vector(11, 512).unwrap()));
+    });
+
+    group.bench_function("encode_10kb", |b| {
+        let seq = random_dna(10_000, 42);
+        b.iter(|| black_box(seq.to_kmer_vector(11, 512).unwrap()));
+    });
+
+    group.bench_function("encode_100kb", |b| {
+        let seq = random_dna(100_000, 42);
+        b.iter(|| black_box(seq.to_kmer_vector(11, 512).unwrap()));
+    });
+
+    // HNSW index insertion
+    group.bench_function("index_insert_100", |b| {
+        let sequences = random_sequences(100, 100, 42);
+        b.iter(|| {
+            let temp = tempfile::TempDir::new().unwrap();
+            let index =
+                TypesKmerIndex::new(11, 512, temp.path().join("idx").to_str().unwrap()).unwrap();
+            for (i, seq) in sequences.iter().enumerate() {
+                let vec = seq.to_kmer_vector(11, 512).unwrap();
+                index
+                    .db()
+                    .insert(ruvector_core::VectorEntry {
+                        id: Some(format!("seq{}", i)),
+                        vector: vec,
+                        metadata: None,
+                    })
+                    .unwrap();
+            }
+            black_box(index)
+        });
+    });
+
+    // HNSW search
+    group.bench_function("search_top10", |b| {
+        let sequences = random_sequences(100, 100, 42);
+        let temp = tempfile::TempDir::new().unwrap();
+        let index =
+            TypesKmerIndex::new(11, 512, temp.path().join("idx").to_str().unwrap()).unwrap();
+
+        for (i, seq) in sequences.iter().enumerate() {
+            let vec = seq.to_kmer_vector(11, 512).unwrap();
+            index
+                .db()
+                .insert(ruvector_core::VectorEntry {
+                    id: Some(format!("seq{}", i)),
+                    vector: vec,
+                    metadata: None,
+                })
+                .unwrap();
+        }
+
+        let query = random_dna(100, 999);
+        let query_vec = query.to_kmer_vector(11, 512).unwrap();
+
+        b.iter(|| {
+            black_box(
+                index
+                    .db()
+                    .search(ruvector_core::SearchQuery {
+                        vector: query_vec.clone(),
+                        k: 10,
+                        filter: None,
+                        ef_search: None,
+                    })
+                    .unwrap(),
+            )
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Alignment Benchmarks
+// ============================================================================
+
+fn alignment_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("alignment");
+
+    group.bench_function("one_hot_encoding_1kb", |b| {
+        let seq = random_dna(1_000, 42);
+        b.iter(|| black_box(seq.encode_one_hot()));
+    });
+
+    group.bench_function("attention_align_100bp", |b| {
+        let query = random_dna(100, 42);
+        let reference = random_dna(1_000, 43);
+        b.iter(|| black_box(query.align_with_attention(&reference).unwrap()));
+    });
+
+    group.bench_function("smith_waterman_100bp", |b| {
+        let query = random_dna(100, 42);
+        let reference = random_dna(500, 43);
+        let aligner = SmithWaterman::new(AlignmentConfig::default());
+        b.iter(|| black_box(aligner.align(&query, &reference).unwrap()));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Variant Calling Benchmarks
+// ============================================================================
+
+fn variant_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("variant");
+
+    group.bench_function("snp_calling_single", |b| {
+        let caller = VariantCaller::new(VariantCallerConfig::default());
+        let pileup = PileupColumn {
+            bases: vec![b'A', b'A', b'G', b'G', b'G', b'G', b'G', b'G', b'G', b'G'],
+            qualities: vec![35; 10],
+            position: 12345,
+            chromosome: 1,
+        };
+
+        b.iter(|| black_box(caller.call_snp(&pileup, b'A')));
+    });
+
+    group.bench_function("snp_calling_1000_positions", |b| {
+        let caller = VariantCaller::new(VariantCallerConfig::default());
+        let mut rng = StdRng::seed_from_u64(42);
+
+        let pileups: Vec<(PileupColumn, u8)> = (0..1000)
+            .map(|i| {
+                let bases: Vec<u8> = (0..20)
+                    .map(|_| [b'A', b'C', b'G', b'T'][rng.gen_range(0..4)])
+                    .collect();
+                let quals: Vec<u8> = (0..20).map(|_| rng.gen_range(20..41)).collect();
+                let ref_base = [b'A', b'C', b'G', b'T'][i % 4];
+                (
+                    PileupColumn {
+                        bases,
+                        qualities: quals,
+                        position: i as u64,
+                        chromosome: 1,
+                    },
+                    ref_base,
+                )
+            })
+            .collect();
+
+        b.iter(|| {
+            let mut count = 0;
+            for (pileup, ref_base) in &pileups {
+                if caller.call_snp(pileup, *ref_base).is_some() {
+                    count += 1;
+                }
+            }
+            black_box(count)
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Protein Analysis Benchmarks
+// ============================================================================
+
+fn protein_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("protein");
+
+    group.bench_function("translate_1kb", |b| {
+        let seq = random_dna(1_002, 42);
+        b.iter(|| black_box(seq.translate().unwrap()));
+    });
+
+    group.bench_function("contact_graph_100residues", |b| {
+        let protein = create_random_protein(100, 42);
+        b.iter(|| black_box(protein.build_contact_graph(8.0).unwrap()));
+    });
+
+    group.bench_function("contact_prediction_100residues", |b| {
+        let protein = create_random_protein(100, 42);
+        let graph = protein.build_contact_graph(8.0).unwrap();
+        b.iter(|| black_box(protein.predict_contacts(&graph).unwrap()));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// RVDNA Format Benchmarks
+// ============================================================================
+
+fn rvdna_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("rvdna");
+
+    group.bench_function("encode_2bit_1kb", |b| {
+        let seq = random_dna(1_000, 42);
+        b.iter(|| black_box(rvdna::encode_2bit(seq.bases())));
+    });
+
+    group.bench_function("encode_2bit_100kb", |b| {
+        let seq = random_dna(100_000, 42);
+        b.iter(|| black_box(rvdna::encode_2bit(seq.bases())));
+    });
+
+    group.bench_function("fasta_to_rvdna_1kb", |b| {
+        let seq_str: String = random_dna(1_000, 42)
+            .bases()
+            .iter()
+            .map(|n| match n {
+                Nucleotide::A => 'A',
+                Nucleotide::C => 'C',
+                Nucleotide::G => 'G',
+                Nucleotide::T => 'T',
+                _ => 'N',
+            })
+            .collect();
+        b.iter(|| black_box(rvdna::fasta_to_rvdna(&seq_str, 11, 256, 1000).unwrap()));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Epigenomics Benchmarks
+// ============================================================================
+
+fn epigenomics_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("epigenomics");
+
+    group.bench_function("cancer_signal_1000_sites", |b| {
+        let positions: Vec<(u8, u64)> = (0..1000).map(|i| (1u8, i as u64)).collect();
+        let betas: Vec<f32> = (0..1000).map(|i| (i as f32 / 1000.0)).collect();
+        let profile = rvdna::MethylationProfile::from_beta_values(positions, betas);
+        let detector = rvdna::CancerSignalDetector::new();
+        b.iter(|| black_box(detector.detect(&profile)));
+    });
+
+    group.bench_function("horvath_clock_1000_sites", |b| {
+        let positions: Vec<(u8, u64)> = (0..1000).map(|i| (1u8, i as u64)).collect();
+        let betas: Vec<f32> = (0..1000).map(|i| (i as f32 / 2000.0 + 0.25)).collect();
+        let profile = rvdna::MethylationProfile::from_beta_values(positions, betas);
+        let clock = rvdna::HorvathClock::default_clock();
+        b.iter(|| black_box(clock.predict_age(&profile)));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Protein Analysis Benchmarks (extended)
+// ============================================================================
+
+fn protein_extended_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("protein_analysis");
+
+    group.bench_function("molecular_weight_300aa", |b| {
+        let protein = rvdna::translate_dna(
+            &random_dna(900, 42)
+                .bases()
+                .iter()
+                .map(|n| match n {
+                    Nucleotide::A => b'A',
+                    Nucleotide::C => b'C',
+                    Nucleotide::G => b'G',
+                    Nucleotide::T => b'T',
+                    _ => b'N',
+                })
+                .collect::<Vec<u8>>(),
+        );
+        b.iter(|| black_box(rvdna::molecular_weight(&protein)));
+    });
+
+    group.bench_function("isoelectric_point_300aa", |b| {
+        let protein = rvdna::translate_dna(
+            &random_dna(900, 42)
+                .bases()
+                .iter()
+                .map(|n| match n {
+                    Nucleotide::A => b'A',
+                    Nucleotide::C => b'C',
+                    Nucleotide::G => b'G',
+                    Nucleotide::T => b'T',
+                    _ => b'N',
+                })
+                .collect::<Vec<u8>>(),
+        );
+        b.iter(|| black_box(rvdna::isoelectric_point(&protein)));
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Full Pipeline Benchmarks
+// ============================================================================
+
+fn pipeline_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("pipeline");
+
+    group.bench_function("full_pipeline_1kb", |b| {
+        let reference = random_dna(1_000, 42);
+        let reads = random_sequences(20, 150, 43);
+        let caller = VariantCaller::new(VariantCallerConfig::default());
+
+        b.iter(|| {
+            // K-mer encoding
+            let ref_vec = reference.to_kmer_vector(11, 512).unwrap();
+
+            // Align reads
+            let mut alignments = Vec::new();
+            for read in &reads {
+                if let Ok(alignment) = read.align_with_attention(&reference) {
+                    alignments.push(alignment);
+                }
+            }
+
+            // Call variants at a few positions
+            let mut variants = Vec::new();
+            let pileup = PileupColumn {
+                bases: vec![b'A', b'G', b'G', b'G', b'A', b'G', b'G', b'A', b'G', b'G'],
+                qualities: vec![35; 10],
+                position: 0,
+                chromosome: 1,
+            };
+            if let Some(v) = caller.call_snp(&pileup, b'A') {
+                variants.push(v);
+            }
+
+            // Translate to protein
+            let protein = reference.translate().unwrap();
+
+            black_box((ref_vec, alignments, variants, protein))
+        });
+    });
+
+    group.finish();
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+fn create_random_protein(len: usize, seed: u64) -> ProteinSequence {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let residues = [
+        ProteinResidue::A,
+        ProteinResidue::C,
+        ProteinResidue::D,
+        ProteinResidue::E,
+        ProteinResidue::F,
+        ProteinResidue::G,
+        ProteinResidue::H,
+        ProteinResidue::I,
+        ProteinResidue::K,
+        ProteinResidue::L,
+        ProteinResidue::M,
+        ProteinResidue::N,
+    ];
+
+    let sequence: Vec<ProteinResidue> = (0..len)
+        .map(|_| residues[rng.gen_range(0..residues.len())])
+        .collect();
+
+    ProteinSequence::new(sequence)
+}
+
+// ============================================================================
+// Criterion Configuration
+// ============================================================================
+
+criterion_group!(
+    benches,
+    kmer_benchmarks,
+    alignment_benchmarks,
+    variant_benchmarks,
+    protein_benchmarks,
+    rvdna_benchmarks,
+    epigenomics_benchmarks,
+    protein_extended_benchmarks,
+    pipeline_benchmarks
+);
+
+criterion_main!(benches);
--- a/vendor/ruvector/examples/dna/benches/solver_bench.rs
+++ b/vendor/ruvector/examples/dna/benches/solver_bench.rs
@@ -0,0 +1,313 @@
+//! DNA Solver Benchmarks -- ruvector-solver integration
+//!
+//! Three benchmark groups targeting real DNA analysis scenarios:
+//! A. Localized relevance via Forward Push PPR on k-mer graphs
+//! B. Laplacian solve for sequence denoising/consistency
+//! C. Cohort-scale label propagation
+//!
+//! Uses real human gene sequences from NCBI RefSeq (HBB, TP53, BRCA1, CYP2D6, INS).
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use ruvector_solver::cg::ConjugateGradientSolver;
+use ruvector_solver::forward_push::ForwardPushSolver;
+use ruvector_solver::neumann::NeumannSolver;
+use ruvector_solver::traits::SolverEngine;
+use ruvector_solver::types::{ComputeBudget, CsrMatrix};
+use rvdna::kmer_pagerank::KmerGraphRanker;
+use rvdna::real_data;
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/// Real gene sequences from NCBI RefSeq
+fn real_gene_sequences() -> Vec<&'static [u8]> {
+    vec![
+        real_data::HBB_CODING_SEQUENCE.as_bytes(),
+        real_data::TP53_EXONS_5_8.as_bytes(),
+        real_data::BRCA1_EXON11_FRAGMENT.as_bytes(),
+        real_data::CYP2D6_CODING.as_bytes(),
+        real_data::INS_CODING.as_bytes(),
+    ]
+}
+
+/// Generate synthetic DNA sequences with mutations from a template
+fn mutated_sequences(template: &[u8], count: usize, mutation_rate: f64, seed: u64) -> Vec<Vec<u8>> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let bases = [b'A', b'C', b'G', b'T'];
+    (0..count)
+        .map(|_| {
+            template
+                .iter()
+                .map(|&b| {
+                    if rng.gen::<f64>() < mutation_rate {
+                        bases[rng.gen_range(0..4)]
+                    } else {
+                        b
+                    }
+                })
+                .collect()
+        })
+        .collect()
+}
+
+/// Build k-mer fingerprint vector for a sequence using FNV-1a hashing
+fn fingerprint(seq: &[u8], k: usize, dims: usize) -> Vec<f64> {
+    if seq.len() < k {
+        return vec![0.0; dims];
+    }
+    let mut counts = vec![0u32; dims];
+    for window in seq.windows(k) {
+        let hash = fnv1a(window);
+        counts[hash % dims] += 1;
+    }
+    let total: u32 = counts.iter().sum();
+    if total == 0 {
+        return vec![0.0; dims];
+    }
+    let inv = 1.0 / total as f64;
+    counts.iter().map(|&c| c as f64 * inv).collect()
+}
+
+fn fnv1a(data: &[u8]) -> usize {
+    let mut hash: u64 = 14695981039346656037;
+    for &byte in data {
+        hash ^= byte as u64;
+        hash = hash.wrapping_mul(1099511628211);
+    }
+    hash as usize
+}
+
+fn cosine_sim(a: &[f64], b: &[f64]) -> f64 {
+    let dot: f64 = a.iter().zip(b).map(|(x, y)| x * y).sum();
+    let na: f64 = a.iter().map(|x| x * x).sum::<f64>().sqrt();
+    let nb: f64 = b.iter().map(|x| x * x).sum::<f64>().sqrt();
+    if na < 1e-15 || nb < 1e-15 {
+        0.0
+    } else {
+        dot / (na * nb)
+    }
+}
+
+/// Build a column-stochastic transition matrix from sequence fingerprints.
+///
+/// Edge weights are cosine similarities above `threshold`, normalized so
+/// each column sums to 1. Isolated nodes get a self-loop.
+fn build_stochastic_matrix(fps: &[Vec<f64>], threshold: f64) -> CsrMatrix<f64> {
+    let n = fps.len();
+    let mut col_sums = vec![0.0f64; n];
+    let mut entries: Vec<(usize, usize, f64)> = Vec::new();
+
+    for i in 0..n {
+        for j in 0..n {
+            if i == j {
+                continue;
+            }
+            let sim = cosine_sim(&fps[i], &fps[j]);
+            if sim > threshold {
+                entries.push((i, j, sim));
+                col_sums[j] += sim;
+            }
+        }
+    }
+
+    let mut normalized: Vec<(usize, usize, f64)> = entries
+        .into_iter()
+        .map(|(i, j, w)| (i, j, w / col_sums[j].max(1e-15)))
+        .collect();
+
+    // Self-loops for dangling nodes
+    for j in 0..n {
+        if col_sums[j] < 1e-15 {
+            normalized.push((j, j, 1.0));
+        }
+    }
+
+    CsrMatrix::<f64>::from_coo(n, n, normalized)
+}
+
+/// Build graph Laplacian from fingerprints: L = D - A (with small regularization).
+///
+/// The regularization term (0.01 added to each diagonal) ensures the Laplacian
+/// is strictly positive definite, which is required for both the Neumann solver
+/// (diagonal dominance) and the CG solver (SPD requirement).
+fn build_laplacian(fps: &[Vec<f64>], threshold: f64) -> CsrMatrix<f64> {
+    let n = fps.len();
+    let mut degree = vec![0.0f64; n];
+    let mut entries: Vec<(usize, usize, f64)> = Vec::new();
+
+    for i in 0..n {
+        for j in (i + 1)..n {
+            let sim = cosine_sim(&fps[i], &fps[j]);
+            if sim > threshold {
+                entries.push((i, j, -sim));
+                entries.push((j, i, -sim));
+                degree[i] += sim;
+                degree[j] += sim;
+            }
+        }
+    }
+
+    // Diagonal: degree + regularization for positive-definiteness
+    for (i, &d) in degree.iter().enumerate() {
+        entries.push((i, i, d + 0.01));
+    }
+
+    CsrMatrix::<f64>::from_coo(n, n, entries)
+}
+
+// ============================================================================
+// Group A: Localized Relevance on K-mer Graphs (Forward Push PPR)
+// ============================================================================
+
+fn localized_relevance_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("solver_ppr");
+    group.sample_size(30);
+
+    // Benchmark with real genes using KmerGraphRanker
+    {
+        let genes = real_gene_sequences();
+        let ranker = KmerGraphRanker::new(11, 128);
+
+        group.bench_function("real_genes_5seq", |b| {
+            b.iter(|| black_box(ranker.rank_sequences(&genes, 0.15, 1e-4, 0.05)));
+        });
+    }
+
+    // Scale with mutated cohorts using ForwardPushSolver directly
+    for &n in &[50usize, 100, 500] {
+        let template = real_data::HBB_CODING_SEQUENCE.as_bytes();
+        let mutated = mutated_sequences(template, n, 0.05, 42);
+        let fps: Vec<Vec<f64>> = mutated.iter().map(|s| fingerprint(s, 11, 128)).collect();
+        let matrix = build_stochastic_matrix(&fps, 0.05);
+
+        let solver = ForwardPushSolver::new(0.15, 1e-4);
+
+        group.bench_with_input(BenchmarkId::new("ppr_single_source", n), &n, |b, _| {
+            b.iter(|| black_box(solver.ppr_from_source(&matrix, 0)));
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Group B: Laplacian Solve for Denoising / Consistency
+// ============================================================================
+
+fn laplacian_solve_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("solver_laplacian");
+    group.sample_size(20);
+
+    for &n in &[50usize, 100, 500] {
+        let template = real_data::TP53_EXONS_5_8.as_bytes();
+        let mutated = mutated_sequences(template, n, 0.03, 42);
+        let fps: Vec<Vec<f64>> = mutated.iter().map(|s| fingerprint(s, 11, 128)).collect();
+        let laplacian = build_laplacian(&fps, 0.1);
+
+        // RHS: noisy signal (first 10% = 1.0, rest = small noise)
+        let mut rhs = vec![0.0f64; n];
+        let mut rng = StdRng::seed_from_u64(42);
+        for i in 0..n {
+            rhs[i] = if i < n / 10 {
+                1.0
+            } else {
+                rng.gen::<f64>() * 0.1
+            };
+        }
+
+        let budget = ComputeBudget::default();
+
+        // Neumann solver (via SolverEngine trait, f64 -> f32 conversion)
+        let neumann = NeumannSolver::new(1e-6, 200);
+
+        group.bench_with_input(BenchmarkId::new("neumann_denoise", n), &n, |b, _| {
+            b.iter(|| {
+                // Neumann may fail on non-diag-dominant Laplacians;
+                // the benchmark measures attempt latency regardless.
+                let _ = black_box(SolverEngine::solve(&neumann, &laplacian, &rhs, &budget));
+            });
+        });
+
+        // CG solver (preconditioned, well-suited for SPD Laplacians)
+        let cg = ConjugateGradientSolver::new(1e-6, 500, true);
+
+        group.bench_with_input(BenchmarkId::new("cg_denoise", n), &n, |b, _| {
+            b.iter(|| black_box(SolverEngine::solve(&cg, &laplacian, &rhs, &budget)));
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Group C: Cohort-Scale Label Propagation
+// ============================================================================
+
+fn cohort_propagation_benchmarks(c: &mut Criterion) {
+    let mut group = c.benchmark_group("solver_cohort");
+    group.sample_size(10);
+
+    for &n in &[100usize, 500, 1000] {
+        // Build mixed cohort: HBB variants + TP53 variants + BRCA1 variants
+        let mut all_seqs: Vec<Vec<u8>> = Vec::new();
+        let genes: Vec<&[u8]> = vec![
+            real_data::HBB_CODING_SEQUENCE.as_bytes(),
+            real_data::TP53_EXONS_5_8.as_bytes(),
+            real_data::BRCA1_EXON11_FRAGMENT.as_bytes(),
+        ];
+
+        let per_gene = n / 3;
+        for (gi, gene) in genes.iter().enumerate() {
+            let variants = mutated_sequences(gene, per_gene, 0.04, 42 + gi as u64);
+            all_seqs.extend(variants);
+        }
+        // Fill remainder with HBB variants
+        while all_seqs.len() < n {
+            let extra = mutated_sequences(genes[0], 1, 0.05, 99 + all_seqs.len() as u64);
+            all_seqs.extend(extra);
+        }
+        all_seqs.truncate(n);
+
+        let fps: Vec<Vec<f64>> = all_seqs.iter().map(|s| fingerprint(s, 11, 128)).collect();
+        let laplacian = build_laplacian(&fps, 0.05);
+
+        // Label propagation: known labels for first 10% of each gene group
+        let mut labels = vec![0.0f64; n];
+        let labeled_count = (per_gene / 10).max(1);
+        for i in 0..labeled_count.min(n) {
+            labels[i] = 1.0; // Gene group 1 (HBB)
+        }
+        for i in per_gene..(per_gene + labeled_count).min(n) {
+            labels[i] = 2.0; // Gene group 2 (TP53)
+        }
+        let start_3 = 2 * per_gene;
+        for i in start_3..(start_3 + labeled_count).min(n) {
+            labels[i] = 3.0; // Gene group 3 (BRCA1)
+        }
+
+        let cg = ConjugateGradientSolver::new(1e-6, 1000, true);
+        let budget = ComputeBudget::default();
+
+        group.bench_with_input(BenchmarkId::new("label_propagation", n), &n, |b, _| {
+            b.iter(|| black_box(SolverEngine::solve(&cg, &laplacian, &labels, &budget)));
+        });
+    }
+
+    group.finish();
+}
+
+// ============================================================================
+// Configuration
+// ============================================================================
+
+criterion_group!(
+    benches,
+    localized_relevance_benchmarks,
+    laplacian_solve_benchmarks,
+    cohort_propagation_benchmarks,
+);
+
+criterion_main!(benches);