Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
420
examples/dna/benches/dna_bench.rs
Normal file
420
examples/dna/benches/dna_bench.rs
Normal file
@@ -0,0 +1,420 @@
|
||||
//! Criterion benchmarks for DNA Analyzer
|
||||
//!
|
||||
//! Comprehensive performance benchmarks covering:
|
||||
//! - K-mer encoding and HNSW indexing
|
||||
//! - Sequence alignment
|
||||
//! - Variant calling
|
||||
//! - Protein translation
|
||||
//! - Full pipeline integration
|
||||
|
||||
use ::rvdna::prelude::*;
|
||||
use ::rvdna::types::KmerIndex as TypesKmerIndex;
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
/// Generate random DNA sequence of specified length
|
||||
fn random_dna(len: usize, seed: u64) -> DnaSequence {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let bases = [Nucleotide::A, Nucleotide::C, Nucleotide::G, Nucleotide::T];
|
||||
let sequence: Vec<Nucleotide> = (0..len).map(|_| bases[rng.gen_range(0..4)]).collect();
|
||||
DnaSequence::new(sequence)
|
||||
}
|
||||
|
||||
/// Generate multiple random sequences
|
||||
fn random_sequences(count: usize, len: usize, seed: u64) -> Vec<DnaSequence> {
|
||||
(0..count)
|
||||
.map(|i| random_dna(len, seed + i as u64))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// K-mer Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn kmer_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("kmer");
|
||||
|
||||
group.bench_function("encode_1kb", |b| {
|
||||
let seq = random_dna(1_000, 42);
|
||||
b.iter(|| black_box(seq.to_kmer_vector(11, 512).unwrap()));
|
||||
});
|
||||
|
||||
group.bench_function("encode_10kb", |b| {
|
||||
let seq = random_dna(10_000, 42);
|
||||
b.iter(|| black_box(seq.to_kmer_vector(11, 512).unwrap()));
|
||||
});
|
||||
|
||||
group.bench_function("encode_100kb", |b| {
|
||||
let seq = random_dna(100_000, 42);
|
||||
b.iter(|| black_box(seq.to_kmer_vector(11, 512).unwrap()));
|
||||
});
|
||||
|
||||
// HNSW index insertion
|
||||
group.bench_function("index_insert_100", |b| {
|
||||
let sequences = random_sequences(100, 100, 42);
|
||||
b.iter(|| {
|
||||
let temp = tempfile::TempDir::new().unwrap();
|
||||
let index =
|
||||
TypesKmerIndex::new(11, 512, temp.path().join("idx").to_str().unwrap()).unwrap();
|
||||
for (i, seq) in sequences.iter().enumerate() {
|
||||
let vec = seq.to_kmer_vector(11, 512).unwrap();
|
||||
index
|
||||
.db()
|
||||
.insert(ruvector_core::VectorEntry {
|
||||
id: Some(format!("seq{}", i)),
|
||||
vector: vec,
|
||||
metadata: None,
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
black_box(index)
|
||||
});
|
||||
});
|
||||
|
||||
// HNSW search
|
||||
group.bench_function("search_top10", |b| {
|
||||
let sequences = random_sequences(100, 100, 42);
|
||||
let temp = tempfile::TempDir::new().unwrap();
|
||||
let index =
|
||||
TypesKmerIndex::new(11, 512, temp.path().join("idx").to_str().unwrap()).unwrap();
|
||||
|
||||
for (i, seq) in sequences.iter().enumerate() {
|
||||
let vec = seq.to_kmer_vector(11, 512).unwrap();
|
||||
index
|
||||
.db()
|
||||
.insert(ruvector_core::VectorEntry {
|
||||
id: Some(format!("seq{}", i)),
|
||||
vector: vec,
|
||||
metadata: None,
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let query = random_dna(100, 999);
|
||||
let query_vec = query.to_kmer_vector(11, 512).unwrap();
|
||||
|
||||
b.iter(|| {
|
||||
black_box(
|
||||
index
|
||||
.db()
|
||||
.search(ruvector_core::SearchQuery {
|
||||
vector: query_vec.clone(),
|
||||
k: 10,
|
||||
filter: None,
|
||||
ef_search: None,
|
||||
})
|
||||
.unwrap(),
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Alignment Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn alignment_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("alignment");
|
||||
|
||||
group.bench_function("one_hot_encoding_1kb", |b| {
|
||||
let seq = random_dna(1_000, 42);
|
||||
b.iter(|| black_box(seq.encode_one_hot()));
|
||||
});
|
||||
|
||||
group.bench_function("attention_align_100bp", |b| {
|
||||
let query = random_dna(100, 42);
|
||||
let reference = random_dna(1_000, 43);
|
||||
b.iter(|| black_box(query.align_with_attention(&reference).unwrap()));
|
||||
});
|
||||
|
||||
group.bench_function("smith_waterman_100bp", |b| {
|
||||
let query = random_dna(100, 42);
|
||||
let reference = random_dna(500, 43);
|
||||
let aligner = SmithWaterman::new(AlignmentConfig::default());
|
||||
b.iter(|| black_box(aligner.align(&query, &reference).unwrap()));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Variant Calling Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn variant_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("variant");
|
||||
|
||||
group.bench_function("snp_calling_single", |b| {
|
||||
let caller = VariantCaller::new(VariantCallerConfig::default());
|
||||
let pileup = PileupColumn {
|
||||
bases: vec![b'A', b'A', b'G', b'G', b'G', b'G', b'G', b'G', b'G', b'G'],
|
||||
qualities: vec![35; 10],
|
||||
position: 12345,
|
||||
chromosome: 1,
|
||||
};
|
||||
|
||||
b.iter(|| black_box(caller.call_snp(&pileup, b'A')));
|
||||
});
|
||||
|
||||
group.bench_function("snp_calling_1000_positions", |b| {
|
||||
let caller = VariantCaller::new(VariantCallerConfig::default());
|
||||
let mut rng = StdRng::seed_from_u64(42);
|
||||
|
||||
let pileups: Vec<(PileupColumn, u8)> = (0..1000)
|
||||
.map(|i| {
|
||||
let bases: Vec<u8> = (0..20)
|
||||
.map(|_| [b'A', b'C', b'G', b'T'][rng.gen_range(0..4)])
|
||||
.collect();
|
||||
let quals: Vec<u8> = (0..20).map(|_| rng.gen_range(20..41)).collect();
|
||||
let ref_base = [b'A', b'C', b'G', b'T'][i % 4];
|
||||
(
|
||||
PileupColumn {
|
||||
bases,
|
||||
qualities: quals,
|
||||
position: i as u64,
|
||||
chromosome: 1,
|
||||
},
|
||||
ref_base,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
b.iter(|| {
|
||||
let mut count = 0;
|
||||
for (pileup, ref_base) in &pileups {
|
||||
if caller.call_snp(pileup, *ref_base).is_some() {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
black_box(count)
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Protein Analysis Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn protein_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("protein");
|
||||
|
||||
group.bench_function("translate_1kb", |b| {
|
||||
let seq = random_dna(1_002, 42);
|
||||
b.iter(|| black_box(seq.translate().unwrap()));
|
||||
});
|
||||
|
||||
group.bench_function("contact_graph_100residues", |b| {
|
||||
let protein = create_random_protein(100, 42);
|
||||
b.iter(|| black_box(protein.build_contact_graph(8.0).unwrap()));
|
||||
});
|
||||
|
||||
group.bench_function("contact_prediction_100residues", |b| {
|
||||
let protein = create_random_protein(100, 42);
|
||||
let graph = protein.build_contact_graph(8.0).unwrap();
|
||||
b.iter(|| black_box(protein.predict_contacts(&graph).unwrap()));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// RVDNA Format Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn rvdna_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("rvdna");
|
||||
|
||||
group.bench_function("encode_2bit_1kb", |b| {
|
||||
let seq = random_dna(1_000, 42);
|
||||
b.iter(|| black_box(rvdna::encode_2bit(seq.bases())));
|
||||
});
|
||||
|
||||
group.bench_function("encode_2bit_100kb", |b| {
|
||||
let seq = random_dna(100_000, 42);
|
||||
b.iter(|| black_box(rvdna::encode_2bit(seq.bases())));
|
||||
});
|
||||
|
||||
group.bench_function("fasta_to_rvdna_1kb", |b| {
|
||||
let seq_str: String = random_dna(1_000, 42)
|
||||
.bases()
|
||||
.iter()
|
||||
.map(|n| match n {
|
||||
Nucleotide::A => 'A',
|
||||
Nucleotide::C => 'C',
|
||||
Nucleotide::G => 'G',
|
||||
Nucleotide::T => 'T',
|
||||
_ => 'N',
|
||||
})
|
||||
.collect();
|
||||
b.iter(|| black_box(rvdna::fasta_to_rvdna(&seq_str, 11, 256, 1000).unwrap()));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Epigenomics Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn epigenomics_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("epigenomics");
|
||||
|
||||
group.bench_function("cancer_signal_1000_sites", |b| {
|
||||
let positions: Vec<(u8, u64)> = (0..1000).map(|i| (1u8, i as u64)).collect();
|
||||
let betas: Vec<f32> = (0..1000).map(|i| (i as f32 / 1000.0)).collect();
|
||||
let profile = rvdna::MethylationProfile::from_beta_values(positions, betas);
|
||||
let detector = rvdna::CancerSignalDetector::new();
|
||||
b.iter(|| black_box(detector.detect(&profile)));
|
||||
});
|
||||
|
||||
group.bench_function("horvath_clock_1000_sites", |b| {
|
||||
let positions: Vec<(u8, u64)> = (0..1000).map(|i| (1u8, i as u64)).collect();
|
||||
let betas: Vec<f32> = (0..1000).map(|i| (i as f32 / 2000.0 + 0.25)).collect();
|
||||
let profile = rvdna::MethylationProfile::from_beta_values(positions, betas);
|
||||
let clock = rvdna::HorvathClock::default_clock();
|
||||
b.iter(|| black_box(clock.predict_age(&profile)));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Protein Analysis Benchmarks (extended)
|
||||
// ============================================================================
|
||||
|
||||
fn protein_extended_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("protein_analysis");
|
||||
|
||||
group.bench_function("molecular_weight_300aa", |b| {
|
||||
let protein = rvdna::translate_dna(
|
||||
&random_dna(900, 42)
|
||||
.bases()
|
||||
.iter()
|
||||
.map(|n| match n {
|
||||
Nucleotide::A => b'A',
|
||||
Nucleotide::C => b'C',
|
||||
Nucleotide::G => b'G',
|
||||
Nucleotide::T => b'T',
|
||||
_ => b'N',
|
||||
})
|
||||
.collect::<Vec<u8>>(),
|
||||
);
|
||||
b.iter(|| black_box(rvdna::molecular_weight(&protein)));
|
||||
});
|
||||
|
||||
group.bench_function("isoelectric_point_300aa", |b| {
|
||||
let protein = rvdna::translate_dna(
|
||||
&random_dna(900, 42)
|
||||
.bases()
|
||||
.iter()
|
||||
.map(|n| match n {
|
||||
Nucleotide::A => b'A',
|
||||
Nucleotide::C => b'C',
|
||||
Nucleotide::G => b'G',
|
||||
Nucleotide::T => b'T',
|
||||
_ => b'N',
|
||||
})
|
||||
.collect::<Vec<u8>>(),
|
||||
);
|
||||
b.iter(|| black_box(rvdna::isoelectric_point(&protein)));
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Full Pipeline Benchmarks
|
||||
// ============================================================================
|
||||
|
||||
fn pipeline_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("pipeline");
|
||||
|
||||
group.bench_function("full_pipeline_1kb", |b| {
|
||||
let reference = random_dna(1_000, 42);
|
||||
let reads = random_sequences(20, 150, 43);
|
||||
let caller = VariantCaller::new(VariantCallerConfig::default());
|
||||
|
||||
b.iter(|| {
|
||||
// K-mer encoding
|
||||
let ref_vec = reference.to_kmer_vector(11, 512).unwrap();
|
||||
|
||||
// Align reads
|
||||
let mut alignments = Vec::new();
|
||||
for read in &reads {
|
||||
if let Ok(alignment) = read.align_with_attention(&reference) {
|
||||
alignments.push(alignment);
|
||||
}
|
||||
}
|
||||
|
||||
// Call variants at a few positions
|
||||
let mut variants = Vec::new();
|
||||
let pileup = PileupColumn {
|
||||
bases: vec![b'A', b'G', b'G', b'G', b'A', b'G', b'G', b'A', b'G', b'G'],
|
||||
qualities: vec![35; 10],
|
||||
position: 0,
|
||||
chromosome: 1,
|
||||
};
|
||||
if let Some(v) = caller.call_snp(&pileup, b'A') {
|
||||
variants.push(v);
|
||||
}
|
||||
|
||||
// Translate to protein
|
||||
let protein = reference.translate().unwrap();
|
||||
|
||||
black_box((ref_vec, alignments, variants, protein))
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Helpers
|
||||
// ============================================================================
|
||||
|
||||
fn create_random_protein(len: usize, seed: u64) -> ProteinSequence {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
let residues = [
|
||||
ProteinResidue::A,
|
||||
ProteinResidue::C,
|
||||
ProteinResidue::D,
|
||||
ProteinResidue::E,
|
||||
ProteinResidue::F,
|
||||
ProteinResidue::G,
|
||||
ProteinResidue::H,
|
||||
ProteinResidue::I,
|
||||
ProteinResidue::K,
|
||||
ProteinResidue::L,
|
||||
ProteinResidue::M,
|
||||
ProteinResidue::N,
|
||||
];
|
||||
|
||||
let sequence: Vec<ProteinResidue> = (0..len)
|
||||
.map(|_| residues[rng.gen_range(0..residues.len())])
|
||||
.collect();
|
||||
|
||||
ProteinSequence::new(sequence)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Criterion Configuration
|
||||
// ============================================================================
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
kmer_benchmarks,
|
||||
alignment_benchmarks,
|
||||
variant_benchmarks,
|
||||
protein_benchmarks,
|
||||
rvdna_benchmarks,
|
||||
epigenomics_benchmarks,
|
||||
protein_extended_benchmarks,
|
||||
pipeline_benchmarks
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
||||
Reference in New Issue
Block a user