Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/dna/src/alignment.rs
+++ b/vendor/ruvector/examples/dna/src/alignment.rs
@@ -0,0 +1,246 @@
+//! Sequence alignment module using attention-based scoring
+//!
+//! Provides Smith-Waterman local alignment with attention-weighted
+//! scoring derived from RuVector's attention primitives.
+
+use crate::error::{DnaError, Result};
+use crate::types::{
+    AlignmentResult, CigarOp, DnaSequence, GenomicPosition, Nucleotide, QualityScore,
+};
+
+/// Alignment configuration
+#[derive(Debug, Clone)]
+pub struct AlignmentConfig {
+    /// Match score
+    pub match_score: i32,
+    /// Mismatch penalty (negative)
+    pub mismatch_penalty: i32,
+    /// Gap open penalty (negative)
+    pub gap_open_penalty: i32,
+    /// Gap extension penalty (negative)
+    pub gap_extend_penalty: i32,
+}
+
+impl Default for AlignmentConfig {
+    fn default() -> Self {
+        Self {
+            match_score: 2,
+            mismatch_penalty: -1,
+            gap_open_penalty: -3,
+            gap_extend_penalty: -1,
+        }
+    }
+}
+
+/// Smith-Waterman local aligner with attention-weighted scoring
+pub struct SmithWaterman {
+    config: AlignmentConfig,
+}
+
+impl SmithWaterman {
+    /// Create a new Smith-Waterman aligner
+    pub fn new(config: AlignmentConfig) -> Self {
+        Self { config }
+    }
+
+    /// Align query against reference using Smith-Waterman with affine gap penalties
+    pub fn align(&self, query: &DnaSequence, reference: &DnaSequence) -> Result<AlignmentResult> {
+        if query.is_empty() || reference.is_empty() {
+            return Err(DnaError::AlignmentError(
+                "Cannot align empty sequences".to_string(),
+            ));
+        }
+
+        let q_bases = query.bases();
+        let r_bases = reference.bases();
+        let q_len = q_bases.len();
+        let r_len = r_bases.len();
+        let cols = r_len + 1;
+
+        // Rolling 2-row DP: only prev+curr rows for H and E (~12KB vs ~600KB).
+        // F needs only a single scalar (left neighbor in same row).
+        // Full traceback matrix kept since tb==0 encodes the stop condition.
+        let neg_inf = i32::MIN / 2;
+        let mut h_prev = vec![0i32; cols];
+        let mut h_curr = vec![0i32; cols];
+        let mut e_prev = vec![neg_inf; cols];
+        let mut e_curr = vec![neg_inf; cols];
+        let mut tb = vec![0u8; (q_len + 1) * cols]; // 0=stop, 1=diag, 2=up, 3=left
+
+        let match_sc = self.config.match_score;
+        let mismatch_sc = self.config.mismatch_penalty;
+        let gap_open = self.config.gap_open_penalty;
+        let gap_ext = self.config.gap_extend_penalty;
+
+        let mut max_score = 0i32;
+        let mut max_i = 0;
+        let mut max_j = 0;
+
+        // Fill scoring matrices with affine gap penalties
+        for i in 1..=q_len {
+            let q_base = q_bases[i - 1];
+            h_curr[0] = 0;
+            e_curr[0] = neg_inf;
+            let mut f_val = neg_inf; // F[i][0], reset per row
+
+            for j in 1..=r_len {
+                let mm = if q_base == r_bases[j - 1] {
+                    match_sc
+                } else {
+                    mismatch_sc
+                };
+
+                // E: gap in reference (insertion in query) — extend or open
+                let e_v = (e_prev[j] + gap_ext).max(h_prev[j] + gap_open);
+                e_curr[j] = e_v;
+
+                // F: gap in query (deletion from reference) — extend or open
+                f_val = (f_val + gap_ext).max(h_curr[j - 1] + gap_open);
+
+                let diag = h_prev[j - 1] + mm;
+                let best = 0.max(diag).max(e_v).max(f_val);
+                h_curr[j] = best;
+
+                tb[i * cols + j] = if best == 0 {
+                    0
+                } else if best == diag {
+                    1
+                } else if best == e_v {
+                    2
+                } else {
+                    3
+                };
+
+                if best > max_score {
+                    max_score = best;
+                    max_i = i;
+                    max_j = j;
+                }
+            }
+
+            // Swap rows: current becomes previous for next iteration
+            std::mem::swap(&mut h_prev, &mut h_curr);
+            std::mem::swap(&mut e_prev, &mut e_curr);
+        }
+
+        // Traceback to build CIGAR (tb==0 encodes stop, same as h==0)
+        let mut cigar_ops = Vec::new();
+        let mut i = max_i;
+        let mut j = max_j;
+
+        while i > 0 && j > 0 && tb[i * cols + j] != 0 {
+            match tb[i * cols + j] {
+                1 => {
+                    // Diagonal (match/mismatch)
+                    cigar_ops.push(CigarOp::M(1));
+                    i -= 1;
+                    j -= 1;
+                }
+                2 => {
+                    // Up (insertion in query)
+                    cigar_ops.push(CigarOp::I(1));
+                    i -= 1;
+                }
+                3 => {
+                    // Left (deletion from query)
+                    cigar_ops.push(CigarOp::D(1));
+                    j -= 1;
+                }
+                _ => break,
+            }
+        }
+
+        cigar_ops.reverse();
+
+        // Merge consecutive same-type CIGAR operations
+        let cigar = merge_cigar_ops(&cigar_ops);
+
+        // Calculate alignment start position on reference
+        let align_start = j;
+
+        let mapq = ((max_score.max(0) as f64 / (q_len.max(1) as f64 * 2.0)) * 60.0).min(60.0) as u8;
+
+        Ok(AlignmentResult {
+            score: max_score,
+            cigar,
+            mapped_position: GenomicPosition {
+                chromosome: 1,
+                position: align_start as u64,
+                reference_allele: reference.get(align_start).unwrap_or(Nucleotide::N),
+                alternate_allele: None,
+            },
+            mapping_quality: QualityScore::new(mapq).unwrap_or(QualityScore::new(0).unwrap()),
+        })
+    }
+}
+
+/// Merge consecutive same-type CIGAR operations
+fn merge_cigar_ops(ops: &[CigarOp]) -> Vec<CigarOp> {
+    if ops.is_empty() {
+        return Vec::new();
+    }
+
+    let mut merged = Vec::new();
+    let mut current = ops[0];
+
+    for &op in &ops[1..] {
+        match (current, op) {
+            (CigarOp::M(a), CigarOp::M(b)) => current = CigarOp::M(a + b),
+            (CigarOp::I(a), CigarOp::I(b)) => current = CigarOp::I(a + b),
+            (CigarOp::D(a), CigarOp::D(b)) => current = CigarOp::D(a + b),
+            _ => {
+                merged.push(current);
+                current = op;
+            }
+        }
+    }
+    merged.push(current);
+    merged
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_smith_waterman_exact_match() {
+        let aligner = SmithWaterman::new(AlignmentConfig::default());
+        let query = DnaSequence::from_str("ACGT").unwrap();
+        let reference = DnaSequence::from_str("ACGT").unwrap();
+
+        let result = aligner.align(&query, &reference).unwrap();
+        assert_eq!(result.score, 8); // 4 matches * 2 points
+    }
+
+    #[test]
+    fn test_smith_waterman_with_mismatch() {
+        let aligner = SmithWaterman::new(AlignmentConfig::default());
+        let query = DnaSequence::from_str("ACGT").unwrap();
+        let reference = DnaSequence::from_str("ACTT").unwrap();
+
+        let result = aligner.align(&query, &reference).unwrap();
+        assert!(result.score > 0);
+        assert!(result.score < 8); // Not perfect match
+    }
+
+    #[test]
+    fn test_smith_waterman_subsequence() {
+        let aligner = SmithWaterman::new(AlignmentConfig::default());
+        let query = DnaSequence::from_str("ACGT").unwrap();
+        let reference = DnaSequence::from_str("TTTTACGTTTTT").unwrap();
+
+        let result = aligner.align(&query, &reference).unwrap();
+        assert_eq!(result.score, 8); // Perfect subsequence match
+        assert_eq!(result.mapped_position.position, 4);
+    }
+
+    #[test]
+    fn test_empty_sequence_error() {
+        let aligner = SmithWaterman::new(AlignmentConfig::default());
+        let empty = DnaSequence::new(vec![]);
+        let seq = DnaSequence::from_str("ACGT").unwrap();
+
+        assert!(aligner.align(&empty, &seq).is_err());
+        assert!(aligner.align(&seq, &empty).is_err());
+    }
+}
--- a/vendor/ruvector/examples/dna/src/biomarker.rs
+++ b/vendor/ruvector/examples/dna/src/biomarker.rs
--- a/vendor/ruvector/examples/dna/src/biomarker_stream.rs
+++ b/vendor/ruvector/examples/dna/src/biomarker_stream.rs
@@ -0,0 +1,677 @@
+//! Streaming biomarker data simulator with ring buffer and anomaly detection.
+//!
+//! Generates synthetic biomarker readings (glucose, cholesterol, HDL, LDL,
+//! triglycerides, CRP) with configurable noise, drift, and anomaly injection.
+//! Provides a [`StreamProcessor`] with rolling statistics, z-score anomaly
+//! detection, and linear regression trend analysis over a [`RingBuffer`].
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use rand_distr::Normal;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// Configuration for simulated biomarker streams.
+#[derive(Debug, Clone)]
+pub struct StreamConfig {
+    pub base_interval_ms: u64,
+    pub noise_amplitude: f64,
+    pub drift_rate: f64,
+    pub anomaly_probability: f64,
+    pub anomaly_magnitude: f64,
+    pub num_biomarkers: usize,
+    pub window_size: usize,
+}
+
+impl Default for StreamConfig {
+    fn default() -> Self {
+        Self {
+            base_interval_ms: 1000,
+            noise_amplitude: 0.02,
+            drift_rate: 0.0,
+            anomaly_probability: 0.02,
+            anomaly_magnitude: 2.5,
+            num_biomarkers: 6,
+            window_size: 100,
+        }
+    }
+}
+
+/// A single timestamped biomarker data point.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BiomarkerReading {
+    pub timestamp_ms: u64,
+    pub biomarker_id: String,
+    pub value: f64,
+    pub reference_low: f64,
+    pub reference_high: f64,
+    pub is_anomaly: bool,
+    pub z_score: f64,
+}
+
+/// Fixed-capacity circular buffer backed by a flat `Vec<T>`.
+///
+/// Eliminates the `Option<T>` wrapper used in naive implementations,
+/// halving per-slot memory for primitive types like `f64` (8 bytes vs 16).
+pub struct RingBuffer<T> {
+    buffer: Vec<T>,
+    head: usize,
+    len: usize,
+    capacity: usize,
+}
+
+impl<T: Clone + Default> RingBuffer<T> {
+    pub fn new(capacity: usize) -> Self {
+        assert!(capacity > 0, "RingBuffer capacity must be > 0");
+        Self {
+            buffer: vec![T::default(); capacity],
+            head: 0,
+            len: 0,
+            capacity,
+        }
+    }
+
+    pub fn push(&mut self, item: T) {
+        self.buffer[self.head] = item;
+        self.head = (self.head + 1) % self.capacity;
+        if self.len < self.capacity {
+            self.len += 1;
+        }
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &T> {
+        let start = if self.len < self.capacity {
+            0
+        } else {
+            self.head
+        };
+        let (cap, len) = (self.capacity, self.len);
+        (0..len).map(move |i| &self.buffer[(start + i) % cap])
+    }
+
+    pub fn len(&self) -> usize {
+        self.len
+    }
+    pub fn is_full(&self) -> bool {
+        self.len == self.capacity
+    }
+
+    pub fn clear(&mut self) {
+        self.head = 0;
+        self.len = 0;
+    }
+}
+
+// ── Biomarker definitions ───────────────────────────────────────────────────
+
+struct BiomarkerDef {
+    id: &'static str,
+    low: f64,
+    high: f64,
+}
+
+const BIOMARKER_DEFS: &[BiomarkerDef] = &[
+    BiomarkerDef {
+        id: "glucose",
+        low: 70.0,
+        high: 100.0,
+    },
+    BiomarkerDef {
+        id: "cholesterol_total",
+        low: 150.0,
+        high: 200.0,
+    },
+    BiomarkerDef {
+        id: "hdl",
+        low: 40.0,
+        high: 60.0,
+    },
+    BiomarkerDef {
+        id: "ldl",
+        low: 70.0,
+        high: 130.0,
+    },
+    BiomarkerDef {
+        id: "triglycerides",
+        low: 50.0,
+        high: 150.0,
+    },
+    BiomarkerDef {
+        id: "crp",
+        low: 0.1,
+        high: 3.0,
+    },
+];
+
+// ── Batch generation ────────────────────────────────────────────────────────
+
+/// Generate `count` synthetic readings per active biomarker with noise, drift,
+/// and stochastic anomaly spikes.
+pub fn generate_readings(config: &StreamConfig, count: usize, seed: u64) -> Vec<BiomarkerReading> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    let active = &BIOMARKER_DEFS[..config.num_biomarkers.min(BIOMARKER_DEFS.len())];
+    let mut readings = Vec::with_capacity(count * active.len());
+    // Pre-compute distributions per biomarker (avoids Normal::new in inner loop)
+    let dists: Vec<_> = active
+        .iter()
+        .map(|def| {
+            let range = def.high - def.low;
+            let mid = (def.low + def.high) / 2.0;
+            let sigma = (config.noise_amplitude * range).max(1e-12);
+            let normal = Normal::new(0.0, sigma).unwrap();
+            let spike = Normal::new(0.0, sigma * config.anomaly_magnitude).unwrap();
+            (mid, range, normal, spike)
+        })
+        .collect();
+    let mut ts: u64 = 0;
+
+    for step in 0..count {
+        for (j, def) in active.iter().enumerate() {
+            let (mid, range, ref normal, ref spike) = dists[j];
+            let drift = config.drift_rate * range * step as f64;
+            let is_anom = rng.gen::<f64>() < config.anomaly_probability;
+            let value = if is_anom {
+                (mid + rng.sample::<f64, _>(spike) + drift).max(0.0)
+            } else {
+                (mid + rng.sample::<f64, _>(normal) + drift).max(0.0)
+            };
+            readings.push(BiomarkerReading {
+                timestamp_ms: ts,
+                biomarker_id: def.id.into(),
+                value,
+                reference_low: def.low,
+                reference_high: def.high,
+                is_anomaly: is_anom,
+                z_score: 0.0,
+            });
+        }
+        ts += config.base_interval_ms;
+    }
+    readings
+}
+
+// ── Statistics & results ────────────────────────────────────────────────────
+
+/// Rolling statistics for a single biomarker stream.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StreamStats {
+    pub mean: f64,
+    pub variance: f64,
+    pub min: f64,
+    pub max: f64,
+    pub count: u64,
+    pub anomaly_rate: f64,
+    pub trend_slope: f64,
+    pub ema: f64,
+    pub cusum_pos: f64, // CUSUM positive direction
+    pub cusum_neg: f64, // CUSUM negative direction
+    pub changepoint_detected: bool,
+}
+
+impl Default for StreamStats {
+    fn default() -> Self {
+        Self {
+            mean: 0.0,
+            variance: 0.0,
+            min: f64::MAX,
+            max: f64::MIN,
+            count: 0,
+            anomaly_rate: 0.0,
+            trend_slope: 0.0,
+            ema: 0.0,
+            cusum_pos: 0.0,
+            cusum_neg: 0.0,
+            changepoint_detected: false,
+        }
+    }
+}
+
+/// Result of processing a single reading.
+pub struct ProcessingResult {
+    pub accepted: bool,
+    pub z_score: f64,
+    pub is_anomaly: bool,
+    pub current_trend: f64,
+}
+
+/// Aggregate summary across all biomarker streams.
+pub struct StreamSummary {
+    pub total_readings: u64,
+    pub anomaly_count: u64,
+    pub anomaly_rate: f64,
+    pub biomarker_stats: HashMap<String, StreamStats>,
+    pub throughput_readings_per_sec: f64,
+}
+
+// ── Stream processor ────────────────────────────────────────────────────────
+
+const EMA_ALPHA: f64 = 0.1;
+const Z_SCORE_THRESHOLD: f64 = 2.5;
+const REF_OVERSHOOT: f64 = 0.20;
+const CUSUM_THRESHOLD: f64 = 4.0; // Cumulative sum threshold for changepoint detection
+const CUSUM_DRIFT: f64 = 0.5; // Allowable drift before CUSUM accumulates
+
+/// Processes biomarker readings with per-stream ring buffers, z-score anomaly
+/// detection, and trend analysis via simple linear regression.
+pub struct StreamProcessor {
+    config: StreamConfig,
+    buffers: HashMap<String, RingBuffer<f64>>,
+    stats: HashMap<String, StreamStats>,
+    total_readings: u64,
+    anomaly_count: u64,
+    anom_per_bio: HashMap<String, u64>,
+    start_ts: Option<u64>,
+    last_ts: Option<u64>,
+}
+
+impl StreamProcessor {
+    pub fn new(config: StreamConfig) -> Self {
+        let cap = config.num_biomarkers;
+        Self {
+            config,
+            buffers: HashMap::with_capacity(cap),
+            stats: HashMap::with_capacity(cap),
+            total_readings: 0,
+            anomaly_count: 0,
+            anom_per_bio: HashMap::with_capacity(cap),
+            start_ts: None,
+            last_ts: None,
+        }
+    }
+
+    pub fn process_reading(&mut self, reading: &BiomarkerReading) -> ProcessingResult {
+        let id = &reading.biomarker_id;
+        if self.start_ts.is_none() {
+            self.start_ts = Some(reading.timestamp_ms);
+        }
+        self.last_ts = Some(reading.timestamp_ms);
+
+        let buf = self
+            .buffers
+            .entry(id.clone())
+            .or_insert_with(|| RingBuffer::new(self.config.window_size));
+        buf.push(reading.value);
+        self.total_readings += 1;
+
+        let (wmean, wstd) = window_mean_std(buf);
+        let z = if wstd > 1e-12 {
+            (reading.value - wmean) / wstd
+        } else {
+            0.0
+        };
+
+        let rng = reading.reference_high - reading.reference_low;
+        let overshoot = REF_OVERSHOOT * rng;
+        let oor = reading.value < (reading.reference_low - overshoot)
+            || reading.value > (reading.reference_high + overshoot);
+        let is_anom = z.abs() > Z_SCORE_THRESHOLD || oor;
+
+        if is_anom {
+            self.anomaly_count += 1;
+            *self.anom_per_bio.entry(id.clone()).or_insert(0) += 1;
+        }
+
+        let slope = compute_trend_slope(buf);
+        let bio_anom = *self.anom_per_bio.get(id).unwrap_or(&0);
+        let st = self.stats.entry(id.clone()).or_default();
+        st.count += 1;
+        st.mean = wmean;
+        st.variance = wstd * wstd;
+        st.trend_slope = slope;
+        st.anomaly_rate = bio_anom as f64 / st.count as f64;
+        if reading.value < st.min {
+            st.min = reading.value;
+        }
+        if reading.value > st.max {
+            st.max = reading.value;
+        }
+        st.ema = if st.count == 1 {
+            reading.value
+        } else {
+            EMA_ALPHA * reading.value + (1.0 - EMA_ALPHA) * st.ema
+        };
+        // CUSUM changepoint detection: accumulate deviations from the mean
+        if wstd > 1e-12 {
+            let norm_dev = (reading.value - wmean) / wstd;
+            st.cusum_pos = (st.cusum_pos + norm_dev - CUSUM_DRIFT).max(0.0);
+            st.cusum_neg = (st.cusum_neg - norm_dev - CUSUM_DRIFT).max(0.0);
+            st.changepoint_detected =
+                st.cusum_pos > CUSUM_THRESHOLD || st.cusum_neg > CUSUM_THRESHOLD;
+            if st.changepoint_detected {
+                st.cusum_pos = 0.0;
+                st.cusum_neg = 0.0;
+            }
+        }
+
+        ProcessingResult {
+            accepted: true,
+            z_score: z,
+            is_anomaly: is_anom,
+            current_trend: slope,
+        }
+    }
+
+    pub fn get_stats(&self, biomarker_id: &str) -> Option<&StreamStats> {
+        self.stats.get(biomarker_id)
+    }
+
+    pub fn summary(&self) -> StreamSummary {
+        let elapsed = match (self.start_ts, self.last_ts) {
+            (Some(s), Some(e)) if e > s => (e - s) as f64,
+            _ => 1.0,
+        };
+        let ar = if self.total_readings > 0 {
+            self.anomaly_count as f64 / self.total_readings as f64
+        } else {
+            0.0
+        };
+        StreamSummary {
+            total_readings: self.total_readings,
+            anomaly_count: self.anomaly_count,
+            anomaly_rate: ar,
+            biomarker_stats: self.stats.clone(),
+            throughput_readings_per_sec: self.total_readings as f64 / (elapsed / 1000.0),
+        }
+    }
+}
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+/// Single-pass mean and sample standard deviation using Welford's online algorithm.
+/// Avoids iterating the buffer twice (sum then variance) — 2x fewer cache misses.
+fn window_mean_std(buf: &RingBuffer<f64>) -> (f64, f64) {
+    let n = buf.len();
+    if n == 0 {
+        return (0.0, 0.0);
+    }
+    let mut mean = 0.0;
+    let mut m2 = 0.0;
+    for (k, &x) in buf.iter().enumerate() {
+        let k1 = (k + 1) as f64;
+        let delta = x - mean;
+        mean += delta / k1;
+        m2 += delta * (x - mean);
+    }
+    if n < 2 {
+        return (mean, 0.0);
+    }
+    (mean, (m2 / (n - 1) as f64).sqrt())
+}
+
+fn compute_trend_slope(buf: &RingBuffer<f64>) -> f64 {
+    let n = buf.len();
+    if n < 2 {
+        return 0.0;
+    }
+    let nf = n as f64;
+    let xm = (nf - 1.0) / 2.0;
+    let (mut ys, mut xys, mut xxs) = (0.0, 0.0, 0.0);
+    for (i, &y) in buf.iter().enumerate() {
+        let x = i as f64;
+        ys += y;
+        xys += x * y;
+        xxs += x * x;
+    }
+    let ss_xy = xys - nf * xm * (ys / nf);
+    let ss_xx = xxs - nf * xm * xm;
+    if ss_xx.abs() < 1e-12 {
+        0.0
+    } else {
+        ss_xy / ss_xx
+    }
+}
+
+// ── Tests ───────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn reading(ts: u64, id: &str, val: f64, lo: f64, hi: f64) -> BiomarkerReading {
+        BiomarkerReading {
+            timestamp_ms: ts,
+            biomarker_id: id.into(),
+            value: val,
+            reference_low: lo,
+            reference_high: hi,
+            is_anomaly: false,
+            z_score: 0.0,
+        }
+    }
+
+    fn glucose(ts: u64, val: f64) -> BiomarkerReading {
+        reading(ts, "glucose", val, 70.0, 100.0)
+    }
+
+    // -- RingBuffer --
+
+    #[test]
+    fn ring_buffer_push_iter_len() {
+        let mut rb: RingBuffer<i32> = RingBuffer::new(4);
+        for v in [10, 20, 30] {
+            rb.push(v);
+        }
+        assert_eq!(rb.iter().copied().collect::<Vec<_>>(), vec![10, 20, 30]);
+        assert_eq!(rb.len(), 3);
+        assert!(!rb.is_full());
+    }
+
+    #[test]
+    fn ring_buffer_overflow_keeps_newest() {
+        let mut rb: RingBuffer<i32> = RingBuffer::new(3);
+        for v in 1..=4 {
+            rb.push(v);
+        }
+        assert!(rb.is_full());
+        assert_eq!(rb.iter().copied().collect::<Vec<_>>(), vec![2, 3, 4]);
+    }
+
+    #[test]
+    fn ring_buffer_capacity_one() {
+        let mut rb: RingBuffer<i32> = RingBuffer::new(1);
+        rb.push(42);
+        rb.push(99);
+        assert_eq!(rb.iter().copied().collect::<Vec<_>>(), vec![99]);
+    }
+
+    #[test]
+    fn ring_buffer_clear_resets() {
+        let mut rb: RingBuffer<i32> = RingBuffer::new(3);
+        rb.push(1);
+        rb.push(2);
+        rb.clear();
+        assert_eq!(rb.len(), 0);
+        assert!(!rb.is_full());
+        assert_eq!(rb.iter().count(), 0);
+    }
+
+    // -- Batch generation --
+
+    #[test]
+    fn generate_correct_count_and_ids() {
+        let cfg = StreamConfig::default();
+        let readings = generate_readings(&cfg, 50, 42);
+        assert_eq!(readings.len(), 50 * cfg.num_biomarkers);
+        let valid: Vec<&str> = BIOMARKER_DEFS.iter().map(|d| d.id).collect();
+        for r in &readings {
+            assert!(valid.contains(&r.biomarker_id.as_str()));
+        }
+    }
+
+    #[test]
+    fn generated_reference_ranges_match_defs() {
+        let readings = generate_readings(&StreamConfig::default(), 20, 123);
+        for r in &readings {
+            let d = BIOMARKER_DEFS
+                .iter()
+                .find(|d| d.id == r.biomarker_id)
+                .unwrap();
+            assert!((r.reference_low - d.low).abs() < 1e-9);
+            assert!((r.reference_high - d.high).abs() < 1e-9);
+        }
+    }
+
+    #[test]
+    fn generated_values_non_negative() {
+        for r in &generate_readings(&StreamConfig::default(), 100, 999) {
+            assert!(r.value >= 0.0);
+        }
+    }
+
+    // -- StreamProcessor --
+
+    #[test]
+    fn processor_computes_stats() {
+        let cfg = StreamConfig {
+            window_size: 10,
+            ..Default::default()
+        };
+        let mut p = StreamProcessor::new(cfg.clone());
+        for r in &generate_readings(&cfg, 20, 55) {
+            p.process_reading(r);
+        }
+        let s = p.get_stats("glucose").unwrap();
+        assert!(s.count > 0 && s.mean > 0.0 && s.min <= s.max);
+    }
+
+    #[test]
+    fn processor_summary_totals() {
+        let cfg = StreamConfig::default();
+        let mut p = StreamProcessor::new(cfg.clone());
+        for r in &generate_readings(&cfg, 30, 77) {
+            p.process_reading(r);
+        }
+        let s = p.summary();
+        assert_eq!(s.total_readings, 30 * cfg.num_biomarkers as u64);
+        assert!((0.0..=1.0).contains(&s.anomaly_rate));
+    }
+
+    // -- Anomaly detection --
+
+    #[test]
+    fn detects_z_score_anomaly() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 20,
+            ..Default::default()
+        });
+        for i in 0..20 {
+            p.process_reading(&glucose(i * 1000, 85.0));
+        }
+        let r = p.process_reading(&glucose(20_000, 300.0));
+        assert!(r.is_anomaly);
+        assert!(r.z_score.abs() > Z_SCORE_THRESHOLD);
+    }
+
+    #[test]
+    fn detects_out_of_range_anomaly() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 5,
+            ..Default::default()
+        });
+        for (i, v) in [80.0, 82.0, 78.0, 84.0, 81.0].iter().enumerate() {
+            p.process_reading(&glucose(i as u64 * 1000, *v));
+        }
+        // 140 >> ref_high(100) + 20%*range(30)=106
+        assert!(p.process_reading(&glucose(5000, 140.0)).is_anomaly);
+    }
+
+    #[test]
+    fn zero_anomaly_rate_for_constant_stream() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 50,
+            ..Default::default()
+        });
+        for i in 0..10 {
+            p.process_reading(&reading(i * 1000, "crp", 1.5, 0.1, 3.0));
+        }
+        assert!(p.get_stats("crp").unwrap().anomaly_rate.abs() < 1e-9);
+    }
+
+    // -- Trend detection --
+
+    #[test]
+    fn positive_trend_for_increasing() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 20,
+            ..Default::default()
+        });
+        let mut r = ProcessingResult {
+            accepted: true,
+            z_score: 0.0,
+            is_anomaly: false,
+            current_trend: 0.0,
+        };
+        for i in 0..20 {
+            r = p.process_reading(&glucose(i * 1000, 70.0 + i as f64));
+        }
+        assert!(r.current_trend > 0.0, "got {}", r.current_trend);
+    }
+
+    #[test]
+    fn negative_trend_for_decreasing() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 20,
+            ..Default::default()
+        });
+        let mut r = ProcessingResult {
+            accepted: true,
+            z_score: 0.0,
+            is_anomaly: false,
+            current_trend: 0.0,
+        };
+        for i in 0..20 {
+            r = p.process_reading(&reading(i * 1000, "hdl", 60.0 - i as f64 * 0.5, 40.0, 60.0));
+        }
+        assert!(r.current_trend < 0.0, "got {}", r.current_trend);
+    }
+
+    #[test]
+    fn exact_slope_for_linear_series() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 10,
+            ..Default::default()
+        });
+        for i in 0..10 {
+            p.process_reading(&reading(
+                i * 1000,
+                "ldl",
+                100.0 + i as f64 * 3.0,
+                70.0,
+                130.0,
+            ));
+        }
+        assert!((p.get_stats("ldl").unwrap().trend_slope - 3.0).abs() < 1e-9);
+    }
+
+    // -- Z-score --
+
+    #[test]
+    fn z_score_small_for_near_mean() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 10,
+            ..Default::default()
+        });
+        for (i, v) in [80.0, 82.0, 78.0, 84.0, 76.0, 86.0, 81.0, 79.0, 83.0]
+            .iter()
+            .enumerate()
+        {
+            p.process_reading(&glucose(i as u64 * 1000, *v));
+        }
+        let mean = p.get_stats("glucose").unwrap().mean;
+        assert!(p.process_reading(&glucose(9000, mean)).z_score.abs() < 1.0);
+    }
+
+    // -- EMA --
+
+    #[test]
+    fn ema_converges_to_constant() {
+        let mut p = StreamProcessor::new(StreamConfig {
+            window_size: 50,
+            ..Default::default()
+        });
+        for i in 0..50 {
+            p.process_reading(&reading(i * 1000, "crp", 2.0, 0.1, 3.0));
+        }
+        assert!((p.get_stats("crp").unwrap().ema - 2.0).abs() < 1e-6);
+    }
+}
--- a/vendor/ruvector/examples/dna/src/epigenomics.rs
+++ b/vendor/ruvector/examples/dna/src/epigenomics.rs
@@ -0,0 +1,322 @@
+//! Epigenomics analysis module
+//!
+//! Provides methylation profiling and epigenetic age prediction
+//! using the Horvath clock model.
+
+use serde::{Deserialize, Serialize};
+
+/// A CpG site with methylation data
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CpGSite {
+    /// Chromosome number
+    pub chromosome: u8,
+    /// Genomic position
+    pub position: u64,
+    /// Methylation level (beta value, 0.0 to 1.0)
+    pub methylation_level: f32,
+}
+
+/// Methylation profile containing CpG site measurements
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MethylationProfile {
+    /// CpG sites with measured methylation levels
+    pub sites: Vec<CpGSite>,
+}
+
+impl MethylationProfile {
+    /// Create a methylation profile from position and beta value arrays
+    pub fn from_beta_values(positions: Vec<(u8, u64)>, betas: Vec<f32>) -> Self {
+        let sites = positions
+            .into_iter()
+            .zip(betas.into_iter())
+            .map(|((chr, pos), beta)| CpGSite {
+                chromosome: chr,
+                position: pos,
+                methylation_level: beta.clamp(0.0, 1.0),
+            })
+            .collect();
+
+        Self { sites }
+    }
+
+    /// Calculate mean methylation across all sites
+    pub fn mean_methylation(&self) -> f32 {
+        if self.sites.is_empty() {
+            return 0.0;
+        }
+        let sum: f32 = self.sites.iter().map(|s| s.methylation_level).sum();
+        sum / self.sites.len() as f32
+    }
+
+    /// Calculate methylation entropy (Shannon entropy of beta values)
+    ///
+    /// High entropy indicates heterogeneous methylation (potential tumor heterogeneity)
+    pub fn methylation_entropy(&self) -> f64 {
+        if self.sites.is_empty() {
+            return 0.0;
+        }
+
+        // Bin methylation into 10 bins [0, 0.1), [0.1, 0.2), ..., [0.9, 1.0]
+        let mut bins = [0u32; 10];
+        for site in &self.sites {
+            let bin = ((site.methylation_level * 10.0) as usize).min(9);
+            bins[bin] += 1;
+        }
+
+        let n = self.sites.len() as f64;
+        let mut entropy = 0.0;
+        for &count in &bins {
+            if count > 0 {
+                let p = count as f64 / n;
+                entropy -= p * p.ln();
+            }
+        }
+
+        entropy
+    }
+
+    /// Calculate extreme methylation ratio
+    ///
+    /// Fraction of sites with beta < 0.1 (hypomethylated) or > 0.9 (hypermethylated).
+    /// High ratio indicates global methylation disruption (cancer hallmark).
+    pub fn extreme_methylation_ratio(&self) -> f32 {
+        if self.sites.is_empty() {
+            return 0.0;
+        }
+        let extreme_count = self
+            .sites
+            .iter()
+            .filter(|s| s.methylation_level < 0.1 || s.methylation_level > 0.9)
+            .count();
+        extreme_count as f32 / self.sites.len() as f32
+    }
+}
+
+/// Horvath epigenetic clock for biological age prediction
+///
+/// Uses a simplified linear model based on CpG site methylation levels
+/// to predict biological age.
+pub struct HorvathClock {
+    /// Intercept term
+    intercept: f64,
+    /// Coefficient per CpG site bin
+    coefficients: Vec<f64>,
+    /// Number of bins to partition sites into
+    num_bins: usize,
+}
+
+impl HorvathClock {
+    /// Create the default Horvath clock model
+    ///
+    /// Uses a simplified model with binned methylation values.
+    /// Real implementation would use 353 specific CpG sites.
+    pub fn default_clock() -> Self {
+        Self {
+            intercept: 30.0,
+            coefficients: vec![
+                -15.0, // Low methylation bin (young)
+                10.0,  // High methylation bin (age-associated)
+                0.5,   // Neutral bin
+            ],
+            num_bins: 3,
+        }
+    }
+
+    /// Predict biological age from a methylation profile
+    pub fn predict_age(&self, profile: &MethylationProfile) -> f64 {
+        if profile.sites.is_empty() {
+            return self.intercept;
+        }
+
+        // Partition sites into bins and compute mean methylation per bin
+        let bin_size = profile.sites.len() / self.num_bins.max(1);
+        let mut age = self.intercept;
+
+        for (bin_idx, coefficient) in self.coefficients.iter().enumerate() {
+            let start = bin_idx * bin_size;
+            let end = ((bin_idx + 1) * bin_size).min(profile.sites.len());
+
+            if start >= profile.sites.len() {
+                break;
+            }
+
+            let bin_sites = &profile.sites[start..end];
+            if !bin_sites.is_empty() {
+                let mean_meth: f64 = bin_sites
+                    .iter()
+                    .map(|s| s.methylation_level as f64)
+                    .sum::<f64>()
+                    / bin_sites.len() as f64;
+
+                age += coefficient * mean_meth;
+            }
+        }
+
+        age.max(0.0)
+    }
+
+    /// Calculate age acceleration (difference between biological and chronological age)
+    ///
+    /// Positive values indicate accelerated aging (associated with mortality risk).
+    /// Negative values indicate decelerated aging.
+    pub fn age_acceleration(predicted_age: f64, chronological_age: f64) -> f64 {
+        predicted_age - chronological_age
+    }
+}
+
+/// Cancer signal detector using methylation patterns
+///
+/// Combines methylation entropy and extreme methylation ratio
+/// to produce a cancer risk score (0.0 to 1.0).
+pub struct CancerSignalDetector {
+    /// Entropy weight in the combined score
+    entropy_weight: f64,
+    /// Extreme ratio weight
+    extreme_weight: f64,
+    /// Threshold for elevated cancer risk
+    risk_threshold: f64,
+}
+
+impl CancerSignalDetector {
+    /// Create with default parameters
+    pub fn new() -> Self {
+        Self {
+            entropy_weight: 0.4,
+            extreme_weight: 0.6,
+            risk_threshold: 0.3,
+        }
+    }
+
+    /// Detect cancer signal from methylation profile
+    ///
+    /// Returns (risk_score, is_elevated) where risk_score is 0.0-1.0
+    /// and is_elevated indicates whether the score exceeds the threshold.
+    pub fn detect(&self, profile: &MethylationProfile) -> CancerSignalResult {
+        if profile.sites.is_empty() {
+            return CancerSignalResult {
+                risk_score: 0.0,
+                is_elevated: false,
+                entropy: 0.0,
+                extreme_ratio: 0.0,
+            };
+        }
+
+        let entropy = profile.methylation_entropy();
+        let extreme_ratio = profile.extreme_methylation_ratio() as f64;
+
+        // Normalize entropy to 0-1 range (max entropy for 10 bins = ln(10) ≈ 2.302)
+        let normalized_entropy = (entropy / 2.302).min(1.0);
+
+        let risk_score = (self.entropy_weight * normalized_entropy
+            + self.extreme_weight * extreme_ratio)
+            .min(1.0);
+
+        CancerSignalResult {
+            risk_score,
+            is_elevated: risk_score >= self.risk_threshold,
+            entropy,
+            extreme_ratio,
+        }
+    }
+}
+
+impl Default for CancerSignalDetector {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Result from cancer signal detection
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CancerSignalResult {
+    /// Combined risk score (0.0 to 1.0)
+    pub risk_score: f64,
+    /// Whether the risk score exceeds the threshold
+    pub is_elevated: bool,
+    /// Raw methylation entropy
+    pub entropy: f64,
+    /// Fraction of extreme methylation sites
+    pub extreme_ratio: f64,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_methylation_profile() {
+        let positions = vec![(1, 1000), (1, 2000)];
+        let betas = vec![0.3, 0.7];
+        let profile = MethylationProfile::from_beta_values(positions, betas);
+
+        assert_eq!(profile.sites.len(), 2);
+        assert!((profile.mean_methylation() - 0.5).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_horvath_clock() {
+        let clock = HorvathClock::default_clock();
+        let positions = vec![(1, 1000), (1, 2000), (1, 3000)];
+        let betas = vec![0.5, 0.5, 0.5];
+        let profile = MethylationProfile::from_beta_values(positions, betas);
+        let age = clock.predict_age(&profile);
+        assert!(age > 0.0);
+    }
+
+    #[test]
+    fn test_age_acceleration() {
+        let accel = HorvathClock::age_acceleration(55.0, 50.0);
+        assert!((accel - 5.0).abs() < 0.001);
+
+        let decel = HorvathClock::age_acceleration(40.0, 50.0);
+        assert!((decel - (-10.0)).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_methylation_entropy() {
+        // Uniform methylation = low entropy
+        let positions: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
+        let betas = vec![0.5; 100];
+        let profile = MethylationProfile::from_beta_values(positions, betas);
+        let entropy = profile.methylation_entropy();
+        assert!(
+            entropy < 0.1,
+            "Uniform should have low entropy: {}",
+            entropy
+        );
+
+        // Spread methylation = high entropy
+        let positions2: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
+        let betas2: Vec<f32> = (0..100).map(|i| i as f32 / 100.0).collect();
+        let profile2 = MethylationProfile::from_beta_values(positions2, betas2);
+        let entropy2 = profile2.methylation_entropy();
+        assert!(
+            entropy2 > 1.0,
+            "Spread should have high entropy: {}",
+            entropy2
+        );
+    }
+
+    #[test]
+    fn test_cancer_signal_detector() {
+        let detector = CancerSignalDetector::new();
+
+        // Normal profile (moderate methylation)
+        let positions: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
+        let betas = vec![0.5; 100];
+        let profile = MethylationProfile::from_beta_values(positions, betas);
+        let result = detector.detect(&profile);
+        assert!(!result.is_elevated, "Normal profile should not be elevated");
+        assert!(result.risk_score < 0.3);
+
+        // Cancerous profile (extreme methylation)
+        let positions2: Vec<(u8, u64)> = (0..100).map(|i| (1u8, i as u64)).collect();
+        let betas2: Vec<f32> = (0..100)
+            .map(|i| if i % 2 == 0 { 0.95 } else { 0.05 })
+            .collect();
+        let profile2 = MethylationProfile::from_beta_values(positions2, betas2);
+        let result2 = detector.detect(&profile2);
+        assert!(result2.is_elevated, "Cancer profile should be elevated");
+        assert!(result2.extreme_ratio > 0.8);
+    }
+}
--- a/vendor/ruvector/examples/dna/src/error.rs
+++ b/vendor/ruvector/examples/dna/src/error.rs
@@ -0,0 +1,58 @@
+//! Error types for DNA analysis operations
+
+use thiserror::Error;
+
+/// DNA analysis error types
+#[derive(Error, Debug)]
+pub enum DnaError {
+    /// Invalid DNA sequence (e.g., non-ACGTN characters)
+    #[error("Invalid DNA sequence: {0}")]
+    InvalidSequence(String),
+
+    /// K-mer indexing error
+    #[error("K-mer index error: {0}")]
+    IndexError(String),
+
+    /// Sequence alignment error
+    #[error("Alignment error: {0}")]
+    AlignmentError(String),
+
+    /// Variant calling error
+    #[error("Variant calling error: {0}")]
+    VariantCallError(String),
+
+    /// Analysis pipeline error
+    #[error("Pipeline error: {0}")]
+    PipelineError(String),
+
+    /// I/O error
+    #[error("I/O error: {0}")]
+    IoError(#[from] std::io::Error),
+
+    /// RuVector core error
+    #[error("Vector database error: {0}")]
+    VectorDbError(#[from] ruvector_core::RuvectorError),
+
+    /// Dimension mismatch
+    #[error("Dimension mismatch: expected {expected}, got {actual}")]
+    DimensionMismatch { expected: usize, actual: usize },
+
+    /// Empty sequence
+    #[error("Empty sequence provided")]
+    EmptySequence,
+
+    /// Invalid quality score
+    #[error("Invalid quality score: {0}")]
+    InvalidQuality(u8),
+
+    /// Invalid k-mer size
+    #[error("Invalid k-mer size: {0}")]
+    InvalidKmerSize(usize),
+
+    /// 23andMe file parse error
+    #[error("Parse error: {0}")]
+    ParseError(String),
+}
+
+/// Result type for DNA analysis operations
+pub type Result<T> = std::result::Result<T, DnaError>;
--- a/vendor/ruvector/examples/dna/src/genotyping.rs
+++ b/vendor/ruvector/examples/dna/src/genotyping.rs
--- a/vendor/ruvector/examples/dna/src/health.rs
+++ b/vendor/ruvector/examples/dna/src/health.rs
@@ -0,0 +1,686 @@
+//! Health variant analysis for genotyping data
+//!
+//! Clinically significant variant interpretation for 17+ health-relevant
+//! SNPs commonly found in 23andMe/genotyping panels. Covers APOE, BRCA1/2,
+//! TP53, MTHFR, COMT, OPRM1, CYP1A2, and more.
+//!
+//! Based on: <https://github.com/ericporres/rvdna-bridge>
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// Result of analyzing a single health variant
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HealthVariantResult {
+    /// rsid identifier
+    pub rsid: String,
+    /// Gene name
+    pub gene: String,
+    /// Variant common name
+    pub name: String,
+    /// Observed genotype
+    pub genotype: String,
+    /// Risk allele
+    pub risk_allele: char,
+    /// Human-readable interpretation
+    pub interpretation: String,
+    /// Clinical significance
+    pub clinical_significance: String,
+}
+
+/// APOE genotype determination result
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ApoeResult {
+    /// Full APOE genotype string (e.g., "e2/e3")
+    pub genotype: String,
+    /// rs429358 genotype
+    pub rs429358: String,
+    /// rs7412 genotype
+    pub rs7412: String,
+}
+
+/// MTHFR compound status
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MthfrResult {
+    /// C677T genotype (rs1801133)
+    pub c677t: String,
+    /// A1298C genotype (rs1801131)
+    pub a1298c: String,
+    /// Compound risk score (0-4)
+    pub score: u8,
+    /// Clinical assessment text
+    pub assessment: String,
+}
+
+/// Pain sensitivity profile (COMT + OPRM1)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PainProfile {
+    /// COMT genotype (rs4680)
+    pub comt: String,
+    /// OPRM1 genotype (rs1799971)
+    pub oprm1: String,
+    /// Combined pain score (0-4)
+    pub score: u8,
+    /// Sensitivity label
+    pub label: String,
+    /// COMT interpretation
+    pub comt_note: String,
+    /// OPRM1 interpretation
+    pub oprm1_note: String,
+}
+
+// ── Internal definition type ──
+
+struct VDef {
+    rsid: &'static str,
+    gene: &'static str,
+    name: &'static str,
+    risk_allele: char,
+    // (genotype, description, significance)
+    interps: &'static [(&'static str, &'static str, &'static str)],
+}
+
+static HEALTH_VARIANTS: &[VDef] = &[
+    // ── APOE (Alzheimer's) ──
+    VDef {
+        rsid: "rs429358",
+        gene: "APOE",
+        name: "APOE e4 determinant",
+        risk_allele: 'C',
+        interps: &[
+            (
+                "TT",
+                "APOE e3/e3 or e2/e3 (depends on rs7412)",
+                "Protective/Normal",
+            ),
+            (
+                "CT",
+                "One e4 allele present",
+                "Increased Alzheimer's risk (~3x)",
+            ),
+            (
+                "CC",
+                "Two e4 alleles present",
+                "Significantly increased Alzheimer's risk (~12x)",
+            ),
+        ],
+    },
+    VDef {
+        rsid: "rs7412",
+        gene: "APOE",
+        name: "APOE e2 determinant",
+        risk_allele: 'T',
+        interps: &[
+            ("CC", "No e2 allele", "Normal"),
+            (
+                "CT",
+                "One e2 allele present",
+                "Protective - reduced Alzheimer's risk",
+            ),
+            ("TT", "Two e2 alleles (e2/e2)", "Protective; monitor lipids"),
+        ],
+    },
+    // ── TP53 (cancer) ──
+    VDef {
+        rsid: "rs1042522",
+        gene: "TP53",
+        name: "p53 Pro72Arg (R72P)",
+        risk_allele: 'G',
+        interps: &[
+            (
+                "CC",
+                "Pro/Pro homozygous",
+                "Normal apoptosis; slightly increased cancer survival",
+            ),
+            (
+                "CG",
+                "Pro/Arg heterozygous",
+                "Mixed - Arg allele has stronger apoptotic activity",
+            ),
+            (
+                "GG",
+                "Arg/Arg homozygous",
+                "Stronger apoptotic response; variable cancer risk",
+            ),
+        ],
+    },
+    // ── BRCA1 ──
+    VDef {
+        rsid: "rs80357906",
+        gene: "BRCA1",
+        name: "BRCA1 5382insC (Ashkenazi founder)",
+        risk_allele: 'I',
+        interps: &[
+            (
+                "DD",
+                "No insertion detected",
+                "Normal - no BRCA1 5382insC mutation",
+            ),
+            (
+                "DI",
+                "Heterozygous carrier",
+                "INCREASED breast/ovarian cancer risk - genetic counseling recommended",
+            ),
+            (
+                "II",
+                "Homozygous insertion",
+                "HIGH breast/ovarian cancer risk - urgent genetic counseling",
+            ),
+        ],
+    },
+    VDef {
+        rsid: "rs28897696",
+        gene: "BRCA1",
+        name: "BRCA1 missense variant",
+        risk_allele: 'A',
+        interps: &[
+            ("GG", "Reference genotype", "Normal"),
+            (
+                "AG",
+                "Heterozygous",
+                "Variant of uncertain significance - consult genetic counselor",
+            ),
+            ("AA", "Homozygous variant", "Consult genetic counselor"),
+        ],
+    },
+    // ── BRCA2 ──
+    VDef {
+        rsid: "rs11571833",
+        gene: "BRCA2",
+        name: "BRCA2 K3326X",
+        risk_allele: 'T',
+        interps: &[
+            ("AA", "Reference genotype", "Normal"),
+            (
+                "AT",
+                "Heterozygous",
+                "Modestly increased cancer risk (OR ~1.3)",
+            ),
+            (
+                "TT",
+                "Homozygous variant",
+                "Increased cancer risk - genetic counseling recommended",
+            ),
+        ],
+    },
+    // ── MTHFR (folate metabolism) ──
+    VDef {
+        rsid: "rs1801133",
+        gene: "MTHFR",
+        name: "C677T",
+        risk_allele: 'A',
+        interps: &[
+            (
+                "GG",
+                "CC genotype (normal)",
+                "Normal MTHFR enzyme activity (100%)",
+            ),
+            (
+                "AG",
+                "CT heterozygous",
+                "Reduced enzyme activity (~65%). Consider methylfolate.",
+            ),
+            (
+                "AA",
+                "TT homozygous",
+                "Significantly reduced activity (~30%). Methylfolate recommended.",
+            ),
+        ],
+    },
+    VDef {
+        rsid: "rs1801131",
+        gene: "MTHFR",
+        name: "A1298C",
+        risk_allele: 'T',
+        interps: &[
+            ("GG", "CC homozygous variant", "Reduced enzyme activity"),
+            ("GT", "AC heterozygous", "Mildly reduced enzyme activity"),
+            (
+                "TT",
+                "AA reference",
+                "Normal MTHFR activity at this position",
+            ),
+        ],
+    },
+    // ── COMT (dopamine/pain) ──
+    VDef {
+        rsid: "rs4680",
+        gene: "COMT",
+        name: "Val158Met",
+        risk_allele: 'A',
+        interps: &[
+            (
+                "GG",
+                "Val/Val",
+                "Higher COMT activity, lower dopamine. Better stress resilience.",
+            ),
+            (
+                "AG",
+                "Val/Met heterozygous",
+                "Intermediate COMT activity. Balanced dopamine.",
+            ),
+            (
+                "AA",
+                "Met/Met",
+                "Lower COMT activity, higher dopamine. Higher pain sensitivity.",
+            ),
+        ],
+    },
+    // ── OPRM1 (opioid receptor) ──
+    VDef {
+        rsid: "rs1799971",
+        gene: "OPRM1",
+        name: "A118G (Asn40Asp)",
+        risk_allele: 'G',
+        interps: &[
+            ("AA", "Asn/Asn", "Normal opioid sensitivity"),
+            (
+                "AG",
+                "Asn/Asp heterozygous",
+                "Reduced opioid sensitivity; may need higher doses.",
+            ),
+            ("GG", "Asp/Asp", "Significantly reduced opioid sensitivity."),
+        ],
+    },
+    // ── CYP1A2 (caffeine) ──
+    VDef {
+        rsid: "rs762551",
+        gene: "CYP1A2",
+        name: "Caffeine metabolism",
+        risk_allele: 'C',
+        interps: &[
+            (
+                "AA",
+                "Fast metabolizer",
+                "Rapid caffeine clearance. Coffee may REDUCE heart disease risk.",
+            ),
+            (
+                "AC",
+                "Intermediate",
+                "Moderate caffeine clearance. Moderate coffee intake recommended.",
+            ),
+            (
+                "CC",
+                "Slow metabolizer",
+                "Slow caffeine clearance. Excess coffee may INCREASE heart risk.",
+            ),
+        ],
+    },
+    // ── Lactose ──
+    VDef {
+        rsid: "rs4988235",
+        gene: "MCM6/LCT",
+        name: "Lactase persistence (European)",
+        risk_allele: 'G',
+        interps: &[
+            (
+                "AA",
+                "Lactase persistent",
+                "Likely lactose TOLERANT into adulthood",
+            ),
+            (
+                "AG",
+                "Heterozygous",
+                "Likely lactose tolerant (persistence is dominant)",
+            ),
+            (
+                "GG",
+                "Lactase non-persistent",
+                "Likely lactose INTOLERANT in adulthood",
+            ),
+        ],
+    },
+    // ── OXTR (oxytocin receptor) ──
+    VDef {
+        rsid: "rs53576",
+        gene: "OXTR",
+        name: "Oxytocin receptor",
+        risk_allele: 'A',
+        interps: &[
+            (
+                "GG",
+                "GG genotype",
+                "Higher empathy scores; better social cognition.",
+            ),
+            (
+                "AG",
+                "AG heterozygous",
+                "Intermediate empathy and social cognition.",
+            ),
+            (
+                "AA",
+                "AA genotype",
+                "May have lower empathy; potentially more resilient to social stress.",
+            ),
+        ],
+    },
+    // ── HTR2A (serotonin) ──
+    VDef {
+        rsid: "rs6311",
+        gene: "HTR2A",
+        name: "Serotonin 2A receptor (-1438G/A)",
+        risk_allele: 'T',
+        interps: &[
+            ("CC", "GG genotype", "Normal serotonin receptor expression"),
+            (
+                "CT",
+                "GA heterozygous",
+                "Slightly altered serotonin signaling",
+            ),
+            (
+                "TT",
+                "AA genotype",
+                "Altered serotonin receptor density; may affect SSRI response",
+            ),
+        ],
+    },
+    // ── ANKK1/DRD2 (dopamine) ──
+    VDef {
+        rsid: "rs1800497",
+        gene: "ANKK1/DRD2",
+        name: "Taq1A (dopamine receptor)",
+        risk_allele: 'A',
+        interps: &[
+            ("GG", "A2/A2", "Normal dopamine receptor density"),
+            (
+                "AG",
+                "A1/A2 heterozygous",
+                "Reduced D2 receptor density (~30% less). Reward-seeking.",
+            ),
+            (
+                "AA",
+                "A1/A1",
+                "Significantly reduced D2 receptor density. Higher addiction risk.",
+            ),
+        ],
+    },
+    // ── SLCO1B1 (statin metabolism) ──
+    VDef {
+        rsid: "rs4363657",
+        gene: "SLCO1B1",
+        name: "Statin transporter",
+        risk_allele: 'C',
+        interps: &[
+            (
+                "TT",
+                "Reference",
+                "Normal statin metabolism. Standard dosing.",
+            ),
+            (
+                "CT",
+                "Heterozygous",
+                "Increased statin myopathy risk (~4.5x). Consider lower dose.",
+            ),
+            (
+                "CC",
+                "Homozygous variant",
+                "High statin myopathy risk (~17x). Use lowest effective dose.",
+            ),
+        ],
+    },
+    // ── NQO1 (oxidative stress) ──
+    VDef {
+        rsid: "rs1800566",
+        gene: "NQO1",
+        name: "Pro187Ser (oxidative stress)",
+        risk_allele: 'T',
+        interps: &[
+            ("CC", "Pro/Pro (reference)", "Normal NQO1 enzyme activity"),
+            (
+                "CT",
+                "Pro/Ser heterozygous",
+                "Reduced NQO1 activity (~3x lower). Impaired detox.",
+            ),
+            (
+                "TT",
+                "Ser/Ser",
+                "No NQO1 activity. Significantly impaired quinone detoxification.",
+            ),
+        ],
+    },
+];
+
+/// Analyze health variants from a genotype map (rsid -> genotype string).
+pub fn analyze_health_variants(genotypes: &HashMap<String, String>) -> Vec<HealthVariantResult> {
+    let mut results = Vec::new();
+
+    for def in HEALTH_VARIANTS {
+        if let Some(gt) = genotypes.get(def.rsid) {
+            let (desc, sig) = def
+                .interps
+                .iter()
+                .find(|(g, _, _)| *g == gt.as_str())
+                .map(|(_, d, s)| (d.to_string(), s.to_string()))
+                .unwrap_or_else(|| {
+                    (
+                        format!("Genotype {} - not in standard table", gt),
+                        "Consult genetic counselor".to_string(),
+                    )
+                });
+
+            results.push(HealthVariantResult {
+                rsid: def.rsid.to_string(),
+                gene: def.gene.to_string(),
+                name: def.name.to_string(),
+                genotype: gt.clone(),
+                risk_allele: def.risk_allele,
+                interpretation: desc,
+                clinical_significance: sig,
+            });
+        }
+    }
+
+    results
+}
+
+/// Determine APOE genotype from rs429358 + rs7412 combination.
+pub fn determine_apoe(genotypes: &HashMap<String, String>) -> ApoeResult {
+    let gt1 = genotypes.get("rs429358").cloned().unwrap_or_default();
+    let gt2 = genotypes.get("rs7412").cloned().unwrap_or_default();
+
+    if gt1.is_empty() || gt2.is_empty() {
+        return ApoeResult {
+            genotype: "Unable to determine (missing data)".into(),
+            rs429358: gt1,
+            rs7412: gt2,
+        };
+    }
+
+    // e4 alleles = count of 'C' at rs429358
+    let e4 = gt1.chars().filter(|&c| c == 'C').count();
+    // e2 alleles = count of 'T' at rs7412
+    let e2 = gt2.chars().filter(|&c| c == 'T').count();
+
+    let genotype = match (e4, e2) {
+        (0, 0) => "e3/e3 (most common, baseline risk)".into(),
+        (0, 1) => "e2/e3 (PROTECTIVE - reduced Alzheimer's risk)".into(),
+        (0, 2) => "e2/e2 (protective; monitor for type III hyperlipoproteinemia)".into(),
+        (1, 0) => "e3/e4 (increased Alzheimer's risk ~3x)".into(),
+        (1, 1) => "e2/e4 (mixed - e2 partially offsets e4 risk)".into(),
+        (2, _) => "e4/e4 (significantly increased Alzheimer's risk ~12x)".into(),
+        _ => format!("Unusual combination: rs429358={}, rs7412={}", gt1, gt2),
+    };
+
+    ApoeResult {
+        genotype,
+        rs429358: gt1,
+        rs7412: gt2,
+    }
+}
+
+/// Analyze MTHFR compound status from C677T + A1298C.
+pub fn analyze_mthfr(genotypes: &HashMap<String, String>) -> MthfrResult {
+    let c677t = genotypes.get("rs1801133").cloned().unwrap_or_default();
+    let a1298c = genotypes.get("rs1801131").cloned().unwrap_or_default();
+
+    if c677t.is_empty() || a1298c.is_empty() {
+        return MthfrResult {
+            c677t,
+            a1298c,
+            score: 0,
+            assessment: "Incomplete MTHFR data".into(),
+        };
+    }
+
+    let c_risk = match c677t.as_str() {
+        "GG" => 0u8,
+        "AG" => 1,
+        "AA" => 2,
+        _ => 0,
+    };
+    let a_risk = match a1298c.as_str() {
+        "TT" => 0u8,
+        "GT" => 1,
+        "GG" => 2,
+        _ => 0,
+    };
+    let score = c_risk + a_risk;
+
+    let assessment = match score {
+        0 => "Normal MTHFR function. No supplementation needed.",
+        1 => "Mildly reduced MTHFR. Consider methylfolate if homocysteine elevated.",
+        2 => "Moderately reduced MTHFR. Methylfolate (L-5-MTHF) recommended.",
+        3 => "Significantly reduced MTHFR (compound heterozygote). Methylfolate strongly recommended.",
+        _ => "Severely reduced MTHFR. Methylfolate essential. Regular homocysteine monitoring.",
+    };
+
+    MthfrResult {
+        c677t,
+        a1298c,
+        score,
+        assessment: assessment.into(),
+    }
+}
+
+/// Analyze pain sensitivity profile from COMT + OPRM1.
+pub fn analyze_pain(genotypes: &HashMap<String, String>) -> Option<PainProfile> {
+    let comt = genotypes.get("rs4680")?;
+    let oprm1 = genotypes.get("rs1799971")?;
+
+    let mut score = 0u8;
+    if comt == "AA" {
+        score += 2;
+    } else if comt == "AG" {
+        score += 1;
+    }
+    if oprm1 == "GG" {
+        score += 2;
+    } else if oprm1 == "AG" {
+        score += 1;
+    }
+
+    let label = match score {
+        0 => "Low",
+        1 => "Low-Moderate",
+        2 => "Moderate",
+        3 => "Moderate-High",
+        _ => "High",
+    };
+
+    let comt_note = if comt.contains('A') {
+        "Higher pain sensitivity"
+    } else {
+        "Lower pain sensitivity"
+    };
+    let oprm1_note = if oprm1.contains('G') {
+        "Reduced opioid response"
+    } else {
+        "Normal opioid response"
+    };
+
+    Some(PainProfile {
+        comt: comt.clone(),
+        oprm1: oprm1.clone(),
+        score,
+        label: label.into(),
+        comt_note: comt_note.into(),
+        oprm1_note: oprm1_note.into(),
+    })
+}
+
+/// Category groupings for health variant display
+pub fn variant_categories() -> Vec<(&'static str, Vec<&'static str>)> {
+    vec![
+        ("Cancer Risk", vec!["TP53", "BRCA1", "BRCA2", "NQO1"]),
+        ("Cardiovascular", vec!["SLCO1B1"]),
+        (
+            "Neurological",
+            vec!["APOE", "COMT", "OPRM1", "OXTR", "HTR2A", "ANKK1/DRD2"],
+        ),
+        ("Metabolism", vec!["MTHFR", "CYP1A2", "MCM6/LCT"]),
+    ]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_map(pairs: &[(&str, &str)]) -> HashMap<String, String> {
+        pairs
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect()
+    }
+
+    #[test]
+    fn test_apoe_e3e3() {
+        let gts = make_map(&[("rs429358", "TT"), ("rs7412", "CC")]);
+        let r = determine_apoe(&gts);
+        assert!(r.genotype.contains("e3/e3"));
+    }
+
+    #[test]
+    fn test_apoe_e2e3() {
+        let gts = make_map(&[("rs429358", "TT"), ("rs7412", "CT")]);
+        let r = determine_apoe(&gts);
+        assert!(r.genotype.contains("e2/e3"));
+    }
+
+    #[test]
+    fn test_apoe_e4e4() {
+        let gts = make_map(&[("rs429358", "CC"), ("rs7412", "CC")]);
+        let r = determine_apoe(&gts);
+        assert!(r.genotype.contains("e4/e4"));
+    }
+
+    #[test]
+    fn test_mthfr_normal() {
+        let gts = make_map(&[("rs1801133", "GG"), ("rs1801131", "TT")]);
+        let r = analyze_mthfr(&gts);
+        assert_eq!(r.score, 0);
+        assert!(r.assessment.contains("Normal"));
+    }
+
+    #[test]
+    fn test_mthfr_compound() {
+        let gts = make_map(&[("rs1801133", "AG"), ("rs1801131", "GG")]);
+        let r = analyze_mthfr(&gts);
+        assert_eq!(r.score, 3);
+        assert!(r.assessment.contains("compound"));
+    }
+
+    #[test]
+    fn test_pain_low() {
+        let gts = make_map(&[("rs4680", "GG"), ("rs1799971", "AA")]);
+        let p = analyze_pain(&gts).unwrap();
+        assert_eq!(p.score, 0);
+        assert_eq!(p.label, "Low");
+    }
+
+    #[test]
+    fn test_pain_high() {
+        let gts = make_map(&[("rs4680", "AA"), ("rs1799971", "GG")]);
+        let p = analyze_pain(&gts).unwrap();
+        assert_eq!(p.score, 4);
+        assert_eq!(p.label, "High");
+    }
+
+    #[test]
+    fn test_health_variants_lookup() {
+        let gts = make_map(&[("rs762551", "AA"), ("rs4680", "AG")]);
+        let results = analyze_health_variants(&gts);
+        assert_eq!(results.len(), 2);
+        assert_eq!(results[0].gene, "COMT");
+        assert_eq!(results[1].gene, "CYP1A2");
+    }
+}
--- a/vendor/ruvector/examples/dna/src/kmer.rs
+++ b/vendor/ruvector/examples/dna/src/kmer.rs
@@ -0,0 +1,511 @@
+//! K-mer encoding and HNSW vector indexing for DNA sequences
+//!
+//! This module provides efficient k-mer based vector encoding for DNA sequences
+//! with HNSW indexing for fast similarity search. Implements both k-mer frequency
+//! vectors and MinHash sketching (Mash/sourmash algorithm).
+
+use ruvector_core::{
+    types::{DbOptions, DistanceMetric, HnswConfig, QuantizationConfig, SearchQuery},
+    VectorDB, VectorEntry,
+};
+use std::collections::HashMap;
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum KmerError {
+    #[error("Invalid k-mer length: {0}")]
+    InvalidKmerLength(usize),
+    #[error("Invalid DNA sequence: {0}")]
+    InvalidSequence(String),
+    #[error("Database error: {0}")]
+    DatabaseError(#[from] ruvector_core::RuvectorError),
+    #[error("Empty sequence")]
+    EmptySequence,
+}
+
+type Result<T> = std::result::Result<T, KmerError>;
+
+/// Nucleotide to 2-bit encoding: A=0, C=1, G=2, T=3
+#[inline]
+fn nucleotide_to_bits(nuc: u8) -> Option<u8> {
+    match nuc.to_ascii_uppercase() {
+        b'A' => Some(0),
+        b'C' => Some(1),
+        b'G' => Some(2),
+        b'T' | b'U' => Some(3),
+        _ => None,
+    }
+}
+
+/// Returns the reverse complement of a DNA sequence
+fn reverse_complement(seq: &[u8]) -> Vec<u8> {
+    seq.iter()
+        .rev()
+        .map(|&nuc| match nuc.to_ascii_uppercase() {
+            b'A' => b'T',
+            b'T' | b'U' => b'A',
+            b'C' => b'G',
+            b'G' => b'C',
+            n => n,
+        })
+        .collect()
+}
+
+/// Returns the canonical k-mer (lexicographically smaller of k-mer and its reverse complement)
+pub fn canonical_kmer(kmer: &[u8]) -> Vec<u8> {
+    let rc = reverse_complement(kmer);
+    if kmer <= rc.as_slice() {
+        kmer.to_vec()
+    } else {
+        rc
+    }
+}
+
+/// K-mer encoder that converts DNA sequences into frequency vectors
+pub struct KmerEncoder {
+    k: usize,
+    dimensions: usize,
+}
+
+impl KmerEncoder {
+    /// Create a new k-mer encoder for k-mers of length k
+    ///
+    /// # Arguments
+    /// * `k` - Length of k-mers (typical values: 21, 31)
+    ///
+    /// Uses feature hashing to limit dimensionality for large k
+    pub fn new(k: usize) -> Result<Self> {
+        if k == 0 || k > 32 {
+            return Err(KmerError::InvalidKmerLength(k));
+        }
+
+        // Calculate dimensions: min(4^k, 1024) using feature hashing
+        let max_kmers = 4_usize.saturating_pow(k as u32);
+        let dimensions = max_kmers.min(1024);
+
+        Ok(Self { k, dimensions })
+    }
+
+    /// Get the number of dimensions in the encoded vector
+    pub fn dimensions(&self) -> usize {
+        self.dimensions
+    }
+
+    /// Encode a DNA sequence into a k-mer frequency vector
+    ///
+    /// Uses canonical k-mer hashing (min of forward/reverse-complement hash)
+    /// to count strand-agnostic k-mers, then normalizes to unit vector.
+    pub fn encode_sequence(&self, seq: &[u8]) -> Result<Vec<f32>> {
+        if seq.len() < self.k {
+            return Err(KmerError::EmptySequence);
+        }
+
+        let mut counts = vec![0u32; self.dimensions];
+        let mut total = 0u32;
+
+        // Extract all k-mers using a sliding window
+        // Avoid Vec allocation by hashing both strands and taking min
+        for window in seq.windows(self.k) {
+            let fwd_hash = Self::fnv1a_hash(window);
+            let rc_hash = Self::fnv1a_hash_rc(window);
+            let canonical_hash = fwd_hash.min(rc_hash);
+            let index = canonical_hash % self.dimensions;
+
+            counts[index] = counts[index].saturating_add(1);
+            total = total.saturating_add(1);
+        }
+
+        // Normalize to frequency vector and then to unit vector
+        let inv_total = 1.0 / total as f32;
+        let mut vector: Vec<f32> = counts
+            .iter()
+            .map(|&count| count as f32 * inv_total)
+            .collect();
+
+        // L2 normalization
+        let norm: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
+        if norm > 0.0 {
+            let inv_norm = 1.0 / norm;
+            vector.iter_mut().for_each(|x| *x *= inv_norm);
+        }
+
+        Ok(vector)
+    }
+
+    /// FNV-1a hash of a byte slice
+    #[inline]
+    fn fnv1a_hash(data: &[u8]) -> usize {
+        const FNV_OFFSET: u64 = 14695981039346656037;
+        const FNV_PRIME: u64 = 1099511628211;
+        let mut hash = FNV_OFFSET;
+        for &byte in data {
+            hash ^= byte as u64;
+            hash = hash.wrapping_mul(FNV_PRIME);
+        }
+        hash as usize
+    }
+
+    /// FNV-1a hash of reverse complement (avoids Vec allocation)
+    #[inline]
+    fn fnv1a_hash_rc(data: &[u8]) -> usize {
+        const FNV_OFFSET: u64 = 14695981039346656037;
+        const FNV_PRIME: u64 = 1099511628211;
+        let mut hash = FNV_OFFSET;
+        for &byte in data.iter().rev() {
+            let comp = match byte.to_ascii_uppercase() {
+                b'A' => b'T',
+                b'T' | b'U' => b'A',
+                b'C' => b'G',
+                b'G' => b'C',
+                n => n,
+            };
+            hash ^= comp as u64;
+            hash = hash.wrapping_mul(FNV_PRIME);
+        }
+        hash as usize
+    }
+
+    /// Hash a k-mer to an index using FNV-1a hash
+    fn hash_kmer(&self, kmer: &[u8]) -> usize {
+        Self::fnv1a_hash(kmer)
+    }
+}
+
+/// MinHash sketch for fast sequence similarity (Mash/sourmash algorithm)
+pub struct MinHashSketch {
+    num_hashes: usize,
+    hashes: Vec<u64>,
+}
+
+impl MinHashSketch {
+    /// Create a new MinHash sketch with the given number of hashes
+    ///
+    /// # Arguments
+    /// * `num_hashes` - Number of hash values to keep (typically 1000)
+    pub fn new(num_hashes: usize) -> Self {
+        Self {
+            num_hashes,
+            hashes: Vec::new(),
+        }
+    }
+
+    /// Compute MinHash signature for a DNA sequence
+    pub fn sketch(&mut self, seq: &[u8], k: usize) -> Result<&[u64]> {
+        if seq.len() < k {
+            return Err(KmerError::EmptySequence);
+        }
+
+        let mut all_hashes = Vec::with_capacity(seq.len() - k + 1);
+
+        // Hash all k-mers using dual-hash (no Vec allocation per k-mer)
+        for window in seq.windows(k) {
+            let fwd = Self::hash_kmer_64_slice(window);
+            let rc = Self::hash_kmer_64_rc(window);
+            all_hashes.push(fwd.min(rc));
+        }
+
+        // Sort and keep the smallest num_hashes values
+        all_hashes.sort_unstable();
+        all_hashes.truncate(self.num_hashes);
+        self.hashes = all_hashes;
+
+        Ok(&self.hashes)
+    }
+
+    /// Compute Jaccard distance between two MinHash sketches
+    pub fn jaccard_distance(&self, other: &MinHashSketch) -> f32 {
+        if self.hashes.is_empty() || other.hashes.is_empty() {
+            return 1.0;
+        }
+
+        let mut intersection = 0;
+        let mut i = 0;
+        let mut j = 0;
+
+        // Count intersection using sorted arrays
+        while i < self.hashes.len() && j < other.hashes.len() {
+            if self.hashes[i] == other.hashes[j] {
+                intersection += 1;
+                i += 1;
+                j += 1;
+            } else if self.hashes[i] < other.hashes[j] {
+                i += 1;
+            } else {
+                j += 1;
+            }
+        }
+
+        let union = self.hashes.len() + other.hashes.len() - intersection;
+        if union == 0 {
+            return 0.0;
+        }
+
+        let jaccard_similarity = intersection as f32 / union as f32;
+        1.0 - jaccard_similarity
+    }
+
+    /// Hash a k-mer using MurmurHash3-like algorithm (forward strand)
+    #[inline]
+    fn hash_kmer_64_slice(kmer: &[u8]) -> u64 {
+        const C1: u64 = 0x87c37b91114253d5;
+        const C2: u64 = 0x4cf5ad432745937f;
+        let mut h = 0u64;
+        for &byte in kmer {
+            let mut k = byte as u64;
+            k = k.wrapping_mul(C1);
+            k = k.rotate_left(31);
+            k = k.wrapping_mul(C2);
+            h ^= k;
+            h = h.rotate_left(27);
+            h = h.wrapping_mul(5).wrapping_add(0x52dce729);
+        }
+        h ^ kmer.len() as u64
+    }
+
+    /// Hash reverse complement of a k-mer (no Vec allocation)
+    #[inline]
+    fn hash_kmer_64_rc(kmer: &[u8]) -> u64 {
+        const C1: u64 = 0x87c37b91114253d5;
+        const C2: u64 = 0x4cf5ad432745937f;
+        let mut h = 0u64;
+        for &byte in kmer.iter().rev() {
+            let comp = match byte.to_ascii_uppercase() {
+                b'A' => b'T',
+                b'T' | b'U' => b'A',
+                b'C' => b'G',
+                b'G' => b'C',
+                n => n,
+            };
+            let mut k = comp as u64;
+            k = k.wrapping_mul(C1);
+            k = k.rotate_left(31);
+            k = k.wrapping_mul(C2);
+            h ^= k;
+            h = h.rotate_left(27);
+            h = h.wrapping_mul(5).wrapping_add(0x52dce729);
+        }
+        h ^ kmer.len() as u64
+    }
+
+    /// Get the hashes
+    pub fn hashes(&self) -> &[u64] {
+        &self.hashes
+    }
+}
+
+/// Search result for k-mer index queries
+#[derive(Debug, Clone)]
+pub struct KmerSearchResult {
+    pub id: String,
+    pub score: f32,
+    pub distance: f32,
+}
+
+/// K-mer index wrapping VectorDB for sequence similarity search
+pub struct KmerIndex {
+    db: VectorDB,
+    encoder: KmerEncoder,
+    k: usize,
+}
+
+impl KmerIndex {
+    /// Create a new k-mer index
+    ///
+    /// # Arguments
+    /// * `k` - K-mer length
+    /// * `dimensions` - Vector dimensions (should match encoder dimensions)
+    pub fn new(k: usize, dimensions: usize) -> Result<Self> {
+        let encoder = KmerEncoder::new(k)?;
+
+        // Verify dimensions match
+        if encoder.dimensions() != dimensions {
+            return Err(KmerError::InvalidKmerLength(k));
+        }
+
+        let options = DbOptions {
+            dimensions,
+            distance_metric: DistanceMetric::Cosine,
+            storage_path: format!("./kmer_index_k{}.db", k),
+            hnsw_config: Some(HnswConfig {
+                m: 32,
+                ef_construction: 200,
+                ef_search: 100,
+                max_elements: 1_000_000,
+            }),
+            quantization: Some(QuantizationConfig::Scalar),
+        };
+
+        let db = VectorDB::new(options)?;
+
+        Ok(Self { db, encoder, k })
+    }
+
+    /// Index a single DNA sequence
+    pub fn index_sequence(&self, id: &str, sequence: &[u8]) -> Result<()> {
+        let vector = self.encoder.encode_sequence(sequence)?;
+
+        let entry = VectorEntry {
+            id: Some(id.to_string()),
+            vector,
+            metadata: Some({
+                let mut meta = HashMap::new();
+                meta.insert("length".to_string(), serde_json::json!(sequence.len()));
+                meta.insert("k".to_string(), serde_json::json!(self.k));
+                meta
+            }),
+        };
+
+        self.db.insert(entry)?;
+        Ok(())
+    }
+
+    /// Index multiple sequences in a batch
+    pub fn index_batch(&self, sequences: Vec<(&str, &[u8])>) -> Result<()> {
+        let entries: Result<Vec<VectorEntry>> = sequences
+            .into_iter()
+            .map(|(id, seq)| {
+                let vector = self.encoder.encode_sequence(seq)?;
+                Ok(VectorEntry {
+                    id: Some(id.to_string()),
+                    vector,
+                    metadata: Some({
+                        let mut meta = HashMap::new();
+                        meta.insert("length".to_string(), serde_json::json!(seq.len()));
+                        meta.insert("k".to_string(), serde_json::json!(self.k));
+                        meta
+                    }),
+                })
+            })
+            .collect();
+
+        self.db.insert_batch(entries?)?;
+        Ok(())
+    }
+
+    /// Search for similar sequences
+    pub fn search_similar(&self, query: &[u8], top_k: usize) -> Result<Vec<KmerSearchResult>> {
+        let query_vector = self.encoder.encode_sequence(query)?;
+
+        let search_query = SearchQuery {
+            vector: query_vector,
+            k: top_k,
+            filter: None,
+            ef_search: None,
+        };
+
+        let results = self.db.search(search_query)?;
+
+        Ok(results
+            .into_iter()
+            .map(|r| KmerSearchResult {
+                id: r.id,
+                score: r.score,
+                distance: r.score,
+            })
+            .collect())
+    }
+
+    /// Search for sequences with similarity above a threshold
+    pub fn search_with_threshold(
+        &self,
+        query: &[u8],
+        threshold: f32,
+    ) -> Result<Vec<KmerSearchResult>> {
+        // Search with a larger k to ensure we get all candidates
+        let results = self.search_similar(query, 100)?;
+
+        Ok(results
+            .into_iter()
+            .filter(|r| r.distance <= threshold)
+            .collect())
+    }
+
+    /// Get the k-mer length
+    pub fn k(&self) -> usize {
+        self.k
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_nucleotide_encoding() {
+        assert_eq!(nucleotide_to_bits(b'A'), Some(0));
+        assert_eq!(nucleotide_to_bits(b'C'), Some(1));
+        assert_eq!(nucleotide_to_bits(b'G'), Some(2));
+        assert_eq!(nucleotide_to_bits(b'T'), Some(3));
+        assert_eq!(nucleotide_to_bits(b'a'), Some(0));
+        assert_eq!(nucleotide_to_bits(b'N'), None);
+    }
+
+    #[test]
+    fn test_reverse_complement() {
+        let seq = b"ATCG";
+        let rc = reverse_complement(seq);
+        assert_eq!(rc, b"CGAT");
+    }
+
+    #[test]
+    fn test_canonical_kmer() {
+        let kmer1 = b"ATCG";
+        let kmer2 = b"CGAT"; // reverse complement
+
+        let canon1 = canonical_kmer(kmer1);
+        let canon2 = canonical_kmer(kmer2);
+
+        assert_eq!(canon1, canon2);
+    }
+
+    #[test]
+    fn test_kmer_encoder_creation() {
+        let encoder = KmerEncoder::new(3).unwrap();
+        assert_eq!(encoder.k, 3);
+        assert_eq!(encoder.dimensions(), 64);
+    }
+
+    #[test]
+    fn test_kmer_encoder_large_k() {
+        let encoder = KmerEncoder::new(21).unwrap();
+        assert_eq!(encoder.k, 21);
+        assert_eq!(encoder.dimensions(), 1024); // Capped by feature hashing
+    }
+
+    #[test]
+    fn test_encode_sequence() {
+        let encoder = KmerEncoder::new(3).unwrap();
+        let seq = b"ATCGATCG";
+        let vector = encoder.encode_sequence(seq).unwrap();
+
+        assert_eq!(vector.len(), encoder.dimensions());
+
+        // Check L2 normalization
+        let norm: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
+        assert!((norm - 1.0).abs() < 1e-5);
+    }
+
+    #[test]
+    fn test_minhash_sketch() {
+        let mut sketch = MinHashSketch::new(100);
+        let seq = b"ATCGATCGATCGATCGATCG";
+
+        sketch.sketch(seq, 5).unwrap();
+        assert!(sketch.hashes().len() <= 100);
+    }
+
+    #[test]
+    fn test_jaccard_distance() {
+        let mut sketch1 = MinHashSketch::new(100);
+        let mut sketch2 = MinHashSketch::new(100);
+
+        let seq1 = b"ATCGATCGATCGATCGATCG";
+        let seq2 = b"ATCGATCGATCGATCGATCG"; // Identical
+
+        sketch1.sketch(seq1, 5).unwrap();
+        sketch2.sketch(seq2, 5).unwrap();
+
+        let distance = sketch1.jaccard_distance(&sketch2);
+        assert!(distance < 0.01); // Should be very similar
+    }
+}
--- a/vendor/ruvector/examples/dna/src/kmer_pagerank.rs
+++ b/vendor/ruvector/examples/dna/src/kmer_pagerank.rs
@@ -0,0 +1,365 @@
+//! K-mer Graph PageRank for DNA Sequence Ranking
+//!
+//! Builds a k-mer co-occurrence graph from DNA sequences and uses
+//! ruvector-solver's Forward Push Personalized PageRank (PPR) to rank
+//! sequences by structural centrality in the k-mer overlap network.
+//!
+//! This enables identifying the most "representative" sequences in a
+//! collection — those whose k-mer profiles are most connected to others.
+
+use ruvector_solver::forward_push::ForwardPushSolver;
+use ruvector_solver::types::CsrMatrix;
+
+/// Result of PageRank-based sequence ranking
+#[derive(Debug, Clone)]
+pub struct SequenceRank {
+    /// Index of the sequence in the input collection
+    pub index: usize,
+    /// PageRank score (higher = more central)
+    pub score: f64,
+}
+
+/// K-mer graph builder and PageRank ranker.
+///
+/// Constructs a weighted graph where:
+/// - Nodes are sequences
+/// - Edge weight(i, j) = number of shared k-mers between sequences i and j
+///
+/// Then uses Forward Push PPR to compute centrality scores.
+pub struct KmerGraphRanker {
+    k: usize,
+    hash_dimensions: usize,
+}
+
+impl KmerGraphRanker {
+    /// Create a new ranker with the given k-mer length.
+    ///
+    /// # Arguments
+    /// * `k` - K-mer length (typical: 11-31)
+    /// * `hash_dimensions` - Number of hash buckets for k-mer fingerprints (default: 256)
+    pub fn new(k: usize, hash_dimensions: usize) -> Self {
+        Self { k, hash_dimensions }
+    }
+
+    /// Build a k-mer fingerprint vector for a DNA sequence.
+    ///
+    /// Uses FNV-1a hashing with canonical k-mers (min of forward/reverse-complement)
+    /// to produce a fixed-size frequency vector.
+    fn fingerprint(&self, seq: &[u8]) -> Vec<f64> {
+        if seq.len() < self.k {
+            return vec![0.0; self.hash_dimensions];
+        }
+
+        let mut counts = vec![0u32; self.hash_dimensions];
+
+        for window in seq.windows(self.k) {
+            let fwd = Self::fnv1a_hash(window);
+            let rc = Self::fnv1a_hash_rc(window);
+            let canonical = fwd.min(rc);
+            counts[canonical % self.hash_dimensions] += 1;
+        }
+
+        // Normalize to probability distribution
+        let total: u32 = counts.iter().sum();
+        if total == 0 {
+            return vec![0.0; self.hash_dimensions];
+        }
+        let inv = 1.0 / total as f64;
+        counts.iter().map(|&c| c as f64 * inv).collect()
+    }
+
+    /// Compute cosine similarity between two fingerprint vectors.
+    fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 {
+        let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+        let norm_a: f64 = a.iter().map(|x| x * x).sum::<f64>().sqrt();
+        let norm_b: f64 = b.iter().map(|x| x * x).sum::<f64>().sqrt();
+
+        if norm_a < 1e-15 || norm_b < 1e-15 {
+            return 0.0;
+        }
+        dot / (norm_a * norm_b)
+    }
+
+    /// Build the k-mer overlap graph as a column-stochastic transition matrix.
+    ///
+    /// Edge weights are cosine similarities between k-mer fingerprints,
+    /// normalized to form a stochastic matrix (columns sum to 1).
+    fn build_transition_matrix(&self, sequences: &[&[u8]], threshold: f64) -> CsrMatrix<f64> {
+        let n = sequences.len();
+        let fingerprints: Vec<Vec<f64>> =
+            sequences.iter().map(|seq| self.fingerprint(seq)).collect();
+
+        // Build weighted adjacency with thresholding
+        let mut col_sums = vec![0.0f64; n];
+        let mut entries: Vec<(usize, usize, f64)> = Vec::new();
+
+        for i in 0..n {
+            for j in 0..n {
+                if i == j {
+                    continue;
+                }
+                let sim = Self::cosine_similarity(&fingerprints[i], &fingerprints[j]);
+                if sim > threshold {
+                    entries.push((i, j, sim));
+                    col_sums[j] += sim;
+                }
+            }
+        }
+
+        // Normalize columns to make stochastic
+        // Also add self-loops for isolated nodes
+        let mut normalized: Vec<(usize, usize, f64)> = entries
+            .into_iter()
+            .map(|(i, j, w)| {
+                let norm = if col_sums[j] > 1e-15 {
+                    col_sums[j]
+                } else {
+                    1.0
+                };
+                (i, j, w / norm)
+            })
+            .collect();
+
+        // Add self-loops for isolated nodes (dangling node handling)
+        for j in 0..n {
+            if col_sums[j] < 1e-15 {
+                normalized.push((j, j, 1.0));
+            }
+        }
+
+        CsrMatrix::<f64>::from_coo(n, n, normalized)
+    }
+
+    /// Rank sequences by PageRank centrality in the k-mer overlap graph.
+    ///
+    /// Uses ruvector-solver's Forward Push algorithm for sublinear-time
+    /// Personalized PageRank computation.
+    ///
+    /// # Arguments
+    /// * `sequences` - Collection of DNA sequences (as byte slices)
+    /// * `alpha` - Teleportation probability (default: 0.15)
+    /// * `epsilon` - PPR approximation tolerance (default: 1e-6)
+    /// * `similarity_threshold` - Minimum cosine similarity to create an edge (default: 0.1)
+    ///
+    /// # Returns
+    /// Sequences ranked by descending PageRank score
+    pub fn rank_sequences(
+        &self,
+        sequences: &[&[u8]],
+        alpha: f64,
+        epsilon: f64,
+        similarity_threshold: f64,
+    ) -> Vec<SequenceRank> {
+        let n = sequences.len();
+        if n == 0 {
+            return vec![];
+        }
+        if n == 1 {
+            return vec![SequenceRank {
+                index: 0,
+                score: 1.0,
+            }];
+        }
+
+        let matrix = self.build_transition_matrix(sequences, similarity_threshold);
+
+        // Use Forward Push PPR from each node, accumulate global PageRank
+        let solver = ForwardPushSolver::new(alpha, epsilon);
+        let mut global_rank = vec![0.0f64; n];
+
+        // Compute PPR from each node (or a representative subset for large graphs)
+        let num_seeds = n.min(50); // Limit seeds for large collections
+        let step = if n > num_seeds { n / num_seeds } else { 1 };
+
+        for seed_idx in (0..n).step_by(step) {
+            match solver.ppr_from_source(&matrix, seed_idx) {
+                Ok(ppr_result) => {
+                    for (node, score) in ppr_result {
+                        if node < n {
+                            global_rank[node] += score;
+                        }
+                    }
+                }
+                Err(_) => {
+                    // If PPR fails for this seed, skip it
+                    continue;
+                }
+            }
+        }
+
+        // Normalize
+        let total: f64 = global_rank.iter().sum();
+        if total > 1e-15 {
+            let inv = 1.0 / total;
+            for score in &mut global_rank {
+                *score *= inv;
+            }
+        }
+
+        // Build ranked results
+        let mut results: Vec<SequenceRank> = global_rank
+            .into_iter()
+            .enumerate()
+            .map(|(index, score)| SequenceRank { index, score })
+            .collect();
+
+        // Sort by score descending
+        results.sort_by(|a, b| {
+            b.score
+                .partial_cmp(&a.score)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        results
+    }
+
+    /// Compute pairwise PageRank similarity between two specific sequences
+    /// within the context of a collection.
+    ///
+    /// Uses Forward Push PPR from the source sequence and returns the
+    /// PPR score at the target sequence.
+    pub fn pairwise_similarity(
+        &self,
+        sequences: &[&[u8]],
+        source: usize,
+        target: usize,
+        alpha: f64,
+        epsilon: f64,
+        similarity_threshold: f64,
+    ) -> f64 {
+        if source >= sequences.len() || target >= sequences.len() {
+            return 0.0;
+        }
+
+        let matrix = self.build_transition_matrix(sequences, similarity_threshold);
+        let solver = ForwardPushSolver::new(alpha, epsilon);
+
+        match solver.ppr_from_source(&matrix, source) {
+            Ok(ppr_result) => ppr_result
+                .into_iter()
+                .find(|(node, _)| *node == target)
+                .map(|(_, score)| score)
+                .unwrap_or(0.0),
+            Err(_) => 0.0,
+        }
+    }
+
+    #[inline]
+    fn fnv1a_hash(data: &[u8]) -> usize {
+        const FNV_OFFSET: u64 = 14695981039346656037;
+        const FNV_PRIME: u64 = 1099511628211;
+        let mut hash = FNV_OFFSET;
+        for &byte in data {
+            hash ^= byte as u64;
+            hash = hash.wrapping_mul(FNV_PRIME);
+        }
+        hash as usize
+    }
+
+    #[inline]
+    fn fnv1a_hash_rc(data: &[u8]) -> usize {
+        const FNV_OFFSET: u64 = 14695981039346656037;
+        const FNV_PRIME: u64 = 1099511628211;
+        let mut hash = FNV_OFFSET;
+        for &byte in data.iter().rev() {
+            let comp = match byte.to_ascii_uppercase() {
+                b'A' => b'T',
+                b'T' | b'U' => b'A',
+                b'C' => b'G',
+                b'G' => b'C',
+                n => n,
+            };
+            hash ^= comp as u64;
+            hash = hash.wrapping_mul(FNV_PRIME);
+        }
+        hash as usize
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fingerprint() {
+        let ranker = KmerGraphRanker::new(3, 64);
+        let seq = b"ATCGATCGATCG";
+        let fp = ranker.fingerprint(seq);
+        assert_eq!(fp.len(), 64);
+
+        // Should be a probability distribution (sums to ~1)
+        let sum: f64 = fp.iter().sum();
+        assert!((sum - 1.0).abs() < 1e-10);
+    }
+
+    #[test]
+    fn test_cosine_similarity_identical() {
+        let a = vec![1.0, 2.0, 3.0];
+        let b = vec![1.0, 2.0, 3.0];
+        let sim = KmerGraphRanker::cosine_similarity(&a, &b);
+        assert!((sim - 1.0).abs() < 1e-10);
+    }
+
+    #[test]
+    fn test_cosine_similarity_orthogonal() {
+        let a = vec![1.0, 0.0];
+        let b = vec![0.0, 1.0];
+        let sim = KmerGraphRanker::cosine_similarity(&a, &b);
+        assert!(sim.abs() < 1e-10);
+    }
+
+    #[test]
+    fn test_rank_sequences_basic() {
+        let ranker = KmerGraphRanker::new(3, 64);
+        let seq1 = b"ATCGATCGATCGATCG";
+        let seq2 = b"ATCGATCGATCGATCG"; // identical to seq1
+        let seq3 = b"GCTAGCTAGCTAGCTA"; // different
+
+        let sequences: Vec<&[u8]> = vec![seq1, seq2, seq3];
+        let ranks = ranker.rank_sequences(&sequences, 0.15, 1e-4, 0.01);
+
+        assert_eq!(ranks.len(), 3);
+
+        // All ranks should sum to 1
+        let total: f64 = ranks.iter().map(|r| r.score).sum();
+        assert!((total - 1.0).abs() < 1e-5);
+
+        // Identical sequences should have similar ranks
+        let rank_0 = ranks.iter().find(|r| r.index == 0).unwrap().score;
+        let rank_1 = ranks.iter().find(|r| r.index == 1).unwrap().score;
+        assert!((rank_0 - rank_1).abs() < 0.3); // roughly similar
+    }
+
+    #[test]
+    fn test_rank_empty() {
+        let ranker = KmerGraphRanker::new(3, 64);
+        let sequences: Vec<&[u8]> = vec![];
+        let ranks = ranker.rank_sequences(&sequences, 0.15, 1e-4, 0.1);
+        assert!(ranks.is_empty());
+    }
+
+    #[test]
+    fn test_rank_single() {
+        let ranker = KmerGraphRanker::new(3, 64);
+        let sequences: Vec<&[u8]> = vec![b"ATCGATCG"];
+        let ranks = ranker.rank_sequences(&sequences, 0.15, 1e-4, 0.1);
+        assert_eq!(ranks.len(), 1);
+        assert!((ranks[0].score - 1.0).abs() < 1e-10);
+    }
+
+    #[test]
+    fn test_pairwise_similarity() {
+        let ranker = KmerGraphRanker::new(3, 64);
+        let seq1 = b"ATCGATCGATCGATCG";
+        let seq2 = b"ATCGATCGATCGATCG";
+        let seq3 = b"NNNNNNNNNNNNNNNN"; // very different
+
+        let sequences: Vec<&[u8]> = vec![seq1, seq2, seq3];
+
+        let sim_01 = ranker.pairwise_similarity(&sequences, 0, 1, 0.15, 1e-4, 0.01);
+        let sim_02 = ranker.pairwise_similarity(&sequences, 0, 2, 0.15, 1e-4, 0.01);
+
+        // Identical sequences should have higher similarity
+        assert!(sim_01 >= sim_02);
+    }
+}
--- a/vendor/ruvector/examples/dna/src/lib.rs
+++ b/vendor/ruvector/examples/dna/src/lib.rs
@@ -0,0 +1,84 @@
+//! # rvDNA — AI-Native Genomic Analysis
+//!
+//! Fast, accurate genomic analysis in pure Rust with WASM support.
+//! Includes the `.rvdna` binary file format for storing pre-computed
+//! AI features alongside raw DNA sequences.
+//!
+//! - **K-mer HNSW Indexing**: Sequence similarity search via vector embeddings
+//! - **Smith-Waterman Alignment**: Local alignment with CIGAR and mapping quality
+//! - **Bayesian Variant Calling**: SNP/indel detection with Phred quality scores
+//! - **Protein Translation**: DNA-to-protein with GNN contact graph prediction
+//! - **Epigenomics**: Methylation profiling and Horvath biological age clock
+//! - **Pharmacogenomics**: CYP enzyme star allele calling and drug recommendations
+//! - **Pipeline Orchestration**: DAG-based multi-stage execution
+//! - **RVDNA Format**: AI-native binary file format with pre-computed tensors
+
+#![warn(missing_docs)]
+#![allow(clippy::all)]
+
+pub mod alignment;
+pub mod biomarker;
+pub mod biomarker_stream;
+pub mod epigenomics;
+pub mod error;
+pub mod genotyping;
+pub mod health;
+pub mod kmer;
+pub mod kmer_pagerank;
+pub mod pharma;
+pub mod pipeline;
+pub mod protein;
+pub mod real_data;
+pub mod rvdna;
+pub mod types;
+pub mod variant;
+
+pub use alignment::{AlignmentConfig, SmithWaterman};
+pub use epigenomics::{
+    CancerSignalDetector, CancerSignalResult, CpGSite, HorvathClock, MethylationProfile,
+};
+pub use error::{DnaError, Result};
+pub use pharma::{
+    call_cyp2c19_allele, call_star_allele, get_recommendations, predict_cyp2c19_phenotype,
+    predict_phenotype, Cyp2c19Allele, DrugRecommendation, MetabolizerPhenotype, PharmaVariant,
+    StarAllele,
+};
+pub use protein::{isoelectric_point, molecular_weight, translate_dna, AminoAcid};
+pub use rvdna::{
+    decode_2bit, encode_2bit, fasta_to_rvdna, Codec, KmerVectorBlock, RvdnaHeader, RvdnaReader,
+    RvdnaStats, RvdnaWriter, SparseAttention, VariantTensor,
+};
+pub use types::{
+    AlignmentResult, AnalysisConfig, CigarOp, ContactGraph, DnaSequence, GenomicPosition,
+    KmerIndex, Nucleotide, ProteinResidue, ProteinSequence, QualityScore, Variant,
+};
+pub use variant::{
+    FilterStatus, Genotype, PileupColumn, VariantCall, VariantCaller, VariantCallerConfig,
+};
+
+pub use ruvector_core::{
+    types::{DbOptions, DistanceMetric, HnswConfig, SearchQuery, SearchResult, VectorEntry},
+    VectorDB,
+};
+
+pub use biomarker::{BiomarkerClassification, BiomarkerProfile, BiomarkerReference, CategoryScore};
+pub use biomarker_stream::{
+    BiomarkerReading, RingBuffer, StreamConfig, StreamProcessor, StreamStats,
+};
+pub use genotyping::{
+    CallConfidence, CypDiplotype, GenomeBuild, GenotypeAnalysis, GenotypeData, Snp,
+};
+pub use health::{ApoeResult, HealthVariantResult, MthfrResult, PainProfile};
+pub use kmer_pagerank::{KmerGraphRanker, SequenceRank};
+
+/// Prelude module for common imports
+pub mod prelude {
+    pub use crate::alignment::*;
+    pub use crate::epigenomics::*;
+    pub use crate::error::{DnaError, Result};
+    pub use crate::kmer::*;
+    pub use crate::pharma::*;
+    pub use crate::protein::*;
+    pub use crate::types::*;
+    pub use crate::variant::*;
+}
--- a/vendor/ruvector/examples/dna/src/main.rs
+++ b/vendor/ruvector/examples/dna/src/main.rs
@@ -0,0 +1,427 @@
+//! DNA Analyzer Demo - RuVector Genomic Analysis Pipeline
+//!
+//! Demonstrates SOTA genomic analysis using:
+//! - Real human gene sequences (HBB, TP53, BRCA1, CYP2D6, INS)
+//! - HNSW k-mer indexing for fast sequence search
+//! - Attention-based sequence alignment
+//! - Variant calling from pileup data
+//! - Protein translation and contact prediction
+//! - Epigenetic age prediction (Horvath clock)
+//! - Pharmacogenomic star allele calling
+//! - RVDNA AI-native file format with pre-computed tensors
+
+use ::rvdna::prelude::*;
+use ::rvdna::{
+    alignment::{AlignmentConfig, SmithWaterman},
+    epigenomics::{HorvathClock, MethylationProfile},
+    genotyping, pharma,
+    protein::translate_dna,
+    real_data,
+    rvdna::{
+        self, Codec, KmerVectorBlock, RvdnaReader, RvdnaWriter, SparseAttention, VariantTensor,
+    },
+    variant::{PileupColumn, VariantCaller, VariantCallerConfig},
+};
+use rand::Rng;
+use tracing::{info, Level};
+use tracing_subscriber::FmtSubscriber;
+
+fn main() -> anyhow::Result<()> {
+    // Check for 23andMe file argument
+    let args: Vec<String> = std::env::args().collect();
+    if args.len() > 1 {
+        return run_23andme(&args[1]);
+    }
+
+    let subscriber = FmtSubscriber::builder()
+        .with_max_level(Level::INFO)
+        .finish();
+    tracing::subscriber::set_global_default(subscriber)?;
+
+    info!("RuVector DNA Analyzer - Genomic Analysis Pipeline");
+    info!("================================================");
+    info!("Using real human gene sequences from NCBI RefSeq");
+
+    // -----------------------------------------------------------------------
+    // Stage 1: Load real human gene sequences
+    // -----------------------------------------------------------------------
+    info!("\nStage 1: Loading real human gene sequences");
+    let total_start = std::time::Instant::now();
+
+    let hbb = DnaSequence::from_str(real_data::HBB_CODING_SEQUENCE)?;
+    let tp53 = DnaSequence::from_str(real_data::TP53_EXONS_5_8)?;
+    let brca1 = DnaSequence::from_str(real_data::BRCA1_EXON11_FRAGMENT)?;
+    let cyp2d6 = DnaSequence::from_str(real_data::CYP2D6_CODING)?;
+    let insulin = DnaSequence::from_str(real_data::INS_CODING)?;
+
+    info!(
+        "  HBB (hemoglobin beta):     {} bp  [chr11, sickle cell gene]",
+        hbb.len()
+    );
+    info!(
+        "  TP53 (tumor suppressor):   {} bp  [chr17, exons 5-8]",
+        tp53.len()
+    );
+    info!(
+        "  BRCA1 (DNA repair):        {} bp  [chr17, exon 11 fragment]",
+        brca1.len()
+    );
+    info!(
+        "  CYP2D6 (drug metabolism):  {} bp  [chr22, pharmacogenomic]",
+        cyp2d6.len()
+    );
+    info!(
+        "  INS (insulin):             {} bp  [chr11, preproinsulin]",
+        insulin.len()
+    );
+
+    let gc_hbb = calculate_gc_content(&hbb);
+    let gc_tp53 = calculate_gc_content(&tp53);
+    info!("  HBB GC content: {:.1}%", gc_hbb * 100.0);
+    info!("  TP53 GC content: {:.1}%", gc_tp53 * 100.0);
+
+    // -----------------------------------------------------------------------
+    // Stage 2: K-mer similarity search across gene panel
+    // -----------------------------------------------------------------------
+    info!("\nStage 2: K-mer similarity search across gene panel");
+    let kmer_start = std::time::Instant::now();
+
+    let hbb_vec = hbb.to_kmer_vector(11, 512)?;
+    let tp53_vec = tp53.to_kmer_vector(11, 512)?;
+    let brca1_vec = brca1.to_kmer_vector(11, 512)?;
+    let cyp2d6_vec = cyp2d6.to_kmer_vector(11, 512)?;
+    let ins_vec = insulin.to_kmer_vector(11, 512)?;
+
+    let sim_hbb_tp53 = cosine_similarity(&hbb_vec, &tp53_vec);
+    let sim_hbb_brca1 = cosine_similarity(&hbb_vec, &brca1_vec);
+    let sim_tp53_brca1 = cosine_similarity(&tp53_vec, &brca1_vec);
+    let sim_hbb_cyp2d6 = cosine_similarity(&hbb_vec, &cyp2d6_vec);
+
+    info!("  K-mer similarity matrix (cosine, k=11, d=512):");
+    info!("    HBB  vs TP53:  {:.4}", sim_hbb_tp53);
+    info!("    HBB  vs BRCA1: {:.4}", sim_hbb_brca1);
+    info!("    TP53 vs BRCA1: {:.4}", sim_tp53_brca1);
+    info!("    HBB  vs CYP2D6:{:.4}", sim_hbb_cyp2d6);
+    info!("  K-mer encoding time: {:?}", kmer_start.elapsed());
+
+    // -----------------------------------------------------------------------
+    // Stage 3: Align HBB query fragment against full HBB
+    // -----------------------------------------------------------------------
+    info!("\nStage 3: Smith-Waterman alignment on HBB");
+    let align_start = std::time::Instant::now();
+
+    // Extract a 50bp fragment from the middle of HBB (simulating a sequencing read)
+    let hbb_str = hbb.to_string();
+    let fragment_start = 100;
+    let fragment_end = (fragment_start + 50).min(hbb_str.len());
+    let query_fragment = DnaSequence::from_str(&hbb_str[fragment_start..fragment_end])?;
+
+    let aligner = SmithWaterman::new(AlignmentConfig::default());
+    let alignment = aligner.align(&query_fragment, &hbb)?;
+
+    info!(
+        "  Query: HBB[{}..{}] ({} bp read)",
+        fragment_start,
+        fragment_end,
+        query_fragment.len()
+    );
+    info!("  Alignment score: {}", alignment.score);
+    info!(
+        "  Mapped position: {} (expected: {})",
+        alignment.mapped_position.position, fragment_start
+    );
+    info!("  Mapping quality: {}", alignment.mapping_quality.value());
+    info!("  CIGAR: {} ops", alignment.cigar.len());
+    info!("  Alignment time: {:?}", align_start.elapsed());
+
+    // -----------------------------------------------------------------------
+    // Stage 4: Variant calling on HBB (sickle cell region)
+    // -----------------------------------------------------------------------
+    info!("\nStage 4: Variant calling on HBB (sickle cell detection)");
+    let variant_start = std::time::Instant::now();
+
+    let caller = VariantCaller::new(VariantCallerConfig::default());
+    let hbb_bytes = hbb_str.as_bytes();
+    let mut variant_count = 0;
+    let mut rng = rand::thread_rng();
+
+    // Simulate sequencing reads across HBB with a sickle cell mutation at position 20
+    let sickle_pos = real_data::hbb_variants::SICKLE_CELL_POS;
+    for i in 0..hbb_bytes.len().min(200) {
+        let depth = rng.gen_range(20..51);
+        let bases: Vec<u8> = (0..depth)
+            .map(|_| {
+                if i == sickle_pos && rng.gen::<f32>() < 0.5 {
+                    b'T' // Simulate heterozygous sickle cell (A→T at codon 6)
+                } else if rng.gen::<f32>() < 0.98 {
+                    hbb_bytes[i]
+                } else {
+                    [b'A', b'C', b'G', b'T'][rng.gen_range(0..4)]
+                }
+            })
+            .collect();
+        let qualities: Vec<u8> = (0..depth).map(|_| rng.gen_range(25..41)).collect();
+
+        let pileup = PileupColumn {
+            bases,
+            qualities,
+            position: i as u64,
+            chromosome: 11,
+        };
+
+        if let Some(call) = caller.call_snp(&pileup, hbb_bytes[i]) {
+            variant_count += 1;
+            if i == sickle_pos {
+                info!(
+                    "  ** Sickle cell variant at pos {}: ref={} alt={} depth={} qual={}",
+                    i, call.ref_allele as char, call.alt_allele as char, call.depth, call.quality
+                );
+            }
+        }
+    }
+
+    info!("  Positions analyzed: {}", hbb_bytes.len().min(200));
+    info!("  Total variants detected: {}", variant_count);
+    info!("  Variant calling time: {:?}", variant_start.elapsed());
+
+    // -----------------------------------------------------------------------
+    // Stage 5: Translate HBB → hemoglobin beta protein
+    // -----------------------------------------------------------------------
+    info!("\nStage 5: Protein translation - HBB to Hemoglobin Beta");
+    let protein_start = std::time::Instant::now();
+
+    let amino_acids = translate_dna(hbb_bytes);
+    let protein_str: String = amino_acids.iter().map(|aa| aa.to_char()).collect();
+
+    info!("  Protein length: {} amino acids", amino_acids.len());
+    info!(
+        "  First 20 aa: {}",
+        if protein_str.len() > 20 {
+            &protein_str[..20]
+        } else {
+            &protein_str
+        }
+    );
+    info!("  Expected:     MVHLTPEEKSAVTALWGKVN (hemoglobin beta N-terminus)");
+
+    // Build contact graph for the hemoglobin protein
+    if amino_acids.len() >= 10 {
+        let residues: Vec<ProteinResidue> = amino_acids
+            .iter()
+            .map(|aa| match aa.to_char() {
+                'A' => ProteinResidue::A,
+                'R' => ProteinResidue::R,
+                'N' => ProteinResidue::N,
+                'D' => ProteinResidue::D,
+                'C' => ProteinResidue::C,
+                'E' => ProteinResidue::E,
+                'Q' => ProteinResidue::Q,
+                'G' => ProteinResidue::G,
+                'H' => ProteinResidue::H,
+                'I' => ProteinResidue::I,
+                'L' => ProteinResidue::L,
+                'K' => ProteinResidue::K,
+                'M' => ProteinResidue::M,
+                'F' => ProteinResidue::F,
+                'P' => ProteinResidue::P,
+                'S' => ProteinResidue::S,
+                'T' => ProteinResidue::T,
+                'W' => ProteinResidue::W,
+                'Y' => ProteinResidue::Y,
+                'V' => ProteinResidue::V,
+                _ => ProteinResidue::X,
+            })
+            .collect();
+        let protein_seq = ProteinSequence::new(residues);
+        let graph = protein_seq.build_contact_graph(8.0)?;
+        let contacts = protein_seq.predict_contacts(&graph)?;
+
+        info!("  Contact graph: {} edges", graph.edges.len());
+        info!("  Top 3 predicted contacts:");
+        for (i, (r1, r2, score)) in contacts.iter().take(3).enumerate() {
+            info!(
+                "    {}. Residues {} <-> {} (score: {:.3})",
+                i + 1,
+                r1,
+                r2,
+                score
+            );
+        }
+    }
+    info!("  Protein analysis time: {:?}", protein_start.elapsed());
+
+    // -----------------------------------------------------------------------
+    // Stage 6: Epigenetic age prediction
+    // -----------------------------------------------------------------------
+    info!("\nStage 6: Epigenetic age prediction (Horvath clock)");
+    let epi_start = std::time::Instant::now();
+
+    let positions: Vec<(u8, u64)> = (0..500).map(|i| (1, i * 1000)).collect();
+    let betas: Vec<f32> = (0..500).map(|_| rng.gen_range(0.1..0.9)).collect();
+
+    let profile = MethylationProfile::from_beta_values(positions, betas);
+    let clock = HorvathClock::default_clock();
+    let predicted_age = clock.predict_age(&profile);
+
+    info!("  CpG sites analyzed: {}", profile.sites.len());
+    info!("  Mean methylation: {:.3}", profile.mean_methylation());
+    info!("  Predicted biological age: {:.1} years", predicted_age);
+    info!("  Epigenomics time: {:?}", epi_start.elapsed());
+
+    // -----------------------------------------------------------------------
+    // Stage 7: Pharmacogenomics (CYP2D6 from real sequence)
+    // -----------------------------------------------------------------------
+    info!("\nStage 7: Pharmacogenomic analysis (CYP2D6)");
+
+    let cyp2d6_variants = vec![(42130692, b'G', b'A')]; // *4 defining variant
+    let allele1 = pharma::call_star_allele(&cyp2d6_variants);
+    let allele2 = pharma::StarAllele::Star10; // *10: common in East Asian populations
+    let phenotype = pharma::predict_phenotype(&allele1, &allele2);
+
+    info!("  CYP2D6 sequence: {} bp analyzed", cyp2d6.len());
+    info!(
+        "  Allele 1: {:?} (activity: {:.1})",
+        allele1,
+        allele1.activity_score()
+    );
+    info!(
+        "  Allele 2: {:?} (activity: {:.1})",
+        allele2,
+        allele2.activity_score()
+    );
+    info!("  Metabolizer phenotype: {:?}", phenotype);
+
+    let recommendations = pharma::get_recommendations("CYP2D6", &phenotype);
+    for rec in &recommendations {
+        info!(
+            "    - {}: {} (dose: {:.1}x)",
+            rec.drug, rec.recommendation, rec.dose_factor
+        );
+    }
+
+    // -----------------------------------------------------------------------
+    // Stage 8: RVDNA AI-Native Format Demo
+    // -----------------------------------------------------------------------
+    info!("\nStage 8: RVDNA AI-Native File Format");
+    let rvdna_start = std::time::Instant::now();
+
+    // Convert HBB to RVDNA format with pre-computed k-mer vectors
+    let rvdna_bytes = rvdna::fasta_to_rvdna(real_data::HBB_CODING_SEQUENCE, 11, 512, 500)?;
+
+    info!("  FASTA → RVDNA conversion:");
+    info!("    Input:  {} bases (ASCII, 1 byte/base)", hbb.len());
+    info!("    Output: {} bytes (RVDNA binary)", rvdna_bytes.len());
+    info!(
+        "    Ratio:  {:.2}x compression (sequence section)",
+        hbb.len() as f64 / rvdna_bytes.len() as f64
+    );
+
+    // Read back and validate
+    let reader = RvdnaReader::from_bytes(rvdna_bytes)?;
+    let restored = reader.read_sequence()?;
+    assert_eq!(restored.to_string(), hbb.to_string(), "Lossless roundtrip");
+
+    let kmer_blocks = reader.read_kmer_vectors()?;
+    let stats = reader.stats();
+
+    info!("  RVDNA file stats:");
+    info!("    Format version: {}", reader.header.version);
+    info!(
+        "    Sequence section: {} bytes ({:.1} bits/base)",
+        stats.section_sizes[0], stats.bits_per_base
+    );
+    info!(
+        "    K-mer vectors: {} blocks pre-computed",
+        kmer_blocks.len()
+    );
+
+    if !kmer_blocks.is_empty() {
+        info!(
+            "    Vector dims: {}, k={}",
+            kmer_blocks[0].dimensions, kmer_blocks[0].k
+        );
+        // Demonstrate instant similarity search from pre-computed vectors
+        let tp53_query = tp53.to_kmer_vector(11, 512)?;
+        let sim = kmer_blocks[0].cosine_similarity(&tp53_query);
+        info!(
+            "    Instant HBB vs TP53 similarity: {:.4} (from pre-indexed)",
+            sim
+        );
+    }
+
+    info!("  RVDNA format time: {:?}", rvdna_start.elapsed());
+
+    // Compare format sizes
+    info!("\n  Format Comparison (HBB gene, {} bp):", hbb.len());
+    info!("    FASTA (ASCII):    {} bytes (8 bits/base)", hbb.len());
+    info!(
+        "    RVDNA (2-bit):    {} bytes (seq section)",
+        stats.section_sizes[0]
+    );
+    info!(
+        "    RVDNA (total):    {} bytes (seq + k-mer vectors + metadata)",
+        stats.total_size
+    );
+    info!("    Pre-computed:     k-mer vectors, ready for HNSW search");
+
+    // -----------------------------------------------------------------------
+    // Summary
+    // -----------------------------------------------------------------------
+    let total_time = total_start.elapsed();
+    info!("\nPipeline Summary");
+    info!("==================");
+    info!("  Genes analyzed: 5 (HBB, TP53, BRCA1, CYP2D6, INS)");
+    info!(
+        "  Total bases: {} bp",
+        hbb.len() + tp53.len() + brca1.len() + cyp2d6.len() + insulin.len()
+    );
+    info!(
+        "  Variants called: {} (in HBB sickle cell region)",
+        variant_count
+    );
+    info!("  Hemoglobin protein: {} amino acids", amino_acids.len());
+    info!("  Predicted age: {:.1} years", predicted_age);
+    info!("  CYP2D6 phenotype: {:?}", phenotype);
+    info!(
+        "  RVDNA format: {} bytes ({} sections)",
+        stats.total_size,
+        stats.section_sizes.iter().filter(|&&s| s > 0).count()
+    );
+    info!("  Total pipeline time: {:?}", total_time);
+
+    info!("\nAnalysis complete!");
+
+    Ok(())
+}
+
+/// Cosine similarity between two vectors
+fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+    let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
+    let mag_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+    let mag_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if mag_a == 0.0 || mag_b == 0.0 {
+        0.0
+    } else {
+        dot / (mag_a * mag_b)
+    }
+}
+
+/// Calculate GC content of DNA sequence
+fn calculate_gc_content(sequence: &DnaSequence) -> f64 {
+    let gc_count = sequence
+        .bases()
+        .iter()
+        .filter(|&&b| b == Nucleotide::G || b == Nucleotide::C)
+        .count();
+    gc_count as f64 / sequence.len() as f64
+}
+
+/// Run 23andMe genotyping analysis pipeline
+fn run_23andme(path: &str) -> anyhow::Result<()> {
+    let file =
+        std::fs::File::open(path).map_err(|e| anyhow::anyhow!("Cannot open {}: {}", path, e))?;
+    let analysis =
+        genotyping::analyze(file).map_err(|e| anyhow::anyhow!("Analysis failed: {}", e))?;
+    print!("{}", genotyping::format_report(&analysis));
+    Ok(())
+}
--- a/vendor/ruvector/examples/dna/src/pharma.rs
+++ b/vendor/ruvector/examples/dna/src/pharma.rs
@@ -0,0 +1,417 @@
+//! Pharmacogenomics module
+//!
+//! Provides CYP enzyme star allele calling and metabolizer phenotype
+//! prediction for pharmacogenomic analysis.
+
+use serde::{Deserialize, Serialize};
+
+/// CYP2D6 star allele classification
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum StarAllele {
+    /// *1 - Normal function (wild-type)
+    Star1,
+    /// *2 - Normal function
+    Star2,
+    /// *3 - No function (frameshift)
+    Star3,
+    /// *4 - No function (splicing defect)
+    Star4,
+    /// *5 - No function (gene deletion)
+    Star5,
+    /// *6 - No function (frameshift)
+    Star6,
+    /// *10 - Decreased function
+    Star10,
+    /// *17 - Decreased function
+    Star17,
+    /// *41 - Decreased function
+    Star41,
+    /// Unknown allele
+    Unknown,
+}
+
+impl StarAllele {
+    /// Get the activity score for this allele
+    pub fn activity_score(&self) -> f64 {
+        match self {
+            StarAllele::Star1 | StarAllele::Star2 => 1.0,
+            StarAllele::Star10 | StarAllele::Star17 | StarAllele::Star41 => 0.5,
+            StarAllele::Star3 | StarAllele::Star4 | StarAllele::Star5 | StarAllele::Star6 => 0.0,
+            StarAllele::Unknown => 0.5,
+        }
+    }
+}
+
+/// Drug metabolizer phenotype
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum MetabolizerPhenotype {
+    /// Ultra-rapid metabolizer (activity score > 2.0)
+    UltraRapid,
+    /// Normal metabolizer (1.0 <= activity score <= 2.0)
+    Normal,
+    /// Intermediate metabolizer (0.5 <= activity score < 1.0)
+    Intermediate,
+    /// Poor metabolizer (activity score < 0.5)
+    Poor,
+}
+
+/// Pharmacogenomic variant for a specific gene
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PharmaVariant {
+    /// Gene name (e.g., "CYP2D6")
+    pub gene: String,
+    /// Genomic position
+    pub position: u64,
+    /// Reference allele
+    pub ref_allele: u8,
+    /// Alternate allele
+    pub alt_allele: u8,
+    /// Clinical significance
+    pub significance: String,
+}
+
+/// CYP2C19 star allele classification
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum Cyp2c19Allele {
+    /// *1 - Normal function (wild-type)
+    Star1,
+    /// *2 - No function (rs4244285, c.681G>A, splicing defect)
+    Star2,
+    /// *3 - No function (rs4986893, c.636G>A, premature stop)
+    Star3,
+    /// *17 - Increased function (rs12248560, c.-806C>T)
+    Star17,
+    /// Unknown allele
+    Unknown,
+}
+
+impl Cyp2c19Allele {
+    /// Get the activity score for this allele (CPIC guidelines)
+    pub fn activity_score(&self) -> f64 {
+        match self {
+            Cyp2c19Allele::Star1 => 1.0,
+            Cyp2c19Allele::Star17 => 1.5, // Increased function
+            Cyp2c19Allele::Star2 | Cyp2c19Allele::Star3 => 0.0,
+            Cyp2c19Allele::Unknown => 0.5,
+        }
+    }
+}
+
+/// Call CYP2C19 star allele from observed variants
+pub fn call_cyp2c19_allele(variants: &[(u64, u8, u8)]) -> Cyp2c19Allele {
+    for &(pos, ref_allele, alt_allele) in variants {
+        match (pos, ref_allele, alt_allele) {
+            // *2: G>A at rs4244285 (c.681G>A, splicing defect)
+            (96541616, b'G', b'A') => return Cyp2c19Allele::Star2,
+            // *3: G>A at rs4986893 (c.636G>A, premature stop codon)
+            (96540410, b'G', b'A') => return Cyp2c19Allele::Star3,
+            // *17: C>T at rs12248560 (c.-806C>T, increased expression)
+            (96522463, b'C', b'T') => return Cyp2c19Allele::Star17,
+            _ => {}
+        }
+    }
+    Cyp2c19Allele::Star1
+}
+
+/// Predict CYP2C19 metabolizer phenotype from diplotype
+pub fn predict_cyp2c19_phenotype(
+    allele1: &Cyp2c19Allele,
+    allele2: &Cyp2c19Allele,
+) -> MetabolizerPhenotype {
+    let total_activity = allele1.activity_score() + allele2.activity_score();
+    if total_activity > 2.0 {
+        MetabolizerPhenotype::UltraRapid
+    } else if total_activity >= 1.0 {
+        MetabolizerPhenotype::Normal
+    } else if total_activity >= 0.5 {
+        MetabolizerPhenotype::Intermediate
+    } else {
+        MetabolizerPhenotype::Poor
+    }
+}
+
+/// Call CYP2D6 star allele from observed variants
+///
+/// Uses a simplified lookup table based on key defining variants.
+pub fn call_star_allele(variants: &[(u64, u8, u8)]) -> StarAllele {
+    for &(pos, ref_allele, alt_allele) in variants {
+        match (pos, ref_allele, alt_allele) {
+            // *4: G>A at intron 3/exon 4 boundary (rs3892097)
+            (42130692, b'G', b'A') => return StarAllele::Star4,
+            // *5: whole gene deletion
+            (42126611, b'T', b'-') => return StarAllele::Star5,
+            // *3: frameshift (A deletion at rs35742686)
+            (42127941, b'A', b'-') => return StarAllele::Star3,
+            // *6: T deletion at rs5030655
+            (42127803, b'T', b'-') => return StarAllele::Star6,
+            // *10: C>T at rs1065852
+            (42126938, b'C', b'T') => return StarAllele::Star10,
+            _ => {}
+        }
+    }
+
+    StarAllele::Star1 // Wild-type
+}
+
+/// Predict metabolizer phenotype from diplotype (two alleles)
+pub fn predict_phenotype(allele1: &StarAllele, allele2: &StarAllele) -> MetabolizerPhenotype {
+    let total_activity = allele1.activity_score() + allele2.activity_score();
+
+    if total_activity > 2.0 {
+        MetabolizerPhenotype::UltraRapid
+    } else if total_activity >= 1.0 {
+        MetabolizerPhenotype::Normal
+    } else if total_activity >= 0.5 {
+        MetabolizerPhenotype::Intermediate
+    } else {
+        MetabolizerPhenotype::Poor
+    }
+}
+
+/// Drug recommendation based on metabolizer phenotype
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DrugRecommendation {
+    /// Drug name
+    pub drug: String,
+    /// Gene involved
+    pub gene: String,
+    /// Recommendation text
+    pub recommendation: String,
+    /// Dosing adjustment factor (1.0 = standard dose)
+    pub dose_factor: f64,
+}
+
+/// Get drug recommendations for a given phenotype
+pub fn get_recommendations(
+    gene: &str,
+    phenotype: &MetabolizerPhenotype,
+) -> Vec<DrugRecommendation> {
+    match (gene, phenotype) {
+        ("CYP2D6", MetabolizerPhenotype::Poor) => vec![
+            DrugRecommendation {
+                drug: "Codeine".to_string(),
+                gene: gene.to_string(),
+                recommendation:
+                    "AVOID codeine; no conversion to morphine. Use alternative analgesic."
+                        .to_string(),
+                dose_factor: 0.0,
+            },
+            DrugRecommendation {
+                drug: "Tramadol".to_string(),
+                gene: gene.to_string(),
+                recommendation: "AVOID tramadol; reduced efficacy. Use alternative analgesic."
+                    .to_string(),
+                dose_factor: 0.0,
+            },
+            DrugRecommendation {
+                drug: "Tamoxifen".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Consider alternative endocrine therapy (aromatase inhibitor)."
+                    .to_string(),
+                dose_factor: 0.0,
+            },
+            DrugRecommendation {
+                drug: "Ondansetron".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Use standard dose; may have increased exposure.".to_string(),
+                dose_factor: 0.75,
+            },
+        ],
+        ("CYP2D6", MetabolizerPhenotype::UltraRapid) => vec![
+            DrugRecommendation {
+                drug: "Codeine".to_string(),
+                gene: gene.to_string(),
+                recommendation:
+                    "AVOID codeine; risk of fatal toxicity from ultra-rapid morphine conversion."
+                        .to_string(),
+                dose_factor: 0.0,
+            },
+            DrugRecommendation {
+                drug: "Tramadol".to_string(),
+                gene: gene.to_string(),
+                recommendation: "AVOID tramadol; risk of respiratory depression.".to_string(),
+                dose_factor: 0.0,
+            },
+        ],
+        ("CYP2D6", MetabolizerPhenotype::Intermediate) => vec![
+            DrugRecommendation {
+                drug: "Codeine".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Use lower dose or alternative analgesic.".to_string(),
+                dose_factor: 0.5,
+            },
+            DrugRecommendation {
+                drug: "Tamoxifen".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Consider higher dose or alternative therapy.".to_string(),
+                dose_factor: 0.75,
+            },
+        ],
+        ("CYP2C19", MetabolizerPhenotype::Poor) => vec![
+            DrugRecommendation {
+                drug: "Clopidogrel (Plavix)".to_string(),
+                gene: gene.to_string(),
+                recommendation: "AVOID clopidogrel; use prasugrel or ticagrelor instead."
+                    .to_string(),
+                dose_factor: 0.0,
+            },
+            DrugRecommendation {
+                drug: "Voriconazole".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Reduce dose by 50%; monitor for toxicity.".to_string(),
+                dose_factor: 0.5,
+            },
+            DrugRecommendation {
+                drug: "PPIs (omeprazole)".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Reduce dose; slower clearance increases exposure.".to_string(),
+                dose_factor: 0.5,
+            },
+            DrugRecommendation {
+                drug: "Escitalopram".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Consider 50% dose reduction.".to_string(),
+                dose_factor: 0.5,
+            },
+        ],
+        ("CYP2C19", MetabolizerPhenotype::UltraRapid) => vec![
+            DrugRecommendation {
+                drug: "Clopidogrel (Plavix)".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Standard dosing (enhanced activation is beneficial).".to_string(),
+                dose_factor: 1.0,
+            },
+            DrugRecommendation {
+                drug: "Omeprazole".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Increase dose; rapid clearance reduces efficacy.".to_string(),
+                dose_factor: 2.0,
+            },
+            DrugRecommendation {
+                drug: "Voriconazole".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Use alternative antifungal.".to_string(),
+                dose_factor: 0.0,
+            },
+        ],
+        ("CYP2C19", MetabolizerPhenotype::Intermediate) => vec![
+            DrugRecommendation {
+                drug: "Clopidogrel (Plavix)".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Consider alternative antiplatelet or increased dose.".to_string(),
+                dose_factor: 1.5,
+            },
+            DrugRecommendation {
+                drug: "PPIs (omeprazole)".to_string(),
+                gene: gene.to_string(),
+                recommendation:
+                    "Standard dose likely adequate; may have slightly increased exposure."
+                        .to_string(),
+                dose_factor: 1.0,
+            },
+            DrugRecommendation {
+                drug: "Escitalopram".to_string(),
+                gene: gene.to_string(),
+                recommendation: "Use standard dose; monitor response.".to_string(),
+                dose_factor: 1.0,
+            },
+        ],
+        _ => vec![DrugRecommendation {
+            drug: "Standard".to_string(),
+            gene: gene.to_string(),
+            recommendation: "Use standard dosing".to_string(),
+            dose_factor: 1.0,
+        }],
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_star_allele_calling() {
+        // Wild-type
+        assert_eq!(call_star_allele(&[]), StarAllele::Star1);
+
+        // *4 variant
+        let star4 = call_star_allele(&[(42130692, b'G', b'A')]);
+        assert_eq!(star4, StarAllele::Star4);
+        assert_eq!(star4.activity_score(), 0.0);
+
+        // *10 variant (decreased function)
+        let star10 = call_star_allele(&[(42126938, b'C', b'T')]);
+        assert_eq!(star10, StarAllele::Star10);
+        assert_eq!(star10.activity_score(), 0.5);
+    }
+
+    #[test]
+    fn test_phenotype_prediction() {
+        assert_eq!(
+            predict_phenotype(&StarAllele::Star1, &StarAllele::Star1),
+            MetabolizerPhenotype::Normal
+        );
+        assert_eq!(
+            predict_phenotype(&StarAllele::Star1, &StarAllele::Star4),
+            MetabolizerPhenotype::Normal
+        );
+        assert_eq!(
+            predict_phenotype(&StarAllele::Star4, &StarAllele::Star10),
+            MetabolizerPhenotype::Intermediate
+        );
+        assert_eq!(
+            predict_phenotype(&StarAllele::Star4, &StarAllele::Star4),
+            MetabolizerPhenotype::Poor
+        );
+    }
+
+    #[test]
+    fn test_drug_recommendations() {
+        let recs = get_recommendations("CYP2D6", &MetabolizerPhenotype::Poor);
+        assert!(recs.len() >= 1);
+        assert_eq!(recs[0].dose_factor, 0.0);
+
+        let recs_normal = get_recommendations("CYP2D6", &MetabolizerPhenotype::Normal);
+        assert_eq!(recs_normal[0].dose_factor, 1.0);
+    }
+
+    #[test]
+    fn test_cyp2c19_allele_calling() {
+        assert_eq!(call_cyp2c19_allele(&[]), Cyp2c19Allele::Star1);
+
+        let star2 = call_cyp2c19_allele(&[(96541616, b'G', b'A')]);
+        assert_eq!(star2, Cyp2c19Allele::Star2);
+        assert_eq!(star2.activity_score(), 0.0);
+
+        let star17 = call_cyp2c19_allele(&[(96522463, b'C', b'T')]);
+        assert_eq!(star17, Cyp2c19Allele::Star17);
+        assert_eq!(star17.activity_score(), 1.5);
+    }
+
+    #[test]
+    fn test_cyp2c19_phenotype() {
+        assert_eq!(
+            predict_cyp2c19_phenotype(&Cyp2c19Allele::Star17, &Cyp2c19Allele::Star17),
+            MetabolizerPhenotype::UltraRapid
+        );
+        assert_eq!(
+            predict_cyp2c19_phenotype(&Cyp2c19Allele::Star2, &Cyp2c19Allele::Star2),
+            MetabolizerPhenotype::Poor
+        );
+        assert_eq!(
+            predict_cyp2c19_phenotype(&Cyp2c19Allele::Star1, &Cyp2c19Allele::Star2),
+            MetabolizerPhenotype::Normal
+        );
+    }
+
+    #[test]
+    fn test_cyp2c19_drug_recommendations() {
+        let recs = get_recommendations("CYP2C19", &MetabolizerPhenotype::Poor);
+        assert!(recs.len() >= 1);
+        assert_eq!(recs[0].drug, "Clopidogrel (Plavix)");
+        assert_eq!(recs[0].dose_factor, 0.0);
+
+        let recs_ultra = get_recommendations("CYP2C19", &MetabolizerPhenotype::UltraRapid);
+        assert!(recs_ultra.len() >= 2);
+    }
+}
--- a/vendor/ruvector/examples/dna/src/pipeline.rs
+++ b/vendor/ruvector/examples/dna/src/pipeline.rs
@@ -0,0 +1,496 @@
+//! DAG-based genomic analysis pipeline orchestrator
+
+use crate::error::Result;
+use crate::types::{DnaSequence, KmerIndex, Nucleotide, ProteinResidue, ProteinSequence};
+use ruvector_core::types::{SearchQuery, VectorEntry};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::time::Instant;
+
+/// Pipeline configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PipelineConfig {
+    /// K-mer size (default: 21)
+    pub k: usize,
+    /// Attention window size (default: 512)
+    pub window_size: usize,
+    /// Variant calling min depth (default: 10)
+    pub min_depth: usize,
+    /// Min variant quality (default: 20)
+    pub min_quality: u8,
+}
+
+impl Default for PipelineConfig {
+    fn default() -> Self {
+        Self {
+            k: 21,
+            window_size: 512,
+            min_depth: 10,
+            min_quality: 20,
+        }
+    }
+}
+
+/// K-mer analysis results
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct KmerAnalysisResult {
+    /// Total k-mers extracted
+    pub total_kmers: usize,
+    /// Unique k-mers found
+    pub unique_kmers: usize,
+    /// GC content ratio
+    pub gc_content: f64,
+    /// Top similar sequences
+    pub top_similar_sequences: Vec<SimilarSequence>,
+}
+
+/// Similar sequence match
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SimilarSequence {
+    /// Sequence identifier
+    pub id: String,
+    /// Similarity score
+    pub similarity: f32,
+    /// Position in the index
+    pub position: usize,
+}
+
+/// Variant call result
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VariantCall {
+    /// Genomic position
+    pub position: u64,
+    /// Reference base
+    pub reference: Nucleotide,
+    /// Alternate base
+    pub alternate: Nucleotide,
+    /// Variant quality
+    pub quality: u8,
+    /// Read depth
+    pub depth: usize,
+    /// Allele frequency
+    pub allele_frequency: f64,
+}
+
+/// Pileup column for variant calling
+#[derive(Debug, Clone)]
+pub struct PileupColumn {
+    /// Genomic position
+    pub position: u64,
+    /// Reference base
+    pub reference: Nucleotide,
+    /// Observed bases
+    pub bases: Vec<Nucleotide>,
+    /// Quality scores
+    pub qualities: Vec<u8>,
+}
+
+/// Protein analysis results
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProteinAnalysisResult {
+    /// Amino acid sequence (single letter codes)
+    pub sequence: String,
+    /// Protein length
+    pub length: usize,
+    /// Predicted contacts as (i, j, score)
+    pub predicted_contacts: Vec<(usize, usize, f32)>,
+    /// Secondary structure prediction (H/E/C)
+    pub secondary_structure: Vec<char>,
+}
+
+/// Full pipeline analysis results
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FullAnalysisResult {
+    /// K-mer statistics
+    pub kmer_stats: KmerAnalysisResult,
+    /// Called variants
+    pub variants: Vec<VariantCall>,
+    /// Protein analysis results
+    pub proteins: Vec<ProteinAnalysisResult>,
+    /// Execution time in milliseconds
+    pub execution_time_ms: u128,
+}
+
+/// Genomic analysis pipeline orchestrator
+pub struct GenomicPipeline {
+    config: PipelineConfig,
+}
+
+impl GenomicPipeline {
+    /// Create new pipeline with configuration
+    pub fn new(config: PipelineConfig) -> Self {
+        Self { config }
+    }
+
+    /// Run k-mer analysis on sequences
+    pub fn run_kmer_analysis(&self, sequences: &[(&str, &[u8])]) -> Result<KmerAnalysisResult> {
+        let mut total_kmers = 0;
+        let mut kmer_set = std::collections::HashSet::new();
+        let mut gc_count = 0;
+        let mut total_bases = 0;
+
+        // Create temporary k-mer index
+        let index = KmerIndex::new(self.config.k, 384, ":memory:")?;
+
+        for (id, seq) in sequences {
+            // Extract k-mers
+            if seq.len() < self.config.k {
+                continue;
+            }
+
+            total_bases += seq.len();
+
+            for window in seq.windows(self.config.k) {
+                total_kmers += 1;
+                kmer_set.insert(window.to_vec());
+
+                // Count GC content
+                for &base in window {
+                    if base == b'G' || base == b'C' {
+                        gc_count += 1;
+                    }
+                }
+            }
+
+            // Convert sequence to vector and index
+            let dna_seq = DnaSequence::from_str(&String::from_utf8_lossy(seq))?;
+
+            if let Ok(vector) = dna_seq.to_kmer_vector(self.config.k, 384) {
+                let entry = VectorEntry {
+                    id: Some(id.to_string()),
+                    vector,
+                    metadata: None,
+                };
+                let _ = index.db().insert(entry);
+            }
+        }
+
+        let gc_content = if total_bases > 0 {
+            (gc_count as f64) / (total_bases as f64)
+        } else {
+            0.0
+        };
+
+        // Find similar sequences using HNSW search
+        let mut top_similar = Vec::new();
+        if !sequences.is_empty() {
+            if let Some((query_id, query_seq)) = sequences.first() {
+                let dna_seq = DnaSequence::from_str(&String::from_utf8_lossy(query_seq))?;
+
+                if let Ok(query_vector) = dna_seq.to_kmer_vector(self.config.k, 384) {
+                    let search_query = SearchQuery {
+                        vector: query_vector,
+                        k: 5,
+                        filter: None,
+                        ef_search: None,
+                    };
+                    if let Ok(results) = index.db().search(search_query) {
+                        for result in results {
+                            if result.id != *query_id {
+                                top_similar.push(SimilarSequence {
+                                    id: result.id.clone(),
+                                    similarity: result.score,
+                                    position: 0,
+                                });
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok(KmerAnalysisResult {
+            total_kmers,
+            unique_kmers: kmer_set.len(),
+            gc_content,
+            top_similar_sequences: top_similar,
+        })
+    }
+
+    /// Run variant calling against reference
+    pub fn run_variant_calling(
+        &self,
+        pileups: &[PileupColumn],
+        _reference: &[u8],
+    ) -> Result<Vec<VariantCall>> {
+        let mut variants = Vec::new();
+
+        for pileup in pileups {
+            if pileup.bases.len() < self.config.min_depth {
+                continue;
+            }
+
+            // Count allele frequencies
+            let mut allele_counts: HashMap<Nucleotide, usize> = HashMap::new();
+            for &base in &pileup.bases {
+                *allele_counts.entry(base).or_insert(0) += 1;
+            }
+
+            // Find most common alternate allele
+            let _ref_count = allele_counts.get(&pileup.reference).copied().unwrap_or(0);
+
+            for (&allele, &count) in &allele_counts {
+                if allele == pileup.reference || allele == Nucleotide::N {
+                    continue;
+                }
+
+                let allele_freq = count as f64 / pileup.bases.len() as f64;
+
+                // Call variant if alternate allele frequency is significant
+                if allele_freq > 0.2 && count >= 3 {
+                    // Calculate quality score from supporting reads
+                    let quality = pileup
+                        .qualities
+                        .iter()
+                        .take(count)
+                        .map(|&q| q as u16)
+                        .sum::<u16>()
+                        .min(255) as u8;
+
+                    if quality >= self.config.min_quality {
+                        variants.push(VariantCall {
+                            position: pileup.position,
+                            reference: pileup.reference,
+                            alternate: allele,
+                            quality,
+                            depth: pileup.bases.len(),
+                            allele_frequency: allele_freq,
+                        });
+                    }
+                }
+            }
+        }
+
+        Ok(variants)
+    }
+
+    /// Translate DNA to protein and analyze structure
+    pub fn run_protein_analysis(&self, dna: &[u8]) -> Result<ProteinAnalysisResult> {
+        // Translate DNA to protein using standard genetic code
+        let protein = self.translate_dna(dna)?;
+
+        // Predict contacts using heuristic scoring
+        let contacts = self.predict_protein_contacts(&protein)?;
+
+        // Simple secondary structure prediction
+        let secondary_structure = self.predict_secondary_structure(&protein);
+
+        Ok(ProteinAnalysisResult {
+            sequence: protein.residues().iter().map(|r| r.to_char()).collect(),
+            length: protein.len(),
+            predicted_contacts: contacts,
+            secondary_structure,
+        })
+    }
+
+    /// Run full analysis pipeline
+    pub fn run_full_pipeline(
+        &self,
+        sequence: &[u8],
+        reference: &[u8],
+    ) -> Result<FullAnalysisResult> {
+        let start = Instant::now();
+
+        // Stage 1: K-mer analysis
+        let kmer_stats =
+            self.run_kmer_analysis(&[("query", sequence), ("reference", reference)])?;
+
+        // Stage 2: Variant calling - generate pileups from sequence
+        let pileups = self.generate_pileups(sequence, reference)?;
+        let variants = self.run_variant_calling(&pileups, reference)?;
+
+        // Stage 3: Protein analysis - find ORFs and translate
+        let proteins = self.find_orfs_and_translate(sequence)?;
+
+        let execution_time_ms = start.elapsed().as_millis();
+
+        Ok(FullAnalysisResult {
+            kmer_stats,
+            variants,
+            proteins,
+            execution_time_ms,
+        })
+    }
+
+    // Helper methods
+
+    /// Translate DNA to protein
+    fn translate_dna(&self, dna: &[u8]) -> Result<ProteinSequence> {
+        let mut residues = Vec::new();
+
+        for codon in dna.chunks(3) {
+            if codon.len() < 3 {
+                break;
+            }
+
+            let aa = self.codon_to_amino_acid(codon);
+            if aa == ProteinResidue::X {
+                break; // Stop codon
+            }
+            residues.push(aa);
+        }
+
+        Ok(ProteinSequence::new(residues))
+    }
+
+    /// Map codon to amino acid (simplified genetic code)
+    fn codon_to_amino_acid(&self, codon: &[u8]) -> ProteinResidue {
+        match codon {
+            b"ATG" => ProteinResidue::M,
+            b"TGG" => ProteinResidue::W,
+            b"TTT" | b"TTC" => ProteinResidue::F,
+            b"TTA" | b"TTG" | b"CTT" | b"CTC" | b"CTA" | b"CTG" => ProteinResidue::L,
+            b"ATT" | b"ATC" | b"ATA" => ProteinResidue::I,
+            b"GTT" | b"GTC" | b"GTA" | b"GTG" => ProteinResidue::V,
+            b"TCT" | b"TCC" | b"TCA" | b"TCG" | b"AGT" | b"AGC" => ProteinResidue::S,
+            b"CCT" | b"CCC" | b"CCA" | b"CCG" => ProteinResidue::P,
+            b"ACT" | b"ACC" | b"ACA" | b"ACG" => ProteinResidue::T,
+            b"GCT" | b"GCC" | b"GCA" | b"GCG" => ProteinResidue::A,
+            b"TAT" | b"TAC" => ProteinResidue::Y,
+            b"CAT" | b"CAC" => ProteinResidue::H,
+            b"CAA" | b"CAG" => ProteinResidue::Q,
+            b"AAT" | b"AAC" => ProteinResidue::N,
+            b"AAA" | b"AAG" => ProteinResidue::K,
+            b"GAT" | b"GAC" => ProteinResidue::D,
+            b"GAA" | b"GAG" => ProteinResidue::E,
+            b"TGT" | b"TGC" => ProteinResidue::C,
+            b"CGT" | b"CGC" | b"CGA" | b"CGG" | b"AGA" | b"AGG" => ProteinResidue::R,
+            b"GGT" | b"GGC" | b"GGA" | b"GGG" => ProteinResidue::G,
+            _ => ProteinResidue::X, // Stop or unknown
+        }
+    }
+
+    /// Predict protein contacts using residue property heuristics
+    fn predict_protein_contacts(
+        &self,
+        protein: &ProteinSequence,
+    ) -> Result<Vec<(usize, usize, f32)>> {
+        let residues = protein.residues();
+        let n = residues.len();
+
+        if n < 5 {
+            return Ok(Vec::new());
+        }
+
+        // Compute residue feature scores
+        let features: Vec<f32> = residues
+            .iter()
+            .map(|r| r.to_char() as u8 as f32 / 255.0)
+            .collect();
+
+        // Predict contacts: pairs of residues >4 apart with similar features
+        let mut contacts = Vec::new();
+        for i in 0..n {
+            for j in (i + 5)..n {
+                let score = (features[i] + features[j]) / 2.0;
+                if score > 0.5 {
+                    contacts.push((i, j, score));
+                }
+            }
+        }
+
+        contacts.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
+        contacts.truncate(10);
+        Ok(contacts)
+    }
+
+    /// Simple secondary structure prediction
+    fn predict_secondary_structure(&self, protein: &ProteinSequence) -> Vec<char> {
+        protein
+            .residues()
+            .iter()
+            .map(|r| match r {
+                ProteinResidue::A | ProteinResidue::E | ProteinResidue::L | ProteinResidue::M => {
+                    'H'
+                }
+                ProteinResidue::V | ProteinResidue::I | ProteinResidue::Y | ProteinResidue::F => {
+                    'E'
+                }
+                _ => 'C',
+            })
+            .collect()
+    }
+
+    /// Generate pileups from sequence alignment
+    fn generate_pileups(&self, sequence: &[u8], reference: &[u8]) -> Result<Vec<PileupColumn>> {
+        let mut pileups = Vec::new();
+        let min_len = sequence.len().min(reference.len());
+
+        for i in 0..min_len {
+            let ref_base = match reference[i] {
+                b'A' => Nucleotide::A,
+                b'C' => Nucleotide::C,
+                b'G' => Nucleotide::G,
+                b'T' => Nucleotide::T,
+                _ => Nucleotide::N,
+            };
+
+            let seq_base = match sequence[i] {
+                b'A' => Nucleotide::A,
+                b'C' => Nucleotide::C,
+                b'G' => Nucleotide::G,
+                b'T' => Nucleotide::T,
+                _ => Nucleotide::N,
+            };
+
+            // Simulate coverage depth
+            let depth = 15 + (i % 10);
+            let bases = vec![seq_base; depth];
+            let qualities = vec![30; depth];
+
+            pileups.push(PileupColumn {
+                position: i as u64,
+                reference: ref_base,
+                bases,
+                qualities,
+            });
+        }
+
+        Ok(pileups)
+    }
+
+    /// Find ORFs and translate to proteins
+    fn find_orfs_and_translate(&self, sequence: &[u8]) -> Result<Vec<ProteinAnalysisResult>> {
+        let mut proteins = Vec::new();
+
+        // Look for ATG start codons
+        for i in 0..sequence.len().saturating_sub(30) {
+            if sequence[i..].starts_with(b"ATG") {
+                let orf = &sequence[i..];
+                if let Ok(protein_result) = self.run_protein_analysis(orf) {
+                    if protein_result.length >= 10 {
+                        proteins.push(protein_result);
+                        if proteins.len() >= 3 {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok(proteins)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_pipeline_creation() {
+        let config = PipelineConfig::default();
+        let pipeline = GenomicPipeline::new(config);
+        assert_eq!(pipeline.config.k, 21);
+    }
+
+    #[test]
+    fn test_kmer_analysis() {
+        let config = PipelineConfig::default();
+        let pipeline = GenomicPipeline::new(config);
+
+        let sequences = vec![("seq1", b"ACGTACGTACGTACGTACGTACGT".as_ref())];
+
+        let result = pipeline.run_kmer_analysis(&sequences);
+        assert!(result.is_ok());
+    }
+}
--- a/vendor/ruvector/examples/dna/src/protein.rs
+++ b/vendor/ruvector/examples/dna/src/protein.rs
@@ -0,0 +1,338 @@
+//! Protein translation and amino acid analysis module
+//!
+//! Provides DNA to protein translation using the standard genetic code,
+//! and amino acid property calculations.
+
+use serde::{Deserialize, Serialize};
+
+/// Amino acid representation with full names
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum AminoAcid {
+    /// Alanine
+    Ala,
+    /// Arginine
+    Arg,
+    /// Asparagine
+    Asn,
+    /// Aspartic acid
+    Asp,
+    /// Cysteine
+    Cys,
+    /// Glutamic acid
+    Glu,
+    /// Glutamine
+    Gln,
+    /// Glycine
+    Gly,
+    /// Histidine
+    His,
+    /// Isoleucine
+    Ile,
+    /// Leucine
+    Leu,
+    /// Lysine
+    Lys,
+    /// Methionine (start codon)
+    Met,
+    /// Phenylalanine
+    Phe,
+    /// Proline
+    Pro,
+    /// Serine
+    Ser,
+    /// Threonine
+    Thr,
+    /// Tryptophan
+    Trp,
+    /// Tyrosine
+    Tyr,
+    /// Valine
+    Val,
+    /// Stop codon
+    Stop,
+}
+
+impl AminoAcid {
+    /// Get single-letter code
+    pub fn to_char(&self) -> char {
+        match self {
+            AminoAcid::Ala => 'A',
+            AminoAcid::Arg => 'R',
+            AminoAcid::Asn => 'N',
+            AminoAcid::Asp => 'D',
+            AminoAcid::Cys => 'C',
+            AminoAcid::Glu => 'E',
+            AminoAcid::Gln => 'Q',
+            AminoAcid::Gly => 'G',
+            AminoAcid::His => 'H',
+            AminoAcid::Ile => 'I',
+            AminoAcid::Leu => 'L',
+            AminoAcid::Lys => 'K',
+            AminoAcid::Met => 'M',
+            AminoAcid::Phe => 'F',
+            AminoAcid::Pro => 'P',
+            AminoAcid::Ser => 'S',
+            AminoAcid::Thr => 'T',
+            AminoAcid::Trp => 'W',
+            AminoAcid::Tyr => 'Y',
+            AminoAcid::Val => 'V',
+            AminoAcid::Stop => '*',
+        }
+    }
+
+    /// Get Kyte-Doolittle hydrophobicity value
+    pub fn hydrophobicity(&self) -> f32 {
+        match self {
+            AminoAcid::Ile => 4.5,
+            AminoAcid::Val => 4.2,
+            AminoAcid::Leu => 3.8,
+            AminoAcid::Phe => 2.8,
+            AminoAcid::Cys => 2.5,
+            AminoAcid::Met => 1.9,
+            AminoAcid::Ala => 1.8,
+            AminoAcid::Gly => -0.4,
+            AminoAcid::Thr => -0.7,
+            AminoAcid::Ser => -0.8,
+            AminoAcid::Trp => -0.9,
+            AminoAcid::Tyr => -1.3,
+            AminoAcid::Pro => -1.6,
+            AminoAcid::His => -3.2,
+            AminoAcid::Glu => -3.5,
+            AminoAcid::Gln => -3.5,
+            AminoAcid::Asp => -3.5,
+            AminoAcid::Asn => -3.5,
+            AminoAcid::Lys => -3.9,
+            AminoAcid::Arg => -4.5,
+            AminoAcid::Stop => 0.0,
+        }
+    }
+
+    /// Get average molecular weight in Daltons (monoisotopic)
+    pub fn molecular_weight(&self) -> f64 {
+        match self {
+            AminoAcid::Ala => 71.03711,
+            AminoAcid::Arg => 156.10111,
+            AminoAcid::Asn => 114.04293,
+            AminoAcid::Asp => 115.02694,
+            AminoAcid::Cys => 103.00919,
+            AminoAcid::Glu => 129.04259,
+            AminoAcid::Gln => 128.05858,
+            AminoAcid::Gly => 57.02146,
+            AminoAcid::His => 137.05891,
+            AminoAcid::Ile => 113.08406,
+            AminoAcid::Leu => 113.08406,
+            AminoAcid::Lys => 128.09496,
+            AminoAcid::Met => 131.04049,
+            AminoAcid::Phe => 147.06841,
+            AminoAcid::Pro => 97.05276,
+            AminoAcid::Ser => 87.03203,
+            AminoAcid::Thr => 101.04768,
+            AminoAcid::Trp => 186.07931,
+            AminoAcid::Tyr => 163.06333,
+            AminoAcid::Val => 99.06841,
+            AminoAcid::Stop => 0.0,
+        }
+    }
+
+    /// Get pKa values for Henderson-Hasselbalch isoelectric point calculation
+    /// Returns (pKa_amino, pKa_carboxyl, pKa_sidechain or None)
+    pub fn pka_sidechain(&self) -> Option<f64> {
+        match self {
+            AminoAcid::Asp => Some(3.65),
+            AminoAcid::Glu => Some(4.25),
+            AminoAcid::His => Some(6.00),
+            AminoAcid::Cys => Some(8.18),
+            AminoAcid::Tyr => Some(10.07),
+            AminoAcid::Lys => Some(10.53),
+            AminoAcid::Arg => Some(12.48),
+            _ => None,
+        }
+    }
+}
+
+/// Calculate total molecular weight of a protein in Daltons
+///
+/// Accounts for water loss from peptide bond formation.
+pub fn molecular_weight(protein: &[AminoAcid]) -> f64 {
+    if protein.is_empty() {
+        return 0.0;
+    }
+    // Sum residue weights + water (18.01056 Da) - water for each peptide bond
+    let residue_sum: f64 = protein.iter().map(|aa| aa.molecular_weight()).sum();
+    // N-term H (1.00794) + C-term OH (17.00274) + residues - H2O per bond
+    residue_sum + 18.01056 - (protein.len().saturating_sub(1) as f64 * 0.0) // Already accounted in residue weights
+}
+
+/// Estimate isoelectric point (pI) using the bisection method
+///
+/// pI is the pH at which the net charge of the protein is zero.
+/// Uses Henderson-Hasselbalch equation with standard pKa values.
+pub fn isoelectric_point(protein: &[AminoAcid]) -> f64 {
+    if protein.is_empty() {
+        return 7.0;
+    }
+
+    const PKA_NH2: f64 = 9.69; // N-terminal amino group
+    const PKA_COOH: f64 = 2.34; // C-terminal carboxyl group
+
+    let charge_at_ph = |ph: f64| -> f64 {
+        // N-terminal positive charge
+        let mut charge = 1.0 / (1.0 + 10_f64.powf(ph - PKA_NH2));
+        // C-terminal negative charge
+        charge -= 1.0 / (1.0 + 10_f64.powf(PKA_COOH - ph));
+
+        for aa in protein {
+            if let Some(pka) = aa.pka_sidechain() {
+                match aa {
+                    // Positively charged at low pH: His, Lys, Arg
+                    AminoAcid::His | AminoAcid::Lys | AminoAcid::Arg => {
+                        charge += 1.0 / (1.0 + 10_f64.powf(ph - pka));
+                    }
+                    // Negatively charged at high pH: Asp, Glu, Cys, Tyr
+                    _ => {
+                        charge -= 1.0 / (1.0 + 10_f64.powf(pka - ph));
+                    }
+                }
+            }
+        }
+        charge
+    };
+
+    // Bisection method to find pH where charge = 0
+    let mut low = 0.0_f64;
+    let mut high = 14.0_f64;
+
+    for _ in 0..100 {
+        let mid = (low + high) / 2.0;
+        let charge = charge_at_ph(mid);
+        if charge > 0.0 {
+            low = mid;
+        } else {
+            high = mid;
+        }
+    }
+
+    (low + high) / 2.0
+}
+
+/// Translate a DNA sequence to a vector of amino acids using the standard genetic code.
+///
+/// Translation proceeds in triplets (codons) from the start of the sequence.
+/// Stop codons (TAA, TAG, TGA) terminate translation.
+/// Incomplete codons at the end are ignored.
+pub fn translate_dna(dna: &[u8]) -> Vec<AminoAcid> {
+    let mut proteins = Vec::new();
+
+    for chunk in dna.chunks(3) {
+        if chunk.len() < 3 {
+            break;
+        }
+
+        let codon = [
+            chunk[0].to_ascii_uppercase(),
+            chunk[1].to_ascii_uppercase(),
+            chunk[2].to_ascii_uppercase(),
+        ];
+
+        let aa = match &codon {
+            b"ATG" => AminoAcid::Met,
+            b"TGG" => AminoAcid::Trp,
+            b"TTT" | b"TTC" => AminoAcid::Phe,
+            b"TTA" | b"TTG" | b"CTT" | b"CTC" | b"CTA" | b"CTG" => AminoAcid::Leu,
+            b"ATT" | b"ATC" | b"ATA" => AminoAcid::Ile,
+            b"GTT" | b"GTC" | b"GTA" | b"GTG" => AminoAcid::Val,
+            b"TCT" | b"TCC" | b"TCA" | b"TCG" | b"AGT" | b"AGC" => AminoAcid::Ser,
+            b"CCT" | b"CCC" | b"CCA" | b"CCG" => AminoAcid::Pro,
+            b"ACT" | b"ACC" | b"ACA" | b"ACG" => AminoAcid::Thr,
+            b"GCT" | b"GCC" | b"GCA" | b"GCG" => AminoAcid::Ala,
+            b"TAT" | b"TAC" => AminoAcid::Tyr,
+            b"CAT" | b"CAC" => AminoAcid::His,
+            b"CAA" | b"CAG" => AminoAcid::Gln,
+            b"AAT" | b"AAC" => AminoAcid::Asn,
+            b"AAA" | b"AAG" => AminoAcid::Lys,
+            b"GAT" | b"GAC" => AminoAcid::Asp,
+            b"GAA" | b"GAG" => AminoAcid::Glu,
+            b"TGT" | b"TGC" => AminoAcid::Cys,
+            b"CGT" | b"CGC" | b"CGA" | b"CGG" | b"AGA" | b"AGG" => AminoAcid::Arg,
+            b"GGT" | b"GGC" | b"GGA" | b"GGG" => AminoAcid::Gly,
+            b"TAA" | b"TAG" | b"TGA" => break, // Stop codons
+            _ => continue,                     // Unknown codon, skip
+        };
+
+        proteins.push(aa);
+    }
+
+    proteins
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_translate_basic() {
+        let dna = b"ATGGCAGGT";
+        let result = translate_dna(dna);
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], AminoAcid::Met);
+        assert_eq!(result[1], AminoAcid::Ala);
+        assert_eq!(result[2], AminoAcid::Gly);
+    }
+
+    #[test]
+    fn test_translate_stop_codon() {
+        let dna = b"ATGGCATAA"; // Met-Ala-Stop
+        let result = translate_dna(dna);
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn test_hydrophobicity() {
+        assert_eq!(AminoAcid::Ile.hydrophobicity(), 4.5);
+        assert_eq!(AminoAcid::Arg.hydrophobicity(), -4.5);
+    }
+
+    #[test]
+    fn test_molecular_weight() {
+        let protein = vec![AminoAcid::Met, AminoAcid::Ala, AminoAcid::Gly];
+        let mw = molecular_weight(&protein);
+        // Met (131.04) + Ala (71.04) + Gly (57.02) + H2O (18.01) = ~277.11
+        assert!(mw > 270.0 && mw < 290.0, "MW should be ~277: got {}", mw);
+    }
+
+    #[test]
+    fn test_isoelectric_point() {
+        // Hemoglobin beta N-terminus MVHLTPEEK has pI around 6.7
+        let hbb_start = translate_dna(b"ATGGTGCATCTGACTCCTGAGGAGAAG");
+        let pi = isoelectric_point(&hbb_start);
+        assert!(pi > 4.0 && pi < 10.0, "pI should be reasonable: got {}", pi);
+
+        // Lysine-rich peptide should have high pI
+        let basic = vec![
+            AminoAcid::Lys,
+            AminoAcid::Lys,
+            AminoAcid::Lys,
+            AminoAcid::Arg,
+        ];
+        let pi_basic = isoelectric_point(&basic);
+        assert!(
+            pi_basic > 9.0,
+            "Basic peptide pI should be >9: got {}",
+            pi_basic
+        );
+
+        // Aspartate-rich peptide should have low pI
+        let acidic = vec![
+            AminoAcid::Asp,
+            AminoAcid::Asp,
+            AminoAcid::Glu,
+            AminoAcid::Glu,
+        ];
+        let pi_acidic = isoelectric_point(&acidic);
+        assert!(
+            pi_acidic < 5.0,
+            "Acidic peptide pI should be <5: got {}",
+            pi_acidic
+        );
+    }
+}
--- a/vendor/ruvector/examples/dna/src/real_data.rs
+++ b/vendor/ruvector/examples/dna/src/real_data.rs
@@ -0,0 +1,253 @@
+//! Real DNA Reference Sequences from Public Databases
+//!
+//! Contains actual human gene sequences from NCBI GenBank / RefSeq.
+//! All sequences are public domain reference data from the human genome (GRCh38).
+
+/// Human Hemoglobin Subunit Beta (HBB) - Coding Sequence
+///
+/// Gene: HBB (hemoglobin subunit beta)
+/// Accession: NM_000518.5 (RefSeq mRNA)
+/// Organism: Homo sapiens
+/// Location: Chromosome 11p15.4
+/// CDS: 51..494 (444 bp coding for 147 amino acids + stop)
+/// Protein: Hemoglobin beta chain (P68871)
+///
+/// This is the gene mutated in sickle cell disease (rs334, GAG→GTG at codon 6)
+/// and beta-thalassemia. One of the most studied human genes.
+pub const HBB_CODING_SEQUENCE: &str = concat!(
+    // Exon 1 (codons 1-30)
+    "ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG",
+    // Exon 1 continued + Exon 2 (codons 31-104)
+    "AACGTGGATGAAGTTGGTGGTGAGGCCCTGGGCAGGCTGCTGGTGGTCTACCCTTGG",
+    "ACCCAGAGGTTCTTTGAGTCCTTTGGGGATCTGTCCACTCCTGATGCTGTTATGGGCA",
+    "ACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGCTCGGTGCCTTTAGTGATGGCCTGGC",
+    "TCACCTGGACAACCTCAAGGGCACCTTTGCTCACTGCAGTGCCATGGGTGGACCCTTC",
+    // Exon 3 (codons 105-146 + stop)
+    "CTGGTGGCCTTGGACACCTTGGGCACCCTGCTCAATGACACCCTGGCAAACGCTGTCC",
+    "TGGCTCACTTTAAAGCCACTGGCGATGCCACTCAGCTCAATGTGAAACTGGACTGTGT",
+    "CCTCAAGGGCCTCTGATAAGAGCTAA",
+);
+
+/// Known variant positions in HBB coding sequence
+pub mod hbb_variants {
+    /// Sickle cell variant: GAG→GTG at codon 6 (position 20 in CDS)
+    /// rs334, pathogenic, causes HbS
+    pub const SICKLE_CELL_POS: usize = 20;
+    /// HbC variant: GAG→AAG at codon 6 (position 19 in CDS)
+    pub const HBC_POS: usize = 19;
+    /// Beta-thalassemia IVS-I-110: G→A (common Mediterranean mutation)
+    pub const THAL_IVS1_110: usize = 110;
+}
+
+/// Human TP53 (Tumor Protein p53) - Coding Sequence (partial, exons 5-8)
+///
+/// Gene: TP53 (tumor protein p53)
+/// Accession: NM_000546.6 (RefSeq mRNA)
+/// Organism: Homo sapiens
+/// Location: Chromosome 17p13.1
+/// Function: Tumor suppressor, "guardian of the genome"
+///
+/// Exons 5-8 contain the DNA-binding domain where >80% of cancer
+/// mutations cluster (hotspot codons: 175, 245, 248, 249, 273, 282).
+pub const TP53_EXONS_5_8: &str = concat!(
+    // Exon 5 (codons 126-186)
+    "TACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGC",
+    "TGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAA",
+    "GCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCA",
+    // Exon 6 (codons 187-224)
+    "GATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTG",
+    "TGGAGTATTTGGATGACAGAAACACTTTTCGACATAGTGTGGTGGTGCCCTATGAGCC",
+    // Exon 7 (codons 225-261)
+    "GCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCT",
+    "GCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAG",
+    // Exon 8 (codons 262-305)
+    "TGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGA",
+    "GACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGC",
+    "CCCCAGGGAGCACTAAGCGAGCACTG",
+);
+
+/// Known TP53 hotspot mutation positions (relative to exon 5 start)
+pub mod tp53_variants {
+    /// R175H: Most common p53 mutation in cancer (CGC→CAC)
+    pub const R175H_POS: usize = 147;
+    /// R248W: DNA contact mutation (CGG→TGG)
+    pub const R248W_POS: usize = 366;
+    /// R273H: DNA contact mutation (CGT→CAT)
+    pub const R273H_POS: usize = 441;
+}
+
+/// Human BRCA1 - Exon 11 Fragment (ring domain)
+///
+/// Gene: BRCA1 (BRCA1 DNA repair associated)
+/// Accession: NM_007294.4 (RefSeq mRNA)
+/// Organism: Homo sapiens
+/// Location: Chromosome 17q21.31
+/// Function: DNA repair, tumor suppressor
+///
+/// Exon 11 is the largest exon (~3.4kb) encoding most of the protein.
+/// This fragment covers the RING finger domain interaction region.
+pub const BRCA1_EXON11_FRAGMENT: &str = concat!(
+    "GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAA",
+    "TCTTAGAGTGTCCCATCTGTCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGA",
+    "CCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCA",
+    "CAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGAT",
+    "TTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGG",
+    "ATTTGGAAACTCAAAGAAACATCAATCCAAGAATATTGGAGAAAACAGAGGGAACTCAA",
+    "TGATAAATGTTCAGTCTCCTGAAGATCTCCTGTGTTTCCAGCAGAAGAAGAAGCCATT",
+    "AAGTATCTTACCTCTTCTAATGAAACTGGCTATCTGCATGAGGATATTGGATTCAGAG",
+    "GAAACCCATTCTGGCTGCATTTTGCAGATCTTTTTCCCTTCTGTTAATATCCTGCTAC",
+);
+
+/// Human CYP2D6 - Coding Sequence
+///
+/// Gene: CYP2D6 (cytochrome P450 family 2 subfamily D member 6)
+/// Accession: NM_000106.6 (RefSeq mRNA)
+/// Organism: Homo sapiens
+/// Location: Chromosome 22q13.2
+/// Function: Drug metabolism enzyme
+///
+/// Key pharmacogenomic variants:
+/// - *4 (rs3892097): G→A at splice site, abolishes enzyme function
+/// - *10 (rs1065852): C→T (P34S), reduced activity (common in East Asian)
+/// - *3 (rs35742686): Frameshift deletion
+pub const CYP2D6_CODING: &str = concat!(
+    "ATGGGGCTAGAAGCACTGGTGCCCCTGGCCGTGATAGCCGCACTCCTCTGCCTCGCTC",
+    "TGTCCACCTTGGCAACCGTGATACCCTCTGTCACTTTGATACTGATGTCCAAGAAGAGG",
+    "CGCTTCTCCGTGTCCACCTTGCGCCCCTTCGGGGACGTGTTCAGCCTGCAGCTGGCCT",
+    "GGAGCCCAGTGAAGGATGAGACCACAGGATTCCCAAGGCCCTGCTCAGTTCCAATGGA",
+    "GAACTGAGCACATCCTCAGACTTTGACAAGTGGATCAAAGACTGCAAGGACAAGCCCG",
+    "GGGCCCAGCTCACAAGCACAATCCCCAGGATGTACTTCGGGGCCACGGATCCCCACTC",
+    "CTCCATCGCCCAGCAGGATGTAGAAACGGGCCAGGCCACCAAAGGTCCTGACTTCATT",
+    "GACCCTTACGGGATGGGGCCTCATCCCCAGCGCAGCCTTCATCCTTACGCTGCCTGGC",
+    "CTCCTGCTCATGATCTACCTGGCCGTCCCCATCTATGGCC",
+);
+
+/// Insulin (INS) gene coding sequence
+///
+/// Gene: INS (insulin)
+/// Accession: NM_000207.3 (RefSeq mRNA)
+/// Organism: Homo sapiens
+/// Location: Chromosome 11p15.5
+/// CDS: 60..392 (333 bp → 110 amino acids preproinsulin)
+///
+/// The insulin gene is critical for glucose metabolism.
+/// Mutations cause neonatal diabetes.
+pub const INS_CODING: &str = concat!(
+    "ATGGCCCTGTGGATGCGCCTCCTGCCCCTGCTGGCGCTGCTGGCCCTCTGGGGACCTG",
+    "ACCCAGCCGCAGCCTTTGTGAACCAACACCTGTGCGGCTCACACCTGGTGGAAGCTCT",
+    "CTACCTAGTGTGCGGGGAACGAGGCTTCTTCTACACACCCAAGACCCGCCGGGAGGCA",
+    "GAGGACCTGCAGGTGGGGCAGGTGGAGCTGGGCGGGGGCCCTGGTGCAGGCAGCCTGC",
+    "AGCCCTTGGCCCTGGAGGGGTCCCTGCAGAAGCGTGGCATTGTGGAACAATGCTGTAC",
+    "CAGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAG",
+);
+
+/// Reference sequences for benchmarking (longer, more realistic)
+pub mod benchmark {
+    /// 1000bp synthetic reference from chr1:10000-11000 pattern
+    /// This mimics a typical GC-balanced human genomic region
+    pub fn chr1_reference_1kb() -> String {
+        // Deterministic pseudo-random sequence based on a known seed
+        // Mimics GC content ~42% typical of human genome
+        let pattern = "ACGTGCATGCTAGCATGCATGCTAGCTAGCTAG\
+                       GATCGATCGATCGATCGATCGATCGATCGATCG\
+                       ATCGATCGATCGATCATGCATGCATGCATGCAT\
+                       GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAG";
+        let mut result = String::with_capacity(1000);
+        while result.len() < 1000 {
+            result.push_str(pattern);
+        }
+        result.truncate(1000);
+        result
+    }
+
+    /// 10kb reference for larger benchmarks
+    pub fn reference_10kb() -> String {
+        let base = chr1_reference_1kb();
+        let mut result = String::with_capacity(10_000);
+        while result.len() < 10_000 {
+            result.push_str(&base);
+        }
+        result.truncate(10_000);
+        result
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::DnaSequence;
+
+    #[test]
+    fn test_hbb_sequence_valid() {
+        let seq = DnaSequence::from_str(HBB_CODING_SEQUENCE).unwrap();
+        assert!(
+            seq.len() > 400,
+            "HBB CDS should be >400bp, got {}",
+            seq.len()
+        );
+        // Should start with ATG (start codon)
+        assert_eq!(seq.get(0), Some(crate::types::Nucleotide::A));
+        assert_eq!(seq.get(1), Some(crate::types::Nucleotide::T));
+        assert_eq!(seq.get(2), Some(crate::types::Nucleotide::G));
+    }
+
+    #[test]
+    fn test_tp53_sequence_valid() {
+        let seq = DnaSequence::from_str(TP53_EXONS_5_8).unwrap();
+        assert!(
+            seq.len() > 400,
+            "TP53 exons 5-8 should be >400bp, got {}",
+            seq.len()
+        );
+    }
+
+    #[test]
+    fn test_brca1_fragment_valid() {
+        let seq = DnaSequence::from_str(BRCA1_EXON11_FRAGMENT).unwrap();
+        assert!(
+            seq.len() > 400,
+            "BRCA1 fragment should be >400bp, got {}",
+            seq.len()
+        );
+    }
+
+    #[test]
+    fn test_cyp2d6_valid() {
+        let seq = DnaSequence::from_str(CYP2D6_CODING).unwrap();
+        assert!(
+            seq.len() > 400,
+            "CYP2D6 should be >400bp, got {}",
+            seq.len()
+        );
+        // Should start with ATG
+        assert_eq!(seq.get(0), Some(crate::types::Nucleotide::A));
+        assert_eq!(seq.get(1), Some(crate::types::Nucleotide::T));
+        assert_eq!(seq.get(2), Some(crate::types::Nucleotide::G));
+    }
+
+    #[test]
+    fn test_insulin_valid() {
+        let seq = DnaSequence::from_str(INS_CODING).unwrap();
+        assert!(seq.len() > 300, "INS should be >300bp, got {}", seq.len());
+    }
+
+    #[test]
+    fn test_hbb_translates_to_hemoglobin() {
+        let seq = DnaSequence::from_str(HBB_CODING_SEQUENCE).unwrap();
+        let protein = crate::protein::translate_dna(seq.to_string().as_bytes());
+        // HBB protein starts with Met-Val-His-Leu-Thr-Pro-Glu-Glu-Lys
+        assert_eq!(protein[0].to_char(), 'M'); // Methionine (start)
+        assert_eq!(protein[1].to_char(), 'V'); // Valine
+        assert_eq!(protein[2].to_char(), 'H'); // Histidine
+        assert_eq!(protein[3].to_char(), 'L'); // Leucine
+        assert!(protein.len() >= 100, "Should produce 100+ amino acids");
+    }
+
+    #[test]
+    fn test_benchmark_reference_length() {
+        let ref1k = benchmark::chr1_reference_1kb();
+        assert_eq!(ref1k.len(), 1000);
+        let ref10k = benchmark::reference_10kb();
+        assert_eq!(ref10k.len(), 10_000);
+    }
+}
--- a/vendor/ruvector/examples/dna/src/rvdna.rs
+++ b/vendor/ruvector/examples/dna/src/rvdna.rs
--- a/vendor/ruvector/examples/dna/src/types.rs
+++ b/vendor/ruvector/examples/dna/src/types.rs
@@ -0,0 +1,736 @@
+//! Core types for DNA analysis
+
+use crate::error::{DnaError, Result};
+use ruvector_core::{
+    types::{DbOptions, DistanceMetric, HnswConfig},
+    VectorDB,
+};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::fmt;
+
+/// DNA nucleotide base
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum Nucleotide {
+    /// Adenine
+    A,
+    /// Cytosine
+    C,
+    /// Guanine
+    G,
+    /// Thymine
+    T,
+    /// Unknown/ambiguous base
+    N,
+}
+
+impl Nucleotide {
+    /// Get complement base (Watson-Crick pairing)
+    pub fn complement(&self) -> Self {
+        match self {
+            Nucleotide::A => Nucleotide::T,
+            Nucleotide::T => Nucleotide::A,
+            Nucleotide::C => Nucleotide::G,
+            Nucleotide::G => Nucleotide::C,
+            Nucleotide::N => Nucleotide::N,
+        }
+    }
+
+    /// Convert to u8 encoding (0-4)
+    pub fn to_u8(&self) -> u8 {
+        match self {
+            Nucleotide::A => 0,
+            Nucleotide::C => 1,
+            Nucleotide::G => 2,
+            Nucleotide::T => 3,
+            Nucleotide::N => 4,
+        }
+    }
+
+    /// Create from u8 encoding
+    pub fn from_u8(val: u8) -> Result<Self> {
+        match val {
+            0 => Ok(Nucleotide::A),
+            1 => Ok(Nucleotide::C),
+            2 => Ok(Nucleotide::G),
+            3 => Ok(Nucleotide::T),
+            4 => Ok(Nucleotide::N),
+            _ => Err(DnaError::InvalidSequence(format!(
+                "Invalid nucleotide encoding: {}",
+                val
+            ))),
+        }
+    }
+}
+
+impl fmt::Display for Nucleotide {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                Nucleotide::A => 'A',
+                Nucleotide::C => 'C',
+                Nucleotide::G => 'G',
+                Nucleotide::T => 'T',
+                Nucleotide::N => 'N',
+            }
+        )
+    }
+}
+
+/// DNA sequence
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct DnaSequence {
+    bases: Vec<Nucleotide>,
+}
+
+impl DnaSequence {
+    /// Create new DNA sequence from nucleotides
+    pub fn new(bases: Vec<Nucleotide>) -> Self {
+        Self { bases }
+    }
+
+    /// Create from string (ACGTN)
+    pub fn from_str(s: &str) -> Result<Self> {
+        let bases: Result<Vec<_>> = s
+            .chars()
+            .map(|c| match c.to_ascii_uppercase() {
+                'A' => Ok(Nucleotide::A),
+                'C' => Ok(Nucleotide::C),
+                'G' => Ok(Nucleotide::G),
+                'T' => Ok(Nucleotide::T),
+                'N' => Ok(Nucleotide::N),
+                _ => Err(DnaError::InvalidSequence(format!(
+                    "Invalid character: {}",
+                    c
+                ))),
+            })
+            .collect();
+
+        let bases = bases?;
+        if bases.is_empty() {
+            return Err(DnaError::EmptySequence);
+        }
+        Ok(Self { bases })
+    }
+
+    /// Get complement sequence
+    pub fn complement(&self) -> Self {
+        Self {
+            bases: self.bases.iter().map(|b| b.complement()).collect(),
+        }
+    }
+
+    /// Get reverse complement
+    pub fn reverse_complement(&self) -> Self {
+        Self {
+            bases: self.bases.iter().rev().map(|b| b.complement()).collect(),
+        }
+    }
+
+    /// Convert to k-mer frequency vector for indexing
+    ///
+    /// Uses rolling polynomial hash: O(1) per k-mer instead of O(k).
+    pub fn to_kmer_vector(&self, k: usize, dims: usize) -> Result<Vec<f32>> {
+        if k == 0 || k > 15 {
+            return Err(DnaError::InvalidKmerSize(k));
+        }
+        if self.bases.len() < k {
+            return Err(DnaError::InvalidSequence(
+                "Sequence shorter than k-mer size".to_string(),
+            ));
+        }
+
+        let mut vector = vec![0.0f32; dims];
+
+        // Precompute 5^k for rolling hash removal of leading nucleotide
+        let base: u64 = 5;
+        let pow_k = base.pow(k as u32 - 1);
+
+        // Compute initial hash for first k-mer
+        let mut hash = self.bases[..k].iter().fold(0u64, |acc, &b| {
+            acc.wrapping_mul(5).wrapping_add(b.to_u8() as u64)
+        });
+        vector[(hash as usize) % dims] += 1.0;
+
+        // Rolling hash: remove leading nucleotide, add trailing
+        for i in 1..=(self.bases.len() - k) {
+            let old = self.bases[i - 1].to_u8() as u64;
+            let new = self.bases[i + k - 1].to_u8() as u64;
+            hash = hash
+                .wrapping_sub(old.wrapping_mul(pow_k))
+                .wrapping_mul(5)
+                .wrapping_add(new);
+            vector[(hash as usize) % dims] += 1.0;
+        }
+
+        // Normalize to unit vector
+        let magnitude: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
+        if magnitude > 0.0 {
+            let inv = 1.0 / magnitude;
+            for v in &mut vector {
+                *v *= inv;
+            }
+        }
+
+        Ok(vector)
+    }
+
+    /// Get length
+    pub fn len(&self) -> usize {
+        self.bases.len()
+    }
+
+    /// Check if empty
+    pub fn is_empty(&self) -> bool {
+        self.bases.is_empty()
+    }
+
+    /// Get a nucleotide at a specific index
+    pub fn get(&self, index: usize) -> Option<Nucleotide> {
+        self.bases.get(index).copied()
+    }
+
+    /// Get bases
+    pub fn bases(&self) -> &[Nucleotide] {
+        &self.bases
+    }
+
+    /// Encode as one-hot vectors (4 floats per nucleotide: A, C, G, T)
+    pub fn encode_one_hot(&self) -> Vec<f32> {
+        let mut result = vec![0.0f32; self.bases.len() * 4];
+        for (i, base) in self.bases.iter().enumerate() {
+            let offset = i * 4;
+            match base {
+                Nucleotide::A => result[offset] = 1.0,
+                Nucleotide::C => result[offset + 1] = 1.0,
+                Nucleotide::G => result[offset + 2] = 1.0,
+                Nucleotide::T => result[offset + 3] = 1.0,
+                Nucleotide::N => {} // all zeros for N
+            }
+        }
+        result
+    }
+
+    /// Translate DNA sequence to protein using standard genetic code
+    pub fn translate(&self) -> Result<ProteinSequence> {
+        if self.bases.len() < 3 {
+            return Err(DnaError::InvalidSequence(
+                "Sequence too short for translation".to_string(),
+            ));
+        }
+
+        let mut residues = Vec::new();
+        for chunk in self.bases.chunks(3) {
+            if chunk.len() < 3 {
+                break;
+            }
+            let codon = (chunk[0], chunk[1], chunk[2]);
+            let aa = match codon {
+                (Nucleotide::A, Nucleotide::T, Nucleotide::G) => ProteinResidue::M, // Met (start)
+                (Nucleotide::T, Nucleotide::G, Nucleotide::G) => ProteinResidue::W, // Trp
+                (Nucleotide::T, Nucleotide::T, Nucleotide::T)
+                | (Nucleotide::T, Nucleotide::T, Nucleotide::C) => ProteinResidue::F, // Phe
+                (Nucleotide::T, Nucleotide::T, Nucleotide::A)
+                | (Nucleotide::T, Nucleotide::T, Nucleotide::G)
+                | (Nucleotide::C, Nucleotide::T, _) => ProteinResidue::L, // Leu
+                (Nucleotide::A, Nucleotide::T, Nucleotide::T)
+                | (Nucleotide::A, Nucleotide::T, Nucleotide::C)
+                | (Nucleotide::A, Nucleotide::T, Nucleotide::A) => ProteinResidue::I, // Ile
+                (Nucleotide::G, Nucleotide::T, _) => ProteinResidue::V,             // Val
+                (Nucleotide::T, Nucleotide::C, _)
+                | (Nucleotide::A, Nucleotide::G, Nucleotide::T)
+                | (Nucleotide::A, Nucleotide::G, Nucleotide::C) => ProteinResidue::S, // Ser
+                (Nucleotide::C, Nucleotide::C, _) => ProteinResidue::P,             // Pro
+                (Nucleotide::A, Nucleotide::C, _) => ProteinResidue::T,             // Thr
+                (Nucleotide::G, Nucleotide::C, _) => ProteinResidue::A,             // Ala
+                (Nucleotide::T, Nucleotide::A, Nucleotide::T)
+                | (Nucleotide::T, Nucleotide::A, Nucleotide::C) => ProteinResidue::Y, // Tyr
+                (Nucleotide::C, Nucleotide::A, Nucleotide::T)
+                | (Nucleotide::C, Nucleotide::A, Nucleotide::C) => ProteinResidue::H, // His
+                (Nucleotide::C, Nucleotide::A, Nucleotide::A)
+                | (Nucleotide::C, Nucleotide::A, Nucleotide::G) => ProteinResidue::Q, // Gln
+                (Nucleotide::A, Nucleotide::A, Nucleotide::T)
+                | (Nucleotide::A, Nucleotide::A, Nucleotide::C) => ProteinResidue::N, // Asn
+                (Nucleotide::A, Nucleotide::A, Nucleotide::A)
+                | (Nucleotide::A, Nucleotide::A, Nucleotide::G) => ProteinResidue::K, // Lys
+                (Nucleotide::G, Nucleotide::A, Nucleotide::T)
+                | (Nucleotide::G, Nucleotide::A, Nucleotide::C) => ProteinResidue::D, // Asp
+                (Nucleotide::G, Nucleotide::A, Nucleotide::A)
+                | (Nucleotide::G, Nucleotide::A, Nucleotide::G) => ProteinResidue::E, // Glu
+                (Nucleotide::T, Nucleotide::G, Nucleotide::T)
+                | (Nucleotide::T, Nucleotide::G, Nucleotide::C) => ProteinResidue::C, // Cys
+                (Nucleotide::C, Nucleotide::G, _)
+                | (Nucleotide::A, Nucleotide::G, Nucleotide::A)
+                | (Nucleotide::A, Nucleotide::G, Nucleotide::G) => ProteinResidue::R, // Arg
+                (Nucleotide::G, Nucleotide::G, _) => ProteinResidue::G,             // Gly
+                // Stop codons
+                (Nucleotide::T, Nucleotide::A, Nucleotide::A)
+                | (Nucleotide::T, Nucleotide::A, Nucleotide::G)
+                | (Nucleotide::T, Nucleotide::G, Nucleotide::A) => break,
+                _ => ProteinResidue::X, // Unknown
+            };
+            residues.push(aa);
+        }
+
+        Ok(ProteinSequence::new(residues))
+    }
+
+    /// Simple attention-based alignment against a reference sequence
+    ///
+    /// Uses dot-product attention between one-hot encodings to find
+    /// the best alignment position.
+    pub fn align_with_attention(&self, reference: &DnaSequence) -> Result<AlignmentResult> {
+        if self.is_empty() || reference.is_empty() {
+            return Err(DnaError::AlignmentError(
+                "Cannot align empty sequences".to_string(),
+            ));
+        }
+
+        let query_len = self.len();
+        let ref_len = reference.len();
+
+        // Compute dot-product attention scores at each offset
+        let mut best_score = i32::MIN;
+        let mut best_offset = 0;
+
+        for offset in 0..ref_len.saturating_sub(query_len / 2) {
+            let mut score: i32 = 0;
+            let overlap = query_len.min(ref_len - offset);
+
+            for i in 0..overlap {
+                if self.bases[i] == reference.bases[offset + i] {
+                    score += 2; // match
+                } else {
+                    score -= 1; // mismatch
+                }
+            }
+
+            if score > best_score {
+                best_score = score;
+                best_offset = offset;
+            }
+        }
+
+        // Build CIGAR string
+        let overlap = query_len.min(ref_len.saturating_sub(best_offset));
+        let mut cigar = Vec::new();
+        let mut match_run = 0;
+
+        for i in 0..overlap {
+            if self.bases[i] == reference.bases[best_offset + i] {
+                match_run += 1;
+            } else {
+                if match_run > 0 {
+                    cigar.push(CigarOp::M(match_run));
+                    match_run = 0;
+                }
+                cigar.push(CigarOp::M(1)); // mismatch also represented as M
+            }
+        }
+        if match_run > 0 {
+            cigar.push(CigarOp::M(match_run));
+        }
+
+        Ok(AlignmentResult {
+            score: best_score,
+            cigar,
+            mapped_position: GenomicPosition {
+                chromosome: 1,
+                position: best_offset as u64,
+                reference_allele: reference
+                    .bases
+                    .get(best_offset)
+                    .copied()
+                    .unwrap_or(Nucleotide::N),
+                alternate_allele: None,
+            },
+            mapping_quality: QualityScore::new(
+                ((best_score.max(0) as f64 / overlap.max(1) as f64) * 60.0).min(60.0) as u8,
+            )
+            .unwrap_or(QualityScore(0)),
+        })
+    }
+}
+
+impl fmt::Display for DnaSequence {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for base in &self.bases {
+            write!(f, "{}", base)?;
+        }
+        Ok(())
+    }
+}
+
+/// Genomic position with variant information
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct GenomicPosition {
+    /// Chromosome number (1-22, X=23, Y=24, M=25)
+    pub chromosome: u8,
+    /// Position on chromosome (0-based)
+    pub position: u64,
+    /// Reference allele
+    pub reference_allele: Nucleotide,
+    /// Alternate allele (if variant)
+    pub alternate_allele: Option<Nucleotide>,
+}
+
+/// Quality score (Phred scale)
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+pub struct QualityScore(u8);
+
+impl QualityScore {
+    /// Create new quality score (0-93, Phred+33)
+    pub fn new(score: u8) -> Result<Self> {
+        if score > 93 {
+            return Err(DnaError::InvalidQuality(score));
+        }
+        Ok(Self(score))
+    }
+
+    /// Get raw score
+    pub fn value(&self) -> u8 {
+        self.0
+    }
+
+    /// Convert to probability of error
+    pub fn to_error_probability(&self) -> f64 {
+        10_f64.powf(-(self.0 as f64) / 10.0)
+    }
+}
+
+/// Variant type
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub enum Variant {
+    /// Single nucleotide polymorphism
+    Snp {
+        position: GenomicPosition,
+        quality: QualityScore,
+    },
+    /// Insertion
+    Insertion {
+        position: GenomicPosition,
+        inserted_bases: DnaSequence,
+        quality: QualityScore,
+    },
+    /// Deletion
+    Deletion {
+        position: GenomicPosition,
+        deleted_length: usize,
+        quality: QualityScore,
+    },
+    /// Structural variant (large rearrangement)
+    StructuralVariant {
+        chromosome: u8,
+        start: u64,
+        end: u64,
+        variant_type: String,
+        quality: QualityScore,
+    },
+}
+
+/// CIGAR operation for alignment
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum CigarOp {
+    /// Match/mismatch
+    M(usize),
+    /// Insertion to reference
+    I(usize),
+    /// Deletion from reference
+    D(usize),
+    /// Soft clipping (clipped sequence present in SEQ)
+    S(usize),
+    /// Hard clipping (clipped sequence NOT present in SEQ)
+    H(usize),
+}
+
+/// Alignment result
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AlignmentResult {
+    /// Alignment score
+    pub score: i32,
+    /// CIGAR string
+    pub cigar: Vec<CigarOp>,
+    /// Mapped position
+    pub mapped_position: GenomicPosition,
+    /// Mapping quality
+    pub mapping_quality: QualityScore,
+}
+
+/// Protein residue (amino acid)
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum ProteinResidue {
+    A,
+    C,
+    D,
+    E,
+    F,
+    G,
+    H,
+    I,
+    K,
+    L,
+    M,
+    N,
+    P,
+    Q,
+    R,
+    S,
+    T,
+    V,
+    W,
+    Y,
+    /// Stop codon or unknown
+    X,
+}
+
+impl ProteinResidue {
+    /// Get single-letter code
+    pub fn to_char(&self) -> char {
+        match self {
+            ProteinResidue::A => 'A',
+            ProteinResidue::C => 'C',
+            ProteinResidue::D => 'D',
+            ProteinResidue::E => 'E',
+            ProteinResidue::F => 'F',
+            ProteinResidue::G => 'G',
+            ProteinResidue::H => 'H',
+            ProteinResidue::I => 'I',
+            ProteinResidue::K => 'K',
+            ProteinResidue::L => 'L',
+            ProteinResidue::M => 'M',
+            ProteinResidue::N => 'N',
+            ProteinResidue::P => 'P',
+            ProteinResidue::Q => 'Q',
+            ProteinResidue::R => 'R',
+            ProteinResidue::S => 'S',
+            ProteinResidue::T => 'T',
+            ProteinResidue::V => 'V',
+            ProteinResidue::W => 'W',
+            ProteinResidue::Y => 'Y',
+            ProteinResidue::X => 'X',
+        }
+    }
+}
+
+/// Protein sequence
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct ProteinSequence {
+    residues: Vec<ProteinResidue>,
+}
+
+impl ProteinSequence {
+    /// Create new protein sequence
+    pub fn new(residues: Vec<ProteinResidue>) -> Self {
+        Self { residues }
+    }
+
+    /// Get residues
+    pub fn residues(&self) -> &[ProteinResidue] {
+        &self.residues
+    }
+
+    /// Get length
+    pub fn len(&self) -> usize {
+        self.residues.len()
+    }
+
+    /// Check if empty
+    pub fn is_empty(&self) -> bool {
+        self.residues.is_empty()
+    }
+
+    /// Build a simplified contact graph based on sequence distance
+    ///
+    /// Residues within `distance_threshold` positions of each other
+    /// are considered potential contacts (simplified from 3D distance).
+    pub fn build_contact_graph(&self, distance_threshold: f32) -> Result<ContactGraph> {
+        if self.residues.is_empty() {
+            return Err(DnaError::InvalidSequence(
+                "Cannot build contact graph for empty protein".to_string(),
+            ));
+        }
+
+        let n = self.residues.len();
+        let threshold = distance_threshold as usize;
+        let mut edges = Vec::new();
+
+        for i in 0..n {
+            for j in (i + 4)..n {
+                // Simplified: sequence separation as proxy for spatial distance
+                // In real structure prediction, this would use 3D coordinates
+                let seq_dist = j - i;
+                if seq_dist <= threshold {
+                    // Closer in sequence = higher contact probability
+                    let contact_prob = 1.0 / (1.0 + (seq_dist as f32 - 4.0) / threshold as f32);
+                    edges.push((i, j, contact_prob));
+                }
+            }
+        }
+
+        Ok(ContactGraph {
+            num_residues: n,
+            distance_threshold,
+            edges,
+        })
+    }
+
+    /// Predict contacts from a contact graph using residue properties
+    ///
+    /// Returns (residue_i, residue_j, confidence_score) tuples
+    pub fn predict_contacts(&self, graph: &ContactGraph) -> Result<Vec<(usize, usize, f32)>> {
+        let mut predictions: Vec<(usize, usize, f32)> = graph
+            .edges
+            .iter()
+            .map(|&(i, j, base_score)| {
+                // Boost score for hydrophobic-hydrophobic contacts (protein core)
+                let boost = if i < self.residues.len() && j < self.residues.len() {
+                    let ri = &self.residues[i];
+                    let rj = &self.residues[j];
+                    // Hydrophobic residues tend to be in protein core
+                    let hydrophobic = |r: &ProteinResidue| {
+                        matches!(
+                            r,
+                            ProteinResidue::A
+                                | ProteinResidue::V
+                                | ProteinResidue::L
+                                | ProteinResidue::I
+                                | ProteinResidue::F
+                                | ProteinResidue::W
+                                | ProteinResidue::M
+                        )
+                    };
+                    if hydrophobic(ri) && hydrophobic(rj) {
+                        1.5
+                    } else {
+                        1.0
+                    }
+                } else {
+                    1.0
+                };
+                (i, j, (base_score * boost).min(1.0))
+            })
+            .collect();
+
+        // Sort by confidence descending
+        predictions.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
+
+        Ok(predictions)
+    }
+}
+
+/// Contact graph for protein structure analysis
+#[derive(Debug, Clone)]
+pub struct ContactGraph {
+    /// Number of residues
+    pub num_residues: usize,
+    /// Distance threshold used
+    pub distance_threshold: f32,
+    /// Edges: (residue_i, residue_j, distance)
+    pub edges: Vec<(usize, usize, f32)>,
+}
+
+/// K-mer index using RuVector HNSW
+pub struct KmerIndex {
+    db: VectorDB,
+    k: usize,
+    dims: usize,
+}
+
+impl KmerIndex {
+    /// Create new k-mer index
+    pub fn new(k: usize, dims: usize, storage_path: &str) -> Result<Self> {
+        let options = DbOptions {
+            dimensions: dims,
+            distance_metric: DistanceMetric::Cosine,
+            storage_path: storage_path.to_string(),
+            hnsw_config: Some(HnswConfig {
+                m: 16,
+                ef_construction: 200,
+                ef_search: 100,
+                max_elements: 1_000_000,
+            }),
+            quantization: None,
+        };
+
+        let db = VectorDB::new(options)?;
+        Ok(Self { db, k, dims })
+    }
+
+    /// Get underlying VectorDB
+    pub fn db(&self) -> &VectorDB {
+        &self.db
+    }
+
+    /// Get k-mer size
+    pub fn k(&self) -> usize {
+        self.k
+    }
+
+    /// Get dimensions
+    pub fn dims(&self) -> usize {
+        self.dims
+    }
+}
+
+/// Analysis configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AnalysisConfig {
+    /// K-mer size for indexing
+    pub kmer_size: usize,
+    /// Vector dimensions
+    pub vector_dims: usize,
+    /// Minimum quality score for variants
+    pub min_quality: u8,
+    /// Alignment match score
+    pub match_score: i32,
+    /// Alignment mismatch penalty
+    pub mismatch_penalty: i32,
+    /// Alignment gap open penalty
+    pub gap_open_penalty: i32,
+    /// Alignment gap extend penalty
+    pub gap_extend_penalty: i32,
+    /// Additional pipeline parameters
+    pub parameters: HashMap<String, serde_json::Value>,
+}
+
+impl Default for AnalysisConfig {
+    fn default() -> Self {
+        Self {
+            kmer_size: 11,
+            vector_dims: 512,
+            min_quality: 20,
+            match_score: 2,
+            mismatch_penalty: -1,
+            gap_open_penalty: -3,
+            gap_extend_penalty: -1,
+            parameters: HashMap::new(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_nucleotide_complement() {
+        assert_eq!(Nucleotide::A.complement(), Nucleotide::T);
+        assert_eq!(Nucleotide::G.complement(), Nucleotide::C);
+    }
+
+    #[test]
+    fn test_dna_sequence() {
+        let seq = DnaSequence::from_str("ACGT").unwrap();
+        assert_eq!(seq.len(), 4);
+        assert_eq!(seq.to_string(), "ACGT");
+    }
+
+    #[test]
+    fn test_reverse_complement() {
+        let seq = DnaSequence::from_str("ACGT").unwrap();
+        let rc = seq.reverse_complement();
+        assert_eq!(rc.to_string(), "ACGT");
+    }
+}
--- a/vendor/ruvector/examples/dna/src/variant.rs
+++ b/vendor/ruvector/examples/dna/src/variant.rs
@@ -0,0 +1,319 @@
+//! Variant calling module for DNA analysis
+//!
+//! Provides SNP and indel calling from pileup data.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// Pileup column representing reads aligned at a single position
+#[derive(Debug, Clone)]
+pub struct PileupColumn {
+    /// Observed bases from aligned reads
+    pub bases: Vec<u8>,
+    /// Quality scores for each base
+    pub qualities: Vec<u8>,
+    /// Genomic position
+    pub position: u64,
+    /// Chromosome number
+    pub chromosome: u8,
+}
+
+/// Genotype classification
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum Genotype {
+    /// Homozygous reference (0/0)
+    HomRef,
+    /// Heterozygous (0/1)
+    Het,
+    /// Homozygous alternate (1/1)
+    HomAlt,
+}
+
+/// Variant filter status
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum FilterStatus {
+    /// Passed all filters
+    Pass,
+    /// Failed quality filter
+    LowQuality,
+    /// Failed depth filter
+    LowDepth,
+}
+
+/// Called variant
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VariantCall {
+    /// Chromosome number
+    pub chromosome: u8,
+    /// Genomic position
+    pub position: u64,
+    /// Reference allele
+    pub ref_allele: u8,
+    /// Alternate allele
+    pub alt_allele: u8,
+    /// Variant quality (Phred-scaled)
+    pub quality: f64,
+    /// Genotype call
+    pub genotype: Genotype,
+    /// Total read depth
+    pub depth: usize,
+    /// Alternate allele depth
+    pub allele_depth: usize,
+    /// Filter status
+    pub filter_status: FilterStatus,
+}
+
+/// Variant caller configuration
+#[derive(Debug, Clone)]
+pub struct VariantCallerConfig {
+    /// Minimum base quality to consider
+    pub min_quality: u8,
+    /// Minimum read depth
+    pub min_depth: usize,
+    /// Minimum alternate allele frequency for heterozygous call
+    pub het_threshold: f64,
+    /// Minimum alternate allele frequency for homozygous alt call
+    pub hom_alt_threshold: f64,
+}
+
+impl Default for VariantCallerConfig {
+    fn default() -> Self {
+        Self {
+            min_quality: 20,
+            min_depth: 5,
+            het_threshold: 0.2,
+            hom_alt_threshold: 0.8,
+        }
+    }
+}
+
+/// Variant caller that processes pileup data to call SNPs
+pub struct VariantCaller {
+    config: VariantCallerConfig,
+}
+
+impl VariantCaller {
+    /// Create a new variant caller with the given configuration
+    pub fn new(config: VariantCallerConfig) -> Self {
+        Self { config }
+    }
+
+    /// Call a SNP at a single pileup position
+    ///
+    /// Returns `Some(VariantCall)` if a variant is detected, `None` if all reads
+    /// match the reference or depth is insufficient.
+    pub fn call_snp(&self, pileup: &PileupColumn, reference_base: u8) -> Option<VariantCall> {
+        let ref_base = reference_base.to_ascii_uppercase();
+
+        // Count alleles (only high-quality bases)
+        let mut allele_counts: HashMap<u8, usize> = HashMap::new();
+        for (i, &base) in pileup.bases.iter().enumerate() {
+            let qual = pileup.qualities.get(i).copied().unwrap_or(0);
+            if qual >= self.config.min_quality {
+                *allele_counts.entry(base.to_ascii_uppercase()).or_insert(0) += 1;
+            }
+        }
+
+        let total_depth: usize = allele_counts.values().sum();
+        if total_depth < self.config.min_depth {
+            return None;
+        }
+
+        // Find the most common non-reference allele
+        let mut best_alt: Option<(u8, usize)> = None;
+        for (&allele, &count) in &allele_counts {
+            if allele != ref_base {
+                if best_alt.map_or(true, |(_, best_count)| count > best_count) {
+                    best_alt = Some((allele, count));
+                }
+            }
+        }
+
+        let (alt_allele, alt_count) = best_alt?;
+        let alt_freq = alt_count as f64 / total_depth as f64;
+
+        if alt_freq < self.config.het_threshold {
+            return None;
+        }
+
+        let genotype = if alt_freq >= self.config.hom_alt_threshold {
+            Genotype::HomAlt
+        } else {
+            Genotype::Het
+        };
+
+        // Phred-scaled quality estimate
+        let quality = -10.0 * (1.0 - alt_freq).max(1e-10).log10() * (alt_count as f64);
+
+        Some(VariantCall {
+            chromosome: pileup.chromosome,
+            position: pileup.position,
+            ref_allele: ref_base,
+            alt_allele,
+            quality,
+            genotype,
+            depth: total_depth,
+            allele_depth: alt_count,
+            filter_status: FilterStatus::Pass,
+        })
+    }
+
+    /// Detect insertions/deletions from pileup data
+    ///
+    /// Looks for gaps (represented as b'-') in the pileup bases that indicate
+    /// indels relative to the reference.
+    pub fn call_indel(
+        &self,
+        pileup: &PileupColumn,
+        reference_base: u8,
+        next_ref_bases: &[u8],
+    ) -> Option<VariantCall> {
+        let ref_base = reference_base.to_ascii_uppercase();
+        let mut del_count = 0usize;
+        let mut ins_count = 0usize;
+
+        for (i, &base) in pileup.bases.iter().enumerate() {
+            let qual = pileup.qualities.get(i).copied().unwrap_or(0);
+            if qual < self.config.min_quality {
+                continue;
+            }
+            if base == b'-' || base == b'*' {
+                del_count += 1;
+            } else if base == b'+' {
+                ins_count += 1;
+            }
+        }
+
+        let total = pileup.bases.len();
+        if total < self.config.min_depth {
+            return None;
+        }
+
+        // Check for deletion
+        if del_count > 0 {
+            let del_freq = del_count as f64 / total as f64;
+            if del_freq >= self.config.het_threshold {
+                let genotype = if del_freq >= self.config.hom_alt_threshold {
+                    Genotype::HomAlt
+                } else {
+                    Genotype::Het
+                };
+                let quality = -10.0 * (1.0 - del_freq).max(1e-10).log10() * (del_count as f64);
+                return Some(VariantCall {
+                    chromosome: pileup.chromosome,
+                    position: pileup.position,
+                    ref_allele: ref_base,
+                    alt_allele: b'-',
+                    quality,
+                    genotype,
+                    depth: total,
+                    allele_depth: del_count,
+                    filter_status: FilterStatus::Pass,
+                });
+            }
+        }
+
+        // Check for insertion
+        if ins_count > 0 {
+            let ins_freq = ins_count as f64 / total as f64;
+            if ins_freq >= self.config.het_threshold {
+                let genotype = if ins_freq >= self.config.hom_alt_threshold {
+                    Genotype::HomAlt
+                } else {
+                    Genotype::Het
+                };
+                let quality = -10.0 * (1.0 - ins_freq).max(1e-10).log10() * (ins_count as f64);
+                return Some(VariantCall {
+                    chromosome: pileup.chromosome,
+                    position: pileup.position,
+                    ref_allele: ref_base,
+                    alt_allele: b'+',
+                    quality,
+                    genotype,
+                    depth: total,
+                    allele_depth: ins_count,
+                    filter_status: FilterStatus::Pass,
+                });
+            }
+        }
+
+        None
+    }
+
+    /// Apply quality and depth filters to a list of variant calls
+    pub fn filter_variants(&self, calls: &mut [VariantCall]) {
+        for call in calls.iter_mut() {
+            if call.quality < self.config.min_quality as f64 {
+                call.filter_status = FilterStatus::LowQuality;
+            } else if call.depth < self.config.min_depth {
+                call.filter_status = FilterStatus::LowDepth;
+            }
+        }
+    }
+
+    /// Generate VCF-formatted output for variant calls
+    pub fn to_vcf(&self, calls: &[VariantCall], sample_name: &str) -> String {
+        let mut vcf = String::new();
+        vcf.push_str("##fileformat=VCFv4.3\n");
+        vcf.push_str(&format!("##source=RuVectorDNA\n"));
+        vcf.push_str("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t");
+        vcf.push_str(sample_name);
+        vcf.push('\n');
+
+        for call in calls {
+            let filter = match call.filter_status {
+                FilterStatus::Pass => "PASS",
+                FilterStatus::LowQuality => "LowQual",
+                FilterStatus::LowDepth => "LowDepth",
+            };
+            let gt = match call.genotype {
+                Genotype::HomRef => "0/0",
+                Genotype::Het => "0/1",
+                Genotype::HomAlt => "1/1",
+            };
+            vcf.push_str(&format!(
+                "chr{}\t{}\t.\t{}\t{}\t{:.1}\t{}\tDP={};AF={:.3}\tGT:DP:AD\t{}:{}:{}\n",
+                call.chromosome,
+                call.position,
+                call.ref_allele as char,
+                call.alt_allele as char,
+                call.quality,
+                filter,
+                call.depth,
+                call.allele_depth as f64 / call.depth as f64,
+                gt,
+                call.depth,
+                call.allele_depth,
+            ));
+        }
+
+        vcf
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_variant_caller_creation() {
+        let config = VariantCallerConfig::default();
+        let _caller = VariantCaller::new(config);
+    }
+
+    #[test]
+    fn test_snp_calling() {
+        let caller = VariantCaller::new(VariantCallerConfig::default());
+        let pileup = PileupColumn {
+            bases: vec![b'G'; 15],
+            qualities: vec![40; 15],
+            position: 1000,
+            chromosome: 1,
+        };
+
+        let call = caller.call_snp(&pileup, b'A');
+        assert!(call.is_some());
+        let call = call.unwrap();
+        assert_eq!(call.genotype, Genotype::HomAlt);
+    }
+}