wifi-densepose/vendor/ruvector/examples/dna/src/rvdna.rs

//! RVDNA - AI-Native Genomic File Format
//!
//! A binary format purpose-built for ultra-low-latency AI genomic analysis.
//! Unlike FASTA/BAM/VCF which require re-encoding for every AI pipeline,
//! RVDNA stores pre-computed tensors, vector embeddings, and graph structures
//! alongside the raw sequence data.
//!
//! ## Format Structure
//!
//! ```text
//! ┌─────────────────────────────────┐
//! │ Header (64 bytes)               │  Magic, version, section offsets
//! ├─────────────────────────────────┤
//! │ Section 0: Sequence Data        │  2-bit packed nucleotides + quality
//! ├─────────────────────────────────┤
//! │ Section 1: K-mer Vectors        │  Pre-computed HNSW-ready embeddings
//! ├─────────────────────────────────┤
//! │ Section 2: Attention Weights    │  Sparse self-attention matrices
//! ├─────────────────────────────────┤
//! │ Section 3: Variant Tensor       │  Per-position genotype likelihoods
//! ├─────────────────────────────────┤
//! │ Section 4: Protein Embeddings   │  GNN node features + contact graph
//! ├─────────────────────────────────┤
//! │ Section 5: Epigenomic Tracks    │  Methylation betas + aging coeffs
//! ├─────────────────────────────────┤
//! │ Section 6: Metadata             │  JSON provenance + checksums
//! └─────────────────────────────────┘
//! ```
//!
//! ## Key Properties
//!
//! - **2-bit encoding**: 4 bases per byte (4x compression vs ASCII)
//! - **Zero-copy access**: Memory-mappable with aligned sections
//! - **Pre-indexed**: HNSW graph stored inline for instant similarity search
//! - **Tensor-native**: Attention weights and variant probabilities stored as
//!   sparse tensors in COO format for direct GPU/SIMD consumption
//! - **Streaming**: Chunked sections allow incremental read/write

use crate::error::{DnaError, Result};
use crate::types::{DnaSequence, Nucleotide, QualityScore};
use serde::{Deserialize, Serialize};
use std::io::{Read, Write};

// ============================================================================
// Constants
// ============================================================================

/// Magic bytes identifying an RVDNA file
pub const MAGIC: [u8; 8] = *b"RVDNA\x01\x00\x00";

/// Current format version
pub const FORMAT_VERSION: u16 = 1;

/// Number of sections in the format
pub const NUM_SECTIONS: usize = 7;

/// Section alignment boundary (64 bytes for cache-line alignment)
pub const SECTION_ALIGN: u64 = 64;

/// Header size (fixed)
pub const HEADER_SIZE: u64 = 64 + (NUM_SECTIONS as u64 * 16); // 64 base + 16 per section offset

// ============================================================================
// Compression Codec
// ============================================================================

/// Compression codec for section data
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[repr(u8)]
pub enum Codec {
    /// No compression (zero-copy mmap friendly)
    None = 0,
    /// LZ4 fast compression (decode at ~4 GB/s)
    Lz4 = 1,
    /// Zstd balanced compression
    Zstd = 2,
}

impl Codec {
    fn from_u8(v: u8) -> Result<Self> {
        match v {
            0 => Ok(Codec::None),
            1 => Ok(Codec::Lz4),
            2 => Ok(Codec::Zstd),
            _ => Err(DnaError::InvalidSequence(format!("Unknown codec: {}", v))),
        }
    }
}

// ============================================================================
// Section Types
// ============================================================================

/// Section identifier
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[repr(u8)]
pub enum SectionType {
    /// Raw sequence data (2-bit encoded)
    Sequence = 0,
    /// Pre-computed k-mer frequency vectors
    KmerVectors = 1,
    /// Sparse attention weight matrices
    AttentionWeights = 2,
    /// Per-position variant probability tensors
    VariantTensor = 3,
    /// Protein residue embeddings + contact graph
    ProteinEmbeddings = 4,
    /// Epigenomic tracks (methylation, chromatin)
    EpigenomicTracks = 5,
    /// JSON metadata and provenance
    Metadata = 6,
}

/// Section offset entry in the header
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct SectionEntry {
    /// Offset from file start (0 = section not present)
    pub offset: u64,
    /// Compressed size in bytes
    pub size: u64,
}

// ============================================================================
// File Header
// ============================================================================

/// RVDNA file header (fixed-size, at byte 0)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RvdnaHeader {
    /// Format version
    pub version: u16,
    /// Compression codec used
    pub codec: Codec,
    /// Flags (bit 0: little-endian, bit 1: has quality scores)
    pub flags: u32,
    /// Total sequence length in bases
    pub sequence_length: u64,
    /// Number of contigs/chromosomes
    pub num_contigs: u32,
    /// Section offset table
    pub sections: [SectionEntry; NUM_SECTIONS],
    /// CRC32 checksum of header
    pub header_checksum: u32,
}

impl RvdnaHeader {
    /// Create a new empty header
    pub fn new(sequence_length: u64, codec: Codec) -> Self {
        Self {
            version: FORMAT_VERSION,
            codec,
            flags: 0x01, // little-endian by default
            sequence_length,
            num_contigs: 1,
            sections: [SectionEntry { offset: 0, size: 0 }; NUM_SECTIONS],
            header_checksum: 0,
        }
    }

    /// Set the has_quality flag
    pub fn with_quality(mut self) -> Self {
        self.flags |= 0x02;
        self
    }

    /// Check if quality scores are present
    pub fn has_quality(&self) -> bool {
        self.flags & 0x02 != 0
    }

    /// Serialize header to bytes
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut buf = Vec::with_capacity(HEADER_SIZE as usize);

        // Magic (8 bytes)
        buf.extend_from_slice(&MAGIC);
        // Version (2 bytes)
        buf.extend_from_slice(&self.version.to_le_bytes());
        // Codec (1 byte)
        buf.push(self.codec as u8);
        // Padding (1 byte)
        buf.push(0);
        // Flags (4 bytes)
        buf.extend_from_slice(&self.flags.to_le_bytes());
        // Sequence length (8 bytes)
        buf.extend_from_slice(&self.sequence_length.to_le_bytes());
        // Num contigs (4 bytes)
        buf.extend_from_slice(&self.num_contigs.to_le_bytes());
        // Reserved (36 bytes to reach 64-byte base header)
        buf.extend_from_slice(&[0u8; 36]);

        // Section table (16 bytes per section: 8 offset + 8 size)
        for section in &self.sections {
            buf.extend_from_slice(&section.offset.to_le_bytes());
            buf.extend_from_slice(&section.size.to_le_bytes());
        }

        // Compute checksum over everything except the last 4 bytes
        let checksum = crc32_simple(&buf);
        buf.extend_from_slice(&checksum.to_le_bytes());

        buf
    }

    /// Parse header from bytes
    pub fn from_bytes(data: &[u8]) -> Result<Self> {
        if data.len() < HEADER_SIZE as usize + 4 {
            return Err(DnaError::InvalidSequence("Header too short".to_string()));
        }

        // Verify magic
        if &data[0..8] != &MAGIC {
            return Err(DnaError::InvalidSequence(
                "Invalid RVDNA magic number".to_string(),
            ));
        }

        let version = u16::from_le_bytes([data[8], data[9]]);
        let codec = Codec::from_u8(data[10])?;
        let flags = u32::from_le_bytes([data[12], data[13], data[14], data[15]]);
        let sequence_length = u64::from_le_bytes(data[16..24].try_into().unwrap());
        let num_contigs = u32::from_le_bytes(data[24..28].try_into().unwrap());

        let mut sections = [SectionEntry { offset: 0, size: 0 }; NUM_SECTIONS];
        let table_start = 64;
        for i in 0..NUM_SECTIONS {
            let base = table_start + i * 16;
            sections[i] = SectionEntry {
                offset: u64::from_le_bytes(data[base..base + 8].try_into().unwrap()),
                size: u64::from_le_bytes(data[base + 8..base + 16].try_into().unwrap()),
            };
        }

        let checksum_offset = table_start + NUM_SECTIONS * 16;
        let header_checksum = u32::from_le_bytes(
            data[checksum_offset..checksum_offset + 4]
                .try_into()
                .unwrap(),
        );

        // Verify checksum
        let computed = crc32_simple(&data[..checksum_offset]);
        if computed != header_checksum {
            return Err(DnaError::InvalidSequence(format!(
                "Header checksum mismatch: expected {:08x}, got {:08x}",
                header_checksum, computed
            )));
        }

        Ok(Self {
            version,
            codec,
            flags,
            sequence_length,
            num_contigs,
            sections,
            header_checksum,
        })
    }
}

// ============================================================================
// 2-Bit Sequence Encoding
// ============================================================================

/// Encode nucleotides to 2-bit packed representation.
///
/// Packing: 4 bases per byte, MSB first.
/// A=00, C=01, G=10, T=11. N is encoded as 00 with a separate N-mask.
///
/// Returns (packed_data, n_mask) where n_mask has 1-bits for N positions.
pub fn encode_2bit(sequence: &[Nucleotide]) -> (Vec<u8>, Vec<u8>) {
    let num_bytes = (sequence.len() + 3) / 4;
    let mut packed = vec![0u8; num_bytes];
    let mask_bytes = (sequence.len() + 7) / 8;
    let mut n_mask = vec![0u8; mask_bytes];

    for (i, &base) in sequence.iter().enumerate() {
        let byte_idx = i / 4;
        let bit_offset = 6 - (i % 4) * 2; // MSB first: positions 6,4,2,0

        let bits = match base {
            Nucleotide::A => 0b00,
            Nucleotide::C => 0b01,
            Nucleotide::G => 0b10,
            Nucleotide::T => 0b11,
            Nucleotide::N => {
                // Mark in N-mask
                n_mask[i / 8] |= 1 << (7 - i % 8);
                0b00 // Encode as A, disambiguated by mask
            }
        };

        packed[byte_idx] |= bits << bit_offset;
    }

    (packed, n_mask)
}

/// Decode 2-bit packed nucleotides back to sequence
pub fn decode_2bit(packed: &[u8], n_mask: &[u8], length: usize) -> Vec<Nucleotide> {
    let mut sequence = Vec::with_capacity(length);

    for i in 0..length {
        let byte_idx = i / 4;
        let bit_offset = 6 - (i % 4) * 2;
        let bits = (packed[byte_idx] >> bit_offset) & 0b11;

        // Check N-mask
        let is_n = if i / 8 < n_mask.len() {
            (n_mask[i / 8] >> (7 - i % 8)) & 1 == 1
        } else {
            false
        };

        let base = if is_n {
            Nucleotide::N
        } else {
            match bits {
                0b00 => Nucleotide::A,
                0b01 => Nucleotide::C,
                0b10 => Nucleotide::G,
                0b11 => Nucleotide::T,
                _ => unreachable!(),
            }
        };

        sequence.push(base);
    }

    sequence
}

/// Compress quality scores using 6-bit encoding (0-63 range, Phred capped)
pub fn encode_quality(qualities: &[u8]) -> Vec<u8> {
    // Pack four 6-bit values into three bytes
    let mut encoded = Vec::with_capacity((qualities.len() * 6 + 7) / 8);
    let mut bit_buffer: u64 = 0;
    let mut bits_in_buffer = 0;

    for &q in qualities {
        let q6 = q.min(63) as u64; // Cap at 6 bits
        bit_buffer = (bit_buffer << 6) | q6;
        bits_in_buffer += 6;

        while bits_in_buffer >= 8 {
            bits_in_buffer -= 8;
            encoded.push((bit_buffer >> bits_in_buffer) as u8);
            bit_buffer &= (1 << bits_in_buffer) - 1;
        }
    }

    // Flush remaining bits
    if bits_in_buffer > 0 {
        encoded.push((bit_buffer << (8 - bits_in_buffer)) as u8);
    }

    encoded
}

/// Decode 6-bit compressed quality scores
pub fn decode_quality(encoded: &[u8], count: usize) -> Vec<u8> {
    let mut qualities = Vec::with_capacity(count);
    let mut bit_buffer: u64 = 0;
    let mut bits_in_buffer = 0;
    let mut byte_idx = 0;

    for _ in 0..count {
        while bits_in_buffer < 6 && byte_idx < encoded.len() {
            bit_buffer = (bit_buffer << 8) | encoded[byte_idx] as u64;
            bits_in_buffer += 8;
            byte_idx += 1;
        }

        bits_in_buffer -= 6;
        let q = ((bit_buffer >> bits_in_buffer) & 0x3F) as u8;
        bit_buffer &= (1 << bits_in_buffer) - 1;
        qualities.push(q);
    }

    qualities
}

// ============================================================================
// Sparse Attention Matrix (COO Format)
// ============================================================================

/// Sparse attention matrix stored in COO (Coordinate) format.
/// Efficient for storing pre-computed attention weights between sequence positions.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SparseAttention {
    /// Row indices (query positions)
    pub rows: Vec<u32>,
    /// Column indices (key positions)
    pub cols: Vec<u32>,
    /// Attention weight values
    pub values: Vec<f32>,
    /// Matrix dimensions (rows, cols)
    pub shape: (u32, u32),
    /// Window size used for computation
    pub window_size: u32,
}

impl SparseAttention {
    /// Create from dense attention matrix, keeping only values above threshold
    pub fn from_dense(matrix: &[f32], rows: usize, cols: usize, threshold: f32) -> Self {
        let mut row_idx = Vec::new();
        let mut col_idx = Vec::new();
        let mut values = Vec::new();

        for i in 0..rows {
            for j in 0..cols {
                let val = matrix[i * cols + j];
                if val.abs() > threshold {
                    row_idx.push(i as u32);
                    col_idx.push(j as u32);
                    values.push(val);
                }
            }
        }

        Self {
            rows: row_idx,
            cols: col_idx,
            values,
            shape: (rows as u32, cols as u32),
            window_size: cols as u32,
        }
    }

    /// Number of non-zero entries
    pub fn nnz(&self) -> usize {
        self.values.len()
    }

    /// Sparsity ratio (fraction of zeros)
    pub fn sparsity(&self) -> f64 {
        let total = self.shape.0 as f64 * self.shape.1 as f64;
        if total == 0.0 {
            return 1.0;
        }
        1.0 - (self.nnz() as f64 / total)
    }

    /// Lookup attention weight at (row, col), returns 0.0 if not stored
    pub fn get(&self, row: u32, col: u32) -> f32 {
        for i in 0..self.values.len() {
            if self.rows[i] == row && self.cols[i] == col {
                return self.values[i];
            }
        }
        0.0
    }

    /// Serialize to bytes (for file storage)
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut buf = Vec::new();
        // Shape (8 bytes)
        buf.extend_from_slice(&self.shape.0.to_le_bytes());
        buf.extend_from_slice(&self.shape.1.to_le_bytes());
        // Window size (4 bytes)
        buf.extend_from_slice(&self.window_size.to_le_bytes());
        // NNZ count (4 bytes)
        let nnz = self.nnz() as u32;
        buf.extend_from_slice(&nnz.to_le_bytes());
        // Row indices
        for &r in &self.rows {
            buf.extend_from_slice(&r.to_le_bytes());
        }
        // Column indices
        for &c in &self.cols {
            buf.extend_from_slice(&c.to_le_bytes());
        }
        // Values
        for &v in &self.values {
            buf.extend_from_slice(&v.to_le_bytes());
        }
        buf
    }

    /// Deserialize from bytes
    pub fn from_bytes(data: &[u8]) -> Result<Self> {
        if data.len() < 20 {
            return Err(DnaError::InvalidSequence(
                "Attention data too short".to_string(),
            ));
        }
        let shape_0 = u32::from_le_bytes(data[0..4].try_into().unwrap());
        let shape_1 = u32::from_le_bytes(data[4..8].try_into().unwrap());
        let window_size = u32::from_le_bytes(data[8..12].try_into().unwrap());
        let nnz = u32::from_le_bytes(data[12..16].try_into().unwrap()) as usize;

        let expected = 16 + nnz * 12; // 4 bytes row + 4 col + 4 value per entry
        if data.len() < expected {
            return Err(DnaError::InvalidSequence(
                "Attention data truncated".to_string(),
            ));
        }

        let mut offset = 16;
        let rows: Vec<u32> = (0..nnz)
            .map(|_| {
                let v = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
                offset += 4;
                v
            })
            .collect();
        let cols: Vec<u32> = (0..nnz)
            .map(|_| {
                let v = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
                offset += 4;
                v
            })
            .collect();
        let values: Vec<f32> = (0..nnz)
            .map(|_| {
                let v = f32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
                offset += 4;
                v
            })
            .collect();

        Ok(Self {
            rows,
            cols,
            values,
            shape: (shape_0, shape_1),
            window_size,
        })
    }
}

// ============================================================================
// Variant Tensor (Per-Position Genotype Likelihoods)
// ============================================================================

/// Per-position variant probability tensor.
/// Stores genotype likelihoods for each genomic position using f16-quantized values.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VariantTensor {
    /// Genomic positions with variant data
    pub positions: Vec<u64>,
    /// Reference alleles (2-bit encoded)
    pub ref_alleles: Vec<u8>,
    /// Alternate alleles (2-bit encoded)
    pub alt_alleles: Vec<u8>,
    /// Genotype likelihoods: [P(0/0), P(0/1), P(1/1)] per position (f16 as u16)
    pub likelihoods: Vec<[u16; 3]>,
    /// Quality scores (Phred-scaled)
    pub qualities: Vec<u8>,
}

impl VariantTensor {
    /// Create empty tensor
    pub fn new() -> Self {
        Self {
            positions: Vec::new(),
            ref_alleles: Vec::new(),
            alt_alleles: Vec::new(),
            likelihoods: Vec::new(),
            qualities: Vec::new(),
        }
    }

    /// Add a variant position with genotype likelihoods
    pub fn add_variant(
        &mut self,
        position: u64,
        ref_allele: Nucleotide,
        alt_allele: Nucleotide,
        gl_hom_ref: f32,
        gl_het: f32,
        gl_hom_alt: f32,
        quality: u8,
    ) {
        self.positions.push(position);
        self.ref_alleles.push(nucleotide_to_2bit(ref_allele));
        self.alt_alleles.push(nucleotide_to_2bit(alt_allele));
        self.likelihoods.push([
            f32_to_f16(gl_hom_ref),
            f32_to_f16(gl_het),
            f32_to_f16(gl_hom_alt),
        ]);
        self.qualities.push(quality);
    }

    /// Number of variant positions
    pub fn len(&self) -> usize {
        self.positions.len()
    }

    /// Check if empty
    pub fn is_empty(&self) -> bool {
        self.positions.is_empty()
    }

    /// Get genotype likelihoods at a position (binary search)
    pub fn get_likelihoods(&self, position: u64) -> Option<[f32; 3]> {
        self.positions.binary_search(&position).ok().map(|idx| {
            [
                f16_to_f32(self.likelihoods[idx][0]),
                f16_to_f32(self.likelihoods[idx][1]),
                f16_to_f32(self.likelihoods[idx][2]),
            ]
        })
    }

    /// Serialize to bytes
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut buf = Vec::new();
        let count = self.len() as u32;
        buf.extend_from_slice(&count.to_le_bytes());

        for &pos in &self.positions {
            buf.extend_from_slice(&pos.to_le_bytes());
        }
        buf.extend_from_slice(&self.ref_alleles);
        buf.extend_from_slice(&self.alt_alleles);
        for &gl in &self.likelihoods {
            for &v in &gl {
                buf.extend_from_slice(&v.to_le_bytes());
            }
        }
        buf.extend_from_slice(&self.qualities);
        buf
    }

    /// Deserialize from bytes
    pub fn from_bytes(data: &[u8]) -> Result<Self> {
        if data.len() < 4 {
            return Err(DnaError::InvalidSequence(
                "Variant tensor too short".to_string(),
            ));
        }
        let count = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize;
        let mut offset = 4;

        let positions: Vec<u64> = (0..count)
            .map(|_| {
                let v = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
                offset += 8;
                v
            })
            .collect();

        let ref_alleles = data[offset..offset + count].to_vec();
        offset += count;
        let alt_alleles = data[offset..offset + count].to_vec();
        offset += count;

        let likelihoods: Vec<[u16; 3]> = (0..count)
            .map(|_| {
                let a = u16::from_le_bytes(data[offset..offset + 2].try_into().unwrap());
                offset += 2;
                let b = u16::from_le_bytes(data[offset..offset + 2].try_into().unwrap());
                offset += 2;
                let c = u16::from_le_bytes(data[offset..offset + 2].try_into().unwrap());
                offset += 2;
                [a, b, c]
            })
            .collect();

        let qualities = data[offset..offset + count].to_vec();

        Ok(Self {
            positions,
            ref_alleles,
            alt_alleles,
            likelihoods,
            qualities,
        })
    }
}

// ============================================================================
// K-mer Vector Section
// ============================================================================

/// Pre-computed k-mer vector block for HNSW-ready storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KmerVectorBlock {
    /// K-mer size used
    pub k: u32,
    /// Vector dimensions
    pub dimensions: u32,
    /// Region start position in sequence
    pub start_pos: u64,
    /// Region length in bases
    pub region_len: u64,
    /// The k-mer frequency vector (f32)
    pub vector: Vec<f32>,
    /// Optional quantized vector (int8) for fast approximate search
    pub quantized: Option<Vec<i8>>,
    /// Quantization scale factor (to reconstruct f32 from int8)
    pub quant_scale: f32,
}

impl KmerVectorBlock {
    /// Create from a DnaSequence region
    pub fn from_sequence(
        sequence: &DnaSequence,
        start: u64,
        len: u64,
        k: u32,
        dimensions: u32,
    ) -> Result<Self> {
        let end = (start + len).min(sequence.len() as u64);
        let subseq_bases: Vec<Nucleotide> = (start as usize..end as usize)
            .map(|i| sequence.get(i).unwrap_or(Nucleotide::N))
            .collect();
        let subseq = DnaSequence::new(subseq_bases);
        let vector = subseq.to_kmer_vector(k as usize, dimensions as usize)?;

        // Quantize to int8
        let max_abs = vector.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
        let scale = if max_abs > 0.0 { max_abs / 127.0 } else { 1.0 };
        let quantized: Vec<i8> = vector
            .iter()
            .map(|&v| (v / scale).round().max(-128.0).min(127.0) as i8)
            .collect();

        Ok(Self {
            k,
            dimensions,
            start_pos: start,
            region_len: end - start,
            vector,
            quantized: Some(quantized),
            quant_scale: scale,
        })
    }

    /// Cosine similarity between this block and another vector
    pub fn cosine_similarity(&self, other: &[f32]) -> f32 {
        if self.vector.len() != other.len() {
            return 0.0;
        }
        let dot: f32 = self.vector.iter().zip(other).map(|(a, b)| a * b).sum();
        let mag_a: f32 = self.vector.iter().map(|a| a * a).sum::<f32>().sqrt();
        let mag_b: f32 = other.iter().map(|b| b * b).sum::<f32>().sqrt();
        if mag_a == 0.0 || mag_b == 0.0 {
            0.0
        } else {
            dot / (mag_a * mag_b)
        }
    }

    /// Fast approximate similarity using quantized vectors (4x less memory, ~3x faster)
    pub fn fast_similarity(&self, other_quantized: &[i8]) -> f32 {
        match &self.quantized {
            Some(q) if q.len() == other_quantized.len() => {
                let dot: i32 = q
                    .iter()
                    .zip(other_quantized)
                    .map(|(&a, &b)| a as i32 * b as i32)
                    .sum();
                dot as f32 * self.quant_scale * self.quant_scale
            }
            _ => 0.0,
        }
    }
}

// ============================================================================
// RVDNA File Writer
// ============================================================================

/// Builder for creating RVDNA files
pub struct RvdnaWriter {
    header: RvdnaHeader,
    sequence_data: Option<(Vec<u8>, Vec<u8>)>, // (packed, n_mask)
    quality_data: Option<Vec<u8>>,
    kmer_blocks: Vec<KmerVectorBlock>,
    attention: Option<SparseAttention>,
    variants: Option<VariantTensor>,
    metadata: Option<serde_json::Value>,
}

impl RvdnaWriter {
    /// Create a new writer for a sequence
    pub fn new(sequence: &DnaSequence, codec: Codec) -> Self {
        let (packed, n_mask) = encode_2bit(sequence.bases());
        Self {
            header: RvdnaHeader::new(sequence.len() as u64, codec),
            sequence_data: Some((packed, n_mask)),
            quality_data: None,
            kmer_blocks: Vec::new(),
            attention: None,
            variants: None,
            metadata: None,
        }
    }

    /// Add quality scores
    pub fn with_quality(mut self, qualities: &[u8]) -> Self {
        self.quality_data = Some(encode_quality(qualities));
        self.header = self.header.with_quality();
        self
    }

    /// Pre-compute and add k-mer vectors for the sequence
    pub fn with_kmer_vectors(
        mut self,
        sequence: &DnaSequence,
        k: u32,
        dimensions: u32,
        block_size: u64,
    ) -> Result<Self> {
        let seq_len = sequence.len() as u64;
        let mut pos = 0u64;
        while pos < seq_len {
            let len = block_size.min(seq_len - pos);
            if len >= k as u64 {
                let block = KmerVectorBlock::from_sequence(sequence, pos, len, k, dimensions)?;
                self.kmer_blocks.push(block);
            }
            pos += block_size;
        }
        Ok(self)
    }

    /// Add pre-computed attention weights
    pub fn with_attention(mut self, attention: SparseAttention) -> Self {
        self.attention = Some(attention);
        self
    }

    /// Add variant tensor
    pub fn with_variants(mut self, variants: VariantTensor) -> Self {
        self.variants = Some(variants);
        self
    }

    /// Add metadata
    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
        self.metadata = Some(metadata);
        self
    }

    /// Write the complete RVDNA file
    pub fn write<W: Write>(&mut self, writer: &mut W) -> Result<usize> {
        let mut sections_data: Vec<Vec<u8>> = vec![Vec::new(); NUM_SECTIONS];

        // Section 0: Sequence data
        if let Some((ref packed, ref n_mask)) = self.sequence_data {
            let mut sec = Vec::new();
            // Packed length (4 bytes)
            sec.extend_from_slice(&(packed.len() as u32).to_le_bytes());
            // N-mask length (4 bytes)
            sec.extend_from_slice(&(n_mask.len() as u32).to_le_bytes());
            // Packed data
            sec.extend_from_slice(packed);
            // N-mask
            sec.extend_from_slice(n_mask);
            // Quality (if present)
            if let Some(ref qual) = self.quality_data {
                sec.extend_from_slice(&(qual.len() as u32).to_le_bytes());
                sec.extend_from_slice(qual);
            }
            sections_data[0] = sec;
        }

        // Section 1: K-mer vectors
        if !self.kmer_blocks.is_empty() {
            let mut sec = Vec::new();
            sec.extend_from_slice(&(self.kmer_blocks.len() as u32).to_le_bytes());
            for block in &self.kmer_blocks {
                let block_bytes = serde_json::to_vec(block)
                    .map_err(|e| DnaError::PipelineError(e.to_string()))?;
                sec.extend_from_slice(&(block_bytes.len() as u32).to_le_bytes());
                sec.extend_from_slice(&block_bytes);
            }
            sections_data[1] = sec;
        }

        // Section 2: Attention weights
        if let Some(ref attn) = self.attention {
            sections_data[2] = attn.to_bytes();
        }

        // Section 3: Variant tensor
        if let Some(ref variants) = self.variants {
            sections_data[3] = variants.to_bytes();
        }

        // Section 6: Metadata
        if let Some(ref meta) = self.metadata {
            let meta_bytes =
                serde_json::to_vec(meta).map_err(|e| DnaError::PipelineError(e.to_string()))?;
            sections_data[6] = meta_bytes;
        }

        // Calculate section offsets (align each section)
        let header_len = HEADER_SIZE + 4; // +4 for checksum
        let mut current_offset = align_up(header_len, SECTION_ALIGN);

        for i in 0..NUM_SECTIONS {
            if !sections_data[i].is_empty() {
                self.header.sections[i] = SectionEntry {
                    offset: current_offset,
                    size: sections_data[i].len() as u64,
                };
                current_offset = align_up(
                    current_offset + sections_data[i].len() as u64,
                    SECTION_ALIGN,
                );
            }
        }

        // Write header
        let header_bytes = self.header.to_bytes();
        writer.write_all(&header_bytes).map_err(DnaError::IoError)?;

        // Pad to first section
        let pad_len =
            align_up(header_bytes.len() as u64, SECTION_ALIGN) - header_bytes.len() as u64;
        writer
            .write_all(&vec![0u8; pad_len as usize])
            .map_err(DnaError::IoError)?;

        let mut total_written = header_bytes.len() + pad_len as usize;

        // Write sections
        for i in 0..NUM_SECTIONS {
            if !sections_data[i].is_empty() {
                // Pad to alignment
                let needed = self.header.sections[i].offset as usize - total_written;
                if needed > 0 {
                    writer
                        .write_all(&vec![0u8; needed])
                        .map_err(DnaError::IoError)?;
                    total_written += needed;
                }
                writer
                    .write_all(&sections_data[i])
                    .map_err(DnaError::IoError)?;
                total_written += sections_data[i].len();
            }
        }

        Ok(total_written)
    }
}

// ============================================================================
// RVDNA File Reader
// ============================================================================

/// Reader for RVDNA files
pub struct RvdnaReader {
    /// Parsed file header
    pub header: RvdnaHeader,
    /// Raw file data (for section access)
    data: Vec<u8>,
}

impl RvdnaReader {
    /// Open and parse an RVDNA file from bytes
    pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
        let header = RvdnaHeader::from_bytes(&data)?;
        Ok(Self { header, data })
    }

    /// Read from a reader
    pub fn from_reader<R: Read>(reader: &mut R) -> Result<Self> {
        let mut data = Vec::new();
        reader.read_to_end(&mut data).map_err(DnaError::IoError)?;
        Self::from_bytes(data)
    }

    /// Extract the DNA sequence
    pub fn read_sequence(&self) -> Result<DnaSequence> {
        let section = &self.header.sections[SectionType::Sequence as usize];
        if section.size == 0 {
            return Err(DnaError::EmptySequence);
        }

        let start = section.offset as usize;
        let packed_len =
            u32::from_le_bytes(self.data[start..start + 4].try_into().unwrap()) as usize;
        let mask_len =
            u32::from_le_bytes(self.data[start + 4..start + 8].try_into().unwrap()) as usize;

        let packed = &self.data[start + 8..start + 8 + packed_len];
        let n_mask = &self.data[start + 8 + packed_len..start + 8 + packed_len + mask_len];

        let bases = decode_2bit(packed, n_mask, self.header.sequence_length as usize);
        Ok(DnaSequence::new(bases))
    }

    /// Read k-mer vector blocks
    pub fn read_kmer_vectors(&self) -> Result<Vec<KmerVectorBlock>> {
        let section = &self.header.sections[SectionType::KmerVectors as usize];
        if section.size == 0 {
            return Ok(Vec::new());
        }

        let start = section.offset as usize;
        let count = u32::from_le_bytes(self.data[start..start + 4].try_into().unwrap()) as usize;

        let mut blocks = Vec::with_capacity(count);
        let mut offset = start + 4;

        for _ in 0..count {
            let block_len =
                u32::from_le_bytes(self.data[offset..offset + 4].try_into().unwrap()) as usize;
            offset += 4;
            let block: KmerVectorBlock =
                serde_json::from_slice(&self.data[offset..offset + block_len])
                    .map_err(|e| DnaError::PipelineError(e.to_string()))?;
            blocks.push(block);
            offset += block_len;
        }

        Ok(blocks)
    }

    /// Read attention weights
    pub fn read_attention(&self) -> Result<Option<SparseAttention>> {
        let section = &self.header.sections[SectionType::AttentionWeights as usize];
        if section.size == 0 {
            return Ok(None);
        }
        let start = section.offset as usize;
        let end = start + section.size as usize;
        Ok(Some(SparseAttention::from_bytes(&self.data[start..end])?))
    }

    /// Read variant tensor
    pub fn read_variants(&self) -> Result<Option<VariantTensor>> {
        let section = &self.header.sections[SectionType::VariantTensor as usize];
        if section.size == 0 {
            return Ok(None);
        }
        let start = section.offset as usize;
        let end = start + section.size as usize;
        Ok(Some(VariantTensor::from_bytes(&self.data[start..end])?))
    }

    /// Read metadata
    pub fn read_metadata(&self) -> Result<Option<serde_json::Value>> {
        let section = &self.header.sections[SectionType::Metadata as usize];
        if section.size == 0 {
            return Ok(None);
        }
        let start = section.offset as usize;
        let end = start + section.size as usize;
        let meta: serde_json::Value = serde_json::from_slice(&self.data[start..end])
            .map_err(|e| DnaError::PipelineError(e.to_string()))?;
        Ok(Some(meta))
    }

    /// Get file size statistics
    pub fn stats(&self) -> RvdnaStats {
        let mut section_sizes = [0u64; NUM_SECTIONS];
        for i in 0..NUM_SECTIONS {
            section_sizes[i] = self.header.sections[i].size;
        }

        let total_size = self.data.len() as u64;
        let seq_len = self.header.sequence_length;
        let bits_per_base = if seq_len > 0 {
            (section_sizes[0] as f64 * 8.0) / seq_len as f64
        } else {
            0.0
        };

        RvdnaStats {
            total_size,
            sequence_length: seq_len,
            bits_per_base,
            section_sizes,
            compression_ratio: if seq_len > 0 {
                seq_len as f64 / total_size as f64
            } else {
                0.0
            },
        }
    }
}

/// File statistics
#[derive(Debug, Clone)]
pub struct RvdnaStats {
    /// Total file size in bytes
    pub total_size: u64,
    /// Sequence length in bases
    pub sequence_length: u64,
    /// Average bits per base for sequence section
    pub bits_per_base: f64,
    /// Size of each section in bytes
    pub section_sizes: [u64; NUM_SECTIONS],
    /// Bases per byte (overall compression)
    pub compression_ratio: f64,
}

// ============================================================================
// Conversion: FASTA → RVDNA
// ============================================================================

/// Convert a FASTA-like string to RVDNA format with pre-computed AI features
pub fn fasta_to_rvdna(
    sequence_str: &str,
    k: u32,
    vector_dims: u32,
    block_size: u64,
) -> Result<Vec<u8>> {
    let sequence = DnaSequence::from_str(sequence_str)?;

    let metadata = serde_json::json!({
        "format": "RVDNA",
        "version": FORMAT_VERSION,
        "source": "fasta_conversion",
        "sequence_length": sequence.len(),
        "kmer_k": k,
        "vector_dimensions": vector_dims,
        "block_size": block_size,
    });

    let mut writer = RvdnaWriter::new(&sequence, Codec::None)
        .with_kmer_vectors(&sequence, k, vector_dims, block_size)?
        .with_metadata(metadata);

    let mut output = Vec::new();
    writer.write(&mut output)?;
    Ok(output)
}

// ============================================================================
// Utility Functions
// ============================================================================

/// CRC32 lookup table (precomputed for IEEE polynomial 0xEDB88320)
const CRC32_TABLE: [u32; 256] = {
    let mut table = [0u32; 256];
    let mut i = 0u32;
    while i < 256 {
        let mut crc = i;
        let mut j = 0;
        while j < 8 {
            if crc & 1 != 0 {
                crc = (crc >> 1) ^ 0xEDB88320;
            } else {
                crc >>= 1;
            }
            j += 1;
        }
        table[i as usize] = crc;
        i += 1;
    }
    table
};

/// CRC32 using precomputed lookup table (~8x faster than bit-by-bit)
fn crc32_simple(data: &[u8]) -> u32 {
    let mut crc: u32 = 0xFFFFFFFF;
    for &byte in data {
        let idx = ((crc ^ byte as u32) & 0xFF) as usize;
        crc = CRC32_TABLE[idx] ^ (crc >> 8);
    }
    !crc
}

/// Align value up to boundary
fn align_up(value: u64, alignment: u64) -> u64 {
    (value + alignment - 1) & !(alignment - 1)
}

/// Convert nucleotide to 2-bit encoding
fn nucleotide_to_2bit(n: Nucleotide) -> u8 {
    match n {
        Nucleotide::A => 0,
        Nucleotide::C => 1,
        Nucleotide::G => 2,
        Nucleotide::T => 3,
        Nucleotide::N => 0,
    }
}

/// Simple f32 → f16 conversion (IEEE 754 half precision)
fn f32_to_f16(value: f32) -> u16 {
    let bits = value.to_bits();
    let sign = (bits >> 31) & 1;
    let exponent = ((bits >> 23) & 0xFF) as i32;
    let mantissa = bits & 0x7FFFFF;

    if exponent == 0xFF {
        // Inf/NaN
        return ((sign << 15) | 0x7C00 | (mantissa >> 13).min(1)) as u16;
    }

    let new_exp = exponent - 127 + 15;
    if new_exp >= 31 {
        // Overflow → Inf
        return ((sign << 15) | 0x7C00) as u16;
    }
    if new_exp <= 0 {
        // Underflow → 0
        return (sign << 15) as u16;
    }

    ((sign << 15) | (new_exp as u32) << 10 | (mantissa >> 13)) as u16
}

/// Simple f16 → f32 conversion
fn f16_to_f32(half: u16) -> f32 {
    let sign = ((half >> 15) & 1) as u32;
    let exponent = ((half >> 10) & 0x1F) as u32;
    let mantissa = (half & 0x3FF) as u32;

    if exponent == 0x1F {
        // Inf/NaN
        let bits = (sign << 31) | 0x7F800000 | (mantissa << 13);
        return f32::from_bits(bits);
    }

    if exponent == 0 {
        if mantissa == 0 {
            return f32::from_bits(sign << 31); // +/- 0
        }
        // Denormalized
        let bits = (sign << 31) | ((exponent + 127 - 15) << 23) | (mantissa << 13);
        return f32::from_bits(bits);
    }

    let bits = (sign << 31) | ((exponent + 127 - 15) << 23) | (mantissa << 13);
    f32::from_bits(bits)
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_2bit_encoding_roundtrip() {
        let bases = vec![
            Nucleotide::A,
            Nucleotide::C,
            Nucleotide::G,
            Nucleotide::T,
            Nucleotide::A,
            Nucleotide::N,
            Nucleotide::G,
            Nucleotide::T,
            Nucleotide::C,
        ];
        let (packed, mask) = encode_2bit(&bases);
        let decoded = decode_2bit(&packed, &mask, bases.len());
        assert_eq!(bases, decoded);
    }

    #[test]
    fn test_2bit_compression_ratio() {
        // 1000 bases should pack into 250 bytes
        let bases: Vec<Nucleotide> = (0..1000)
            .map(|i| match i % 4 {
                0 => Nucleotide::A,
                1 => Nucleotide::C,
                2 => Nucleotide::G,
                _ => Nucleotide::T,
            })
            .collect();
        let (packed, _mask) = encode_2bit(&bases);
        assert_eq!(packed.len(), 250); // 4x compression vs 1 byte per base
    }

    #[test]
    fn test_quality_encoding_roundtrip() {
        let qualities: Vec<u8> = (0..100).map(|i| (i % 42) as u8).collect();
        let encoded = encode_quality(&qualities);
        let decoded = decode_quality(&encoded, qualities.len());
        assert_eq!(qualities, decoded);
    }

    #[test]
    fn test_quality_compression_ratio() {
        // 6-bit encoding: 100 values = 75 bytes (vs 100 bytes raw)
        let qualities: Vec<u8> = vec![30; 100];
        let encoded = encode_quality(&qualities);
        assert!(
            encoded.len() <= 75,
            "6-bit should compress: {} bytes",
            encoded.len()
        );
    }

    #[test]
    fn test_sparse_attention_roundtrip() {
        let dense = vec![
            0.0, 0.5, 0.0, 0.0, 0.3, 0.0, 0.0, 0.7, 0.0, 0.0, 0.9, 0.0, 0.0, 0.1, 0.0, 0.0,
        ];
        let sparse = SparseAttention::from_dense(&dense, 4, 4, 0.05);
        assert_eq!(sparse.nnz(), 5); // 5 values > 0.05
        assert!(sparse.sparsity() > 0.6);

        // Roundtrip through bytes
        let bytes = sparse.to_bytes();
        let restored = SparseAttention::from_bytes(&bytes).unwrap();
        assert_eq!(restored.nnz(), 5);
        assert!((restored.get(0, 1) - 0.5).abs() < 1e-6);
        assert!((restored.get(1, 3) - 0.7).abs() < 1e-6);
    }

    #[test]
    fn test_variant_tensor_binary_search() {
        let mut vt = VariantTensor::new();
        vt.add_variant(100, Nucleotide::A, Nucleotide::G, 0.01, 0.99, 0.0, 40);
        vt.add_variant(200, Nucleotide::C, Nucleotide::T, 0.0, 0.5, 0.5, 35);
        vt.add_variant(300, Nucleotide::G, Nucleotide::A, 0.9, 0.1, 0.0, 50);

        let gl = vt.get_likelihoods(200).unwrap();
        assert!(gl[1] > 0.4); // Het likelihood
        assert!(vt.get_likelihoods(150).is_none()); // Not found

        // Roundtrip
        let bytes = vt.to_bytes();
        let restored = VariantTensor::from_bytes(&bytes).unwrap();
        assert_eq!(restored.len(), 3);
    }

    #[test]
    fn test_f16_roundtrip() {
        for &val in &[0.0f32, 1.0, -1.0, 0.5, 0.001, 100.0] {
            let half = f32_to_f16(val);
            let back = f16_to_f32(half);
            let rel_err = if val.abs() > 0.0 {
                (back - val).abs() / val.abs()
            } else {
                back.abs()
            };
            assert!(
                rel_err < 0.01,
                "f16 roundtrip failed for {}: got {}",
                val,
                back
            );
        }
    }

    #[test]
    fn test_header_roundtrip() {
        let mut header = RvdnaHeader::new(10000, Codec::None).with_quality();
        header.sections[0] = SectionEntry {
            offset: 192,
            size: 2500,
        };
        header.sections[1] = SectionEntry {
            offset: 2752,
            size: 8192,
        };

        let bytes = header.to_bytes();
        let restored = RvdnaHeader::from_bytes(&bytes).unwrap();

        assert_eq!(restored.version, FORMAT_VERSION);
        assert_eq!(restored.codec, Codec::None);
        assert!(restored.has_quality());
        assert_eq!(restored.sequence_length, 10000);
        assert_eq!(restored.sections[0].offset, 192);
        assert_eq!(restored.sections[0].size, 2500);
    }

    #[test]
    fn test_full_rvdna_write_read() {
        // Create a sequence
        let bases: Vec<Nucleotide> = "ACGTACGTACGTACGTACGTACGTACGTACGT"
            .chars()
            .map(|c| match c {
                'A' => Nucleotide::A,
                'C' => Nucleotide::C,
                'G' => Nucleotide::G,
                'T' => Nucleotide::T,
                _ => Nucleotide::N,
            })
            .collect();
        let sequence = DnaSequence::new(bases);

        // Build RVDNA file
        let mut writer = RvdnaWriter::new(&sequence, Codec::None)
            .with_metadata(serde_json::json!({"sample": "test"}));

        let mut output = Vec::new();
        let written = writer.write(&mut output).unwrap();
        assert!(written > 0);

        // Read it back
        let reader = RvdnaReader::from_bytes(output).unwrap();
        assert_eq!(reader.header.sequence_length, 32);
        assert_eq!(reader.header.codec, Codec::None);

        let restored_seq = reader.read_sequence().unwrap();
        assert_eq!(restored_seq.len(), 32);
        assert_eq!(restored_seq.to_string(), sequence.to_string());

        let meta = reader.read_metadata().unwrap().unwrap();
        assert_eq!(meta["sample"], "test");

        // Check stats
        let stats = reader.stats();
        assert!(
            stats.bits_per_base < 8.0,
            "Should compress below 1 byte/base"
        );
    }

    #[test]
    fn test_rvdna_with_kmer_vectors() {
        let seq = DnaSequence::from_str("ACGTACGTACGTACGTACGTACGTACGTACGT").unwrap();
        let mut writer = RvdnaWriter::new(&seq, Codec::None)
            .with_kmer_vectors(&seq, 11, 256, 32)
            .unwrap();

        let mut output = Vec::new();
        writer.write(&mut output).unwrap();

        let reader = RvdnaReader::from_bytes(output).unwrap();
        let blocks = reader.read_kmer_vectors().unwrap();
        assert!(!blocks.is_empty());
        assert_eq!(blocks[0].k, 11);
        assert_eq!(blocks[0].dimensions, 256);
        assert!(blocks[0].quantized.is_some());
    }

    #[test]
    fn test_fasta_to_rvdna_conversion() {
        let fasta_seq = "ACGTACGTACGTACGTACGTACGTACGTACGT";
        let rvdna_bytes = fasta_to_rvdna(fasta_seq, 11, 256, 1000).unwrap();

        let reader = RvdnaReader::from_bytes(rvdna_bytes).unwrap();
        let restored = reader.read_sequence().unwrap();
        assert_eq!(restored.to_string(), fasta_seq);

        let stats = reader.stats();
        assert!(stats.total_size > 0);
        // Sequence section uses 2-bit encoding (~2 bits/base at scale)
        // For short sequences, overhead from length headers increases ratio
        // At 1000+ bases, this drops well below 3 bits/base
        assert!(
            stats.bits_per_base < 8.0,
            "Should beat 1-byte-per-base encoding, got {:.1} bits/base",
            stats.bits_per_base
        );
    }

    #[test]
    fn test_kmer_vector_similarity() {
        let seq1 = DnaSequence::from_str("ACGTACGTACGTACGTACGTACGTACGTACGT").unwrap();
        let seq2 = DnaSequence::from_str("ACGTACGTACGTACGTACGTACGTACGTACGG").unwrap(); // 1 base diff
        let seq3 = DnaSequence::from_str("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT").unwrap(); // very different

        let block1 = KmerVectorBlock::from_sequence(&seq1, 0, 32, 11, 256).unwrap();
        let vec2 = seq2.to_kmer_vector(11, 256).unwrap();
        let vec3 = seq3.to_kmer_vector(11, 256).unwrap();

        let sim_similar = block1.cosine_similarity(&vec2);
        let sim_different = block1.cosine_similarity(&vec3);

        assert!(
            sim_similar > sim_different,
            "Similar sequence ({}) should score higher than different ({})",
            sim_similar,
            sim_different
        );
    }
}