wifi-densepose/vendor/ruvector/crates/ruvllm/src/hub/registry.rs

//! RuvLTRA model registry with pre-configured models

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Model size category
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ModelSize {
    /// Tiny models (< 1B parameters)
    Tiny,
    /// Small models (0.5B - 1B parameters)
    Small,
    /// Medium models (1B - 5B parameters)
    Medium,
    /// Large models (5B - 10B parameters)
    Large,
}

/// Quantization level
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QuantizationLevel {
    /// 4-bit quantization (smallest, ~662MB for 0.5B model)
    Q4,
    /// 5-bit quantization (balanced)
    Q5,
    /// 8-bit quantization (highest quality)
    Q8,
    /// FP16 (no quantization)
    FP16,
}

impl QuantizationLevel {
    /// Get file size multiplier relative to FP16
    pub fn size_multiplier(&self) -> f32 {
        match self {
            Self::Q4 => 0.25,
            Self::Q5 => 0.3125,
            Self::Q8 => 0.5,
            Self::FP16 => 1.0,
        }
    }

    /// Get expected memory reduction
    pub fn memory_reduction(&self) -> f32 {
        match self {
            Self::Q4 => 0.75,  // 75% reduction
            Self::Q5 => 0.69,  // 69% reduction
            Self::Q8 => 0.50,  // 50% reduction
            Self::FP16 => 0.0, // No reduction
        }
    }
}

/// Hardware requirements for model execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardwareRequirements {
    /// Minimum RAM in GB
    pub min_ram_gb: f32,
    /// Recommended RAM in GB
    pub recommended_ram_gb: f32,
    /// Supports Apple Neural Engine
    pub supports_ane: bool,
    /// Supports Metal GPU acceleration
    pub supports_metal: bool,
    /// Supports CUDA
    pub supports_cuda: bool,
    /// Minimum GPU VRAM in GB (if using GPU)
    pub min_vram_gb: Option<f32>,
}

/// Model information in the registry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
    /// Model identifier (e.g., "ruvltra-small")
    pub id: String,
    /// Display name
    pub name: String,
    /// HuggingFace repository (e.g., "ruvnet/ruvltra-small")
    pub repo: String,
    /// Model filename on HF Hub
    pub filename: String,
    /// Model size category
    pub size: ModelSize,
    /// Quantization level
    pub quantization: QuantizationLevel,
    /// File size in bytes
    pub size_bytes: u64,
    /// SHA256 checksum
    pub checksum: Option<String>,
    /// Number of parameters (in billions)
    pub params_b: f32,
    /// Context window size
    pub context_length: usize,
    /// Hardware requirements
    pub hardware: HardwareRequirements,
    /// Model description
    pub description: String,
    /// Whether this is a LoRA adapter
    pub is_adapter: bool,
    /// Base model required (for adapters)
    pub base_model: Option<String>,
    /// Includes SONA pre-trained weights
    pub has_sona_weights: bool,
}

impl ModelInfo {
    /// Get download URL for this model
    pub fn download_url(&self) -> String {
        format!(
            "https://huggingface.co/{}/resolve/main/{}",
            self.repo, self.filename
        )
    }

    /// Get HuggingFace Hub page URL
    pub fn hub_url(&self) -> String {
        format!("https://huggingface.co/{}", self.repo)
    }

    /// Estimate download time in seconds at given speed (MB/s)
    pub fn estimate_download_time(&self, speed_mbps: f32) -> f32 {
        let size_mb = self.size_bytes as f32 / (1024.0 * 1024.0);
        size_mb / speed_mbps
    }

    /// Check if model fits in available RAM
    pub fn fits_in_ram(&self, available_gb: f32) -> bool {
        available_gb >= self.hardware.min_ram_gb
    }
}

/// RuvLTRA model registry
pub struct RuvLtraRegistry {
    models: HashMap<String, ModelInfo>,
}

impl RuvLtraRegistry {
    /// Create a new registry with pre-configured models
    pub fn new() -> Self {
        let mut models = HashMap::new();

        // RuvLTRA-Small (0.5B) - Q4 quantization
        models.insert(
            "ruvltra-small".to_string(),
            ModelInfo {
                id: "ruvltra-small".to_string(),
                name: "RuvLTRA Small (0.5B Q4)".to_string(),
                repo: "ruv/ruvltra".to_string(),
                filename: "ruvltra-small-0.5b-q4_k_m.gguf".to_string(),
                size: ModelSize::Small,
                quantization: QuantizationLevel::Q4,
                size_bytes: 662_000_000, // ~662MB
                checksum: None,          // Set after publishing
                params_b: 0.5,
                context_length: 4096,
                hardware: HardwareRequirements {
                    min_ram_gb: 1.0,
                    recommended_ram_gb: 2.0,
                    supports_ane: true,
                    supports_metal: true,
                    supports_cuda: true,
                    min_vram_gb: Some(1.0),
                },
                description: "Compact RuvLTRA model optimized for edge devices. \
                             Includes SONA pre-trained weights for adaptive learning."
                    .to_string(),
                is_adapter: false,
                base_model: None,
                has_sona_weights: true,
            },
        );

        // RuvLTRA-Small (0.5B) - Q8 quantization
        models.insert(
            "ruvltra-small-q8".to_string(),
            ModelInfo {
                id: "ruvltra-small-q8".to_string(),
                name: "RuvLTRA Small (0.5B Q8)".to_string(),
                repo: "ruv/ruvltra".to_string(),
                filename: "ruvltra-small-0.5b-q8_0.gguf".to_string(),
                size: ModelSize::Small,
                quantization: QuantizationLevel::Q8,
                size_bytes: 1_324_000_000, // ~1.3GB
                checksum: None,
                params_b: 0.5,
                context_length: 4096,
                hardware: HardwareRequirements {
                    min_ram_gb: 2.0,
                    recommended_ram_gb: 4.0,
                    supports_ane: true,
                    supports_metal: true,
                    supports_cuda: true,
                    min_vram_gb: Some(2.0),
                },
                description: "High-quality Q8 quantization for better accuracy.".to_string(),
                is_adapter: false,
                base_model: None,
                has_sona_weights: true,
            },
        );

        // RuvLTRA-Medium (3B) - Q4 quantization
        models.insert(
            "ruvltra-medium".to_string(),
            ModelInfo {
                id: "ruvltra-medium".to_string(),
                name: "RuvLTRA Medium (3B Q4)".to_string(),
                repo: "ruv/ruvltra".to_string(),
                filename: "ruvltra-medium-1.1b-q4_k_m.gguf".to_string(),
                size: ModelSize::Medium,
                quantization: QuantizationLevel::Q4,
                size_bytes: 2_100_000_000, // ~2.1GB
                checksum: None,
                params_b: 3.0,
                context_length: 8192,
                hardware: HardwareRequirements {
                    min_ram_gb: 4.0,
                    recommended_ram_gb: 8.0,
                    supports_ane: true,
                    supports_metal: true,
                    supports_cuda: true,
                    min_vram_gb: Some(4.0),
                },
                description: "Balanced RuvLTRA model for general-purpose tasks. \
                             Extended context window with SONA learning."
                    .to_string(),
                is_adapter: false,
                base_model: None,
                has_sona_weights: true,
            },
        );

        // RuvLTRA-Medium (3B) - Q8 quantization
        models.insert(
            "ruvltra-medium-q8".to_string(),
            ModelInfo {
                id: "ruvltra-medium-q8".to_string(),
                name: "RuvLTRA Medium (3B Q8)".to_string(),
                repo: "ruv/ruvltra".to_string(),
                filename: "ruvltra-medium-1.1b-q8_0.gguf".to_string(),
                size: ModelSize::Medium,
                quantization: QuantizationLevel::Q8,
                size_bytes: 4_200_000_000, // ~4.2GB
                checksum: None,
                params_b: 3.0,
                context_length: 8192,
                hardware: HardwareRequirements {
                    min_ram_gb: 6.0,
                    recommended_ram_gb: 12.0,
                    supports_ane: true,
                    supports_metal: true,
                    supports_cuda: true,
                    min_vram_gb: Some(6.0),
                },
                description: "High-quality Medium model with Q8 quantization.".to_string(),
                is_adapter: false,
                base_model: None,
                has_sona_weights: true,
            },
        );

        // RuvLTRA-Small-Coder (LoRA adapter)
        models.insert(
            "ruvltra-small-coder".to_string(),
            ModelInfo {
                id: "ruvltra-small-coder".to_string(),
                name: "RuvLTRA Small Coder (LoRA)".to_string(),
                repo: "ruv/ruvltra".to_string(),
                filename: "ruvltra-small-coder-lora.safetensors".to_string(),
                size: ModelSize::Tiny,
                quantization: QuantizationLevel::FP16,
                size_bytes: 50_000_000, // ~50MB (LoRA is small)
                checksum: None,
                params_b: 0.05, // Adapter parameters
                context_length: 4096,
                hardware: HardwareRequirements {
                    min_ram_gb: 0.1,
                    recommended_ram_gb: 0.5,
                    supports_ane: true,
                    supports_metal: true,
                    supports_cuda: true,
                    min_vram_gb: None,
                },
                description: "LoRA adapter for code completion. \
                             Requires ruvltra-small or ruvltra-small-q8 base model."
                    .to_string(),
                is_adapter: true,
                base_model: Some("ruvltra-small".to_string()),
                has_sona_weights: false,
            },
        );

        Self { models }
    }

    /// Get model info by ID
    pub fn get(&self, id: &str) -> Option<&ModelInfo> {
        self.models.get(id)
    }

    /// Get all available models
    pub fn list_all(&self) -> Vec<&ModelInfo> {
        self.models.values().collect()
    }

    /// Get models by size
    pub fn list_by_size(&self, size: ModelSize) -> Vec<&ModelInfo> {
        self.models.values().filter(|m| m.size == size).collect()
    }

    /// Get base models (exclude adapters)
    pub fn list_base_models(&self) -> Vec<&ModelInfo> {
        self.models.values().filter(|m| !m.is_adapter).collect()
    }

    /// Get adapters for a specific base model
    pub fn list_adapters(&self, base_model: &str) -> Vec<&ModelInfo> {
        self.models
            .values()
            .filter(|m| {
                m.is_adapter
                    && m.base_model
                        .as_ref()
                        .map(|b| b == base_model)
                        .unwrap_or(false)
            })
            .collect()
    }

    /// Recommend model based on available RAM
    pub fn recommend_for_ram(&self, available_gb: f32) -> Option<&ModelInfo> {
        let mut candidates: Vec<_> = self
            .models
            .values()
            .filter(|m| !m.is_adapter && m.fits_in_ram(available_gb))
            .collect();

        // Sort by parameters (largest that fits)
        candidates.sort_by(|a, b| b.params_b.partial_cmp(&a.params_b).unwrap());

        candidates.first().copied()
    }

    /// Get model IDs
    pub fn model_ids(&self) -> Vec<String> {
        self.models.keys().cloned().collect()
    }
}

impl Default for RuvLtraRegistry {
    fn default() -> Self {
        Self::new()
    }
}

/// Get model info by ID (convenience function)
pub fn get_model_info(id: &str) -> Option<ModelInfo> {
    RuvLtraRegistry::new().get(id).cloned()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_registry_initialization() {
        let registry = RuvLtraRegistry::new();
        assert!(registry.get("ruvltra-small").is_some());
        assert!(registry.get("ruvltra-medium").is_some());
        assert!(registry.get("nonexistent").is_none());
    }

    #[test]
    fn test_model_info() {
        let registry = RuvLtraRegistry::new();
        let model = registry.get("ruvltra-small").unwrap();

        assert_eq!(model.params_b, 0.5);
        assert_eq!(model.quantization, QuantizationLevel::Q4);
        assert!(model.has_sona_weights);
        assert!(!model.is_adapter);
    }

    #[test]
    fn test_list_by_size() {
        let registry = RuvLtraRegistry::new();
        let small_models = registry.list_by_size(ModelSize::Small);
        assert!(!small_models.is_empty());
    }

    #[test]
    fn test_adapters() {
        let registry = RuvLtraRegistry::new();
        let adapters = registry.list_adapters("ruvltra-small");
        assert!(!adapters.is_empty());
        assert!(adapters[0].is_adapter);
    }

    #[test]
    fn test_ram_recommendation() {
        let registry = RuvLtraRegistry::new();

        // Should recommend small model for 2GB
        let model = registry.recommend_for_ram(2.0);
        assert!(model.is_some());
        assert!(model.unwrap().params_b <= 1.0);

        // Should recommend medium model for 8GB
        let model = registry.recommend_for_ram(8.0);
        assert!(model.is_some());
    }

    #[test]
    fn test_quantization_multipliers() {
        assert_eq!(QuantizationLevel::Q4.size_multiplier(), 0.25);
        assert_eq!(QuantizationLevel::Q8.size_multiplier(), 0.5);
        assert_eq!(QuantizationLevel::FP16.size_multiplier(), 1.0);
    }

    #[test]
    fn test_model_urls() {
        let registry = RuvLtraRegistry::new();
        let model = registry.get("ruvltra-small").unwrap();

        let url = model.download_url();
        assert!(url.contains("huggingface.co"));
        assert!(url.contains("ruv/ruvltra"));
        assert!(url.contains(".gguf"));

        let hub_url = model.hub_url();
        assert_eq!(hub_url, "https://huggingface.co/ruv/ruvltra");
    }

    #[test]
    fn test_download_time_estimation() {
        let registry = RuvLtraRegistry::new();
        let model = registry.get("ruvltra-small").unwrap();

        // At 10 MB/s, should take ~66 seconds
        let time = model.estimate_download_time(10.0);
        assert!(time > 60.0 && time < 70.0);
    }
}