444 lines
14 KiB
Rust
444 lines
14 KiB
Rust
//! RuvLTRA model registry with pre-configured models
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
|
|
/// Model size category
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub enum ModelSize {
|
|
/// Tiny models (< 1B parameters)
|
|
Tiny,
|
|
/// Small models (0.5B - 1B parameters)
|
|
Small,
|
|
/// Medium models (1B - 5B parameters)
|
|
Medium,
|
|
/// Large models (5B - 10B parameters)
|
|
Large,
|
|
}
|
|
|
|
/// Quantization level
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub enum QuantizationLevel {
|
|
/// 4-bit quantization (smallest, ~662MB for 0.5B model)
|
|
Q4,
|
|
/// 5-bit quantization (balanced)
|
|
Q5,
|
|
/// 8-bit quantization (highest quality)
|
|
Q8,
|
|
/// FP16 (no quantization)
|
|
FP16,
|
|
}
|
|
|
|
impl QuantizationLevel {
|
|
/// Get file size multiplier relative to FP16
|
|
pub fn size_multiplier(&self) -> f32 {
|
|
match self {
|
|
Self::Q4 => 0.25,
|
|
Self::Q5 => 0.3125,
|
|
Self::Q8 => 0.5,
|
|
Self::FP16 => 1.0,
|
|
}
|
|
}
|
|
|
|
/// Get expected memory reduction
|
|
pub fn memory_reduction(&self) -> f32 {
|
|
match self {
|
|
Self::Q4 => 0.75, // 75% reduction
|
|
Self::Q5 => 0.69, // 69% reduction
|
|
Self::Q8 => 0.50, // 50% reduction
|
|
Self::FP16 => 0.0, // No reduction
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Hardware requirements for model execution
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct HardwareRequirements {
|
|
/// Minimum RAM in GB
|
|
pub min_ram_gb: f32,
|
|
/// Recommended RAM in GB
|
|
pub recommended_ram_gb: f32,
|
|
/// Supports Apple Neural Engine
|
|
pub supports_ane: bool,
|
|
/// Supports Metal GPU acceleration
|
|
pub supports_metal: bool,
|
|
/// Supports CUDA
|
|
pub supports_cuda: bool,
|
|
/// Minimum GPU VRAM in GB (if using GPU)
|
|
pub min_vram_gb: Option<f32>,
|
|
}
|
|
|
|
/// Model information in the registry
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ModelInfo {
|
|
/// Model identifier (e.g., "ruvltra-small")
|
|
pub id: String,
|
|
/// Display name
|
|
pub name: String,
|
|
/// HuggingFace repository (e.g., "ruvnet/ruvltra-small")
|
|
pub repo: String,
|
|
/// Model filename on HF Hub
|
|
pub filename: String,
|
|
/// Model size category
|
|
pub size: ModelSize,
|
|
/// Quantization level
|
|
pub quantization: QuantizationLevel,
|
|
/// File size in bytes
|
|
pub size_bytes: u64,
|
|
/// SHA256 checksum
|
|
pub checksum: Option<String>,
|
|
/// Number of parameters (in billions)
|
|
pub params_b: f32,
|
|
/// Context window size
|
|
pub context_length: usize,
|
|
/// Hardware requirements
|
|
pub hardware: HardwareRequirements,
|
|
/// Model description
|
|
pub description: String,
|
|
/// Whether this is a LoRA adapter
|
|
pub is_adapter: bool,
|
|
/// Base model required (for adapters)
|
|
pub base_model: Option<String>,
|
|
/// Includes SONA pre-trained weights
|
|
pub has_sona_weights: bool,
|
|
}
|
|
|
|
impl ModelInfo {
|
|
/// Get download URL for this model
|
|
pub fn download_url(&self) -> String {
|
|
format!(
|
|
"https://huggingface.co/{}/resolve/main/{}",
|
|
self.repo, self.filename
|
|
)
|
|
}
|
|
|
|
/// Get HuggingFace Hub page URL
|
|
pub fn hub_url(&self) -> String {
|
|
format!("https://huggingface.co/{}", self.repo)
|
|
}
|
|
|
|
/// Estimate download time in seconds at given speed (MB/s)
|
|
pub fn estimate_download_time(&self, speed_mbps: f32) -> f32 {
|
|
let size_mb = self.size_bytes as f32 / (1024.0 * 1024.0);
|
|
size_mb / speed_mbps
|
|
}
|
|
|
|
/// Check if model fits in available RAM
|
|
pub fn fits_in_ram(&self, available_gb: f32) -> bool {
|
|
available_gb >= self.hardware.min_ram_gb
|
|
}
|
|
}
|
|
|
|
/// RuvLTRA model registry
|
|
pub struct RuvLtraRegistry {
|
|
models: HashMap<String, ModelInfo>,
|
|
}
|
|
|
|
impl RuvLtraRegistry {
|
|
/// Create a new registry with pre-configured models
|
|
pub fn new() -> Self {
|
|
let mut models = HashMap::new();
|
|
|
|
// RuvLTRA-Small (0.5B) - Q4 quantization
|
|
models.insert(
|
|
"ruvltra-small".to_string(),
|
|
ModelInfo {
|
|
id: "ruvltra-small".to_string(),
|
|
name: "RuvLTRA Small (0.5B Q4)".to_string(),
|
|
repo: "ruv/ruvltra".to_string(),
|
|
filename: "ruvltra-small-0.5b-q4_k_m.gguf".to_string(),
|
|
size: ModelSize::Small,
|
|
quantization: QuantizationLevel::Q4,
|
|
size_bytes: 662_000_000, // ~662MB
|
|
checksum: None, // Set after publishing
|
|
params_b: 0.5,
|
|
context_length: 4096,
|
|
hardware: HardwareRequirements {
|
|
min_ram_gb: 1.0,
|
|
recommended_ram_gb: 2.0,
|
|
supports_ane: true,
|
|
supports_metal: true,
|
|
supports_cuda: true,
|
|
min_vram_gb: Some(1.0),
|
|
},
|
|
description: "Compact RuvLTRA model optimized for edge devices. \
|
|
Includes SONA pre-trained weights for adaptive learning."
|
|
.to_string(),
|
|
is_adapter: false,
|
|
base_model: None,
|
|
has_sona_weights: true,
|
|
},
|
|
);
|
|
|
|
// RuvLTRA-Small (0.5B) - Q8 quantization
|
|
models.insert(
|
|
"ruvltra-small-q8".to_string(),
|
|
ModelInfo {
|
|
id: "ruvltra-small-q8".to_string(),
|
|
name: "RuvLTRA Small (0.5B Q8)".to_string(),
|
|
repo: "ruv/ruvltra".to_string(),
|
|
filename: "ruvltra-small-0.5b-q8_0.gguf".to_string(),
|
|
size: ModelSize::Small,
|
|
quantization: QuantizationLevel::Q8,
|
|
size_bytes: 1_324_000_000, // ~1.3GB
|
|
checksum: None,
|
|
params_b: 0.5,
|
|
context_length: 4096,
|
|
hardware: HardwareRequirements {
|
|
min_ram_gb: 2.0,
|
|
recommended_ram_gb: 4.0,
|
|
supports_ane: true,
|
|
supports_metal: true,
|
|
supports_cuda: true,
|
|
min_vram_gb: Some(2.0),
|
|
},
|
|
description: "High-quality Q8 quantization for better accuracy.".to_string(),
|
|
is_adapter: false,
|
|
base_model: None,
|
|
has_sona_weights: true,
|
|
},
|
|
);
|
|
|
|
// RuvLTRA-Medium (3B) - Q4 quantization
|
|
models.insert(
|
|
"ruvltra-medium".to_string(),
|
|
ModelInfo {
|
|
id: "ruvltra-medium".to_string(),
|
|
name: "RuvLTRA Medium (3B Q4)".to_string(),
|
|
repo: "ruv/ruvltra".to_string(),
|
|
filename: "ruvltra-medium-1.1b-q4_k_m.gguf".to_string(),
|
|
size: ModelSize::Medium,
|
|
quantization: QuantizationLevel::Q4,
|
|
size_bytes: 2_100_000_000, // ~2.1GB
|
|
checksum: None,
|
|
params_b: 3.0,
|
|
context_length: 8192,
|
|
hardware: HardwareRequirements {
|
|
min_ram_gb: 4.0,
|
|
recommended_ram_gb: 8.0,
|
|
supports_ane: true,
|
|
supports_metal: true,
|
|
supports_cuda: true,
|
|
min_vram_gb: Some(4.0),
|
|
},
|
|
description: "Balanced RuvLTRA model for general-purpose tasks. \
|
|
Extended context window with SONA learning."
|
|
.to_string(),
|
|
is_adapter: false,
|
|
base_model: None,
|
|
has_sona_weights: true,
|
|
},
|
|
);
|
|
|
|
// RuvLTRA-Medium (3B) - Q8 quantization
|
|
models.insert(
|
|
"ruvltra-medium-q8".to_string(),
|
|
ModelInfo {
|
|
id: "ruvltra-medium-q8".to_string(),
|
|
name: "RuvLTRA Medium (3B Q8)".to_string(),
|
|
repo: "ruv/ruvltra".to_string(),
|
|
filename: "ruvltra-medium-1.1b-q8_0.gguf".to_string(),
|
|
size: ModelSize::Medium,
|
|
quantization: QuantizationLevel::Q8,
|
|
size_bytes: 4_200_000_000, // ~4.2GB
|
|
checksum: None,
|
|
params_b: 3.0,
|
|
context_length: 8192,
|
|
hardware: HardwareRequirements {
|
|
min_ram_gb: 6.0,
|
|
recommended_ram_gb: 12.0,
|
|
supports_ane: true,
|
|
supports_metal: true,
|
|
supports_cuda: true,
|
|
min_vram_gb: Some(6.0),
|
|
},
|
|
description: "High-quality Medium model with Q8 quantization.".to_string(),
|
|
is_adapter: false,
|
|
base_model: None,
|
|
has_sona_weights: true,
|
|
},
|
|
);
|
|
|
|
// RuvLTRA-Small-Coder (LoRA adapter)
|
|
models.insert(
|
|
"ruvltra-small-coder".to_string(),
|
|
ModelInfo {
|
|
id: "ruvltra-small-coder".to_string(),
|
|
name: "RuvLTRA Small Coder (LoRA)".to_string(),
|
|
repo: "ruv/ruvltra".to_string(),
|
|
filename: "ruvltra-small-coder-lora.safetensors".to_string(),
|
|
size: ModelSize::Tiny,
|
|
quantization: QuantizationLevel::FP16,
|
|
size_bytes: 50_000_000, // ~50MB (LoRA is small)
|
|
checksum: None,
|
|
params_b: 0.05, // Adapter parameters
|
|
context_length: 4096,
|
|
hardware: HardwareRequirements {
|
|
min_ram_gb: 0.1,
|
|
recommended_ram_gb: 0.5,
|
|
supports_ane: true,
|
|
supports_metal: true,
|
|
supports_cuda: true,
|
|
min_vram_gb: None,
|
|
},
|
|
description: "LoRA adapter for code completion. \
|
|
Requires ruvltra-small or ruvltra-small-q8 base model."
|
|
.to_string(),
|
|
is_adapter: true,
|
|
base_model: Some("ruvltra-small".to_string()),
|
|
has_sona_weights: false,
|
|
},
|
|
);
|
|
|
|
Self { models }
|
|
}
|
|
|
|
/// Get model info by ID
|
|
pub fn get(&self, id: &str) -> Option<&ModelInfo> {
|
|
self.models.get(id)
|
|
}
|
|
|
|
/// Get all available models
|
|
pub fn list_all(&self) -> Vec<&ModelInfo> {
|
|
self.models.values().collect()
|
|
}
|
|
|
|
/// Get models by size
|
|
pub fn list_by_size(&self, size: ModelSize) -> Vec<&ModelInfo> {
|
|
self.models.values().filter(|m| m.size == size).collect()
|
|
}
|
|
|
|
/// Get base models (exclude adapters)
|
|
pub fn list_base_models(&self) -> Vec<&ModelInfo> {
|
|
self.models.values().filter(|m| !m.is_adapter).collect()
|
|
}
|
|
|
|
/// Get adapters for a specific base model
|
|
pub fn list_adapters(&self, base_model: &str) -> Vec<&ModelInfo> {
|
|
self.models
|
|
.values()
|
|
.filter(|m| {
|
|
m.is_adapter
|
|
&& m.base_model
|
|
.as_ref()
|
|
.map(|b| b == base_model)
|
|
.unwrap_or(false)
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
/// Recommend model based on available RAM
|
|
pub fn recommend_for_ram(&self, available_gb: f32) -> Option<&ModelInfo> {
|
|
let mut candidates: Vec<_> = self
|
|
.models
|
|
.values()
|
|
.filter(|m| !m.is_adapter && m.fits_in_ram(available_gb))
|
|
.collect();
|
|
|
|
// Sort by parameters (largest that fits)
|
|
candidates.sort_by(|a, b| b.params_b.partial_cmp(&a.params_b).unwrap());
|
|
|
|
candidates.first().copied()
|
|
}
|
|
|
|
/// Get model IDs
|
|
pub fn model_ids(&self) -> Vec<String> {
|
|
self.models.keys().cloned().collect()
|
|
}
|
|
}
|
|
|
|
impl Default for RuvLtraRegistry {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
/// Get model info by ID (convenience function)
|
|
pub fn get_model_info(id: &str) -> Option<ModelInfo> {
|
|
RuvLtraRegistry::new().get(id).cloned()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_registry_initialization() {
|
|
let registry = RuvLtraRegistry::new();
|
|
assert!(registry.get("ruvltra-small").is_some());
|
|
assert!(registry.get("ruvltra-medium").is_some());
|
|
assert!(registry.get("nonexistent").is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_model_info() {
|
|
let registry = RuvLtraRegistry::new();
|
|
let model = registry.get("ruvltra-small").unwrap();
|
|
|
|
assert_eq!(model.params_b, 0.5);
|
|
assert_eq!(model.quantization, QuantizationLevel::Q4);
|
|
assert!(model.has_sona_weights);
|
|
assert!(!model.is_adapter);
|
|
}
|
|
|
|
#[test]
|
|
fn test_list_by_size() {
|
|
let registry = RuvLtraRegistry::new();
|
|
let small_models = registry.list_by_size(ModelSize::Small);
|
|
assert!(!small_models.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_adapters() {
|
|
let registry = RuvLtraRegistry::new();
|
|
let adapters = registry.list_adapters("ruvltra-small");
|
|
assert!(!adapters.is_empty());
|
|
assert!(adapters[0].is_adapter);
|
|
}
|
|
|
|
#[test]
|
|
fn test_ram_recommendation() {
|
|
let registry = RuvLtraRegistry::new();
|
|
|
|
// Should recommend small model for 2GB
|
|
let model = registry.recommend_for_ram(2.0);
|
|
assert!(model.is_some());
|
|
assert!(model.unwrap().params_b <= 1.0);
|
|
|
|
// Should recommend medium model for 8GB
|
|
let model = registry.recommend_for_ram(8.0);
|
|
assert!(model.is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn test_quantization_multipliers() {
|
|
assert_eq!(QuantizationLevel::Q4.size_multiplier(), 0.25);
|
|
assert_eq!(QuantizationLevel::Q8.size_multiplier(), 0.5);
|
|
assert_eq!(QuantizationLevel::FP16.size_multiplier(), 1.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_model_urls() {
|
|
let registry = RuvLtraRegistry::new();
|
|
let model = registry.get("ruvltra-small").unwrap();
|
|
|
|
let url = model.download_url();
|
|
assert!(url.contains("huggingface.co"));
|
|
assert!(url.contains("ruv/ruvltra"));
|
|
assert!(url.contains(".gguf"));
|
|
|
|
let hub_url = model.hub_url();
|
|
assert_eq!(hub_url, "https://huggingface.co/ruv/ruvltra");
|
|
}
|
|
|
|
#[test]
|
|
fn test_download_time_estimation() {
|
|
let registry = RuvLtraRegistry::new();
|
|
let model = registry.get("ruvltra-small").unwrap();
|
|
|
|
// At 10 MB/s, should take ~66 seconds
|
|
let time = model.estimate_download_time(10.0);
|
|
assert!(time > 60.0 && time < 70.0);
|
|
}
|
|
}
|