wifi-densepose/vendor/ruvector/crates/ruvllm/tests/autodetect_integration.rs

#![allow(
    clippy::all,
    unused_imports,
    unused_variables,
    dead_code,
    unused_mut,
    unused_assignments,
    non_camel_case_types,
    clippy::approx_constant,
    unexpected_cfgs,
    unused_must_use,
    unused_parens
)]
//! Auto-Detection Integration Tests
//!
//! Tests the system capabilities detection, optimal configuration generation,
//! and intelligent hardware-aware settings for LLM inference using the
//! actual autodetect module.

use ruvllm::autodetect::{
    Architecture, ComputeBackend, CoreInfo, CpuFeatures, GpuBackend, GpuCapabilities,
    InferenceConfig, Platform, SystemCapabilities,
};
use ruvllm::backends::Quantization;
use std::collections::HashSet;

// ============================================================================
// System Detection Tests
// ============================================================================

#[test]
fn test_system_capabilities_detection() {
    let caps = SystemCapabilities::detect();

    // Platform detection
    #[cfg(target_os = "macos")]
    assert_eq!(caps.platform, Platform::MacOS);

    #[cfg(target_os = "linux")]
    assert_eq!(caps.platform, Platform::Linux);

    #[cfg(target_os = "windows")]
    assert_eq!(caps.platform, Platform::Windows);

    // Architecture detection
    #[cfg(target_arch = "aarch64")]
    assert_eq!(caps.arch, Architecture::Aarch64);

    #[cfg(target_arch = "x86_64")]
    assert_eq!(caps.arch, Architecture::X86_64);

    #[cfg(target_arch = "wasm32")]
    assert_eq!(caps.arch, Architecture::Wasm32);

    // CPU features should have baseline set
    #[cfg(target_arch = "aarch64")]
    assert!(
        caps.cpu_features.neon,
        "NEON should be available on aarch64"
    );

    // Memory should be positive
    assert!(caps.memory_mb > 0, "Memory should be detected");

    // Cores should be positive
    assert!(
        caps.cores.physical_cores > 0,
        "Physical cores should be detected"
    );
    assert!(
        caps.cores.logical_cores > 0,
        "Logical cores should be detected"
    );
    assert!(
        caps.cores.logical_cores >= caps.cores.physical_cores,
        "Logical cores should be >= physical cores"
    );
}

#[test]
fn test_optimal_config_generation() {
    let caps = SystemCapabilities::detect();
    let config = caps.optimal_config();

    // Verify reasonable defaults
    assert!(config.batch_size >= 1, "Batch size should be at least 1");
    assert!(
        config.thread_count >= 1,
        "Thread count should be at least 1"
    );
    assert!(config.block_size >= 16, "Block size should be at least 16");

    // Thread count should not exceed logical cores
    assert!(
        config.thread_count <= caps.cores.logical_cores,
        "Thread count {} should not exceed logical cores {}",
        config.thread_count,
        caps.cores.logical_cores
    );
}

#[test]
fn test_quantization_recommendation_small_model() {
    let caps = SystemCapabilities::detect();

    // Small model (3GB) - should use FP16 or Q8 on most systems
    let q_small = caps.optimal_quantization(3.0);

    if caps.memory_mb >= 16384 {
        // With 16GB+ RAM, FP16 or Q8 should be recommended
        assert!(
            matches!(q_small, Quantization::F16 | Quantization::Q8),
            "Small model with 16GB+ RAM should use F16 or Q8, got {:?}",
            q_small
        );
    }
}

#[test]
fn test_quantization_recommendation_large_model() {
    let caps = SystemCapabilities::detect();

    // Large model (70GB) - should use Q4K or Q4
    let q_large = caps.optimal_quantization(70.0);

    // Unless you have 256GB+ RAM, this should be Q4K or Q4
    if caps.memory_mb < 256 * 1024 {
        assert!(
            matches!(
                q_large,
                Quantization::Q4K | Quantization::Q4 | Quantization::Q2K
            ),
            "Large model should use aggressive quantization, got {:?}",
            q_large
        );
    }
}

#[test]
fn test_auto_config_matches_manual() {
    let auto = InferenceConfig::auto();
    let caps = SystemCapabilities::detect();
    let manual = caps.optimal_config();

    // Auto should produce same result as manual
    assert_eq!(
        auto.batch_size, manual.batch_size,
        "Auto batch size should match manual"
    );
    assert_eq!(
        auto.thread_count, manual.thread_count,
        "Auto thread count should match manual"
    );
    assert_eq!(
        auto.block_size, manual.block_size,
        "Auto block size should match manual"
    );
    assert_eq!(
        auto.compute_backend, manual.compute_backend,
        "Auto compute backend should match manual"
    );
}

#[test]
fn test_platform_specific_gpu_detection() {
    let caps = SystemCapabilities::detect();

    #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
    {
        // Apple Silicon should detect Metal
        assert!(caps.gpu.is_some(), "Apple Silicon should have GPU");
        let gpu = caps.gpu.as_ref().unwrap();
        assert_eq!(gpu.backend, GpuBackend::Metal);
    }

    #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
    {
        // Intel Mac should detect Metal
        assert!(caps.gpu.is_some(), "Intel Mac should have GPU");
        let gpu = caps.gpu.as_ref().unwrap();
        assert_eq!(gpu.backend, GpuBackend::Metal);
    }
}

#[test]
fn test_cpu_feature_detection_aarch64() {
    #[cfg(target_arch = "aarch64")]
    {
        let features = CpuFeatures::detect();

        // NEON is mandatory on aarch64
        assert!(features.neon, "NEON must be available on aarch64");
    }
}

#[test]
fn test_cpu_feature_detection_x86_64() {
    #[cfg(target_arch = "x86_64")]
    {
        let features = CpuFeatures::detect();

        // SSE4.2 should be common on modern x86_64
        // Note: This depends on compile-time detection or runtime check
        println!(
            "SSE4.2: {}, AVX2: {}, AVX-512: {}",
            features.sse42, features.avx2, features.avx512
        );
    }
}

#[test]
fn test_memory_detection() {
    let caps = SystemCapabilities::detect();

    // Memory should be in reasonable range (256MB to 1TB)
    assert!(caps.memory_mb >= 256, "Memory should be at least 256MB");
    assert!(
        caps.memory_mb <= 1024 * 1024,
        "Memory should be at most 1TB"
    );

    println!(
        "Detected memory: {} MB ({:.1} GB)",
        caps.memory_mb,
        caps.memory_mb as f64 / 1024.0
    );
}

#[test]
fn test_core_count_detection() {
    let cores = CoreInfo::detect();

    // Physical cores should be reasonable
    assert!(
        cores.physical_cores >= 1,
        "Should have at least 1 physical core"
    );
    assert!(
        cores.physical_cores <= 256,
        "Should have at most 256 physical cores"
    );

    // Logical cores should be >= physical
    assert!(
        cores.logical_cores >= cores.physical_cores,
        "Logical cores {} should >= physical cores {}",
        cores.logical_cores,
        cores.physical_cores
    );

    println!(
        "Detected cores: {} physical, {} logical",
        cores.physical_cores, cores.logical_cores
    );

    // Check heterogeneous cores on Apple Silicon
    #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
    {
        if let (Some(perf), Some(eff)) = (cores.performance_cores, cores.efficiency_cores) {
            println!("  Performance cores: {}, Efficiency cores: {}", perf, eff);
        }
    }
}

#[test]
fn test_recommended_batch_size_scaling() {
    let caps = SystemCapabilities::detect();

    // Test that batch size decreases with longer sequences
    let batch_512 = caps.recommended_batch_size(512);
    let batch_4096 = caps.recommended_batch_size(4096);
    let batch_16384 = caps.recommended_batch_size(16384);

    assert!(
        batch_512 >= batch_4096,
        "Shorter sequences should allow larger batches"
    );
    assert!(
        batch_4096 >= batch_16384,
        "Medium sequences should allow larger batches than long ones"
    );
}

#[test]
fn test_inference_config_presets() {
    let auto = InferenceConfig::auto();
    let low_mem = InferenceConfig::low_memory();
    let high_throughput = InferenceConfig::high_throughput();
    let low_latency = InferenceConfig::low_latency();

    // Low memory should use aggressive quantization
    assert!(
        matches!(
            low_mem.quantization,
            Quantization::Q4 | Quantization::Q4K | Quantization::Q2K
        ),
        "Low memory config should use aggressive quantization"
    );
    assert_eq!(low_mem.batch_size, 1, "Low memory should use batch size 1");

    // Low latency should use batch size 1
    assert_eq!(
        low_latency.batch_size, 1,
        "Low latency should use batch size 1"
    );

    // All configs should have flash attention enabled
    assert!(auto.use_flash_attention);
    assert!(low_mem.use_flash_attention);
    assert!(high_throughput.use_flash_attention);
    assert!(low_latency.use_flash_attention);
}

#[test]
fn test_compute_backend_selection() {
    let caps = SystemCapabilities::detect();
    let config = caps.optimal_config();

    // On macOS with GPU, should select Metal
    #[cfg(target_os = "macos")]
    {
        if caps.gpu.is_some() {
            assert_eq!(
                config.compute_backend,
                ComputeBackend::Metal,
                "Should select Metal on macOS with GPU"
            );
        }
    }

    // On aarch64 without GPU, should select NEON
    #[cfg(target_arch = "aarch64")]
    {
        if caps.gpu.is_none() {
            assert_eq!(
                config.compute_backend,
                ComputeBackend::CpuNeon,
                "Should select NEON on aarch64 without GPU"
            );
        }
    }

    // Verify GPU backends are detected as GPU
    assert!(ComputeBackend::Metal.is_gpu());
    assert!(ComputeBackend::Cuda.is_gpu());
    assert!(ComputeBackend::WebGPU.is_gpu());
    assert!(!ComputeBackend::CpuNeon.is_gpu());
    assert!(!ComputeBackend::CpuAvx2.is_gpu());
    assert!(!ComputeBackend::CpuScalar.is_gpu());
}

#[test]
fn test_system_summary() {
    let caps = SystemCapabilities::detect();
    let summary = caps.summary();

    println!("System Summary: {}", summary);

    // Summary should contain useful information
    assert!(!summary.is_empty(), "Summary should not be empty");
    assert!(
        summary.contains("cores") || summary.contains("RAM"),
        "Summary should contain cores or RAM info"
    );
}

#[test]
fn test_can_run_model() {
    let caps = SystemCapabilities::detect();

    // Should be able to run a tiny model
    assert!(caps.can_run_model(0.1), "Should be able to run 100MB model");

    // Likely can't run a 1TB model
    assert!(
        !caps.can_run_model(1000.0),
        "Should not be able to run 1TB model"
    );

    // Test boundary conditions
    // Note: can_run_model uses available_memory_mb which defaults to memory_mb / 2
    let available_gb = caps.available_memory_mb.unwrap_or(caps.memory_mb / 2) as f32 / 1024.0;
    let max_model = (available_gb - 2.0) / 0.4; // Reverse the formula from can_run_model

    if max_model > 0.0 {
        // Should be able to run a model slightly smaller than max
        assert!(
            caps.can_run_model(max_model * 0.8),
            "Should be able to run model at 80% of max"
        );
    }
}

#[test]
fn test_estimated_tokens_per_second() {
    let auto = InferenceConfig::auto();
    let tps = auto.estimated_tokens_per_second();

    assert!(tps > 0.0, "Estimated tokens per second should be positive");

    // Metal and CUDA should have higher estimates than CPU
    let metal_tps = {
        let mut config = auto.clone();
        config.compute_backend = ComputeBackend::Metal;
        config.estimated_tokens_per_second()
    };

    let cpu_tps = {
        let mut config = auto.clone();
        config.compute_backend = ComputeBackend::CpuScalar;
        config.estimated_tokens_per_second()
    };

    assert!(
        metal_tps > cpu_tps,
        "Metal should have higher estimated TPS than CPU scalar"
    );
}

// ============================================================================
// Hardware Fingerprinting Tests
// ============================================================================

#[test]
fn test_hardware_fingerprint_stability() {
    // Run detection multiple times and verify consistency
    let cap1 = SystemCapabilities::detect();
    let cap2 = SystemCapabilities::detect();

    assert_eq!(cap1.platform, cap2.platform);
    assert_eq!(cap1.arch, cap2.arch);
    assert_eq!(cap1.cores.logical_cores, cap2.cores.logical_cores);
    assert_eq!(cap1.cpu_features.neon, cap2.cpu_features.neon);

    // Memory may vary slightly due to system activity, but should be close
    let mem_diff = (cap1.memory_mb as i64 - cap2.memory_mb as i64).abs();
    assert!(mem_diff < 100, "Memory detection should be stable");
}

#[test]
fn test_all_supported_platforms() {
    // Verify all platform variants are distinct
    let platforms = vec![
        Platform::MacOS,
        Platform::Linux,
        Platform::Windows,
        Platform::Wasm,
        Platform::IOS,
        Platform::Android,
        Platform::Unknown,
    ];

    let unique: HashSet<_> = platforms.iter().collect();
    assert_eq!(unique.len(), 7, "All platform variants should be distinct");
}

#[test]
fn test_all_architecture_variants() {
    let archs = vec![
        Architecture::Aarch64,
        Architecture::X86_64,
        Architecture::Wasm32,
        Architecture::Unknown,
    ];

    let unique: HashSet<_> = archs.iter().collect();
    assert_eq!(
        unique.len(),
        4,
        "All architecture variants should be distinct"
    );
}

#[test]
fn test_all_gpu_backend_variants() {
    let backends = vec![
        GpuBackend::Metal,
        GpuBackend::Cuda,
        GpuBackend::WebGPU,
        GpuBackend::Vulkan,
        GpuBackend::OpenCL,
    ];

    let unique: HashSet<_> = backends.iter().collect();
    assert_eq!(
        unique.len(),
        5,
        "All GPU backend variants should be distinct"
    );
}

#[test]
fn test_all_compute_backend_variants() {
    let backends = vec![
        ComputeBackend::Metal,
        ComputeBackend::Cuda,
        ComputeBackend::WebGPU,
        ComputeBackend::CpuAvx512,
        ComputeBackend::CpuAvx2,
        ComputeBackend::CpuNeon,
        ComputeBackend::CpuScalar,
    ];

    let unique: HashSet<_> = backends.iter().collect();
    assert_eq!(
        unique.len(),
        7,
        "All compute backend variants should be distinct"
    );

    // Verify relative performance ordering
    assert!(
        ComputeBackend::Cuda.relative_performance() > ComputeBackend::Metal.relative_performance()
    );
    assert!(
        ComputeBackend::Metal.relative_performance()
            > ComputeBackend::CpuAvx512.relative_performance()
    );
    assert!(
        ComputeBackend::CpuAvx512.relative_performance()
            > ComputeBackend::CpuAvx2.relative_performance()
    );
    assert!(
        ComputeBackend::CpuAvx2.relative_performance()
            >= ComputeBackend::CpuNeon.relative_performance()
    );
    assert!(
        ComputeBackend::CpuNeon.relative_performance()
            > ComputeBackend::CpuScalar.relative_performance()
    );
}

#[test]
fn test_gpu_can_fit_model() {
    // Test with a synthetic GPU
    let gpu = GpuCapabilities {
        backend: GpuBackend::Metal,
        vram_mb: Some(16 * 1024), // 16GB
        compute_units: Some(128),
        name: Some("Test GPU".to_string()),
        supports_fp16: true,
        supports_int8: true,
        has_tensor_cores: true,
        max_shared_memory: Some(32 * 1024),
    };

    // 16GB should fit 7B model (needs ~10GB with overhead)
    assert!(gpu.can_fit_model(7.0), "16GB VRAM should fit 7B model");

    // 16GB should not fit 70B model (needs ~100GB)
    assert!(
        !gpu.can_fit_model(70.0),
        "16GB VRAM should not fit 70B model"
    );

    // Edge case: unknown VRAM
    let gpu_unknown = GpuCapabilities {
        backend: GpuBackend::Metal,
        vram_mb: None,
        compute_units: None,
        name: Some("Unknown GPU".to_string()),
        supports_fp16: true,
        supports_int8: true,
        has_tensor_cores: false,
        max_shared_memory: None,
    };

    // Unknown VRAM should assume it can fit (optimistic)
    assert!(
        gpu_unknown.can_fit_model(7.0),
        "Unknown VRAM should optimistically assume model fits"
    );
}

// ============================================================================
// System Capabilities Display Test
// ============================================================================

#[test]
fn test_system_capabilities_display() {
    let caps = SystemCapabilities::detect();

    println!("\n=== System Capabilities ===");
    println!("Platform: {:?}", caps.platform);
    println!("Architecture: {:?}", caps.arch);
    println!(
        "Memory: {} MB ({:.1} GB)",
        caps.memory_mb,
        caps.memory_mb as f64 / 1024.0
    );
    println!(
        "Cores: {} physical, {} logical",
        caps.cores.physical_cores, caps.cores.logical_cores
    );

    if let Some(ref gpu) = caps.gpu {
        println!("GPU: {:?} - {:?}", gpu.backend, gpu.name);
        if let Some(vram) = gpu.vram_mb {
            println!("     VRAM: {} MB", vram);
        }
        println!(
            "     FP16: {}, INT8: {}, Tensor Cores: {}",
            gpu.supports_fp16, gpu.supports_int8, gpu.has_tensor_cores
        );
    } else {
        println!("GPU: None");
    }

    println!("\nCPU Features:");
    #[cfg(target_arch = "aarch64")]
    println!("  NEON: {}", caps.cpu_features.neon);

    #[cfg(target_arch = "x86_64")]
    {
        println!("  SSE4.2: {}", caps.cpu_features.sse42);
        println!("  AVX2: {}", caps.cpu_features.avx2);
        println!("  AVX-512: {}", caps.cpu_features.avx512);
    }

    println!(
        "  Best SIMD width: {} bits",
        caps.cpu_features.best_simd_width()
    );
    println!(
        "  SIMD float lanes: {}",
        caps.cpu_features.simd_float_lanes()
    );

    let config = caps.optimal_config();
    println!("\n=== Optimal Configuration ===");
    println!("Compute Backend: {:?}", config.compute_backend);
    println!("Quantization: {:?}", config.quantization);
    println!("Batch Size: {}", config.batch_size);
    println!("Thread Count: {}", config.thread_count);
    println!("Block Size: {}", config.block_size);
    println!("Flash Attention: {}", config.use_flash_attention);
    println!("Device Type: {:?}", config.device_type);
    println!("DType: {:?}", config.dtype);
    println!("Estimated TPS: {:.1}", config.estimated_tokens_per_second());

    println!("\n=== Summary ===");
    println!("{}", caps.summary());

    // Test passes if we get here without panicking
    assert!(true);
}

// ============================================================================
// Attention Config Integration
// ============================================================================

#[test]
fn test_optimal_attention_config() {
    let caps = SystemCapabilities::detect();
    let attn_config = caps.optimal_attention_config();

    // Verify reasonable attention configuration
    assert!(attn_config.num_heads > 0, "Should have at least 1 head");
    assert!(
        attn_config.num_kv_heads > 0,
        "Should have at least 1 KV head"
    );
    assert!(attn_config.head_dim > 0, "Should have positive head dim");
    assert!(
        attn_config.max_seq_len >= 1024,
        "Should support at least 1K context"
    );

    // GQA ratio should be valid
    let gqa_ratio = attn_config.gqa_ratio();
    assert!(gqa_ratio >= 1, "GQA ratio should be at least 1");
    assert!(
        attn_config.num_heads % attn_config.num_kv_heads == 0,
        "num_heads should be divisible by num_kv_heads"
    );

    // Scale should be reasonable
    let scale = attn_config.effective_scale();
    assert!(
        scale > 0.0 && scale < 1.0,
        "Scale should be between 0 and 1"
    );

    println!(
        "Attention Config: {} heads, {} KV heads, {} head_dim, {} max_seq_len, GQA {}:1",
        attn_config.num_heads,
        attn_config.num_kv_heads,
        attn_config.head_dim,
        attn_config.max_seq_len,
        gqa_ratio
    );
}