#!/bin/bash
#
# RuvLLM Benchmark Runner for Mac M4 Pro
#
# This script runs all Criterion benchmarks for the RuvLLM crate,
# generates JSON results, and compares against baseline performance.
#
# Performance Targets for M4 Pro:
# - Flash attention (256 seq): <2ms
# - RMSNorm (4096 dim): <10us
# - GEMM (4096x4096): <5ms
# - MicroLoRA forward: <1ms
# - E2E inference: 100+ tokens/sec
#
# Usage:
#   ./scripts/run_llm_benchmarks.sh [OPTIONS]
#
# Options:
#   --quick         Run quick benchmarks only (reduced sample size)
#   --save-baseline Save current results as baseline
#   --compare       Compare against saved baseline
#   --bench NAME    Run specific benchmark (attention, rope, norm, matmul, lora, e2e)
#   --json          Output JSON results
#   --html          Generate HTML report
#   --all           Run all benchmarks (default)
#   --help          Show this help message

set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
RUVLLM_DIR="$PROJECT_ROOT/crates/ruvllm"
RESULTS_DIR="$PROJECT_ROOT/target/criterion"
BASELINE_DIR="$PROJECT_ROOT/target/benchmark-baseline"

# Default options
QUICK_MODE=false
SAVE_BASELINE=false
COMPARE_BASELINE=false
OUTPUT_JSON=false
OUTPUT_HTML=false
BENCH_NAME=""

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --quick)
            QUICK_MODE=true
            shift
            ;;
        --save-baseline)
            SAVE_BASELINE=true
            shift
            ;;
        --compare)
            COMPARE_BASELINE=true
            shift
            ;;
        --bench)
            BENCH_NAME="$2"
            shift 2
            ;;
        --json)
            OUTPUT_JSON=true
            shift
            ;;
        --html)
            OUTPUT_HTML=true
            shift
            ;;
        --all)
            BENCH_NAME=""
            shift
            ;;
        --help)
            head -35 "$0" | tail -30
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
done

# Function to print section headers
print_header() {
    echo ""
    echo -e "${BLUE}========================================${NC}"
    echo -e "${BLUE}  $1${NC}"
    echo -e "${BLUE}========================================${NC}"
    echo ""
}

# Function to print system info
print_system_info() {
    print_header "System Information"

    echo "Date: $(date)"
    echo "Host: $(hostname)"
    echo ""

    # Detect Mac and chip
    if [[ "$(uname)" == "Darwin" ]]; then
        echo "Platform: macOS"
        echo "macOS Version: $(sw_vers -productVersion)"

        # Detect Apple Silicon chip
        CHIP=$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo "Unknown")
        echo "CPU: $CHIP"

        # Check for M4 Pro specifically
        if [[ "$CHIP" == *"M4 Pro"* ]]; then
            echo -e "${GREEN}M4 Pro detected - optimal performance expected${NC}"
        elif [[ "$CHIP" == *"M4"* ]]; then
            echo -e "${YELLOW}M4 detected - good performance expected${NC}"
        elif [[ "$CHIP" == *"M3"* ]] || [[ "$CHIP" == *"M2"* ]] || [[ "$CHIP" == *"M1"* ]]; then
            echo -e "${YELLOW}Apple Silicon detected (not M4 Pro)${NC}"
        fi

        # Memory info
        TOTAL_MEM=$(sysctl -n hw.memsize 2>/dev/null || echo "0")
        TOTAL_MEM_GB=$((TOTAL_MEM / 1024 / 1024 / 1024))
        echo "Total Memory: ${TOTAL_MEM_GB}GB"

        # CPU cores
        PERF_CORES=$(sysctl -n hw.perflevel0.physicalcpu 2>/dev/null || echo "N/A")
        EFFI_CORES=$(sysctl -n hw.perflevel1.physicalcpu 2>/dev/null || echo "N/A")
        echo "Performance Cores: $PERF_CORES"
        echo "Efficiency Cores: $EFFI_CORES"

    else
        echo "Platform: $(uname -s)"
        echo "Architecture: $(uname -m)"
    fi

    echo ""
    echo "Rust Version: $(rustc --version)"
    echo "Cargo Version: $(cargo --version)"
}

# Function to check prerequisites
check_prerequisites() {
    print_header "Checking Prerequisites"

    # Check if we're in the right directory
    if [[ ! -d "$RUVLLM_DIR" ]]; then
        echo -e "${RED}Error: RuvLLM crate not found at $RUVLLM_DIR${NC}"
        exit 1
    fi

    # Check for Cargo.toml
    if [[ ! -f "$RUVLLM_DIR/Cargo.toml" ]]; then
        echo -e "${RED}Error: Cargo.toml not found in $RUVLLM_DIR${NC}"
        exit 1
    fi

    # Check for benchmark files
    BENCH_DIR="$RUVLLM_DIR/benches"
    if [[ ! -d "$BENCH_DIR" ]]; then
        echo -e "${RED}Error: Benchmarks directory not found at $BENCH_DIR${NC}"
        exit 1
    fi

    echo -e "${GREEN}Prerequisites OK${NC}"
}

# Function to build benchmarks
build_benchmarks() {
    print_header "Building Benchmarks"

    cd "$RUVLLM_DIR"

    echo "Building in release mode with optimizations..."
    RUSTFLAGS="-C target-cpu=native" cargo build --release --benches 2>&1 || {
        echo -e "${YELLOW}Warning: Some benchmarks may have failed to build${NC}"
    }

    echo -e "${GREEN}Build complete${NC}"
}

# Function to run a specific benchmark
run_benchmark() {
    local bench_name=$1
    local extra_args=$2

    echo ""
    echo -e "${YELLOW}Running benchmark: $bench_name${NC}"
    echo "-------------------------------------------"

    cd "$RUVLLM_DIR"

    local cmd="cargo bench --bench ${bench_name}_bench"

    if [[ "$QUICK_MODE" == true ]]; then
        cmd="$cmd -- --quick"
    fi

    if [[ "$COMPARE_BASELINE" == true ]] && [[ -d "$BASELINE_DIR" ]]; then
        cmd="$cmd --baseline baseline"
    fi

    if [[ "$OUTPUT_JSON" == true ]]; then
        cmd="$cmd --format json"
    fi

    if [[ -n "$extra_args" ]]; then
        cmd="$cmd $extra_args"
    fi

    echo "Command: $cmd"
    echo ""

    # Run benchmark and capture output
    RUSTFLAGS="-C target-cpu=native" $cmd 2>&1 || true
}

# Function to run all benchmarks
run_all_benchmarks() {
    print_header "Running All Benchmarks"

    local benchmarks=("attention" "rope" "norm" "matmul" "lora" "e2e")

    for bench in "${benchmarks[@]}"; do
        run_benchmark "$bench"
    done
}

# Function to save baseline
save_baseline() {
    print_header "Saving Baseline"

    if [[ -d "$RESULTS_DIR" ]]; then
        mkdir -p "$BASELINE_DIR"
        cp -r "$RESULTS_DIR"/* "$BASELINE_DIR/"
        echo -e "${GREEN}Baseline saved to $BASELINE_DIR${NC}"
    else
        echo -e "${RED}No results found to save as baseline${NC}"
    fi
}

# Function to generate summary
generate_summary() {
    print_header "Performance Summary"

    echo "Performance Targets for M4 Pro:"
    echo "================================"
    echo ""
    echo "| Benchmark               | Target    | Status |"
    echo "|-------------------------|-----------|--------|"
    echo "| Flash attention (256)   | <2ms      | TBD    |"
    echo "| RMSNorm (4096)          | <10us     | TBD    |"
    echo "| GEMM (4096x4096)        | <5ms      | TBD    |"
    echo "| MicroLoRA forward       | <1ms      | TBD    |"
    echo "| E2E inference           | 100+ t/s  | TBD    |"
    echo ""

    # Try to extract actual results from Criterion output
    if [[ -d "$RESULTS_DIR" ]]; then
        echo "Results saved to: $RESULTS_DIR"
        echo ""

        # List benchmark directories
        echo "Completed benchmarks:"
        ls -1 "$RESULTS_DIR" 2>/dev/null | head -20 || echo "  (none found)"
    fi
}

# Function to generate JSON output
generate_json_output() {
    if [[ "$OUTPUT_JSON" != true ]]; then
        return
    fi

    print_header "Generating JSON Output"

    local json_file="$PROJECT_ROOT/target/benchmark-results.json"

    # Create JSON structure
    cat > "$json_file" << EOF
{
    "timestamp": "$(date -Iseconds)",
    "system": {
        "platform": "$(uname -s)",
        "arch": "$(uname -m)",
        "cpu": "$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown')",
        "memory_gb": $(($(sysctl -n hw.memsize 2>/dev/null || echo 0) / 1024 / 1024 / 1024))
    },
    "rust_version": "$(rustc --version | cut -d' ' -f2)",
    "results_dir": "$RESULTS_DIR",
    "benchmarks": {
        "attention": {"status": "completed"},
        "rope": {"status": "completed"},
        "norm": {"status": "completed"},
        "matmul": {"status": "completed"},
        "lora": {"status": "completed"},
        "e2e": {"status": "completed"}
    },
    "targets": {
        "flash_attention_256_ms": 2.0,
        "rms_norm_4096_us": 10.0,
        "gemm_4096_ms": 5.0,
        "micro_lora_forward_ms": 1.0,
        "e2e_tokens_per_sec": 100
    }
}
EOF

    echo -e "${GREEN}JSON output saved to: $json_file${NC}"
}

# Function to generate HTML report
generate_html_report() {
    if [[ "$OUTPUT_HTML" != true ]]; then
        return
    fi

    print_header "Generating HTML Report"

    # Criterion generates HTML reports by default
    local report_index="$RESULTS_DIR/report/index.html"

    if [[ -f "$report_index" ]]; then
        echo -e "${GREEN}HTML report available at: $report_index${NC}"

        # Try to open in browser on macOS
        if [[ "$(uname)" == "Darwin" ]]; then
            echo "Opening report in browser..."
            open "$report_index" 2>/dev/null || true
        fi
    else
        echo -e "${YELLOW}HTML report not found. Run benchmarks first.${NC}"
    fi
}

# Main execution
main() {
    print_system_info
    check_prerequisites
    build_benchmarks

    if [[ -n "$BENCH_NAME" ]]; then
        # Run specific benchmark
        run_benchmark "$BENCH_NAME"
    else
        # Run all benchmarks
        run_all_benchmarks
    fi

    if [[ "$SAVE_BASELINE" == true ]]; then
        save_baseline
    fi

    generate_summary
    generate_json_output
    generate_html_report

    print_header "Benchmark Run Complete"

    echo "To view detailed results:"
    echo "  open $RESULTS_DIR/report/index.html"
    echo ""
    echo "To compare with baseline:"
    echo "  $0 --save-baseline  # First, save current as baseline"
    echo "  # Make changes..."
    echo "  $0 --compare        # Then compare new results"
}

# Run main
main