Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/scripts/benchmark/run_llm_benchmarks.sh
+++ b/vendor/ruvector/scripts/benchmark/run_llm_benchmarks.sh
@@ -0,0 +1,378 @@
+#!/bin/bash
+#
+# RuvLLM Benchmark Runner for Mac M4 Pro
+#
+# This script runs all Criterion benchmarks for the RuvLLM crate,
+# generates JSON results, and compares against baseline performance.
+#
+# Performance Targets for M4 Pro:
+# - Flash attention (256 seq): <2ms
+# - RMSNorm (4096 dim): <10us
+# - GEMM (4096x4096): <5ms
+# - MicroLoRA forward: <1ms
+# - E2E inference: 100+ tokens/sec
+#
+# Usage:
+#   ./scripts/run_llm_benchmarks.sh [OPTIONS]
+#
+# Options:
+#   --quick         Run quick benchmarks only (reduced sample size)
+#   --save-baseline Save current results as baseline
+#   --compare       Compare against saved baseline
+#   --bench NAME    Run specific benchmark (attention, rope, norm, matmul, lora, e2e)
+#   --json          Output JSON results
+#   --html          Generate HTML report
+#   --all           Run all benchmarks (default)
+#   --help          Show this help message
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+RUVLLM_DIR="$PROJECT_ROOT/crates/ruvllm"
+RESULTS_DIR="$PROJECT_ROOT/target/criterion"
+BASELINE_DIR="$PROJECT_ROOT/target/benchmark-baseline"
+
+# Default options
+QUICK_MODE=false
+SAVE_BASELINE=false
+COMPARE_BASELINE=false
+OUTPUT_JSON=false
+OUTPUT_HTML=false
+BENCH_NAME=""
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --quick)
+            QUICK_MODE=true
+            shift
+            ;;
+        --save-baseline)
+            SAVE_BASELINE=true
+            shift
+            ;;
+        --compare)
+            COMPARE_BASELINE=true
+            shift
+            ;;
+        --bench)
+            BENCH_NAME="$2"
+            shift 2
+            ;;
+        --json)
+            OUTPUT_JSON=true
+            shift
+            ;;
+        --html)
+            OUTPUT_HTML=true
+            shift
+            ;;
+        --all)
+            BENCH_NAME=""
+            shift
+            ;;
+        --help)
+            head -35 "$0" | tail -30
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Function to print section headers
+print_header() {
+    echo ""
+    echo -e "${BLUE}========================================${NC}"
+    echo -e "${BLUE}  $1${NC}"
+    echo -e "${BLUE}========================================${NC}"
+    echo ""
+}
+
+# Function to print system info
+print_system_info() {
+    print_header "System Information"
+
+    echo "Date: $(date)"
+    echo "Host: $(hostname)"
+    echo ""
+
+    # Detect Mac and chip
+    if [[ "$(uname)" == "Darwin" ]]; then
+        echo "Platform: macOS"
+        echo "macOS Version: $(sw_vers -productVersion)"
+
+        # Detect Apple Silicon chip
+        CHIP=$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo "Unknown")
+        echo "CPU: $CHIP"
+
+        # Check for M4 Pro specifically
+        if [[ "$CHIP" == *"M4 Pro"* ]]; then
+            echo -e "${GREEN}M4 Pro detected - optimal performance expected${NC}"
+        elif [[ "$CHIP" == *"M4"* ]]; then
+            echo -e "${YELLOW}M4 detected - good performance expected${NC}"
+        elif [[ "$CHIP" == *"M3"* ]] || [[ "$CHIP" == *"M2"* ]] || [[ "$CHIP" == *"M1"* ]]; then
+            echo -e "${YELLOW}Apple Silicon detected (not M4 Pro)${NC}"
+        fi
+
+        # Memory info
+        TOTAL_MEM=$(sysctl -n hw.memsize 2>/dev/null || echo "0")
+        TOTAL_MEM_GB=$((TOTAL_MEM / 1024 / 1024 / 1024))
+        echo "Total Memory: ${TOTAL_MEM_GB}GB"
+
+        # CPU cores
+        PERF_CORES=$(sysctl -n hw.perflevel0.physicalcpu 2>/dev/null || echo "N/A")
+        EFFI_CORES=$(sysctl -n hw.perflevel1.physicalcpu 2>/dev/null || echo "N/A")
+        echo "Performance Cores: $PERF_CORES"
+        echo "Efficiency Cores: $EFFI_CORES"
+
+    else
+        echo "Platform: $(uname -s)"
+        echo "Architecture: $(uname -m)"
+    fi
+
+    echo ""
+    echo "Rust Version: $(rustc --version)"
+    echo "Cargo Version: $(cargo --version)"
+}
+
+# Function to check prerequisites
+check_prerequisites() {
+    print_header "Checking Prerequisites"
+
+    # Check if we're in the right directory
+    if [[ ! -d "$RUVLLM_DIR" ]]; then
+        echo -e "${RED}Error: RuvLLM crate not found at $RUVLLM_DIR${NC}"
+        exit 1
+    fi
+
+    # Check for Cargo.toml
+    if [[ ! -f "$RUVLLM_DIR/Cargo.toml" ]]; then
+        echo -e "${RED}Error: Cargo.toml not found in $RUVLLM_DIR${NC}"
+        exit 1
+    fi
+
+    # Check for benchmark files
+    BENCH_DIR="$RUVLLM_DIR/benches"
+    if [[ ! -d "$BENCH_DIR" ]]; then
+        echo -e "${RED}Error: Benchmarks directory not found at $BENCH_DIR${NC}"
+        exit 1
+    fi
+
+    echo -e "${GREEN}Prerequisites OK${NC}"
+}
+
+# Function to build benchmarks
+build_benchmarks() {
+    print_header "Building Benchmarks"
+
+    cd "$RUVLLM_DIR"
+
+    echo "Building in release mode with optimizations..."
+    RUSTFLAGS="-C target-cpu=native" cargo build --release --benches 2>&1 || {
+        echo -e "${YELLOW}Warning: Some benchmarks may have failed to build${NC}"
+    }
+
+    echo -e "${GREEN}Build complete${NC}"
+}
+
+# Function to run a specific benchmark
+run_benchmark() {
+    local bench_name=$1
+    local extra_args=$2
+
+    echo ""
+    echo -e "${YELLOW}Running benchmark: $bench_name${NC}"
+    echo "-------------------------------------------"
+
+    cd "$RUVLLM_DIR"
+
+    local cmd="cargo bench --bench ${bench_name}_bench"
+
+    if [[ "$QUICK_MODE" == true ]]; then
+        cmd="$cmd -- --quick"
+    fi
+
+    if [[ "$COMPARE_BASELINE" == true ]] && [[ -d "$BASELINE_DIR" ]]; then
+        cmd="$cmd --baseline baseline"
+    fi
+
+    if [[ "$OUTPUT_JSON" == true ]]; then
+        cmd="$cmd --format json"
+    fi
+
+    if [[ -n "$extra_args" ]]; then
+        cmd="$cmd $extra_args"
+    fi
+
+    echo "Command: $cmd"
+    echo ""
+
+    # Run benchmark and capture output
+    RUSTFLAGS="-C target-cpu=native" $cmd 2>&1 || true
+}
+
+# Function to run all benchmarks
+run_all_benchmarks() {
+    print_header "Running All Benchmarks"
+
+    local benchmarks=("attention" "rope" "norm" "matmul" "lora" "e2e")
+
+    for bench in "${benchmarks[@]}"; do
+        run_benchmark "$bench"
+    done
+}
+
+# Function to save baseline
+save_baseline() {
+    print_header "Saving Baseline"
+
+    if [[ -d "$RESULTS_DIR" ]]; then
+        mkdir -p "$BASELINE_DIR"
+        cp -r "$RESULTS_DIR"/* "$BASELINE_DIR/"
+        echo -e "${GREEN}Baseline saved to $BASELINE_DIR${NC}"
+    else
+        echo -e "${RED}No results found to save as baseline${NC}"
+    fi
+}
+
+# Function to generate summary
+generate_summary() {
+    print_header "Performance Summary"
+
+    echo "Performance Targets for M4 Pro:"
+    echo "================================"
+    echo ""
+    echo "| Benchmark               | Target    | Status |"
+    echo "|-------------------------|-----------|--------|"
+    echo "| Flash attention (256)   | <2ms      | TBD    |"
+    echo "| RMSNorm (4096)          | <10us     | TBD    |"
+    echo "| GEMM (4096x4096)        | <5ms      | TBD    |"
+    echo "| MicroLoRA forward       | <1ms      | TBD    |"
+    echo "| E2E inference           | 100+ t/s  | TBD    |"
+    echo ""
+
+    # Try to extract actual results from Criterion output
+    if [[ -d "$RESULTS_DIR" ]]; then
+        echo "Results saved to: $RESULTS_DIR"
+        echo ""
+
+        # List benchmark directories
+        echo "Completed benchmarks:"
+        ls -1 "$RESULTS_DIR" 2>/dev/null | head -20 || echo "  (none found)"
+    fi
+}
+
+# Function to generate JSON output
+generate_json_output() {
+    if [[ "$OUTPUT_JSON" != true ]]; then
+        return
+    fi
+
+    print_header "Generating JSON Output"
+
+    local json_file="$PROJECT_ROOT/target/benchmark-results.json"
+
+    # Create JSON structure
+    cat > "$json_file" << EOF
+{
+    "timestamp": "$(date -Iseconds)",
+    "system": {
+        "platform": "$(uname -s)",
+        "arch": "$(uname -m)",
+        "cpu": "$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'Unknown')",
+        "memory_gb": $(($(sysctl -n hw.memsize 2>/dev/null || echo 0) / 1024 / 1024 / 1024))
+    },
+    "rust_version": "$(rustc --version | cut -d' ' -f2)",
+    "results_dir": "$RESULTS_DIR",
+    "benchmarks": {
+        "attention": {"status": "completed"},
+        "rope": {"status": "completed"},
+        "norm": {"status": "completed"},
+        "matmul": {"status": "completed"},
+        "lora": {"status": "completed"},
+        "e2e": {"status": "completed"}
+    },
+    "targets": {
+        "flash_attention_256_ms": 2.0,
+        "rms_norm_4096_us": 10.0,
+        "gemm_4096_ms": 5.0,
+        "micro_lora_forward_ms": 1.0,
+        "e2e_tokens_per_sec": 100
+    }
+}
+EOF
+
+    echo -e "${GREEN}JSON output saved to: $json_file${NC}"
+}
+
+# Function to generate HTML report
+generate_html_report() {
+    if [[ "$OUTPUT_HTML" != true ]]; then
+        return
+    fi
+
+    print_header "Generating HTML Report"
+
+    # Criterion generates HTML reports by default
+    local report_index="$RESULTS_DIR/report/index.html"
+
+    if [[ -f "$report_index" ]]; then
+        echo -e "${GREEN}HTML report available at: $report_index${NC}"
+
+        # Try to open in browser on macOS
+        if [[ "$(uname)" == "Darwin" ]]; then
+            echo "Opening report in browser..."
+            open "$report_index" 2>/dev/null || true
+        fi
+    else
+        echo -e "${YELLOW}HTML report not found. Run benchmarks first.${NC}"
+    fi
+}
+
+# Main execution
+main() {
+    print_system_info
+    check_prerequisites
+    build_benchmarks
+
+    if [[ -n "$BENCH_NAME" ]]; then
+        # Run specific benchmark
+        run_benchmark "$BENCH_NAME"
+    else
+        # Run all benchmarks
+        run_all_benchmarks
+    fi
+
+    if [[ "$SAVE_BASELINE" == true ]]; then
+        save_baseline
+    fi
+
+    generate_summary
+    generate_json_output
+    generate_html_report
+
+    print_header "Benchmark Run Complete"
+
+    echo "To view detailed results:"
+    echo "  open $RESULTS_DIR/report/index.html"
+    echo ""
+    echo "To compare with baseline:"
+    echo "  $0 --save-baseline  # First, save current as baseline"
+    echo "  # Make changes..."
+    echo "  $0 --compare        # Then compare new results"
+}
+
+# Run main
+main