Files

199 lines
6.1 KiB
Bash
Executable File

#!/bin/bash
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}Downloading RuVector Mathpix ONNX Models${NC}"
echo ""
# Configuration
MODELS_DIR="models"
GITHUB_REPO="ruvnet/ruvector"
RELEASE_TAG="scipix-models-v1.0.0"
# Model configurations
declare -A MODELS=(
["scipix_encoder.onnx"]="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_TAG}/scipix_encoder.onnx"
["scipix_decoder.onnx"]="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_TAG}/scipix_decoder.onnx"
["scipix_tokenizer.onnx"]="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_TAG}/scipix_tokenizer.onnx"
)
# SHA256 checksums (these should be updated with actual checksums)
declare -A CHECKSUMS=(
["scipix_encoder.onnx"]="SHA256_PLACEHOLDER"
["scipix_decoder.onnx"]="SHA256_PLACEHOLDER"
["scipix_tokenizer.onnx"]="SHA256_PLACEHOLDER"
)
# Create models directory
mkdir -p "${MODELS_DIR}"
# Function to download a file with progress
download_file() {
local url=$1
local output=$2
if command -v curl &> /dev/null; then
curl -L --progress-bar -o "${output}" "${url}"
elif command -v wget &> /dev/null; then
wget --show-progress -O "${output}" "${url}"
else
echo -e "${RED}Error: Neither curl nor wget is available. Please install one.${NC}"
exit 1
fi
}
# Function to verify checksum
verify_checksum() {
local file=$1
local expected=$2
if [ "${expected}" = "SHA256_PLACEHOLDER" ]; then
echo -e "${YELLOW}Warning: No checksum available for ${file}. Skipping verification.${NC}"
return 0
fi
if command -v sha256sum &> /dev/null; then
local actual=$(sha256sum "${file}" | cut -d' ' -f1)
elif command -v shasum &> /dev/null; then
local actual=$(shasum -a 256 "${file}" | cut -d' ' -f1)
else
echo -e "${YELLOW}Warning: No SHA256 tool available. Skipping verification.${NC}"
return 0
fi
if [ "${actual}" = "${expected}" ]; then
echo -e "${GREEN}Checksum verified for ${file}${NC}"
return 0
else
echo -e "${RED}Checksum mismatch for ${file}!${NC}"
echo -e "${RED}Expected: ${expected}${NC}"
echo -e "${RED}Got: ${actual}${NC}"
return 1
fi
}
# Download each model
for model in "${!MODELS[@]}"; do
output_path="${MODELS_DIR}/${model}"
# Check if model already exists
if [ -f "${output_path}" ]; then
echo -e "${YELLOW}${model} already exists. Verifying...${NC}"
if verify_checksum "${output_path}" "${CHECKSUMS[$model]}"; then
echo -e "${GREEN}${model} is valid. Skipping download.${NC}"
continue
else
echo -e "${YELLOW}${model} verification failed. Re-downloading...${NC}"
rm -f "${output_path}"
fi
fi
echo -e "${BLUE}Downloading ${model}...${NC}"
# Try to download from GitHub releases
if download_file "${MODELS[$model]}" "${output_path}"; then
echo -e "${GREEN}Downloaded ${model}${NC}"
# Verify checksum
if ! verify_checksum "${output_path}" "${CHECKSUMS[$model]}"; then
echo -e "${RED}Failed to verify ${model}. Removing file.${NC}"
rm -f "${output_path}"
exit 1
fi
else
echo -e "${YELLOW}Failed to download from releases. Trying alternative sources...${NC}"
# Alternative: Download from Hugging Face (if available)
HF_URL="https://huggingface.co/ruvnet/scipix-models/resolve/main/${model}"
if download_file "${HF_URL}" "${output_path}"; then
echo -e "${GREEN}Downloaded ${model} from Hugging Face${NC}"
verify_checksum "${output_path}" "${CHECKSUMS[$model]}" || true
else
echo -e "${RED}Failed to download ${model} from all sources${NC}"
# Create a placeholder file with instructions
cat > "${output_path}.README" << EOF
Model: ${model}
This model file could not be downloaded automatically.
Please download it manually from one of these sources:
1. GitHub Releases: ${MODELS[$model]}
2. Hugging Face: https://huggingface.co/ruvnet/scipix-models
After downloading, place the file at:
${output_path}
Expected SHA256 checksum: ${CHECKSUMS[$model]}
EOF
echo -e "${YELLOW}Created instructions at ${output_path}.README${NC}"
fi
fi
done
# Create model configuration file
echo -e "${BLUE}Creating model configuration...${NC}"
cat > "${MODELS_DIR}/config.json" << EOF
{
"models": {
"encoder": {
"path": "scipix_encoder.onnx",
"type": "image_encoder",
"input_shape": [1, 3, 224, 224],
"output_dim": 768
},
"decoder": {
"path": "scipix_decoder.onnx",
"type": "sequence_decoder",
"vocab_size": 50000,
"max_length": 512
},
"tokenizer": {
"path": "scipix_tokenizer.onnx",
"type": "tokenizer",
"vocab_size": 50000
}
},
"version": "1.0.0",
"created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
EOF
# Verify all models are present
echo ""
echo -e "${BLUE}Verifying model files...${NC}"
missing_models=0
for model in "${!MODELS[@]}"; do
if [ -f "${MODELS_DIR}/${model}" ]; then
size=$(du -h "${MODELS_DIR}/${model}" | cut -f1)
echo -e "${GREEN}${model} (${size})${NC}"
else
echo -e "${RED}${model} (missing)${NC}"
((missing_models++))
fi
done
echo ""
if [ ${missing_models} -eq 0 ]; then
echo -e "${GREEN}====================================${NC}"
echo -e "${GREEN}All models downloaded successfully!${NC}"
echo -e "${GREEN}====================================${NC}"
echo ""
echo -e "${BLUE}Models are located in: ${MODELS_DIR}/${NC}"
echo -e "${BLUE}Configuration file: ${MODELS_DIR}/config.json${NC}"
exit 0
else
echo -e "${YELLOW}====================================${NC}"
echo -e "${YELLOW}Warning: ${missing_models} model(s) missing${NC}"
echo -e "${YELLOW}====================================${NC}"
echo ""
echo -e "${YELLOW}Please check the .README files in ${MODELS_DIR}/ for manual download instructions.${NC}"
exit 1
fi