Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
198
vendor/ruvector/examples/scipix/scripts/download_models.sh
vendored
Executable file
198
vendor/ruvector/examples/scipix/scripts/download_models.sh
vendored
Executable file
@@ -0,0 +1,198 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[0;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${BLUE}Downloading RuVector Mathpix ONNX Models${NC}"
|
||||
echo ""
|
||||
|
||||
# Configuration
|
||||
MODELS_DIR="models"
|
||||
GITHUB_REPO="ruvnet/ruvector"
|
||||
RELEASE_TAG="scipix-models-v1.0.0"
|
||||
|
||||
# Model configurations
|
||||
declare -A MODELS=(
|
||||
["scipix_encoder.onnx"]="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_TAG}/scipix_encoder.onnx"
|
||||
["scipix_decoder.onnx"]="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_TAG}/scipix_decoder.onnx"
|
||||
["scipix_tokenizer.onnx"]="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_TAG}/scipix_tokenizer.onnx"
|
||||
)
|
||||
|
||||
# SHA256 checksums (these should be updated with actual checksums)
|
||||
declare -A CHECKSUMS=(
|
||||
["scipix_encoder.onnx"]="SHA256_PLACEHOLDER"
|
||||
["scipix_decoder.onnx"]="SHA256_PLACEHOLDER"
|
||||
["scipix_tokenizer.onnx"]="SHA256_PLACEHOLDER"
|
||||
)
|
||||
|
||||
# Create models directory
|
||||
mkdir -p "${MODELS_DIR}"
|
||||
|
||||
# Function to download a file with progress
|
||||
download_file() {
|
||||
local url=$1
|
||||
local output=$2
|
||||
|
||||
if command -v curl &> /dev/null; then
|
||||
curl -L --progress-bar -o "${output}" "${url}"
|
||||
elif command -v wget &> /dev/null; then
|
||||
wget --show-progress -O "${output}" "${url}"
|
||||
else
|
||||
echo -e "${RED}Error: Neither curl nor wget is available. Please install one.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to verify checksum
|
||||
verify_checksum() {
|
||||
local file=$1
|
||||
local expected=$2
|
||||
|
||||
if [ "${expected}" = "SHA256_PLACEHOLDER" ]; then
|
||||
echo -e "${YELLOW}Warning: No checksum available for ${file}. Skipping verification.${NC}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if command -v sha256sum &> /dev/null; then
|
||||
local actual=$(sha256sum "${file}" | cut -d' ' -f1)
|
||||
elif command -v shasum &> /dev/null; then
|
||||
local actual=$(shasum -a 256 "${file}" | cut -d' ' -f1)
|
||||
else
|
||||
echo -e "${YELLOW}Warning: No SHA256 tool available. Skipping verification.${NC}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ "${actual}" = "${expected}" ]; then
|
||||
echo -e "${GREEN}Checksum verified for ${file}${NC}"
|
||||
return 0
|
||||
else
|
||||
echo -e "${RED}Checksum mismatch for ${file}!${NC}"
|
||||
echo -e "${RED}Expected: ${expected}${NC}"
|
||||
echo -e "${RED}Got: ${actual}${NC}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Download each model
|
||||
for model in "${!MODELS[@]}"; do
|
||||
output_path="${MODELS_DIR}/${model}"
|
||||
|
||||
# Check if model already exists
|
||||
if [ -f "${output_path}" ]; then
|
||||
echo -e "${YELLOW}${model} already exists. Verifying...${NC}"
|
||||
if verify_checksum "${output_path}" "${CHECKSUMS[$model]}"; then
|
||||
echo -e "${GREEN}${model} is valid. Skipping download.${NC}"
|
||||
continue
|
||||
else
|
||||
echo -e "${YELLOW}${model} verification failed. Re-downloading...${NC}"
|
||||
rm -f "${output_path}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo -e "${BLUE}Downloading ${model}...${NC}"
|
||||
|
||||
# Try to download from GitHub releases
|
||||
if download_file "${MODELS[$model]}" "${output_path}"; then
|
||||
echo -e "${GREEN}Downloaded ${model}${NC}"
|
||||
|
||||
# Verify checksum
|
||||
if ! verify_checksum "${output_path}" "${CHECKSUMS[$model]}"; then
|
||||
echo -e "${RED}Failed to verify ${model}. Removing file.${NC}"
|
||||
rm -f "${output_path}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}Failed to download from releases. Trying alternative sources...${NC}"
|
||||
|
||||
# Alternative: Download from Hugging Face (if available)
|
||||
HF_URL="https://huggingface.co/ruvnet/scipix-models/resolve/main/${model}"
|
||||
if download_file "${HF_URL}" "${output_path}"; then
|
||||
echo -e "${GREEN}Downloaded ${model} from Hugging Face${NC}"
|
||||
verify_checksum "${output_path}" "${CHECKSUMS[$model]}" || true
|
||||
else
|
||||
echo -e "${RED}Failed to download ${model} from all sources${NC}"
|
||||
|
||||
# Create a placeholder file with instructions
|
||||
cat > "${output_path}.README" << EOF
|
||||
Model: ${model}
|
||||
|
||||
This model file could not be downloaded automatically.
|
||||
|
||||
Please download it manually from one of these sources:
|
||||
1. GitHub Releases: ${MODELS[$model]}
|
||||
2. Hugging Face: https://huggingface.co/ruvnet/scipix-models
|
||||
|
||||
After downloading, place the file at:
|
||||
${output_path}
|
||||
|
||||
Expected SHA256 checksum: ${CHECKSUMS[$model]}
|
||||
EOF
|
||||
echo -e "${YELLOW}Created instructions at ${output_path}.README${NC}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Create model configuration file
|
||||
echo -e "${BLUE}Creating model configuration...${NC}"
|
||||
cat > "${MODELS_DIR}/config.json" << EOF
|
||||
{
|
||||
"models": {
|
||||
"encoder": {
|
||||
"path": "scipix_encoder.onnx",
|
||||
"type": "image_encoder",
|
||||
"input_shape": [1, 3, 224, 224],
|
||||
"output_dim": 768
|
||||
},
|
||||
"decoder": {
|
||||
"path": "scipix_decoder.onnx",
|
||||
"type": "sequence_decoder",
|
||||
"vocab_size": 50000,
|
||||
"max_length": 512
|
||||
},
|
||||
"tokenizer": {
|
||||
"path": "scipix_tokenizer.onnx",
|
||||
"type": "tokenizer",
|
||||
"vocab_size": 50000
|
||||
}
|
||||
},
|
||||
"version": "1.0.0",
|
||||
"created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
}
|
||||
EOF
|
||||
|
||||
# Verify all models are present
|
||||
echo ""
|
||||
echo -e "${BLUE}Verifying model files...${NC}"
|
||||
missing_models=0
|
||||
for model in "${!MODELS[@]}"; do
|
||||
if [ -f "${MODELS_DIR}/${model}" ]; then
|
||||
size=$(du -h "${MODELS_DIR}/${model}" | cut -f1)
|
||||
echo -e "${GREEN}✓ ${model} (${size})${NC}"
|
||||
else
|
||||
echo -e "${RED}✗ ${model} (missing)${NC}"
|
||||
((missing_models++))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
if [ ${missing_models} -eq 0 ]; then
|
||||
echo -e "${GREEN}====================================${NC}"
|
||||
echo -e "${GREEN}All models downloaded successfully!${NC}"
|
||||
echo -e "${GREEN}====================================${NC}"
|
||||
echo ""
|
||||
echo -e "${BLUE}Models are located in: ${MODELS_DIR}/${NC}"
|
||||
echo -e "${BLUE}Configuration file: ${MODELS_DIR}/config.json${NC}"
|
||||
exit 0
|
||||
else
|
||||
echo -e "${YELLOW}====================================${NC}"
|
||||
echo -e "${YELLOW}Warning: ${missing_models} model(s) missing${NC}"
|
||||
echo -e "${YELLOW}====================================${NC}"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Please check the .README files in ${MODELS_DIR}/ for manual download instructions.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user