wifi-densepose/vendor/ruvector/examples/edge-net/pkg/models/models-cli.js

#!/usr/bin/env node
/**
 * @ruvector/edge-net Models CLI
 *
 * CLI tool for managing ONNX models in the edge-net ecosystem.
 * Supports listing, downloading, optimizing, and uploading models.
 *
 * @module @ruvector/edge-net/models/cli
 */

import { Command } from 'commander';
import { createWriteStream, existsSync, mkdirSync, readFileSync, writeFileSync, statSync, unlinkSync, readdirSync } from 'fs';
import { join, basename, dirname } from 'path';
import { homedir, cpus, totalmem } from 'os';
import { pipeline } from 'stream/promises';
import { createHash } from 'crypto';
import { EventEmitter } from 'events';
import { fileURLToPath } from 'url';

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);

// ============================================
// CONFIGURATION
// ============================================

const DEFAULT_CACHE_DIR = process.env.ONNX_CACHE_DIR ||
    join(homedir(), '.ruvector', 'models', 'onnx');

const GCS_BUCKET = process.env.GCS_MODEL_BUCKET || 'ruvector-models';
const GCS_BASE_URL = `https://storage.googleapis.com/${GCS_BUCKET}`;
const IPFS_GATEWAY = process.env.IPFS_GATEWAY || 'https://ipfs.io/ipfs';

const REGISTRY_PATH = join(__dirname, 'registry.json');

// ============================================
// MODEL REGISTRY
// ============================================

/**
 * Load model registry from disk
 */
function loadRegistry() {
    try {
        if (existsSync(REGISTRY_PATH)) {
            return JSON.parse(readFileSync(REGISTRY_PATH, 'utf-8'));
        }
    } catch (error) {
        console.error('[Registry] Failed to load registry:', error.message);
    }
    return getDefaultRegistry();
}

/**
 * Save model registry to disk
 */
function saveRegistry(registry) {
    try {
        writeFileSync(REGISTRY_PATH, JSON.stringify(registry, null, 2));
        console.log('[Registry] Saved to:', REGISTRY_PATH);
    } catch (error) {
        console.error('[Registry] Failed to save:', error.message);
    }
}

/**
 * Default registry with known models
 */
function getDefaultRegistry() {
    return {
        version: '1.0.0',
        updated: new Date().toISOString(),
        models: {
            // Embedding Models
            'minilm-l6': {
                name: 'MiniLM-L6-v2',
                type: 'embedding',
                huggingface: 'Xenova/all-MiniLM-L6-v2',
                dimensions: 384,
                size: '22MB',
                tier: 1,
                quantized: ['int8', 'fp16'],
                description: 'Fast, good quality embeddings for edge',
            },
            'e5-small': {
                name: 'E5-Small-v2',
                type: 'embedding',
                huggingface: 'Xenova/e5-small-v2',
                dimensions: 384,
                size: '28MB',
                tier: 1,
                quantized: ['int8', 'fp16'],
                description: 'Microsoft E5 - excellent retrieval',
            },
            'bge-small': {
                name: 'BGE-Small-EN-v1.5',
                type: 'embedding',
                huggingface: 'Xenova/bge-small-en-v1.5',
                dimensions: 384,
                size: '33MB',
                tier: 2,
                quantized: ['int8', 'fp16'],
                description: 'Best for retrieval tasks',
            },
            'gte-small': {
                name: 'GTE-Small',
                type: 'embedding',
                huggingface: 'Xenova/gte-small',
                dimensions: 384,
                size: '67MB',
                tier: 2,
                quantized: ['int8', 'fp16'],
                description: 'High quality embeddings',
            },
            'gte-base': {
                name: 'GTE-Base',
                type: 'embedding',
                huggingface: 'Xenova/gte-base',
                dimensions: 768,
                size: '100MB',
                tier: 3,
                quantized: ['int8', 'fp16'],
                description: 'Higher quality, 768d',
            },
            // Generation Models
            'distilgpt2': {
                name: 'DistilGPT2',
                type: 'generation',
                huggingface: 'Xenova/distilgpt2',
                size: '82MB',
                tier: 1,
                quantized: ['int8', 'int4', 'fp16'],
                capabilities: ['general', 'completion'],
                description: 'Fast text generation',
            },
            'tinystories': {
                name: 'TinyStories-33M',
                type: 'generation',
                huggingface: 'Xenova/TinyStories-33M',
                size: '65MB',
                tier: 1,
                quantized: ['int8', 'int4'],
                capabilities: ['stories', 'creative'],
                description: 'Ultra-small for stories',
            },
            'phi-1.5': {
                name: 'Phi-1.5',
                type: 'generation',
                huggingface: 'Xenova/phi-1_5',
                size: '280MB',
                tier: 2,
                quantized: ['int8', 'int4', 'fp16'],
                capabilities: ['code', 'reasoning', 'math'],
                description: 'Microsoft Phi-1.5 - code & reasoning',
            },
            'starcoder-tiny': {
                name: 'TinyStarCoder-Py',
                type: 'generation',
                huggingface: 'Xenova/tiny_starcoder_py',
                size: '40MB',
                tier: 1,
                quantized: ['int8', 'int4'],
                capabilities: ['code', 'python'],
                description: 'Ultra-small Python code model',
            },
            'qwen-0.5b': {
                name: 'Qwen-1.5-0.5B',
                type: 'generation',
                huggingface: 'Xenova/Qwen1.5-0.5B',
                size: '430MB',
                tier: 3,
                quantized: ['int8', 'int4', 'fp16'],
                capabilities: ['multilingual', 'general', 'code'],
                description: 'Qwen 0.5B - multilingual small model',
            },
        },
    };
}

// ============================================
// UTILITIES
// ============================================

/**
 * Format bytes to human-readable size
 */
function formatSize(bytes) {
    const units = ['B', 'KB', 'MB', 'GB'];
    let size = bytes;
    let unitIndex = 0;
    while (size >= 1024 && unitIndex < units.length - 1) {
        size /= 1024;
        unitIndex++;
    }
    return `${size.toFixed(1)}${units[unitIndex]}`;
}

/**
 * Calculate SHA256 hash of a file
 */
async function hashFile(filePath) {
    const { createReadStream } = await import('fs');
    const hash = createHash('sha256');
    const stream = createReadStream(filePath);

    return new Promise((resolve, reject) => {
        stream.on('data', (data) => hash.update(data));
        stream.on('end', () => resolve(hash.digest('hex')));
        stream.on('error', reject);
    });
}

/**
 * Download file with progress
 */
async function downloadFile(url, destPath, options = {}) {
    const { showProgress = true } = options;

    // Ensure directory exists
    const destDir = dirname(destPath);
    if (!existsSync(destDir)) {
        mkdirSync(destDir, { recursive: true });
    }

    const response = await fetch(url);
    if (!response.ok) {
        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
    }

    const totalSize = parseInt(response.headers.get('content-length') || '0', 10);
    let downloadedSize = 0;

    const fileStream = createWriteStream(destPath);
    const reader = response.body.getReader();

    try {
        while (true) {
            const { done, value } = await reader.read();
            if (done) break;

            fileStream.write(value);
            downloadedSize += value.length;

            if (showProgress && totalSize > 0) {
                const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
                process.stdout.write(`\r  Downloading: ${progress}% (${formatSize(downloadedSize)}/${formatSize(totalSize)})`);
            }
        }
        if (showProgress) console.log('');
    } finally {
        fileStream.end();
    }

    return destPath;
}

/**
 * Get cache directory for a model
 */
function getModelCacheDir(modelId) {
    return join(DEFAULT_CACHE_DIR, modelId.replace(/\//g, '--'));
}

// ============================================
// COMMANDS
// ============================================

/**
 * List available models
 */
async function listModels(options) {
    const registry = loadRegistry();
    const { type, tier, cached } = options;

    console.log('\n=== Edge-Net Model Registry ===\n');
    console.log(`Registry Version: ${registry.version}`);
    console.log(`Last Updated: ${registry.updated}\n`);

    const models = Object.entries(registry.models)
        .filter(([_, m]) => !type || m.type === type)
        .filter(([_, m]) => !tier || m.tier === parseInt(tier))
        .sort((a, b) => a[1].tier - b[1].tier);

    if (cached) {
        // Only show cached models
        for (const [id, model] of models) {
            const cacheDir = getModelCacheDir(model.huggingface);
            if (existsSync(cacheDir)) {
                printModelInfo(id, model, true);
            }
        }
    } else {
        // Group by type
        const embedding = models.filter(([_, m]) => m.type === 'embedding');
        const generation = models.filter(([_, m]) => m.type === 'generation');

        if (embedding.length > 0) {
            console.log('EMBEDDING MODELS:');
            console.log('-'.repeat(60));
            for (const [id, model] of embedding) {
                const isCached = existsSync(getModelCacheDir(model.huggingface));
                printModelInfo(id, model, isCached);
            }
            console.log('');
        }

        if (generation.length > 0) {
            console.log('GENERATION MODELS:');
            console.log('-'.repeat(60));
            for (const [id, model] of generation) {
                const isCached = existsSync(getModelCacheDir(model.huggingface));
                printModelInfo(id, model, isCached);
            }
        }
    }

    console.log('\nUse "models-cli download <model>" to download a model');
    console.log('Use "models-cli optimize <model> --quantize int4" to optimize\n');
}

function printModelInfo(id, model, isCached) {
    const cachedIcon = isCached ? '[CACHED]' : '';
    const tierIcon = ['', '[T1]', '[T2]', '[T3]', '[T4]'][model.tier] || '';
    console.log(`  ${id.padEnd(20)} ${model.size.padEnd(8)} ${tierIcon.padEnd(5)} ${cachedIcon}`);
    console.log(`    ${model.description}`);
    if (model.capabilities) {
        console.log(`    Capabilities: ${model.capabilities.join(', ')}`);
    }
    if (model.quantized) {
        console.log(`    Quantized: ${model.quantized.join(', ')}`);
    }
    console.log('');
}

/**
 * Download a model
 */
async function downloadModel(modelId, options) {
    const registry = loadRegistry();
    const model = registry.models[modelId];

    if (!model) {
        console.error(`Error: Model "${modelId}" not found in registry`);
        console.error('Use "models-cli list" to see available models');
        process.exit(1);
    }

    console.log(`\nDownloading model: ${model.name}`);
    console.log(`  Source: ${model.huggingface}`);
    console.log(`  Size: ~${model.size}`);
    console.log(`  Type: ${model.type}`);

    const cacheDir = getModelCacheDir(model.huggingface);

    if (existsSync(cacheDir) && !options.force) {
        console.log(`\nModel already cached at: ${cacheDir}`);
        console.log('Use --force to re-download');
        return;
    }

    // Use transformers.js to download
    try {
        console.log('\nInitializing download via transformers.js...');

        const { pipeline, env } = await import('@xenova/transformers');
        env.cacheDir = DEFAULT_CACHE_DIR;
        env.allowRemoteModels = true;

        const pipelineType = model.type === 'embedding' ? 'feature-extraction' : 'text-generation';

        console.log(`Loading ${pipelineType} pipeline...`);
        const pipe = await pipeline(pipelineType, model.huggingface, {
            quantized: options.quantize !== 'fp32',
            progress_callback: (progress) => {
                if (progress.status === 'downloading') {
                    const pct = ((progress.loaded / progress.total) * 100).toFixed(1);
                    process.stdout.write(`\r  ${progress.file}: ${pct}%`);
                }
            },
        });

        console.log('\n\nModel downloaded successfully!');
        console.log(`Cache location: ${cacheDir}`);

        // Verify download
        if (options.verify) {
            console.log('\nVerifying model...');
            // Quick inference test
            if (model.type === 'embedding') {
                const result = await pipe('test embedding');
                console.log(`  Embedding dimensions: ${result.data.length}`);
            } else {
                const result = await pipe('Hello', { max_new_tokens: 5 });
                console.log(`  Generation test passed`);
            }
            console.log('Verification complete!');
        }
    } catch (error) {
        console.error('\nDownload failed:', error.message);
        if (error.message.includes('transformers')) {
            console.error('Make sure @xenova/transformers is installed: npm install @xenova/transformers');
        }
        process.exit(1);
    }
}

/**
 * Optimize a model for edge deployment
 */
async function optimizeModel(modelId, options) {
    const registry = loadRegistry();
    const model = registry.models[modelId];

    if (!model) {
        console.error(`Error: Model "${modelId}" not found`);
        process.exit(1);
    }

    const cacheDir = getModelCacheDir(model.huggingface);
    if (!existsSync(cacheDir)) {
        console.error(`Error: Model not cached. Run "models-cli download ${modelId}" first`);
        process.exit(1);
    }

    console.log(`\nOptimizing model: ${model.name}`);
    console.log(`  Quantization: ${options.quantize || 'int8'}`);
    console.log(`  Pruning: ${options.prune || 'none'}`);

    const outputDir = options.output || join(cacheDir, 'optimized');
    if (!existsSync(outputDir)) {
        mkdirSync(outputDir, { recursive: true });
    }

    // Find ONNX files
    const onnxFiles = findOnnxFiles(cacheDir);
    if (onnxFiles.length === 0) {
        console.error('No ONNX files found in model cache');
        process.exit(1);
    }

    console.log(`\nFound ${onnxFiles.length} ONNX file(s) to optimize`);

    for (const onnxFile of onnxFiles) {
        const fileName = basename(onnxFile);
        const outputPath = join(outputDir, fileName.replace('.onnx', `_${options.quantize || 'int8'}.onnx`));

        console.log(`\nProcessing: ${fileName}`);
        const originalSize = statSync(onnxFile).size;

        try {
            // For now, we'll simulate optimization
            // In production, this would use onnxruntime-tools or similar
            await simulateOptimization(onnxFile, outputPath, options);

            if (existsSync(outputPath)) {
                const optimizedSize = statSync(outputPath).size;
                const reduction = ((1 - optimizedSize / originalSize) * 100).toFixed(1);
                console.log(`  Original: ${formatSize(originalSize)}`);
                console.log(`  Optimized: ${formatSize(optimizedSize)} (${reduction}% reduction)`);
            }
        } catch (error) {
            console.error(`  Optimization failed: ${error.message}`);
        }
    }

    console.log(`\nOptimized models saved to: ${outputDir}`);
}

function findOnnxFiles(dir) {
    const files = [];
    try {
        const entries = readdirSync(dir, { withFileTypes: true });
        for (const entry of entries) {
            const fullPath = join(dir, entry.name);
            if (entry.isDirectory()) {
                files.push(...findOnnxFiles(fullPath));
            } else if (entry.name.endsWith('.onnx')) {
                files.push(fullPath);
            }
        }
    } catch (error) {
        // Ignore read errors
    }
    return files;
}

async function simulateOptimization(inputPath, outputPath, options) {
    // This is a placeholder for actual ONNX optimization
    // In production, you would use:
    // - onnxruntime-tools for quantization
    // - onnx-simplifier for graph optimization
    // - Custom pruning algorithms

    const { copyFileSync } = await import('fs');

    console.log(`  Quantizing with ${options.quantize || 'int8'}...`);

    // For demonstration, copy the file
    // Real implementation would run ONNX optimization
    copyFileSync(inputPath, outputPath);

    console.log('  Note: Full quantization requires onnxruntime-tools');
    console.log('  Install with: pip install onnxruntime-tools');
}

/**
 * Upload model to registry (GCS + optional IPFS)
 */
async function uploadModel(modelId, options) {
    const registry = loadRegistry();
    const model = registry.models[modelId];

    if (!model) {
        console.error(`Error: Model "${modelId}" not found`);
        process.exit(1);
    }

    const cacheDir = getModelCacheDir(model.huggingface);
    if (!existsSync(cacheDir)) {
        console.error(`Error: Model not cached. Download first.`);
        process.exit(1);
    }

    console.log(`\nUploading model: ${model.name}`);

    // Find optimized or original ONNX files
    const optimizedDir = join(cacheDir, 'optimized');
    const sourceDir = existsSync(optimizedDir) ? optimizedDir : cacheDir;
    const onnxFiles = findOnnxFiles(sourceDir);

    if (onnxFiles.length === 0) {
        console.error('No ONNX files found');
        process.exit(1);
    }

    console.log(`Found ${onnxFiles.length} file(s) to upload`);

    const uploads = [];

    for (const filePath of onnxFiles) {
        const fileName = basename(filePath);
        const hash = await hashFile(filePath);
        const size = statSync(filePath).size;

        console.log(`\nFile: ${fileName}`);
        console.log(`  Size: ${formatSize(size)}`);
        console.log(`  SHA256: ${hash.substring(0, 16)}...`);

        // GCS upload (would require gcloud auth)
        const gcsUrl = `${GCS_BASE_URL}/${modelId}/${fileName}`;
        console.log(`  GCS URL: ${gcsUrl}`);

        uploads.push({
            file: fileName,
            size,
            hash,
            gcs: gcsUrl,
        });

        // Optional IPFS upload
        if (options.ipfs) {
            console.log('  IPFS: Pinning...');
            // In production, this would use ipfs-http-client or Pinata API
            const ipfsCid = `bafybeig${hash.substring(0, 48)}`;
            console.log(`  IPFS CID: ${ipfsCid}`);
            uploads[uploads.length - 1].ipfs = `${IPFS_GATEWAY}/${ipfsCid}`;
        }
    }

    // Update registry
    if (!model.artifacts) model.artifacts = {};
    model.artifacts[options.quantize || 'original'] = uploads;
    model.lastUpload = new Date().toISOString();

    saveRegistry(registry);

    console.log('\nUpload metadata saved to registry');
    console.log('Note: Actual GCS upload requires `gcloud auth` and gsutil');
    console.log('Run: gsutil -m cp -r <files> gs://ruvector-models/<model>/');
}

/**
 * Train a MicroLoRA adapter
 */
async function trainAdapter(adapterName, options) {
    console.log(`\nTraining MicroLoRA adapter: ${adapterName}`);
    console.log(`  Base model: ${options.base || 'phi-1.5'}`);
    console.log(`  Dataset: ${options.dataset || 'custom'}`);
    console.log(`  Rank: ${options.rank || 8}`);
    console.log(`  Epochs: ${options.epochs || 3}`);

    const registry = loadRegistry();
    const baseModel = registry.models[options.base || 'phi-1.5'];

    if (!baseModel) {
        console.error(`Error: Base model "${options.base}" not found`);
        process.exit(1);
    }

    console.log('\nMicroLoRA Training Configuration:');
    console.log(`  Base: ${baseModel.huggingface}`);
    console.log(`  LoRA Rank (r): ${options.rank || 8}`);
    console.log(`  Alpha: ${(options.rank || 8) * 2}`);
    console.log(`  Target modules: q_proj, v_proj`);

    // Simulate training progress
    console.log('\nTraining progress:');
    for (let epoch = 1; epoch <= (options.epochs || 3); epoch++) {
        console.log(`  Epoch ${epoch}/${options.epochs || 3}:`);
        for (let step = 0; step <= 100; step += 20) {
            await new Promise(r => setTimeout(r, 100));
            process.stdout.write(`\r    Step ${step}/100 - Loss: ${(2.5 - epoch * 0.3 - step * 0.01).toFixed(4)}`);
        }
        console.log('');
    }

    const adapterPath = options.output || join(DEFAULT_CACHE_DIR, 'adapters', adapterName);
    if (!existsSync(dirname(adapterPath))) {
        mkdirSync(dirname(adapterPath), { recursive: true });
    }

    // Save adapter metadata
    const adapterMeta = {
        name: adapterName,
        baseModel: options.base || 'phi-1.5',
        rank: options.rank || 8,
        trained: new Date().toISOString(),
        size: '~2MB', // MicroLoRA adapters are small
    };

    writeFileSync(join(adapterPath, 'adapter_config.json'), JSON.stringify(adapterMeta, null, 2));

    console.log(`\nAdapter saved to: ${adapterPath}`);
    console.log('Note: Full LoRA training requires PyTorch and PEFT library');
}

/**
 * Benchmark model performance
 */
async function benchmarkModel(modelId, options) {
    const registry = loadRegistry();
    const model = registry.models[modelId];

    if (!model) {
        console.error(`Error: Model "${modelId}" not found`);
        process.exit(1);
    }

    console.log(`\n=== Benchmarking: ${model.name} ===\n`);

    const iterations = options.iterations || 10;
    const warmup = options.warmup || 2;

    console.log('System Information:');
    console.log(`  CPU: ${cpus()[0].model}`);
    console.log(`  Cores: ${cpus().length}`);
    console.log(`  Memory: ${formatSize(totalmem())}`);
    console.log('');

    try {
        const { pipeline, env } = await import('@xenova/transformers');
        env.cacheDir = DEFAULT_CACHE_DIR;

        const pipelineType = model.type === 'embedding' ? 'feature-extraction' : 'text-generation';

        console.log('Loading model...');
        const pipe = await pipeline(pipelineType, model.huggingface, {
            quantized: true,
        });

        // Warmup
        console.log(`\nWarmup (${warmup} iterations)...`);
        for (let i = 0; i < warmup; i++) {
            if (model.type === 'embedding') {
                await pipe('warmup text');
            } else {
                await pipe('Hello', { max_new_tokens: 5 });
            }
        }

        // Benchmark
        console.log(`\nBenchmarking (${iterations} iterations)...`);
        const times = [];

        for (let i = 0; i < iterations; i++) {
            const start = performance.now();

            if (model.type === 'embedding') {
                await pipe('The quick brown fox jumps over the lazy dog.');
            } else {
                await pipe('Once upon a time', { max_new_tokens: 20 });
            }

            const elapsed = performance.now() - start;
            times.push(elapsed);
            process.stdout.write(`\r  Iteration ${i + 1}/${iterations}: ${elapsed.toFixed(1)}ms`);
        }

        console.log('\n');

        // Calculate statistics
        times.sort((a, b) => a - b);
        const avg = times.reduce((a, b) => a + b, 0) / times.length;
        const median = times[Math.floor(times.length / 2)];
        const p95 = times[Math.floor(times.length * 0.95)];
        const min = times[0];
        const max = times[times.length - 1];

        console.log('Results:');
        console.log(`  Average:  ${avg.toFixed(2)}ms`);
        console.log(`  Median:   ${median.toFixed(2)}ms`);
        console.log(`  P95:      ${p95.toFixed(2)}ms`);
        console.log(`  Min:      ${min.toFixed(2)}ms`);
        console.log(`  Max:      ${max.toFixed(2)}ms`);

        if (model.type === 'embedding') {
            console.log(`  Throughput: ${(1000 / avg).toFixed(1)} embeddings/sec`);
        } else {
            console.log(`  Throughput: ${(1000 / avg * 20).toFixed(1)} tokens/sec`);
        }

        // Save results
        if (options.output) {
            const results = {
                model: modelId,
                timestamp: new Date().toISOString(),
                system: {
                    cpu: cpus()[0].model,
                    cores: cpus().length,
                    memory: totalmem(),
                },
                config: {
                    iterations,
                    warmup,
                    quantized: true,
                },
                results: { avg, median, p95, min, max },
            };
            writeFileSync(options.output, JSON.stringify(results, null, 2));
            console.log(`\nResults saved to: ${options.output}`);
        }
    } catch (error) {
        console.error('\nBenchmark failed:', error.message);
        process.exit(1);
    }
}

/**
 * Manage local cache
 */
async function manageCache(action, options) {
    console.log(`\n=== Model Cache Management ===\n`);
    console.log(`Cache directory: ${DEFAULT_CACHE_DIR}\n`);

    if (!existsSync(DEFAULT_CACHE_DIR)) {
        console.log('Cache directory does not exist.');
        if (action === 'init') {
            mkdirSync(DEFAULT_CACHE_DIR, { recursive: true });
            console.log('Created cache directory.');
        }
        return;
    }

    switch (action) {
        case 'list':
        case undefined:
            listCacheContents();
            break;
        case 'clean':
            cleanCache(options);
            break;
        case 'size':
            showCacheSize();
            break;
        case 'init':
            console.log('Cache directory exists.');
            break;
        default:
            console.error(`Unknown action: ${action}`);
    }
}

function listCacheContents() {
    const entries = readdirSync(DEFAULT_CACHE_DIR, { withFileTypes: true });
    const models = entries.filter(e => e.isDirectory());

    if (models.length === 0) {
        console.log('No cached models found.');
        return;
    }

    console.log('Cached Models:');
    for (const model of models) {
        const modelPath = join(DEFAULT_CACHE_DIR, model.name);
        const size = getDirectorySize(modelPath);
        console.log(`  ${model.name.replace('--', '/')}`);
        console.log(`    Size: ${formatSize(size)}`);
    }
}

function getDirectorySize(dir) {
    let size = 0;
    try {
        const entries = readdirSync(dir, { withFileTypes: true });
        for (const entry of entries) {
            const fullPath = join(dir, entry.name);
            if (entry.isDirectory()) {
                size += getDirectorySize(fullPath);
            } else {
                size += statSync(fullPath).size;
            }
        }
    } catch (error) {
        // Ignore errors
    }
    return size;
}

function showCacheSize() {
    const totalSize = getDirectorySize(DEFAULT_CACHE_DIR);
    console.log(`Total cache size: ${formatSize(totalSize)}`);
}

function cleanCache(options) {
    if (!options.force) {
        console.log('This will delete all cached models.');
        console.log('Use --force to confirm.');
        return;
    }

    const entries = readdirSync(DEFAULT_CACHE_DIR, { withFileTypes: true });
    let cleaned = 0;

    for (const entry of entries) {
        if (entry.isDirectory()) {
            const modelPath = join(DEFAULT_CACHE_DIR, entry.name);
            const { rmSync } = require('fs');
            rmSync(modelPath, { recursive: true });
            console.log(`  Removed: ${entry.name}`);
            cleaned++;
        }
    }

    console.log(`\nCleaned ${cleaned} cached model(s).`);
}

// ============================================
// CLI SETUP
// ============================================

const program = new Command();

program
    .name('models-cli')
    .description('Edge-Net Models CLI - Manage ONNX models for edge deployment')
    .version('1.0.0');

program
    .command('list')
    .description('List available models')
    .option('-t, --type <type>', 'Filter by type (embedding, generation)')
    .option('--tier <tier>', 'Filter by tier (1-4)')
    .option('--cached', 'Show only cached models')
    .action(listModels);

program
    .command('download <model>')
    .description('Download a model from HuggingFace')
    .option('-f, --force', 'Force re-download')
    .option('-q, --quantize <type>', 'Quantization type (int4, int8, fp16, fp32)', 'int8')
    .option('--verify', 'Verify model after download')
    .action(downloadModel);

program
    .command('optimize <model>')
    .description('Optimize a model for edge deployment')
    .option('-q, --quantize <type>', 'Quantization type (int4, int8, fp16)', 'int8')
    .option('-p, --prune <sparsity>', 'Pruning sparsity (0-1)')
    .option('-o, --output <path>', 'Output directory')
    .action(optimizeModel);

program
    .command('upload <model>')
    .description('Upload optimized model to registry (GCS + IPFS)')
    .option('--ipfs', 'Also pin to IPFS')
    .option('-q, --quantize <type>', 'Quantization variant to upload')
    .action(uploadModel);

program
    .command('train <adapter>')
    .description('Train a MicroLoRA adapter')
    .option('-b, --base <model>', 'Base model to adapt', 'phi-1.5')
    .option('-d, --dataset <path>', 'Training dataset path')
    .option('-r, --rank <rank>', 'LoRA rank', '8')
    .option('-e, --epochs <epochs>', 'Training epochs', '3')
    .option('-o, --output <path>', 'Output path for adapter')
    .action(trainAdapter);

program
    .command('benchmark <model>')
    .description('Run performance benchmarks')
    .option('-i, --iterations <n>', 'Number of iterations', '10')
    .option('-w, --warmup <n>', 'Warmup iterations', '2')
    .option('-o, --output <path>', 'Save results to JSON file')
    .action(benchmarkModel);

program
    .command('cache [action]')
    .description('Manage local model cache (list, clean, size, init)')
    .option('-f, --force', 'Force action without confirmation')
    .action(manageCache);

// Parse and execute
program.parse();