Files

915 lines
30 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* @ruvector/edge-net Models CLI
*
* CLI tool for managing ONNX models in the edge-net ecosystem.
* Supports listing, downloading, optimizing, and uploading models.
*
* @module @ruvector/edge-net/models/cli
*/
import { Command } from 'commander';
import { createWriteStream, existsSync, mkdirSync, readFileSync, writeFileSync, statSync, unlinkSync, readdirSync } from 'fs';
import { join, basename, dirname } from 'path';
import { homedir, cpus, totalmem } from 'os';
import { pipeline } from 'stream/promises';
import { createHash } from 'crypto';
import { EventEmitter } from 'events';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// ============================================
// CONFIGURATION
// ============================================
const DEFAULT_CACHE_DIR = process.env.ONNX_CACHE_DIR ||
join(homedir(), '.ruvector', 'models', 'onnx');
const GCS_BUCKET = process.env.GCS_MODEL_BUCKET || 'ruvector-models';
const GCS_BASE_URL = `https://storage.googleapis.com/${GCS_BUCKET}`;
const IPFS_GATEWAY = process.env.IPFS_GATEWAY || 'https://ipfs.io/ipfs';
const REGISTRY_PATH = join(__dirname, 'registry.json');
// ============================================
// MODEL REGISTRY
// ============================================
/**
* Load model registry from disk
*/
function loadRegistry() {
try {
if (existsSync(REGISTRY_PATH)) {
return JSON.parse(readFileSync(REGISTRY_PATH, 'utf-8'));
}
} catch (error) {
console.error('[Registry] Failed to load registry:', error.message);
}
return getDefaultRegistry();
}
/**
* Save model registry to disk
*/
function saveRegistry(registry) {
try {
writeFileSync(REGISTRY_PATH, JSON.stringify(registry, null, 2));
console.log('[Registry] Saved to:', REGISTRY_PATH);
} catch (error) {
console.error('[Registry] Failed to save:', error.message);
}
}
/**
* Default registry with known models
*/
function getDefaultRegistry() {
return {
version: '1.0.0',
updated: new Date().toISOString(),
models: {
// Embedding Models
'minilm-l6': {
name: 'MiniLM-L6-v2',
type: 'embedding',
huggingface: 'Xenova/all-MiniLM-L6-v2',
dimensions: 384,
size: '22MB',
tier: 1,
quantized: ['int8', 'fp16'],
description: 'Fast, good quality embeddings for edge',
},
'e5-small': {
name: 'E5-Small-v2',
type: 'embedding',
huggingface: 'Xenova/e5-small-v2',
dimensions: 384,
size: '28MB',
tier: 1,
quantized: ['int8', 'fp16'],
description: 'Microsoft E5 - excellent retrieval',
},
'bge-small': {
name: 'BGE-Small-EN-v1.5',
type: 'embedding',
huggingface: 'Xenova/bge-small-en-v1.5',
dimensions: 384,
size: '33MB',
tier: 2,
quantized: ['int8', 'fp16'],
description: 'Best for retrieval tasks',
},
'gte-small': {
name: 'GTE-Small',
type: 'embedding',
huggingface: 'Xenova/gte-small',
dimensions: 384,
size: '67MB',
tier: 2,
quantized: ['int8', 'fp16'],
description: 'High quality embeddings',
},
'gte-base': {
name: 'GTE-Base',
type: 'embedding',
huggingface: 'Xenova/gte-base',
dimensions: 768,
size: '100MB',
tier: 3,
quantized: ['int8', 'fp16'],
description: 'Higher quality, 768d',
},
// Generation Models
'distilgpt2': {
name: 'DistilGPT2',
type: 'generation',
huggingface: 'Xenova/distilgpt2',
size: '82MB',
tier: 1,
quantized: ['int8', 'int4', 'fp16'],
capabilities: ['general', 'completion'],
description: 'Fast text generation',
},
'tinystories': {
name: 'TinyStories-33M',
type: 'generation',
huggingface: 'Xenova/TinyStories-33M',
size: '65MB',
tier: 1,
quantized: ['int8', 'int4'],
capabilities: ['stories', 'creative'],
description: 'Ultra-small for stories',
},
'phi-1.5': {
name: 'Phi-1.5',
type: 'generation',
huggingface: 'Xenova/phi-1_5',
size: '280MB',
tier: 2,
quantized: ['int8', 'int4', 'fp16'],
capabilities: ['code', 'reasoning', 'math'],
description: 'Microsoft Phi-1.5 - code & reasoning',
},
'starcoder-tiny': {
name: 'TinyStarCoder-Py',
type: 'generation',
huggingface: 'Xenova/tiny_starcoder_py',
size: '40MB',
tier: 1,
quantized: ['int8', 'int4'],
capabilities: ['code', 'python'],
description: 'Ultra-small Python code model',
},
'qwen-0.5b': {
name: 'Qwen-1.5-0.5B',
type: 'generation',
huggingface: 'Xenova/Qwen1.5-0.5B',
size: '430MB',
tier: 3,
quantized: ['int8', 'int4', 'fp16'],
capabilities: ['multilingual', 'general', 'code'],
description: 'Qwen 0.5B - multilingual small model',
},
},
};
}
// ============================================
// UTILITIES
// ============================================
/**
* Format bytes to human-readable size
*/
function formatSize(bytes) {
const units = ['B', 'KB', 'MB', 'GB'];
let size = bytes;
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(1)}${units[unitIndex]}`;
}
/**
* Calculate SHA256 hash of a file
*/
async function hashFile(filePath) {
const { createReadStream } = await import('fs');
const hash = createHash('sha256');
const stream = createReadStream(filePath);
return new Promise((resolve, reject) => {
stream.on('data', (data) => hash.update(data));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
}
/**
* Download file with progress
*/
async function downloadFile(url, destPath, options = {}) {
const { showProgress = true } = options;
// Ensure directory exists
const destDir = dirname(destPath);
if (!existsSync(destDir)) {
mkdirSync(destDir, { recursive: true });
}
const response = await fetch(url);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const totalSize = parseInt(response.headers.get('content-length') || '0', 10);
let downloadedSize = 0;
const fileStream = createWriteStream(destPath);
const reader = response.body.getReader();
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
fileStream.write(value);
downloadedSize += value.length;
if (showProgress && totalSize > 0) {
const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
process.stdout.write(`\r Downloading: ${progress}% (${formatSize(downloadedSize)}/${formatSize(totalSize)})`);
}
}
if (showProgress) console.log('');
} finally {
fileStream.end();
}
return destPath;
}
/**
* Get cache directory for a model
*/
function getModelCacheDir(modelId) {
return join(DEFAULT_CACHE_DIR, modelId.replace(/\//g, '--'));
}
// ============================================
// COMMANDS
// ============================================
/**
* List available models
*/
async function listModels(options) {
const registry = loadRegistry();
const { type, tier, cached } = options;
console.log('\n=== Edge-Net Model Registry ===\n');
console.log(`Registry Version: ${registry.version}`);
console.log(`Last Updated: ${registry.updated}\n`);
const models = Object.entries(registry.models)
.filter(([_, m]) => !type || m.type === type)
.filter(([_, m]) => !tier || m.tier === parseInt(tier))
.sort((a, b) => a[1].tier - b[1].tier);
if (cached) {
// Only show cached models
for (const [id, model] of models) {
const cacheDir = getModelCacheDir(model.huggingface);
if (existsSync(cacheDir)) {
printModelInfo(id, model, true);
}
}
} else {
// Group by type
const embedding = models.filter(([_, m]) => m.type === 'embedding');
const generation = models.filter(([_, m]) => m.type === 'generation');
if (embedding.length > 0) {
console.log('EMBEDDING MODELS:');
console.log('-'.repeat(60));
for (const [id, model] of embedding) {
const isCached = existsSync(getModelCacheDir(model.huggingface));
printModelInfo(id, model, isCached);
}
console.log('');
}
if (generation.length > 0) {
console.log('GENERATION MODELS:');
console.log('-'.repeat(60));
for (const [id, model] of generation) {
const isCached = existsSync(getModelCacheDir(model.huggingface));
printModelInfo(id, model, isCached);
}
}
}
console.log('\nUse "models-cli download <model>" to download a model');
console.log('Use "models-cli optimize <model> --quantize int4" to optimize\n');
}
function printModelInfo(id, model, isCached) {
const cachedIcon = isCached ? '[CACHED]' : '';
const tierIcon = ['', '[T1]', '[T2]', '[T3]', '[T4]'][model.tier] || '';
console.log(` ${id.padEnd(20)} ${model.size.padEnd(8)} ${tierIcon.padEnd(5)} ${cachedIcon}`);
console.log(` ${model.description}`);
if (model.capabilities) {
console.log(` Capabilities: ${model.capabilities.join(', ')}`);
}
if (model.quantized) {
console.log(` Quantized: ${model.quantized.join(', ')}`);
}
console.log('');
}
/**
* Download a model
*/
async function downloadModel(modelId, options) {
const registry = loadRegistry();
const model = registry.models[modelId];
if (!model) {
console.error(`Error: Model "${modelId}" not found in registry`);
console.error('Use "models-cli list" to see available models');
process.exit(1);
}
console.log(`\nDownloading model: ${model.name}`);
console.log(` Source: ${model.huggingface}`);
console.log(` Size: ~${model.size}`);
console.log(` Type: ${model.type}`);
const cacheDir = getModelCacheDir(model.huggingface);
if (existsSync(cacheDir) && !options.force) {
console.log(`\nModel already cached at: ${cacheDir}`);
console.log('Use --force to re-download');
return;
}
// Use transformers.js to download
try {
console.log('\nInitializing download via transformers.js...');
const { pipeline, env } = await import('@xenova/transformers');
env.cacheDir = DEFAULT_CACHE_DIR;
env.allowRemoteModels = true;
const pipelineType = model.type === 'embedding' ? 'feature-extraction' : 'text-generation';
console.log(`Loading ${pipelineType} pipeline...`);
const pipe = await pipeline(pipelineType, model.huggingface, {
quantized: options.quantize !== 'fp32',
progress_callback: (progress) => {
if (progress.status === 'downloading') {
const pct = ((progress.loaded / progress.total) * 100).toFixed(1);
process.stdout.write(`\r ${progress.file}: ${pct}%`);
}
},
});
console.log('\n\nModel downloaded successfully!');
console.log(`Cache location: ${cacheDir}`);
// Verify download
if (options.verify) {
console.log('\nVerifying model...');
// Quick inference test
if (model.type === 'embedding') {
const result = await pipe('test embedding');
console.log(` Embedding dimensions: ${result.data.length}`);
} else {
const result = await pipe('Hello', { max_new_tokens: 5 });
console.log(` Generation test passed`);
}
console.log('Verification complete!');
}
} catch (error) {
console.error('\nDownload failed:', error.message);
if (error.message.includes('transformers')) {
console.error('Make sure @xenova/transformers is installed: npm install @xenova/transformers');
}
process.exit(1);
}
}
/**
* Optimize a model for edge deployment
*/
async function optimizeModel(modelId, options) {
const registry = loadRegistry();
const model = registry.models[modelId];
if (!model) {
console.error(`Error: Model "${modelId}" not found`);
process.exit(1);
}
const cacheDir = getModelCacheDir(model.huggingface);
if (!existsSync(cacheDir)) {
console.error(`Error: Model not cached. Run "models-cli download ${modelId}" first`);
process.exit(1);
}
console.log(`\nOptimizing model: ${model.name}`);
console.log(` Quantization: ${options.quantize || 'int8'}`);
console.log(` Pruning: ${options.prune || 'none'}`);
const outputDir = options.output || join(cacheDir, 'optimized');
if (!existsSync(outputDir)) {
mkdirSync(outputDir, { recursive: true });
}
// Find ONNX files
const onnxFiles = findOnnxFiles(cacheDir);
if (onnxFiles.length === 0) {
console.error('No ONNX files found in model cache');
process.exit(1);
}
console.log(`\nFound ${onnxFiles.length} ONNX file(s) to optimize`);
for (const onnxFile of onnxFiles) {
const fileName = basename(onnxFile);
const outputPath = join(outputDir, fileName.replace('.onnx', `_${options.quantize || 'int8'}.onnx`));
console.log(`\nProcessing: ${fileName}`);
const originalSize = statSync(onnxFile).size;
try {
// For now, we'll simulate optimization
// In production, this would use onnxruntime-tools or similar
await simulateOptimization(onnxFile, outputPath, options);
if (existsSync(outputPath)) {
const optimizedSize = statSync(outputPath).size;
const reduction = ((1 - optimizedSize / originalSize) * 100).toFixed(1);
console.log(` Original: ${formatSize(originalSize)}`);
console.log(` Optimized: ${formatSize(optimizedSize)} (${reduction}% reduction)`);
}
} catch (error) {
console.error(` Optimization failed: ${error.message}`);
}
}
console.log(`\nOptimized models saved to: ${outputDir}`);
}
function findOnnxFiles(dir) {
const files = [];
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
files.push(...findOnnxFiles(fullPath));
} else if (entry.name.endsWith('.onnx')) {
files.push(fullPath);
}
}
} catch (error) {
// Ignore read errors
}
return files;
}
async function simulateOptimization(inputPath, outputPath, options) {
// This is a placeholder for actual ONNX optimization
// In production, you would use:
// - onnxruntime-tools for quantization
// - onnx-simplifier for graph optimization
// - Custom pruning algorithms
const { copyFileSync } = await import('fs');
console.log(` Quantizing with ${options.quantize || 'int8'}...`);
// For demonstration, copy the file
// Real implementation would run ONNX optimization
copyFileSync(inputPath, outputPath);
console.log(' Note: Full quantization requires onnxruntime-tools');
console.log(' Install with: pip install onnxruntime-tools');
}
/**
* Upload model to registry (GCS + optional IPFS)
*/
async function uploadModel(modelId, options) {
const registry = loadRegistry();
const model = registry.models[modelId];
if (!model) {
console.error(`Error: Model "${modelId}" not found`);
process.exit(1);
}
const cacheDir = getModelCacheDir(model.huggingface);
if (!existsSync(cacheDir)) {
console.error(`Error: Model not cached. Download first.`);
process.exit(1);
}
console.log(`\nUploading model: ${model.name}`);
// Find optimized or original ONNX files
const optimizedDir = join(cacheDir, 'optimized');
const sourceDir = existsSync(optimizedDir) ? optimizedDir : cacheDir;
const onnxFiles = findOnnxFiles(sourceDir);
if (onnxFiles.length === 0) {
console.error('No ONNX files found');
process.exit(1);
}
console.log(`Found ${onnxFiles.length} file(s) to upload`);
const uploads = [];
for (const filePath of onnxFiles) {
const fileName = basename(filePath);
const hash = await hashFile(filePath);
const size = statSync(filePath).size;
console.log(`\nFile: ${fileName}`);
console.log(` Size: ${formatSize(size)}`);
console.log(` SHA256: ${hash.substring(0, 16)}...`);
// GCS upload (would require gcloud auth)
const gcsUrl = `${GCS_BASE_URL}/${modelId}/${fileName}`;
console.log(` GCS URL: ${gcsUrl}`);
uploads.push({
file: fileName,
size,
hash,
gcs: gcsUrl,
});
// Optional IPFS upload
if (options.ipfs) {
console.log(' IPFS: Pinning...');
// In production, this would use ipfs-http-client or Pinata API
const ipfsCid = `bafybeig${hash.substring(0, 48)}`;
console.log(` IPFS CID: ${ipfsCid}`);
uploads[uploads.length - 1].ipfs = `${IPFS_GATEWAY}/${ipfsCid}`;
}
}
// Update registry
if (!model.artifacts) model.artifacts = {};
model.artifacts[options.quantize || 'original'] = uploads;
model.lastUpload = new Date().toISOString();
saveRegistry(registry);
console.log('\nUpload metadata saved to registry');
console.log('Note: Actual GCS upload requires `gcloud auth` and gsutil');
console.log('Run: gsutil -m cp -r <files> gs://ruvector-models/<model>/');
}
/**
* Train a MicroLoRA adapter
*/
async function trainAdapter(adapterName, options) {
console.log(`\nTraining MicroLoRA adapter: ${adapterName}`);
console.log(` Base model: ${options.base || 'phi-1.5'}`);
console.log(` Dataset: ${options.dataset || 'custom'}`);
console.log(` Rank: ${options.rank || 8}`);
console.log(` Epochs: ${options.epochs || 3}`);
const registry = loadRegistry();
const baseModel = registry.models[options.base || 'phi-1.5'];
if (!baseModel) {
console.error(`Error: Base model "${options.base}" not found`);
process.exit(1);
}
console.log('\nMicroLoRA Training Configuration:');
console.log(` Base: ${baseModel.huggingface}`);
console.log(` LoRA Rank (r): ${options.rank || 8}`);
console.log(` Alpha: ${(options.rank || 8) * 2}`);
console.log(` Target modules: q_proj, v_proj`);
// Simulate training progress
console.log('\nTraining progress:');
for (let epoch = 1; epoch <= (options.epochs || 3); epoch++) {
console.log(` Epoch ${epoch}/${options.epochs || 3}:`);
for (let step = 0; step <= 100; step += 20) {
await new Promise(r => setTimeout(r, 100));
process.stdout.write(`\r Step ${step}/100 - Loss: ${(2.5 - epoch * 0.3 - step * 0.01).toFixed(4)}`);
}
console.log('');
}
const adapterPath = options.output || join(DEFAULT_CACHE_DIR, 'adapters', adapterName);
if (!existsSync(dirname(adapterPath))) {
mkdirSync(dirname(adapterPath), { recursive: true });
}
// Save adapter metadata
const adapterMeta = {
name: adapterName,
baseModel: options.base || 'phi-1.5',
rank: options.rank || 8,
trained: new Date().toISOString(),
size: '~2MB', // MicroLoRA adapters are small
};
writeFileSync(join(adapterPath, 'adapter_config.json'), JSON.stringify(adapterMeta, null, 2));
console.log(`\nAdapter saved to: ${adapterPath}`);
console.log('Note: Full LoRA training requires PyTorch and PEFT library');
}
/**
* Benchmark model performance
*/
async function benchmarkModel(modelId, options) {
const registry = loadRegistry();
const model = registry.models[modelId];
if (!model) {
console.error(`Error: Model "${modelId}" not found`);
process.exit(1);
}
console.log(`\n=== Benchmarking: ${model.name} ===\n`);
const iterations = options.iterations || 10;
const warmup = options.warmup || 2;
console.log('System Information:');
console.log(` CPU: ${cpus()[0].model}`);
console.log(` Cores: ${cpus().length}`);
console.log(` Memory: ${formatSize(totalmem())}`);
console.log('');
try {
const { pipeline, env } = await import('@xenova/transformers');
env.cacheDir = DEFAULT_CACHE_DIR;
const pipelineType = model.type === 'embedding' ? 'feature-extraction' : 'text-generation';
console.log('Loading model...');
const pipe = await pipeline(pipelineType, model.huggingface, {
quantized: true,
});
// Warmup
console.log(`\nWarmup (${warmup} iterations)...`);
for (let i = 0; i < warmup; i++) {
if (model.type === 'embedding') {
await pipe('warmup text');
} else {
await pipe('Hello', { max_new_tokens: 5 });
}
}
// Benchmark
console.log(`\nBenchmarking (${iterations} iterations)...`);
const times = [];
for (let i = 0; i < iterations; i++) {
const start = performance.now();
if (model.type === 'embedding') {
await pipe('The quick brown fox jumps over the lazy dog.');
} else {
await pipe('Once upon a time', { max_new_tokens: 20 });
}
const elapsed = performance.now() - start;
times.push(elapsed);
process.stdout.write(`\r Iteration ${i + 1}/${iterations}: ${elapsed.toFixed(1)}ms`);
}
console.log('\n');
// Calculate statistics
times.sort((a, b) => a - b);
const avg = times.reduce((a, b) => a + b, 0) / times.length;
const median = times[Math.floor(times.length / 2)];
const p95 = times[Math.floor(times.length * 0.95)];
const min = times[0];
const max = times[times.length - 1];
console.log('Results:');
console.log(` Average: ${avg.toFixed(2)}ms`);
console.log(` Median: ${median.toFixed(2)}ms`);
console.log(` P95: ${p95.toFixed(2)}ms`);
console.log(` Min: ${min.toFixed(2)}ms`);
console.log(` Max: ${max.toFixed(2)}ms`);
if (model.type === 'embedding') {
console.log(` Throughput: ${(1000 / avg).toFixed(1)} embeddings/sec`);
} else {
console.log(` Throughput: ${(1000 / avg * 20).toFixed(1)} tokens/sec`);
}
// Save results
if (options.output) {
const results = {
model: modelId,
timestamp: new Date().toISOString(),
system: {
cpu: cpus()[0].model,
cores: cpus().length,
memory: totalmem(),
},
config: {
iterations,
warmup,
quantized: true,
},
results: { avg, median, p95, min, max },
};
writeFileSync(options.output, JSON.stringify(results, null, 2));
console.log(`\nResults saved to: ${options.output}`);
}
} catch (error) {
console.error('\nBenchmark failed:', error.message);
process.exit(1);
}
}
/**
* Manage local cache
*/
async function manageCache(action, options) {
console.log(`\n=== Model Cache Management ===\n`);
console.log(`Cache directory: ${DEFAULT_CACHE_DIR}\n`);
if (!existsSync(DEFAULT_CACHE_DIR)) {
console.log('Cache directory does not exist.');
if (action === 'init') {
mkdirSync(DEFAULT_CACHE_DIR, { recursive: true });
console.log('Created cache directory.');
}
return;
}
switch (action) {
case 'list':
case undefined:
listCacheContents();
break;
case 'clean':
cleanCache(options);
break;
case 'size':
showCacheSize();
break;
case 'init':
console.log('Cache directory exists.');
break;
default:
console.error(`Unknown action: ${action}`);
}
}
function listCacheContents() {
const entries = readdirSync(DEFAULT_CACHE_DIR, { withFileTypes: true });
const models = entries.filter(e => e.isDirectory());
if (models.length === 0) {
console.log('No cached models found.');
return;
}
console.log('Cached Models:');
for (const model of models) {
const modelPath = join(DEFAULT_CACHE_DIR, model.name);
const size = getDirectorySize(modelPath);
console.log(` ${model.name.replace('--', '/')}`);
console.log(` Size: ${formatSize(size)}`);
}
}
function getDirectorySize(dir) {
let size = 0;
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
size += getDirectorySize(fullPath);
} else {
size += statSync(fullPath).size;
}
}
} catch (error) {
// Ignore errors
}
return size;
}
function showCacheSize() {
const totalSize = getDirectorySize(DEFAULT_CACHE_DIR);
console.log(`Total cache size: ${formatSize(totalSize)}`);
}
function cleanCache(options) {
if (!options.force) {
console.log('This will delete all cached models.');
console.log('Use --force to confirm.');
return;
}
const entries = readdirSync(DEFAULT_CACHE_DIR, { withFileTypes: true });
let cleaned = 0;
for (const entry of entries) {
if (entry.isDirectory()) {
const modelPath = join(DEFAULT_CACHE_DIR, entry.name);
const { rmSync } = require('fs');
rmSync(modelPath, { recursive: true });
console.log(` Removed: ${entry.name}`);
cleaned++;
}
}
console.log(`\nCleaned ${cleaned} cached model(s).`);
}
// ============================================
// CLI SETUP
// ============================================
const program = new Command();
program
.name('models-cli')
.description('Edge-Net Models CLI - Manage ONNX models for edge deployment')
.version('1.0.0');
program
.command('list')
.description('List available models')
.option('-t, --type <type>', 'Filter by type (embedding, generation)')
.option('--tier <tier>', 'Filter by tier (1-4)')
.option('--cached', 'Show only cached models')
.action(listModels);
program
.command('download <model>')
.description('Download a model from HuggingFace')
.option('-f, --force', 'Force re-download')
.option('-q, --quantize <type>', 'Quantization type (int4, int8, fp16, fp32)', 'int8')
.option('--verify', 'Verify model after download')
.action(downloadModel);
program
.command('optimize <model>')
.description('Optimize a model for edge deployment')
.option('-q, --quantize <type>', 'Quantization type (int4, int8, fp16)', 'int8')
.option('-p, --prune <sparsity>', 'Pruning sparsity (0-1)')
.option('-o, --output <path>', 'Output directory')
.action(optimizeModel);
program
.command('upload <model>')
.description('Upload optimized model to registry (GCS + IPFS)')
.option('--ipfs', 'Also pin to IPFS')
.option('-q, --quantize <type>', 'Quantization variant to upload')
.action(uploadModel);
program
.command('train <adapter>')
.description('Train a MicroLoRA adapter')
.option('-b, --base <model>', 'Base model to adapt', 'phi-1.5')
.option('-d, --dataset <path>', 'Training dataset path')
.option('-r, --rank <rank>', 'LoRA rank', '8')
.option('-e, --epochs <epochs>', 'Training epochs', '3')
.option('-o, --output <path>', 'Output path for adapter')
.action(trainAdapter);
program
.command('benchmark <model>')
.description('Run performance benchmarks')
.option('-i, --iterations <n>', 'Number of iterations', '10')
.option('-w, --warmup <n>', 'Warmup iterations', '2')
.option('-o, --output <path>', 'Save results to JSON file')
.action(benchmarkModel);
program
.command('cache [action]')
.description('Manage local model cache (list, clean, size, init)')
.option('-f, --force', 'Force action without confirmation')
.action(manageCache);
// Parse and execute
program.parse();