Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
223
vendor/ruvector/npm/packages/ruvllm-cli/README.md
vendored
Normal file
223
vendor/ruvector/npm/packages/ruvllm-cli/README.md
vendored
Normal file
@@ -0,0 +1,223 @@
|
||||
# @ruvector/ruvllm-cli
|
||||
|
||||
[](https://www.npmjs.com/package/@ruvector/ruvllm-cli)
|
||||
[](https://www.npmjs.com/package/@ruvector/ruvllm-cli)
|
||||
[](https://www.npmjs.com/package/@ruvector/ruvllm-cli)
|
||||
[](https://github.com/ruvnet/ruvector/blob/main/LICENSE)
|
||||
[](https://www.typescriptlang.org/)
|
||||
|
||||
**Command-line interface for local LLM inference and benchmarking** - run AI models on your machine with Metal, CUDA, and CPU acceleration.
|
||||
|
||||
## Features
|
||||
|
||||
- **Hardware Acceleration** - Metal (macOS), CUDA (NVIDIA), Vulkan, Apple Neural Engine
|
||||
- **GGUF Support** - Load quantized models (Q4, Q5, Q6, Q8) for efficient inference
|
||||
- **Interactive Chat** - Terminal-based chat sessions with conversation history
|
||||
- **Benchmarking** - Measure tokens/second, memory usage, time-to-first-token
|
||||
- **HTTP Server** - OpenAI-compatible API server for integration
|
||||
- **Model Management** - Download, list, and manage models from HuggingFace
|
||||
- **Streaming Output** - Real-time token streaming for responsive UX
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# Install globally
|
||||
npm install -g @ruvector/ruvllm-cli
|
||||
|
||||
# Or run directly with npx
|
||||
npx @ruvector/ruvllm-cli --help
|
||||
```
|
||||
|
||||
For full native performance, install the Rust binary:
|
||||
|
||||
```bash
|
||||
cargo install ruvllm-cli
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Run Inference
|
||||
|
||||
```bash
|
||||
# Basic inference
|
||||
ruvllm run --model ./llama-7b-q4.gguf --prompt "Explain quantum computing"
|
||||
|
||||
# With options
|
||||
ruvllm run \
|
||||
--model ./model.gguf \
|
||||
--prompt "Write a haiku about Rust" \
|
||||
--temperature 0.8 \
|
||||
--max-tokens 100 \
|
||||
--backend metal
|
||||
```
|
||||
|
||||
### Interactive Chat
|
||||
|
||||
```bash
|
||||
# Start chat session
|
||||
ruvllm chat --model ./model.gguf
|
||||
|
||||
# With system prompt
|
||||
ruvllm chat --model ./model.gguf --system "You are a helpful coding assistant"
|
||||
```
|
||||
|
||||
### Benchmark Performance
|
||||
|
||||
```bash
|
||||
# Run benchmark
|
||||
ruvllm bench --model ./model.gguf --iterations 20
|
||||
|
||||
# Compare backends
|
||||
ruvllm bench --model ./model.gguf --backend metal
|
||||
ruvllm bench --model ./model.gguf --backend cpu
|
||||
```
|
||||
|
||||
### Start Server
|
||||
|
||||
```bash
|
||||
# OpenAI-compatible API server
|
||||
ruvllm serve --model ./model.gguf --port 8080
|
||||
|
||||
# Then use with any OpenAI client
|
||||
curl http://localhost:8080/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"prompt": "Hello", "max_tokens": 50}'
|
||||
```
|
||||
|
||||
### Model Management
|
||||
|
||||
```bash
|
||||
# List available models
|
||||
ruvllm list
|
||||
|
||||
# Download from HuggingFace
|
||||
ruvllm download TheBloke/Llama-2-7B-GGUF
|
||||
|
||||
# Download specific quantization
|
||||
ruvllm download TheBloke/Llama-2-7B-GGUF --quant q4_k_m
|
||||
```
|
||||
|
||||
## CLI Reference
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `run` | Run inference on a prompt |
|
||||
| `chat` | Interactive chat session |
|
||||
| `bench` | Benchmark model performance |
|
||||
| `serve` | Start HTTP server |
|
||||
| `list` | List downloaded models |
|
||||
| `download` | Download model from HuggingFace |
|
||||
|
||||
### Global Options
|
||||
|
||||
| Option | Description | Default |
|
||||
|--------|-------------|---------|
|
||||
| `--model, -m` | Path to GGUF model file | - |
|
||||
| `--backend, -b` | Acceleration backend (metal, cuda, cpu) | auto |
|
||||
| `--threads, -t` | Number of CPU threads | auto |
|
||||
| `--gpu-layers` | Layers to offload to GPU | all |
|
||||
| `--context-size` | Context window size | 2048 |
|
||||
| `--verbose, -v` | Enable verbose logging | false |
|
||||
|
||||
### Generation Options
|
||||
|
||||
| Option | Description | Default |
|
||||
|--------|-------------|---------|
|
||||
| `--temperature` | Sampling temperature (0-2) | 0.7 |
|
||||
| `--top-p` | Nucleus sampling threshold | 0.9 |
|
||||
| `--top-k` | Top-k sampling | 40 |
|
||||
| `--max-tokens` | Maximum tokens to generate | 256 |
|
||||
| `--repeat-penalty` | Repetition penalty | 1.1 |
|
||||
|
||||
## Programmatic Usage
|
||||
|
||||
```typescript
|
||||
import {
|
||||
parseArgs,
|
||||
formatBenchmarkTable,
|
||||
getAvailableBackends,
|
||||
ModelConfig,
|
||||
BenchmarkResult,
|
||||
} from '@ruvector/ruvllm-cli';
|
||||
|
||||
// Parse CLI arguments
|
||||
const args = parseArgs(['--model', './model.gguf', '--temperature', '0.8']);
|
||||
console.log(args); // { model: './model.gguf', temperature: '0.8' }
|
||||
|
||||
// Check available backends
|
||||
const backends = getAvailableBackends();
|
||||
console.log('Available:', backends); // ['cpu', 'metal'] on macOS
|
||||
|
||||
// Format benchmark results
|
||||
const results: BenchmarkResult[] = [
|
||||
{
|
||||
model: 'llama-7b',
|
||||
backend: 'metal',
|
||||
promptTokens: 50,
|
||||
generatedTokens: 100,
|
||||
promptTime: 120,
|
||||
generationTime: 2500,
|
||||
promptTPS: 416.7,
|
||||
generationTPS: 40.0,
|
||||
memoryUsage: 4200,
|
||||
peakMemory: 4800,
|
||||
},
|
||||
];
|
||||
|
||||
console.log(formatBenchmarkTable(results));
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
Benchmarks on Apple M2 Pro with Q4_K_M quantization:
|
||||
|
||||
| Model | Prompt TPS | Gen TPS | Memory |
|
||||
|-------|------------|---------|--------|
|
||||
| Llama-2-7B | 450 | 42 | 4.2 GB |
|
||||
| Mistral-7B | 480 | 45 | 4.1 GB |
|
||||
| Phi-2 | 820 | 85 | 1.8 GB |
|
||||
| TinyLlama-1.1B | 1200 | 120 | 0.8 GB |
|
||||
|
||||
## Configuration
|
||||
|
||||
Create `~/.ruvllm/config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"defaultBackend": "metal",
|
||||
"modelsDir": "~/.ruvllm/models",
|
||||
"cacheDir": "~/.ruvllm/cache",
|
||||
"streaming": true,
|
||||
"logLevel": "info"
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `RUVLLM_MODELS_DIR` | Models directory |
|
||||
| `RUVLLM_CACHE_DIR` | Cache directory |
|
||||
| `RUVLLM_BACKEND` | Default backend |
|
||||
| `RUVLLM_THREADS` | CPU threads |
|
||||
| `HF_TOKEN` | HuggingFace token for gated models |
|
||||
|
||||
## Related Packages
|
||||
|
||||
- [@ruvector/ruvllm](https://www.npmjs.com/package/@ruvector/ruvllm) - LLM orchestration library
|
||||
- [@ruvector/ruvllm-wasm](https://www.npmjs.com/package/@ruvector/ruvllm-wasm) - Browser LLM inference
|
||||
- [ruvector](https://www.npmjs.com/package/ruvector) - All-in-one vector database
|
||||
|
||||
## Documentation
|
||||
|
||||
- [RuvLLM Documentation](https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm)
|
||||
- [CLI Crate](https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm-cli)
|
||||
- [API Reference](https://docs.rs/ruvllm-cli)
|
||||
|
||||
## License
|
||||
|
||||
MIT OR Apache-2.0
|
||||
|
||||
---
|
||||
|
||||
**Part of the [RuVector](https://github.com/ruvnet/ruvector) ecosystem** - High-performance vector database with self-learning capabilities.
|
||||
121
vendor/ruvector/npm/packages/ruvllm-cli/bin/ruvllm.js
vendored
Normal file
121
vendor/ruvector/npm/packages/ruvllm-cli/bin/ruvllm.js
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* RuvLLM CLI Entry Point
|
||||
*
|
||||
* Usage:
|
||||
* ruvllm run --model <path> --prompt <text>
|
||||
* ruvllm bench --model <path> [--iterations <n>]
|
||||
* ruvllm serve --model <path> [--port <n>]
|
||||
* ruvllm list
|
||||
* ruvllm download <model-id>
|
||||
*/
|
||||
|
||||
import { parseArgs, VERSION, DEFAULT_CONFIG } from '../dist/index.js';
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0];
|
||||
const options = parseArgs(args.slice(1));
|
||||
|
||||
function printHelp() {
|
||||
console.log(`
|
||||
RuvLLM CLI v${VERSION}
|
||||
|
||||
Usage:
|
||||
ruvllm <command> [options]
|
||||
|
||||
Commands:
|
||||
run Run inference on a prompt
|
||||
bench Benchmark model performance
|
||||
serve Start HTTP server for inference
|
||||
list List available models
|
||||
download Download a model from HuggingFace
|
||||
chat Interactive chat session
|
||||
|
||||
Options:
|
||||
--model, -m Path to model file (GGUF)
|
||||
--prompt, -p Input prompt text
|
||||
--backend, -b Acceleration backend (metal, cuda, cpu)
|
||||
--port Server port (default: 8080)
|
||||
--iterations Benchmark iterations (default: 10)
|
||||
--temperature Sampling temperature (default: 0.7)
|
||||
--max-tokens Maximum tokens to generate (default: 256)
|
||||
--help, -h Show this help message
|
||||
--version, -v Show version
|
||||
|
||||
Examples:
|
||||
ruvllm run --model ./model.gguf --prompt "Hello, world"
|
||||
ruvllm bench --model ./model.gguf --iterations 20
|
||||
ruvllm serve --model ./model.gguf --port 3000
|
||||
ruvllm chat --model ./model.gguf
|
||||
`);
|
||||
}
|
||||
|
||||
function printVersion() {
|
||||
console.log(`ruvllm v${VERSION}`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (options.help || options.h || !command) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (options.version || options.v) {
|
||||
printVersion();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
switch (command) {
|
||||
case 'run':
|
||||
console.log('Running inference...');
|
||||
console.log('Model:', options.model || 'Not specified');
|
||||
console.log('Prompt:', options.prompt || 'Not specified');
|
||||
console.log('\nNote: Full inference requires the native ruvllm binary.');
|
||||
console.log('Install with: cargo install ruvllm-cli');
|
||||
break;
|
||||
|
||||
case 'bench':
|
||||
console.log('Running benchmark...');
|
||||
console.log('Model:', options.model || 'Not specified');
|
||||
console.log('Iterations:', options.iterations || 10);
|
||||
console.log('\nNote: Full benchmarking requires the native ruvllm binary.');
|
||||
console.log('Install with: cargo install ruvllm-cli');
|
||||
break;
|
||||
|
||||
case 'serve':
|
||||
console.log('Starting server...');
|
||||
console.log('Model:', options.model || 'Not specified');
|
||||
console.log('Port:', options.port || 8080);
|
||||
console.log('\nNote: Server mode requires the native ruvllm binary.');
|
||||
console.log('Install with: cargo install ruvllm-cli');
|
||||
break;
|
||||
|
||||
case 'list':
|
||||
console.log('Available models in', DEFAULT_CONFIG.modelsDir);
|
||||
console.log('\nNote: Model listing requires the native ruvllm binary.');
|
||||
break;
|
||||
|
||||
case 'download':
|
||||
console.log('Downloading model:', args[1] || 'Not specified');
|
||||
console.log('\nNote: Model download requires the native ruvllm binary.');
|
||||
break;
|
||||
|
||||
case 'chat':
|
||||
console.log('Starting chat session...');
|
||||
console.log('Model:', options.model || 'Not specified');
|
||||
console.log('\nNote: Chat mode requires the native ruvllm binary.');
|
||||
console.log('Install with: cargo install ruvllm-cli');
|
||||
break;
|
||||
|
||||
default:
|
||||
console.error(`Unknown command: ${command}`);
|
||||
printHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Error:', err.message);
|
||||
process.exit(1);
|
||||
});
|
||||
71
vendor/ruvector/npm/packages/ruvllm-cli/package.json
vendored
Normal file
71
vendor/ruvector/npm/packages/ruvllm-cli/package.json
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
{
|
||||
"name": "@ruvector/ruvllm-cli",
|
||||
"version": "0.1.0",
|
||||
"description": "CLI for LLM inference, benchmarking, and model management - run local LLMs with Metal/CUDA acceleration",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"bin": {
|
||||
"ruvllm": "./bin/ruvllm.js"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"import": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"default": "./dist/index.js"
|
||||
},
|
||||
"require": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"default": "./dist/index.js"
|
||||
}
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"prepublishOnly": "npm run build",
|
||||
"test": "node --test test/*.test.js",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"clean": "rm -rf dist"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.19.30",
|
||||
"typescript": "^5.9.3"
|
||||
},
|
||||
"keywords": [
|
||||
"llm",
|
||||
"cli",
|
||||
"inference",
|
||||
"benchmarking",
|
||||
"gguf",
|
||||
"metal",
|
||||
"cuda",
|
||||
"local-llm",
|
||||
"ai",
|
||||
"machine-learning",
|
||||
"ruvector",
|
||||
"ruvllm",
|
||||
"model-serving"
|
||||
],
|
||||
"author": "rUv Team <team@ruv.io>",
|
||||
"license": "MIT OR Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ruvnet/ruvector.git",
|
||||
"directory": "npm/packages/ruvllm-cli"
|
||||
},
|
||||
"homepage": "https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm-cli",
|
||||
"bugs": {
|
||||
"url": "https://github.com/ruvnet/ruvector/issues"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"publishConfig": {
|
||||
"registry": "https://registry.npmjs.org/",
|
||||
"access": "public"
|
||||
},
|
||||
"files": [
|
||||
"dist",
|
||||
"bin",
|
||||
"README.md"
|
||||
]
|
||||
}
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EACL,WAAW,EACX,mBAAmB,EACnB,gBAAgB,EAChB,WAAW,EACX,gBAAgB,EAChB,eAAe,EACf,eAAe,EACf,SAAS,EACT,WAAW,EACX,qBAAqB,GACtB,MAAM,YAAY,CAAC;AAEpB,kBAAkB;AAClB,eAAO,MAAM,OAAO,UAAU,CAAC;AAE/B,gCAAgC;AAChC,eAAO,MAAM,cAAc,EAAE,OAAO,YAAY,EAAE,SAMjD,CAAC;AAEF;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAqB1E;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,YAAY,EAAE,eAAe,EAAE,GAAG,MAAM,CAqB5F;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,OAAO,YAAY,EAAE,mBAAmB,EAAE,CAOjF"}
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AA8BH,8BAqBC;AAKD,oDAqBC;AAKD,oDAOC;AAvFD,uCAWoB;AAVlB,uGAAA,WAAW,OAAA;AACX,+GAAA,mBAAmB,OAAA;AACnB,4GAAA,gBAAgB,OAAA;AAUlB,kBAAkB;AACL,QAAA,OAAO,GAAG,OAAO,CAAC;AAE/B,gCAAgC;AACnB,QAAA,cAAc,GAAmC;IAC5D,cAAc,EAAE,KAAiD;IACjE,SAAS,EAAE,kBAAkB;IAC7B,QAAQ,EAAE,iBAAiB;IAC3B,QAAQ,EAAE,MAAM;IAChB,SAAS,EAAE,IAAI;CAChB,CAAC;AAEF;;GAEG;AACH,SAAgB,SAAS,CAAC,IAAc;IACtC,MAAM,MAAM,GAAqC,EAAE,CAAC;IAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACzB,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;gBACnB,CAAC,EAAE,CAAC;YACN,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB,CAAC,OAA+C;IAClF,MAAM,OAAO,GAAG,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;IAC7E,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5B,CAAC,CAAC,KAAK;QACP,CAAC,CAAC,OAAO;QACT,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QACtB,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;KACzB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC1D,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAC9B,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CACjE,CAAC;IAEF,OAAO,CAAC,SAAS,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACxD,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB;IAClC,MAAM,QAAQ,GAA+C,CAAC,KAAiD,CAAC,CAAC;IAEjH,mCAAmC;IACnC,0CAA0C;IAE1C,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
||||
109
vendor/ruvector/npm/packages/ruvllm-cli/src/index.ts
vendored
Normal file
109
vendor/ruvector/npm/packages/ruvllm-cli/src/index.ts
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* @ruvector/ruvllm-cli - CLI for LLM Inference and Benchmarking
|
||||
*
|
||||
* A command-line interface for running local LLM inference with
|
||||
* Metal/CUDA acceleration, model benchmarking, and serving.
|
||||
*
|
||||
* @example
|
||||
* ```bash
|
||||
* # Run inference
|
||||
* npx @ruvector/ruvllm-cli run --model ./model.gguf --prompt "Hello"
|
||||
*
|
||||
* # Benchmark a model
|
||||
* npx @ruvector/ruvllm-cli bench --model ./model.gguf --iterations 10
|
||||
*
|
||||
* # Start server
|
||||
* npx @ruvector/ruvllm-cli serve --model ./model.gguf --port 8080
|
||||
* ```
|
||||
*
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
export {
|
||||
ModelFormat,
|
||||
AccelerationBackend,
|
||||
QuantizationType,
|
||||
ModelConfig,
|
||||
GenerationParams,
|
||||
InferenceResult,
|
||||
BenchmarkResult,
|
||||
CLIConfig,
|
||||
ChatMessage,
|
||||
ChatCompletionOptions,
|
||||
} from './types.js';
|
||||
|
||||
/** CLI version */
|
||||
export const VERSION = '0.1.0';
|
||||
|
||||
/** Default CLI configuration */
|
||||
export const DEFAULT_CONFIG: import('./types.js').CLIConfig = {
|
||||
defaultBackend: 'cpu' as import('./types.js').AccelerationBackend,
|
||||
modelsDir: '~/.ruvllm/models',
|
||||
cacheDir: '~/.ruvllm/cache',
|
||||
logLevel: 'info',
|
||||
streaming: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse CLI arguments
|
||||
*/
|
||||
export function parseArgs(args: string[]): Record<string, string | boolean> {
|
||||
const result: Record<string, string | boolean> = {};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const key = arg.slice(2);
|
||||
const next = args[i + 1];
|
||||
if (next && !next.startsWith('--')) {
|
||||
result[key] = next;
|
||||
i++;
|
||||
} else {
|
||||
result[key] = true;
|
||||
}
|
||||
} else if (arg.startsWith('-')) {
|
||||
const key = arg.slice(1);
|
||||
result[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format benchmark results as table
|
||||
*/
|
||||
export function formatBenchmarkTable(results: import('./types.js').BenchmarkResult[]): string {
|
||||
const headers = ['Model', 'Backend', 'Prompt TPS', 'Gen TPS', 'Memory (MB)'];
|
||||
const rows = results.map(r => [
|
||||
r.model,
|
||||
r.backend,
|
||||
r.promptTPS.toFixed(2),
|
||||
r.generationTPS.toFixed(2),
|
||||
r.memoryUsage.toFixed(0),
|
||||
]);
|
||||
|
||||
const widths = headers.map((h, i) =>
|
||||
Math.max(h.length, ...rows.map(r => String(r[i]).length))
|
||||
);
|
||||
|
||||
const separator = widths.map(w => '-'.repeat(w)).join(' | ');
|
||||
const headerRow = headers.map((h, i) => h.padEnd(widths[i])).join(' | ');
|
||||
const dataRows = rows.map(row =>
|
||||
row.map((cell, i) => String(cell).padEnd(widths[i])).join(' | ')
|
||||
);
|
||||
|
||||
return [headerRow, separator, ...dataRows].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available backends for current system
|
||||
*/
|
||||
export function getAvailableBackends(): import('./types.js').AccelerationBackend[] {
|
||||
const backends: import('./types.js').AccelerationBackend[] = ['cpu' as import('./types.js').AccelerationBackend];
|
||||
|
||||
// Platform detection would go here
|
||||
// For now, return CPU as always available
|
||||
|
||||
return backends;
|
||||
}
|
||||
136
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts
vendored
Normal file
136
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* RuvLLM CLI Types
|
||||
* Types for CLI configuration and inference options
|
||||
*/
|
||||
/** Supported model formats */
|
||||
export declare enum ModelFormat {
|
||||
GGUF = "gguf",
|
||||
SafeTensors = "safetensors",
|
||||
ONNX = "onnx"
|
||||
}
|
||||
/** Hardware acceleration backends */
|
||||
export declare enum AccelerationBackend {
|
||||
/** Apple Metal (macOS) */
|
||||
Metal = "metal",
|
||||
/** NVIDIA CUDA */
|
||||
CUDA = "cuda",
|
||||
/** CPU only */
|
||||
CPU = "cpu",
|
||||
/** Apple Neural Engine */
|
||||
ANE = "ane",
|
||||
/** Vulkan (cross-platform GPU) */
|
||||
Vulkan = "vulkan"
|
||||
}
|
||||
/** Quantization levels */
|
||||
export declare enum QuantizationType {
|
||||
F32 = "f32",
|
||||
F16 = "f16",
|
||||
Q8_0 = "q8_0",
|
||||
Q4_K_M = "q4_k_m",
|
||||
Q4_K_S = "q4_k_s",
|
||||
Q5_K_M = "q5_k_m",
|
||||
Q5_K_S = "q5_k_s",
|
||||
Q6_K = "q6_k",
|
||||
Q2_K = "q2_k",
|
||||
Q3_K_M = "q3_k_m"
|
||||
}
|
||||
/** Model configuration */
|
||||
export interface ModelConfig {
|
||||
/** Path to model file */
|
||||
modelPath: string;
|
||||
/** Model format */
|
||||
format?: ModelFormat;
|
||||
/** Quantization type */
|
||||
quantization?: QuantizationType;
|
||||
/** Context window size */
|
||||
contextSize?: number;
|
||||
/** Number of GPU layers to offload */
|
||||
gpuLayers?: number;
|
||||
/** Batch size for inference */
|
||||
batchSize?: number;
|
||||
/** Number of threads for CPU inference */
|
||||
threads?: number;
|
||||
}
|
||||
/** Generation parameters */
|
||||
export interface GenerationParams {
|
||||
/** Maximum tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** Temperature for sampling */
|
||||
temperature?: number;
|
||||
/** Top-p (nucleus) sampling */
|
||||
topP?: number;
|
||||
/** Top-k sampling */
|
||||
topK?: number;
|
||||
/** Repetition penalty */
|
||||
repetitionPenalty?: number;
|
||||
/** Stop sequences */
|
||||
stopSequences?: string[];
|
||||
/** Seed for reproducibility */
|
||||
seed?: number;
|
||||
}
|
||||
/** Inference result */
|
||||
export interface InferenceResult {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Number of tokens generated */
|
||||
tokensGenerated: number;
|
||||
/** Time to first token (ms) */
|
||||
timeToFirstToken: number;
|
||||
/** Total generation time (ms) */
|
||||
totalTime: number;
|
||||
/** Tokens per second */
|
||||
tokensPerSecond: number;
|
||||
/** Finish reason */
|
||||
finishReason: 'stop' | 'length' | 'error';
|
||||
}
|
||||
/** Benchmark result */
|
||||
export interface BenchmarkResult {
|
||||
/** Model name */
|
||||
model: string;
|
||||
/** Backend used */
|
||||
backend: AccelerationBackend;
|
||||
/** Prompt tokens */
|
||||
promptTokens: number;
|
||||
/** Generated tokens */
|
||||
generatedTokens: number;
|
||||
/** Prompt processing time (ms) */
|
||||
promptTime: number;
|
||||
/** Generation time (ms) */
|
||||
generationTime: number;
|
||||
/** Tokens per second (prompt) */
|
||||
promptTPS: number;
|
||||
/** Tokens per second (generation) */
|
||||
generationTPS: number;
|
||||
/** Memory usage (MB) */
|
||||
memoryUsage: number;
|
||||
/** Peak memory (MB) */
|
||||
peakMemory: number;
|
||||
}
|
||||
/** CLI configuration */
|
||||
export interface CLIConfig {
|
||||
/** Default model path */
|
||||
defaultModel?: string;
|
||||
/** Default backend */
|
||||
defaultBackend?: AccelerationBackend;
|
||||
/** Models directory */
|
||||
modelsDir?: string;
|
||||
/** Cache directory */
|
||||
cacheDir?: string;
|
||||
/** Log level */
|
||||
logLevel?: 'debug' | 'info' | 'warn' | 'error';
|
||||
/** Enable streaming output */
|
||||
streaming?: boolean;
|
||||
}
|
||||
/** Chat message */
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
/** Chat completion options */
|
||||
export interface ChatCompletionOptions extends GenerationParams {
|
||||
/** System prompt */
|
||||
systemPrompt?: string;
|
||||
/** Chat history */
|
||||
messages?: ChatMessage[];
|
||||
}
|
||||
//# sourceMappingURL=types.d.ts.map
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,8BAA8B;AAC9B,oBAAY,WAAW;IACrB,IAAI,SAAS;IACb,WAAW,gBAAgB;IAC3B,IAAI,SAAS;CACd;AAED,qCAAqC;AACrC,oBAAY,mBAAmB;IAC7B,0BAA0B;IAC1B,KAAK,UAAU;IACf,kBAAkB;IAClB,IAAI,SAAS;IACb,eAAe;IACf,GAAG,QAAQ;IACX,0BAA0B;IAC1B,GAAG,QAAQ;IACX,kCAAkC;IAClC,MAAM,WAAW;CAClB;AAED,0BAA0B;AAC1B,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,IAAI,SAAS;IACb,MAAM,WAAW;CAClB;AAED,0BAA0B;AAC1B,MAAM,WAAW,WAAW;IAC1B,yBAAyB;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,wBAAwB;IACxB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,0BAA0B;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,4BAA4B;AAC5B,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,uBAAuB;AACvB,MAAM,WAAW,eAAe;IAC9B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,+BAA+B;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,YAAY,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;CAC3C;AAED,uBAAuB;AACvB,MAAM,WAAW,eAAe;IAC9B,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,OAAO,EAAE,mBAAmB,CAAC;IAC7B,oBAAoB;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB,wBAAwB;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAwB;AACxB,MAAM,WAAW,SAAS;IACxB,yBAAyB;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sBAAsB;IACtB,cAAc,CAAC,EAAE,mBAAmB,CAAC;IACrC,uBAAuB;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sBAAsB;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gBAAgB;IAChB,QAAQ,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAC/C,8BAA8B;IAC9B,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,mBAAmB;AACnB,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,8BAA8B;AAC9B,MAAM,WAAW,qBAAsB,SAAQ,gBAAgB;IAC7D,oBAAoB;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mBAAmB;IACnB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B"}
|
||||
43
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js
vendored
Normal file
43
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
"use strict";
|
||||
/**
|
||||
* RuvLLM CLI Types
|
||||
* Types for CLI configuration and inference options
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.QuantizationType = exports.AccelerationBackend = exports.ModelFormat = void 0;
|
||||
/** Supported model formats */
|
||||
var ModelFormat;
|
||||
(function (ModelFormat) {
|
||||
ModelFormat["GGUF"] = "gguf";
|
||||
ModelFormat["SafeTensors"] = "safetensors";
|
||||
ModelFormat["ONNX"] = "onnx";
|
||||
})(ModelFormat || (exports.ModelFormat = ModelFormat = {}));
|
||||
/** Hardware acceleration backends */
|
||||
var AccelerationBackend;
|
||||
(function (AccelerationBackend) {
|
||||
/** Apple Metal (macOS) */
|
||||
AccelerationBackend["Metal"] = "metal";
|
||||
/** NVIDIA CUDA */
|
||||
AccelerationBackend["CUDA"] = "cuda";
|
||||
/** CPU only */
|
||||
AccelerationBackend["CPU"] = "cpu";
|
||||
/** Apple Neural Engine */
|
||||
AccelerationBackend["ANE"] = "ane";
|
||||
/** Vulkan (cross-platform GPU) */
|
||||
AccelerationBackend["Vulkan"] = "vulkan";
|
||||
})(AccelerationBackend || (exports.AccelerationBackend = AccelerationBackend = {}));
|
||||
/** Quantization levels */
|
||||
var QuantizationType;
|
||||
(function (QuantizationType) {
|
||||
QuantizationType["F32"] = "f32";
|
||||
QuantizationType["F16"] = "f16";
|
||||
QuantizationType["Q8_0"] = "q8_0";
|
||||
QuantizationType["Q4_K_M"] = "q4_k_m";
|
||||
QuantizationType["Q4_K_S"] = "q4_k_s";
|
||||
QuantizationType["Q5_K_M"] = "q5_k_m";
|
||||
QuantizationType["Q5_K_S"] = "q5_k_s";
|
||||
QuantizationType["Q6_K"] = "q6_k";
|
||||
QuantizationType["Q2_K"] = "q2_k";
|
||||
QuantizationType["Q3_K_M"] = "q3_k_m";
|
||||
})(QuantizationType || (exports.QuantizationType = QuantizationType = {}));
|
||||
//# sourceMappingURL=types.js.map
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"types.js","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,8BAA8B;AAC9B,IAAY,WAIX;AAJD,WAAY,WAAW;IACrB,4BAAa,CAAA;IACb,0CAA2B,CAAA;IAC3B,4BAAa,CAAA;AACf,CAAC,EAJW,WAAW,2BAAX,WAAW,QAItB;AAED,qCAAqC;AACrC,IAAY,mBAWX;AAXD,WAAY,mBAAmB;IAC7B,0BAA0B;IAC1B,sCAAe,CAAA;IACf,kBAAkB;IAClB,oCAAa,CAAA;IACb,eAAe;IACf,kCAAW,CAAA;IACX,0BAA0B;IAC1B,kCAAW,CAAA;IACX,kCAAkC;IAClC,wCAAiB,CAAA;AACnB,CAAC,EAXW,mBAAmB,mCAAnB,mBAAmB,QAW9B;AAED,0BAA0B;AAC1B,IAAY,gBAWX;AAXD,WAAY,gBAAgB;IAC1B,+BAAW,CAAA;IACX,+BAAW,CAAA;IACX,iCAAa,CAAA;IACb,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,iCAAa,CAAA;IACb,iCAAa,CAAA;IACb,qCAAiB,CAAA;AACnB,CAAC,EAXW,gBAAgB,gCAAhB,gBAAgB,QAW3B"}
|
||||
145
vendor/ruvector/npm/packages/ruvllm-cli/src/types.ts
vendored
Normal file
145
vendor/ruvector/npm/packages/ruvllm-cli/src/types.ts
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
/**
|
||||
* RuvLLM CLI Types
|
||||
* Types for CLI configuration and inference options
|
||||
*/
|
||||
|
||||
/** Supported model formats */
|
||||
export enum ModelFormat {
|
||||
GGUF = 'gguf',
|
||||
SafeTensors = 'safetensors',
|
||||
ONNX = 'onnx',
|
||||
}
|
||||
|
||||
/** Hardware acceleration backends */
|
||||
export enum AccelerationBackend {
|
||||
/** Apple Metal (macOS) */
|
||||
Metal = 'metal',
|
||||
/** NVIDIA CUDA */
|
||||
CUDA = 'cuda',
|
||||
/** CPU only */
|
||||
CPU = 'cpu',
|
||||
/** Apple Neural Engine */
|
||||
ANE = 'ane',
|
||||
/** Vulkan (cross-platform GPU) */
|
||||
Vulkan = 'vulkan',
|
||||
}
|
||||
|
||||
/** Quantization levels */
|
||||
export enum QuantizationType {
|
||||
F32 = 'f32',
|
||||
F16 = 'f16',
|
||||
Q8_0 = 'q8_0',
|
||||
Q4_K_M = 'q4_k_m',
|
||||
Q4_K_S = 'q4_k_s',
|
||||
Q5_K_M = 'q5_k_m',
|
||||
Q5_K_S = 'q5_k_s',
|
||||
Q6_K = 'q6_k',
|
||||
Q2_K = 'q2_k',
|
||||
Q3_K_M = 'q3_k_m',
|
||||
}
|
||||
|
||||
/** Model configuration */
|
||||
export interface ModelConfig {
|
||||
/** Path to model file */
|
||||
modelPath: string;
|
||||
/** Model format */
|
||||
format?: ModelFormat;
|
||||
/** Quantization type */
|
||||
quantization?: QuantizationType;
|
||||
/** Context window size */
|
||||
contextSize?: number;
|
||||
/** Number of GPU layers to offload */
|
||||
gpuLayers?: number;
|
||||
/** Batch size for inference */
|
||||
batchSize?: number;
|
||||
/** Number of threads for CPU inference */
|
||||
threads?: number;
|
||||
}
|
||||
|
||||
/** Generation parameters */
|
||||
export interface GenerationParams {
|
||||
/** Maximum tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** Temperature for sampling */
|
||||
temperature?: number;
|
||||
/** Top-p (nucleus) sampling */
|
||||
topP?: number;
|
||||
/** Top-k sampling */
|
||||
topK?: number;
|
||||
/** Repetition penalty */
|
||||
repetitionPenalty?: number;
|
||||
/** Stop sequences */
|
||||
stopSequences?: string[];
|
||||
/** Seed for reproducibility */
|
||||
seed?: number;
|
||||
}
|
||||
|
||||
/** Inference result */
|
||||
export interface InferenceResult {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Number of tokens generated */
|
||||
tokensGenerated: number;
|
||||
/** Time to first token (ms) */
|
||||
timeToFirstToken: number;
|
||||
/** Total generation time (ms) */
|
||||
totalTime: number;
|
||||
/** Tokens per second */
|
||||
tokensPerSecond: number;
|
||||
/** Finish reason */
|
||||
finishReason: 'stop' | 'length' | 'error';
|
||||
}
|
||||
|
||||
/** Benchmark result */
|
||||
export interface BenchmarkResult {
|
||||
/** Model name */
|
||||
model: string;
|
||||
/** Backend used */
|
||||
backend: AccelerationBackend;
|
||||
/** Prompt tokens */
|
||||
promptTokens: number;
|
||||
/** Generated tokens */
|
||||
generatedTokens: number;
|
||||
/** Prompt processing time (ms) */
|
||||
promptTime: number;
|
||||
/** Generation time (ms) */
|
||||
generationTime: number;
|
||||
/** Tokens per second (prompt) */
|
||||
promptTPS: number;
|
||||
/** Tokens per second (generation) */
|
||||
generationTPS: number;
|
||||
/** Memory usage (MB) */
|
||||
memoryUsage: number;
|
||||
/** Peak memory (MB) */
|
||||
peakMemory: number;
|
||||
}
|
||||
|
||||
/** CLI configuration */
|
||||
export interface CLIConfig {
|
||||
/** Default model path */
|
||||
defaultModel?: string;
|
||||
/** Default backend */
|
||||
defaultBackend?: AccelerationBackend;
|
||||
/** Models directory */
|
||||
modelsDir?: string;
|
||||
/** Cache directory */
|
||||
cacheDir?: string;
|
||||
/** Log level */
|
||||
logLevel?: 'debug' | 'info' | 'warn' | 'error';
|
||||
/** Enable streaming output */
|
||||
streaming?: boolean;
|
||||
}
|
||||
|
||||
/** Chat message */
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
/** Chat completion options */
|
||||
export interface ChatCompletionOptions extends GenerationParams {
|
||||
/** System prompt */
|
||||
systemPrompt?: string;
|
||||
/** Chat history */
|
||||
messages?: ChatMessage[];
|
||||
}
|
||||
19
vendor/ruvector/npm/packages/ruvllm-cli/tsconfig.json
vendored
Normal file
19
vendor/ruvector/npm/packages/ruvllm-cli/tsconfig.json
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "test"]
|
||||
}
|
||||
Reference in New Issue
Block a user