Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EACL,WAAW,EACX,mBAAmB,EACnB,gBAAgB,EAChB,WAAW,EACX,gBAAgB,EAChB,eAAe,EACf,eAAe,EACf,SAAS,EACT,WAAW,EACX,qBAAqB,GACtB,MAAM,YAAY,CAAC;AAEpB,kBAAkB;AAClB,eAAO,MAAM,OAAO,UAAU,CAAC;AAE/B,gCAAgC;AAChC,eAAO,MAAM,cAAc,EAAE,OAAO,YAAY,EAAE,SAMjD,CAAC;AAEF;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAqB1E;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,YAAY,EAAE,eAAe,EAAE,GAAG,MAAM,CAqB5F;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,OAAO,YAAY,EAAE,mBAAmB,EAAE,CAOjF"}

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AA8BH,8BAqBC;AAKD,oDAqBC;AAKD,oDAOC;AAvFD,uCAWoB;AAVlB,uGAAA,WAAW,OAAA;AACX,+GAAA,mBAAmB,OAAA;AACnB,4GAAA,gBAAgB,OAAA;AAUlB,kBAAkB;AACL,QAAA,OAAO,GAAG,OAAO,CAAC;AAE/B,gCAAgC;AACnB,QAAA,cAAc,GAAmC;IAC5D,cAAc,EAAE,KAAiD;IACjE,SAAS,EAAE,kBAAkB;IAC7B,QAAQ,EAAE,iBAAiB;IAC3B,QAAQ,EAAE,MAAM;IAChB,SAAS,EAAE,IAAI;CAChB,CAAC;AAEF;;GAEG;AACH,SAAgB,SAAS,CAAC,IAAc;IACtC,MAAM,MAAM,GAAqC,EAAE,CAAC;IAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACzB,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;gBACnB,CAAC,EAAE,CAAC;YACN,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB,CAAC,OAA+C;IAClF,MAAM,OAAO,GAAG,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;IAC7E,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5B,CAAC,CAAC,KAAK;QACP,CAAC,CAAC,OAAO;QACT,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QACtB,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;KACzB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC1D,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAC9B,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CACjE,CAAC;IAEF,OAAO,CAAC,SAAS,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACxD,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB;IAClC,MAAM,QAAQ,GAA+C,CAAC,KAAiD,CAAC,CAAC;IAEjH,mCAAmC;IACnC,0CAA0C;IAE1C,OAAO,QAAQ,CAAC;AAClB,CAAC"}

View File

@@ -0,0 +1,109 @@
/**
* @ruvector/ruvllm-cli - CLI for LLM Inference and Benchmarking
*
* A command-line interface for running local LLM inference with
* Metal/CUDA acceleration, model benchmarking, and serving.
*
* @example
* ```bash
* # Run inference
* npx @ruvector/ruvllm-cli run --model ./model.gguf --prompt "Hello"
*
* # Benchmark a model
* npx @ruvector/ruvllm-cli bench --model ./model.gguf --iterations 10
*
* # Start server
* npx @ruvector/ruvllm-cli serve --model ./model.gguf --port 8080
* ```
*
* @packageDocumentation
*/
export {
ModelFormat,
AccelerationBackend,
QuantizationType,
ModelConfig,
GenerationParams,
InferenceResult,
BenchmarkResult,
CLIConfig,
ChatMessage,
ChatCompletionOptions,
} from './types.js';
/** CLI version */
export const VERSION = '0.1.0';
/** Default CLI configuration */
export const DEFAULT_CONFIG: import('./types.js').CLIConfig = {
defaultBackend: 'cpu' as import('./types.js').AccelerationBackend,
modelsDir: '~/.ruvllm/models',
cacheDir: '~/.ruvllm/cache',
logLevel: 'info',
streaming: true,
};
/**
* Parse CLI arguments
*/
export function parseArgs(args: string[]): Record<string, string | boolean> {
const result: Record<string, string | boolean> = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const key = arg.slice(2);
const next = args[i + 1];
if (next && !next.startsWith('--')) {
result[key] = next;
i++;
} else {
result[key] = true;
}
} else if (arg.startsWith('-')) {
const key = arg.slice(1);
result[key] = true;
}
}
return result;
}
/**
* Format benchmark results as table
*/
export function formatBenchmarkTable(results: import('./types.js').BenchmarkResult[]): string {
const headers = ['Model', 'Backend', 'Prompt TPS', 'Gen TPS', 'Memory (MB)'];
const rows = results.map(r => [
r.model,
r.backend,
r.promptTPS.toFixed(2),
r.generationTPS.toFixed(2),
r.memoryUsage.toFixed(0),
]);
const widths = headers.map((h, i) =>
Math.max(h.length, ...rows.map(r => String(r[i]).length))
);
const separator = widths.map(w => '-'.repeat(w)).join(' | ');
const headerRow = headers.map((h, i) => h.padEnd(widths[i])).join(' | ');
const dataRows = rows.map(row =>
row.map((cell, i) => String(cell).padEnd(widths[i])).join(' | ')
);
return [headerRow, separator, ...dataRows].join('\n');
}
/**
* Get available backends for current system
*/
export function getAvailableBackends(): import('./types.js').AccelerationBackend[] {
const backends: import('./types.js').AccelerationBackend[] = ['cpu' as import('./types.js').AccelerationBackend];
// Platform detection would go here
// For now, return CPU as always available
return backends;
}

View File

@@ -0,0 +1,136 @@
/**
* RuvLLM CLI Types
* Types for CLI configuration and inference options
*/
/** Supported model formats */
export declare enum ModelFormat {
GGUF = "gguf",
SafeTensors = "safetensors",
ONNX = "onnx"
}
/** Hardware acceleration backends */
export declare enum AccelerationBackend {
/** Apple Metal (macOS) */
Metal = "metal",
/** NVIDIA CUDA */
CUDA = "cuda",
/** CPU only */
CPU = "cpu",
/** Apple Neural Engine */
ANE = "ane",
/** Vulkan (cross-platform GPU) */
Vulkan = "vulkan"
}
/** Quantization levels */
export declare enum QuantizationType {
F32 = "f32",
F16 = "f16",
Q8_0 = "q8_0",
Q4_K_M = "q4_k_m",
Q4_K_S = "q4_k_s",
Q5_K_M = "q5_k_m",
Q5_K_S = "q5_k_s",
Q6_K = "q6_k",
Q2_K = "q2_k",
Q3_K_M = "q3_k_m"
}
/** Model configuration */
export interface ModelConfig {
/** Path to model file */
modelPath: string;
/** Model format */
format?: ModelFormat;
/** Quantization type */
quantization?: QuantizationType;
/** Context window size */
contextSize?: number;
/** Number of GPU layers to offload */
gpuLayers?: number;
/** Batch size for inference */
batchSize?: number;
/** Number of threads for CPU inference */
threads?: number;
}
/** Generation parameters */
export interface GenerationParams {
/** Maximum tokens to generate */
maxTokens?: number;
/** Temperature for sampling */
temperature?: number;
/** Top-p (nucleus) sampling */
topP?: number;
/** Top-k sampling */
topK?: number;
/** Repetition penalty */
repetitionPenalty?: number;
/** Stop sequences */
stopSequences?: string[];
/** Seed for reproducibility */
seed?: number;
}
/** Inference result */
export interface InferenceResult {
/** Generated text */
text: string;
/** Number of tokens generated */
tokensGenerated: number;
/** Time to first token (ms) */
timeToFirstToken: number;
/** Total generation time (ms) */
totalTime: number;
/** Tokens per second */
tokensPerSecond: number;
/** Finish reason */
finishReason: 'stop' | 'length' | 'error';
}
/** Benchmark result */
export interface BenchmarkResult {
/** Model name */
model: string;
/** Backend used */
backend: AccelerationBackend;
/** Prompt tokens */
promptTokens: number;
/** Generated tokens */
generatedTokens: number;
/** Prompt processing time (ms) */
promptTime: number;
/** Generation time (ms) */
generationTime: number;
/** Tokens per second (prompt) */
promptTPS: number;
/** Tokens per second (generation) */
generationTPS: number;
/** Memory usage (MB) */
memoryUsage: number;
/** Peak memory (MB) */
peakMemory: number;
}
/** CLI configuration */
export interface CLIConfig {
/** Default model path */
defaultModel?: string;
/** Default backend */
defaultBackend?: AccelerationBackend;
/** Models directory */
modelsDir?: string;
/** Cache directory */
cacheDir?: string;
/** Log level */
logLevel?: 'debug' | 'info' | 'warn' | 'error';
/** Enable streaming output */
streaming?: boolean;
}
/** Chat message */
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
/** Chat completion options */
export interface ChatCompletionOptions extends GenerationParams {
/** System prompt */
systemPrompt?: string;
/** Chat history */
messages?: ChatMessage[];
}
//# sourceMappingURL=types.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,8BAA8B;AAC9B,oBAAY,WAAW;IACrB,IAAI,SAAS;IACb,WAAW,gBAAgB;IAC3B,IAAI,SAAS;CACd;AAED,qCAAqC;AACrC,oBAAY,mBAAmB;IAC7B,0BAA0B;IAC1B,KAAK,UAAU;IACf,kBAAkB;IAClB,IAAI,SAAS;IACb,eAAe;IACf,GAAG,QAAQ;IACX,0BAA0B;IAC1B,GAAG,QAAQ;IACX,kCAAkC;IAClC,MAAM,WAAW;CAClB;AAED,0BAA0B;AAC1B,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,IAAI,SAAS;IACb,MAAM,WAAW;CAClB;AAED,0BAA0B;AAC1B,MAAM,WAAW,WAAW;IAC1B,yBAAyB;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,wBAAwB;IACxB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,0BAA0B;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,4BAA4B;AAC5B,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,uBAAuB;AACvB,MAAM,WAAW,eAAe;IAC9B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,+BAA+B;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,YAAY,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;CAC3C;AAED,uBAAuB;AACvB,MAAM,WAAW,eAAe;IAC9B,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,OAAO,EAAE,mBAAmB,CAAC;IAC7B,oBAAoB;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB,wBAAwB;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAwB;AACxB,MAAM,WAAW,SAAS;IACxB,yBAAyB;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sBAAsB;IACtB,cAAc,CAAC,EAAE,mBAAmB,CAAC;IACrC,uBAAuB;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sBAAsB;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gBAAgB;IAChB,QAAQ,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAC/C,8BAA8B;IAC9B,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,mBAAmB;AACnB,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,8BAA8B;AAC9B,MAAM,WAAW,qBAAsB,SAAQ,gBAAgB;IAC7D,oBAAoB;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mBAAmB;IACnB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B"}

View File

@@ -0,0 +1,43 @@
"use strict";
/**
* RuvLLM CLI Types
* Types for CLI configuration and inference options
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.QuantizationType = exports.AccelerationBackend = exports.ModelFormat = void 0;
/** Supported model formats */
var ModelFormat;
(function (ModelFormat) {
ModelFormat["GGUF"] = "gguf";
ModelFormat["SafeTensors"] = "safetensors";
ModelFormat["ONNX"] = "onnx";
})(ModelFormat || (exports.ModelFormat = ModelFormat = {}));
/** Hardware acceleration backends */
var AccelerationBackend;
(function (AccelerationBackend) {
/** Apple Metal (macOS) */
AccelerationBackend["Metal"] = "metal";
/** NVIDIA CUDA */
AccelerationBackend["CUDA"] = "cuda";
/** CPU only */
AccelerationBackend["CPU"] = "cpu";
/** Apple Neural Engine */
AccelerationBackend["ANE"] = "ane";
/** Vulkan (cross-platform GPU) */
AccelerationBackend["Vulkan"] = "vulkan";
})(AccelerationBackend || (exports.AccelerationBackend = AccelerationBackend = {}));
/** Quantization levels */
var QuantizationType;
(function (QuantizationType) {
QuantizationType["F32"] = "f32";
QuantizationType["F16"] = "f16";
QuantizationType["Q8_0"] = "q8_0";
QuantizationType["Q4_K_M"] = "q4_k_m";
QuantizationType["Q4_K_S"] = "q4_k_s";
QuantizationType["Q5_K_M"] = "q5_k_m";
QuantizationType["Q5_K_S"] = "q5_k_s";
QuantizationType["Q6_K"] = "q6_k";
QuantizationType["Q2_K"] = "q2_k";
QuantizationType["Q3_K_M"] = "q3_k_m";
})(QuantizationType || (exports.QuantizationType = QuantizationType = {}));
//# sourceMappingURL=types.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"types.js","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,8BAA8B;AAC9B,IAAY,WAIX;AAJD,WAAY,WAAW;IACrB,4BAAa,CAAA;IACb,0CAA2B,CAAA;IAC3B,4BAAa,CAAA;AACf,CAAC,EAJW,WAAW,2BAAX,WAAW,QAItB;AAED,qCAAqC;AACrC,IAAY,mBAWX;AAXD,WAAY,mBAAmB;IAC7B,0BAA0B;IAC1B,sCAAe,CAAA;IACf,kBAAkB;IAClB,oCAAa,CAAA;IACb,eAAe;IACf,kCAAW,CAAA;IACX,0BAA0B;IAC1B,kCAAW,CAAA;IACX,kCAAkC;IAClC,wCAAiB,CAAA;AACnB,CAAC,EAXW,mBAAmB,mCAAnB,mBAAmB,QAW9B;AAED,0BAA0B;AAC1B,IAAY,gBAWX;AAXD,WAAY,gBAAgB;IAC1B,+BAAW,CAAA;IACX,+BAAW,CAAA;IACX,iCAAa,CAAA;IACb,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,iCAAa,CAAA;IACb,iCAAa,CAAA;IACb,qCAAiB,CAAA;AACnB,CAAC,EAXW,gBAAgB,gCAAhB,gBAAgB,QAW3B"}

View File

@@ -0,0 +1,145 @@
/**
* RuvLLM CLI Types
* Types for CLI configuration and inference options
*/
/** Supported model formats */
export enum ModelFormat {
GGUF = 'gguf',
SafeTensors = 'safetensors',
ONNX = 'onnx',
}
/** Hardware acceleration backends */
export enum AccelerationBackend {
/** Apple Metal (macOS) */
Metal = 'metal',
/** NVIDIA CUDA */
CUDA = 'cuda',
/** CPU only */
CPU = 'cpu',
/** Apple Neural Engine */
ANE = 'ane',
/** Vulkan (cross-platform GPU) */
Vulkan = 'vulkan',
}
/** Quantization levels */
export enum QuantizationType {
F32 = 'f32',
F16 = 'f16',
Q8_0 = 'q8_0',
Q4_K_M = 'q4_k_m',
Q4_K_S = 'q4_k_s',
Q5_K_M = 'q5_k_m',
Q5_K_S = 'q5_k_s',
Q6_K = 'q6_k',
Q2_K = 'q2_k',
Q3_K_M = 'q3_k_m',
}
/** Model configuration */
export interface ModelConfig {
/** Path to model file */
modelPath: string;
/** Model format */
format?: ModelFormat;
/** Quantization type */
quantization?: QuantizationType;
/** Context window size */
contextSize?: number;
/** Number of GPU layers to offload */
gpuLayers?: number;
/** Batch size for inference */
batchSize?: number;
/** Number of threads for CPU inference */
threads?: number;
}
/** Generation parameters */
export interface GenerationParams {
/** Maximum tokens to generate */
maxTokens?: number;
/** Temperature for sampling */
temperature?: number;
/** Top-p (nucleus) sampling */
topP?: number;
/** Top-k sampling */
topK?: number;
/** Repetition penalty */
repetitionPenalty?: number;
/** Stop sequences */
stopSequences?: string[];
/** Seed for reproducibility */
seed?: number;
}
/** Inference result */
export interface InferenceResult {
/** Generated text */
text: string;
/** Number of tokens generated */
tokensGenerated: number;
/** Time to first token (ms) */
timeToFirstToken: number;
/** Total generation time (ms) */
totalTime: number;
/** Tokens per second */
tokensPerSecond: number;
/** Finish reason */
finishReason: 'stop' | 'length' | 'error';
}
/** Benchmark result */
export interface BenchmarkResult {
/** Model name */
model: string;
/** Backend used */
backend: AccelerationBackend;
/** Prompt tokens */
promptTokens: number;
/** Generated tokens */
generatedTokens: number;
/** Prompt processing time (ms) */
promptTime: number;
/** Generation time (ms) */
generationTime: number;
/** Tokens per second (prompt) */
promptTPS: number;
/** Tokens per second (generation) */
generationTPS: number;
/** Memory usage (MB) */
memoryUsage: number;
/** Peak memory (MB) */
peakMemory: number;
}
/** CLI configuration */
export interface CLIConfig {
/** Default model path */
defaultModel?: string;
/** Default backend */
defaultBackend?: AccelerationBackend;
/** Models directory */
modelsDir?: string;
/** Cache directory */
cacheDir?: string;
/** Log level */
logLevel?: 'debug' | 'info' | 'warn' | 'error';
/** Enable streaming output */
streaming?: boolean;
}
/** Chat message */
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
/** Chat completion options */
export interface ChatCompletionOptions extends GenerationParams {
/** System prompt */
systemPrompt?: string;
/** Chat history */
messages?: ChatMessage[];
}