Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EACL,WAAW,EACX,mBAAmB,EACnB,gBAAgB,EAChB,WAAW,EACX,gBAAgB,EAChB,eAAe,EACf,eAAe,EACf,SAAS,EACT,WAAW,EACX,qBAAqB,GACtB,MAAM,YAAY,CAAC;AAEpB,kBAAkB;AAClB,eAAO,MAAM,OAAO,UAAU,CAAC;AAE/B,gCAAgC;AAChC,eAAO,MAAM,cAAc,EAAE,OAAO,YAAY,EAAE,SAMjD,CAAC;AAEF;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAqB1E;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,YAAY,EAAE,eAAe,EAAE,GAAG,MAAM,CAqB5F;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,OAAO,YAAY,EAAE,mBAAmB,EAAE,CAOjF"}
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AA8BH,8BAqBC;AAKD,oDAqBC;AAKD,oDAOC;AAvFD,uCAWoB;AAVlB,uGAAA,WAAW,OAAA;AACX,+GAAA,mBAAmB,OAAA;AACnB,4GAAA,gBAAgB,OAAA;AAUlB,kBAAkB;AACL,QAAA,OAAO,GAAG,OAAO,CAAC;AAE/B,gCAAgC;AACnB,QAAA,cAAc,GAAmC;IAC5D,cAAc,EAAE,KAAiD;IACjE,SAAS,EAAE,kBAAkB;IAC7B,QAAQ,EAAE,iBAAiB;IAC3B,QAAQ,EAAE,MAAM;IAChB,SAAS,EAAE,IAAI;CAChB,CAAC;AAEF;;GAEG;AACH,SAAgB,SAAS,CAAC,IAAc;IACtC,MAAM,MAAM,GAAqC,EAAE,CAAC;IAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACzB,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;gBACnB,CAAC,EAAE,CAAC;YACN,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB,CAAC,OAA+C;IAClF,MAAM,OAAO,GAAG,CAAC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;IAC7E,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5B,CAAC,CAAC,KAAK;QACP,CAAC,CAAC,OAAO;QACT,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QACtB,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;KACzB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC1D,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAC9B,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CACjE,CAAC;IAEF,OAAO,CAAC,SAAS,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACxD,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB;IAClC,MAAM,QAAQ,GAA+C,CAAC,KAAiD,CAAC,CAAC;IAEjH,mCAAmC;IACnC,0CAA0C;IAE1C,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
||||
109
vendor/ruvector/npm/packages/ruvllm-cli/src/index.ts
vendored
Normal file
109
vendor/ruvector/npm/packages/ruvllm-cli/src/index.ts
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* @ruvector/ruvllm-cli - CLI for LLM Inference and Benchmarking
|
||||
*
|
||||
* A command-line interface for running local LLM inference with
|
||||
* Metal/CUDA acceleration, model benchmarking, and serving.
|
||||
*
|
||||
* @example
|
||||
* ```bash
|
||||
* # Run inference
|
||||
* npx @ruvector/ruvllm-cli run --model ./model.gguf --prompt "Hello"
|
||||
*
|
||||
* # Benchmark a model
|
||||
* npx @ruvector/ruvllm-cli bench --model ./model.gguf --iterations 10
|
||||
*
|
||||
* # Start server
|
||||
* npx @ruvector/ruvllm-cli serve --model ./model.gguf --port 8080
|
||||
* ```
|
||||
*
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
export {
|
||||
ModelFormat,
|
||||
AccelerationBackend,
|
||||
QuantizationType,
|
||||
ModelConfig,
|
||||
GenerationParams,
|
||||
InferenceResult,
|
||||
BenchmarkResult,
|
||||
CLIConfig,
|
||||
ChatMessage,
|
||||
ChatCompletionOptions,
|
||||
} from './types.js';
|
||||
|
||||
/** CLI version */
|
||||
export const VERSION = '0.1.0';
|
||||
|
||||
/** Default CLI configuration */
|
||||
export const DEFAULT_CONFIG: import('./types.js').CLIConfig = {
|
||||
defaultBackend: 'cpu' as import('./types.js').AccelerationBackend,
|
||||
modelsDir: '~/.ruvllm/models',
|
||||
cacheDir: '~/.ruvllm/cache',
|
||||
logLevel: 'info',
|
||||
streaming: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse CLI arguments
|
||||
*/
|
||||
export function parseArgs(args: string[]): Record<string, string | boolean> {
|
||||
const result: Record<string, string | boolean> = {};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const key = arg.slice(2);
|
||||
const next = args[i + 1];
|
||||
if (next && !next.startsWith('--')) {
|
||||
result[key] = next;
|
||||
i++;
|
||||
} else {
|
||||
result[key] = true;
|
||||
}
|
||||
} else if (arg.startsWith('-')) {
|
||||
const key = arg.slice(1);
|
||||
result[key] = true;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format benchmark results as table
|
||||
*/
|
||||
export function formatBenchmarkTable(results: import('./types.js').BenchmarkResult[]): string {
|
||||
const headers = ['Model', 'Backend', 'Prompt TPS', 'Gen TPS', 'Memory (MB)'];
|
||||
const rows = results.map(r => [
|
||||
r.model,
|
||||
r.backend,
|
||||
r.promptTPS.toFixed(2),
|
||||
r.generationTPS.toFixed(2),
|
||||
r.memoryUsage.toFixed(0),
|
||||
]);
|
||||
|
||||
const widths = headers.map((h, i) =>
|
||||
Math.max(h.length, ...rows.map(r => String(r[i]).length))
|
||||
);
|
||||
|
||||
const separator = widths.map(w => '-'.repeat(w)).join(' | ');
|
||||
const headerRow = headers.map((h, i) => h.padEnd(widths[i])).join(' | ');
|
||||
const dataRows = rows.map(row =>
|
||||
row.map((cell, i) => String(cell).padEnd(widths[i])).join(' | ')
|
||||
);
|
||||
|
||||
return [headerRow, separator, ...dataRows].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available backends for current system
|
||||
*/
|
||||
export function getAvailableBackends(): import('./types.js').AccelerationBackend[] {
|
||||
const backends: import('./types.js').AccelerationBackend[] = ['cpu' as import('./types.js').AccelerationBackend];
|
||||
|
||||
// Platform detection would go here
|
||||
// For now, return CPU as always available
|
||||
|
||||
return backends;
|
||||
}
|
||||
136
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts
vendored
Normal file
136
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* RuvLLM CLI Types
|
||||
* Types for CLI configuration and inference options
|
||||
*/
|
||||
/** Supported model formats */
|
||||
export declare enum ModelFormat {
|
||||
GGUF = "gguf",
|
||||
SafeTensors = "safetensors",
|
||||
ONNX = "onnx"
|
||||
}
|
||||
/** Hardware acceleration backends */
|
||||
export declare enum AccelerationBackend {
|
||||
/** Apple Metal (macOS) */
|
||||
Metal = "metal",
|
||||
/** NVIDIA CUDA */
|
||||
CUDA = "cuda",
|
||||
/** CPU only */
|
||||
CPU = "cpu",
|
||||
/** Apple Neural Engine */
|
||||
ANE = "ane",
|
||||
/** Vulkan (cross-platform GPU) */
|
||||
Vulkan = "vulkan"
|
||||
}
|
||||
/** Quantization levels */
|
||||
export declare enum QuantizationType {
|
||||
F32 = "f32",
|
||||
F16 = "f16",
|
||||
Q8_0 = "q8_0",
|
||||
Q4_K_M = "q4_k_m",
|
||||
Q4_K_S = "q4_k_s",
|
||||
Q5_K_M = "q5_k_m",
|
||||
Q5_K_S = "q5_k_s",
|
||||
Q6_K = "q6_k",
|
||||
Q2_K = "q2_k",
|
||||
Q3_K_M = "q3_k_m"
|
||||
}
|
||||
/** Model configuration */
|
||||
export interface ModelConfig {
|
||||
/** Path to model file */
|
||||
modelPath: string;
|
||||
/** Model format */
|
||||
format?: ModelFormat;
|
||||
/** Quantization type */
|
||||
quantization?: QuantizationType;
|
||||
/** Context window size */
|
||||
contextSize?: number;
|
||||
/** Number of GPU layers to offload */
|
||||
gpuLayers?: number;
|
||||
/** Batch size for inference */
|
||||
batchSize?: number;
|
||||
/** Number of threads for CPU inference */
|
||||
threads?: number;
|
||||
}
|
||||
/** Generation parameters */
|
||||
export interface GenerationParams {
|
||||
/** Maximum tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** Temperature for sampling */
|
||||
temperature?: number;
|
||||
/** Top-p (nucleus) sampling */
|
||||
topP?: number;
|
||||
/** Top-k sampling */
|
||||
topK?: number;
|
||||
/** Repetition penalty */
|
||||
repetitionPenalty?: number;
|
||||
/** Stop sequences */
|
||||
stopSequences?: string[];
|
||||
/** Seed for reproducibility */
|
||||
seed?: number;
|
||||
}
|
||||
/** Inference result */
|
||||
export interface InferenceResult {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Number of tokens generated */
|
||||
tokensGenerated: number;
|
||||
/** Time to first token (ms) */
|
||||
timeToFirstToken: number;
|
||||
/** Total generation time (ms) */
|
||||
totalTime: number;
|
||||
/** Tokens per second */
|
||||
tokensPerSecond: number;
|
||||
/** Finish reason */
|
||||
finishReason: 'stop' | 'length' | 'error';
|
||||
}
|
||||
/** Benchmark result */
|
||||
export interface BenchmarkResult {
|
||||
/** Model name */
|
||||
model: string;
|
||||
/** Backend used */
|
||||
backend: AccelerationBackend;
|
||||
/** Prompt tokens */
|
||||
promptTokens: number;
|
||||
/** Generated tokens */
|
||||
generatedTokens: number;
|
||||
/** Prompt processing time (ms) */
|
||||
promptTime: number;
|
||||
/** Generation time (ms) */
|
||||
generationTime: number;
|
||||
/** Tokens per second (prompt) */
|
||||
promptTPS: number;
|
||||
/** Tokens per second (generation) */
|
||||
generationTPS: number;
|
||||
/** Memory usage (MB) */
|
||||
memoryUsage: number;
|
||||
/** Peak memory (MB) */
|
||||
peakMemory: number;
|
||||
}
|
||||
/** CLI configuration */
|
||||
export interface CLIConfig {
|
||||
/** Default model path */
|
||||
defaultModel?: string;
|
||||
/** Default backend */
|
||||
defaultBackend?: AccelerationBackend;
|
||||
/** Models directory */
|
||||
modelsDir?: string;
|
||||
/** Cache directory */
|
||||
cacheDir?: string;
|
||||
/** Log level */
|
||||
logLevel?: 'debug' | 'info' | 'warn' | 'error';
|
||||
/** Enable streaming output */
|
||||
streaming?: boolean;
|
||||
}
|
||||
/** Chat message */
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
/** Chat completion options */
|
||||
export interface ChatCompletionOptions extends GenerationParams {
|
||||
/** System prompt */
|
||||
systemPrompt?: string;
|
||||
/** Chat history */
|
||||
messages?: ChatMessage[];
|
||||
}
|
||||
//# sourceMappingURL=types.d.ts.map
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,8BAA8B;AAC9B,oBAAY,WAAW;IACrB,IAAI,SAAS;IACb,WAAW,gBAAgB;IAC3B,IAAI,SAAS;CACd;AAED,qCAAqC;AACrC,oBAAY,mBAAmB;IAC7B,0BAA0B;IAC1B,KAAK,UAAU;IACf,kBAAkB;IAClB,IAAI,SAAS;IACb,eAAe;IACf,GAAG,QAAQ;IACX,0BAA0B;IAC1B,GAAG,QAAQ;IACX,kCAAkC;IAClC,MAAM,WAAW;CAClB;AAED,0BAA0B;AAC1B,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,IAAI,SAAS;IACb,MAAM,WAAW;CAClB;AAED,0BAA0B;AAC1B,MAAM,WAAW,WAAW;IAC1B,yBAAyB;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,wBAAwB;IACxB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,0BAA0B;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,4BAA4B;AAC5B,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,uBAAuB;AACvB,MAAM,WAAW,eAAe;IAC9B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,+BAA+B;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,YAAY,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;CAC3C;AAED,uBAAuB;AACvB,MAAM,WAAW,eAAe;IAC9B,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,OAAO,EAAE,mBAAmB,CAAC;IAC7B,oBAAoB;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB,wBAAwB;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAwB;AACxB,MAAM,WAAW,SAAS;IACxB,yBAAyB;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sBAAsB;IACtB,cAAc,CAAC,EAAE,mBAAmB,CAAC;IACrC,uBAAuB;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sBAAsB;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gBAAgB;IAChB,QAAQ,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAC/C,8BAA8B;IAC9B,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,mBAAmB;AACnB,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,8BAA8B;AAC9B,MAAM,WAAW,qBAAsB,SAAQ,gBAAgB;IAC7D,oBAAoB;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mBAAmB;IACnB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B"}
|
||||
43
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js
vendored
Normal file
43
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
"use strict";
|
||||
/**
|
||||
* RuvLLM CLI Types
|
||||
* Types for CLI configuration and inference options
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.QuantizationType = exports.AccelerationBackend = exports.ModelFormat = void 0;
|
||||
/** Supported model formats */
|
||||
var ModelFormat;
|
||||
(function (ModelFormat) {
|
||||
ModelFormat["GGUF"] = "gguf";
|
||||
ModelFormat["SafeTensors"] = "safetensors";
|
||||
ModelFormat["ONNX"] = "onnx";
|
||||
})(ModelFormat || (exports.ModelFormat = ModelFormat = {}));
|
||||
/** Hardware acceleration backends */
|
||||
var AccelerationBackend;
|
||||
(function (AccelerationBackend) {
|
||||
/** Apple Metal (macOS) */
|
||||
AccelerationBackend["Metal"] = "metal";
|
||||
/** NVIDIA CUDA */
|
||||
AccelerationBackend["CUDA"] = "cuda";
|
||||
/** CPU only */
|
||||
AccelerationBackend["CPU"] = "cpu";
|
||||
/** Apple Neural Engine */
|
||||
AccelerationBackend["ANE"] = "ane";
|
||||
/** Vulkan (cross-platform GPU) */
|
||||
AccelerationBackend["Vulkan"] = "vulkan";
|
||||
})(AccelerationBackend || (exports.AccelerationBackend = AccelerationBackend = {}));
|
||||
/** Quantization levels */
|
||||
var QuantizationType;
|
||||
(function (QuantizationType) {
|
||||
QuantizationType["F32"] = "f32";
|
||||
QuantizationType["F16"] = "f16";
|
||||
QuantizationType["Q8_0"] = "q8_0";
|
||||
QuantizationType["Q4_K_M"] = "q4_k_m";
|
||||
QuantizationType["Q4_K_S"] = "q4_k_s";
|
||||
QuantizationType["Q5_K_M"] = "q5_k_m";
|
||||
QuantizationType["Q5_K_S"] = "q5_k_s";
|
||||
QuantizationType["Q6_K"] = "q6_k";
|
||||
QuantizationType["Q2_K"] = "q2_k";
|
||||
QuantizationType["Q3_K_M"] = "q3_k_m";
|
||||
})(QuantizationType || (exports.QuantizationType = QuantizationType = {}));
|
||||
//# sourceMappingURL=types.js.map
|
||||
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-cli/src/types.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"types.js","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,8BAA8B;AAC9B,IAAY,WAIX;AAJD,WAAY,WAAW;IACrB,4BAAa,CAAA;IACb,0CAA2B,CAAA;IAC3B,4BAAa,CAAA;AACf,CAAC,EAJW,WAAW,2BAAX,WAAW,QAItB;AAED,qCAAqC;AACrC,IAAY,mBAWX;AAXD,WAAY,mBAAmB;IAC7B,0BAA0B;IAC1B,sCAAe,CAAA;IACf,kBAAkB;IAClB,oCAAa,CAAA;IACb,eAAe;IACf,kCAAW,CAAA;IACX,0BAA0B;IAC1B,kCAAW,CAAA;IACX,kCAAkC;IAClC,wCAAiB,CAAA;AACnB,CAAC,EAXW,mBAAmB,mCAAnB,mBAAmB,QAW9B;AAED,0BAA0B;AAC1B,IAAY,gBAWX;AAXD,WAAY,gBAAgB;IAC1B,+BAAW,CAAA;IACX,+BAAW,CAAA;IACX,iCAAa,CAAA;IACb,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,qCAAiB,CAAA;IACjB,iCAAa,CAAA;IACb,iCAAa,CAAA;IACb,qCAAiB,CAAA;AACnB,CAAC,EAXW,gBAAgB,gCAAhB,gBAAgB,QAW3B"}
|
||||
145
vendor/ruvector/npm/packages/ruvllm-cli/src/types.ts
vendored
Normal file
145
vendor/ruvector/npm/packages/ruvllm-cli/src/types.ts
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
/**
|
||||
* RuvLLM CLI Types
|
||||
* Types for CLI configuration and inference options
|
||||
*/
|
||||
|
||||
/** Supported model formats */
|
||||
export enum ModelFormat {
|
||||
GGUF = 'gguf',
|
||||
SafeTensors = 'safetensors',
|
||||
ONNX = 'onnx',
|
||||
}
|
||||
|
||||
/** Hardware acceleration backends */
|
||||
export enum AccelerationBackend {
|
||||
/** Apple Metal (macOS) */
|
||||
Metal = 'metal',
|
||||
/** NVIDIA CUDA */
|
||||
CUDA = 'cuda',
|
||||
/** CPU only */
|
||||
CPU = 'cpu',
|
||||
/** Apple Neural Engine */
|
||||
ANE = 'ane',
|
||||
/** Vulkan (cross-platform GPU) */
|
||||
Vulkan = 'vulkan',
|
||||
}
|
||||
|
||||
/** Quantization levels */
|
||||
export enum QuantizationType {
|
||||
F32 = 'f32',
|
||||
F16 = 'f16',
|
||||
Q8_0 = 'q8_0',
|
||||
Q4_K_M = 'q4_k_m',
|
||||
Q4_K_S = 'q4_k_s',
|
||||
Q5_K_M = 'q5_k_m',
|
||||
Q5_K_S = 'q5_k_s',
|
||||
Q6_K = 'q6_k',
|
||||
Q2_K = 'q2_k',
|
||||
Q3_K_M = 'q3_k_m',
|
||||
}
|
||||
|
||||
/** Model configuration */
|
||||
export interface ModelConfig {
|
||||
/** Path to model file */
|
||||
modelPath: string;
|
||||
/** Model format */
|
||||
format?: ModelFormat;
|
||||
/** Quantization type */
|
||||
quantization?: QuantizationType;
|
||||
/** Context window size */
|
||||
contextSize?: number;
|
||||
/** Number of GPU layers to offload */
|
||||
gpuLayers?: number;
|
||||
/** Batch size for inference */
|
||||
batchSize?: number;
|
||||
/** Number of threads for CPU inference */
|
||||
threads?: number;
|
||||
}
|
||||
|
||||
/** Generation parameters */
|
||||
export interface GenerationParams {
|
||||
/** Maximum tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** Temperature for sampling */
|
||||
temperature?: number;
|
||||
/** Top-p (nucleus) sampling */
|
||||
topP?: number;
|
||||
/** Top-k sampling */
|
||||
topK?: number;
|
||||
/** Repetition penalty */
|
||||
repetitionPenalty?: number;
|
||||
/** Stop sequences */
|
||||
stopSequences?: string[];
|
||||
/** Seed for reproducibility */
|
||||
seed?: number;
|
||||
}
|
||||
|
||||
/** Inference result */
|
||||
export interface InferenceResult {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Number of tokens generated */
|
||||
tokensGenerated: number;
|
||||
/** Time to first token (ms) */
|
||||
timeToFirstToken: number;
|
||||
/** Total generation time (ms) */
|
||||
totalTime: number;
|
||||
/** Tokens per second */
|
||||
tokensPerSecond: number;
|
||||
/** Finish reason */
|
||||
finishReason: 'stop' | 'length' | 'error';
|
||||
}
|
||||
|
||||
/** Benchmark result */
|
||||
export interface BenchmarkResult {
|
||||
/** Model name */
|
||||
model: string;
|
||||
/** Backend used */
|
||||
backend: AccelerationBackend;
|
||||
/** Prompt tokens */
|
||||
promptTokens: number;
|
||||
/** Generated tokens */
|
||||
generatedTokens: number;
|
||||
/** Prompt processing time (ms) */
|
||||
promptTime: number;
|
||||
/** Generation time (ms) */
|
||||
generationTime: number;
|
||||
/** Tokens per second (prompt) */
|
||||
promptTPS: number;
|
||||
/** Tokens per second (generation) */
|
||||
generationTPS: number;
|
||||
/** Memory usage (MB) */
|
||||
memoryUsage: number;
|
||||
/** Peak memory (MB) */
|
||||
peakMemory: number;
|
||||
}
|
||||
|
||||
/** CLI configuration */
|
||||
export interface CLIConfig {
|
||||
/** Default model path */
|
||||
defaultModel?: string;
|
||||
/** Default backend */
|
||||
defaultBackend?: AccelerationBackend;
|
||||
/** Models directory */
|
||||
modelsDir?: string;
|
||||
/** Cache directory */
|
||||
cacheDir?: string;
|
||||
/** Log level */
|
||||
logLevel?: 'debug' | 'info' | 'warn' | 'error';
|
||||
/** Enable streaming output */
|
||||
streaming?: boolean;
|
||||
}
|
||||
|
||||
/** Chat message */
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
/** Chat completion options */
|
||||
export interface ChatCompletionOptions extends GenerationParams {
|
||||
/** System prompt */
|
||||
systemPrompt?: string;
|
||||
/** Chat history */
|
||||
messages?: ChatMessage[];
|
||||
}
|
||||
Reference in New Issue
Block a user