Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
276
vendor/ruvector/npm/packages/ruvllm-wasm/README.md
vendored
Normal file
276
vendor/ruvector/npm/packages/ruvllm-wasm/README.md
vendored
Normal file
@@ -0,0 +1,276 @@
|
||||
# @ruvector/ruvllm-wasm
|
||||
|
||||
[](https://www.npmjs.com/package/@ruvector/ruvllm-wasm)
|
||||
[](https://www.npmjs.com/package/@ruvector/ruvllm-wasm)
|
||||
[](https://www.npmjs.com/package/@ruvector/ruvllm-wasm)
|
||||
[](https://github.com/ruvnet/ruvector/blob/main/LICENSE)
|
||||
[](https://www.typescriptlang.org/)
|
||||
|
||||
**Run large language models directly in the browser** using WebAssembly with optional WebGPU acceleration for faster inference.
|
||||
|
||||
## Features
|
||||
|
||||
- **Browser-Native** - No server required, runs entirely client-side
|
||||
- **WebGPU Acceleration** - 10-50x faster inference with GPU support
|
||||
- **GGUF Models** - Load quantized models for efficient browser inference
|
||||
- **Streaming** - Real-time token streaming for responsive UX
|
||||
- **IndexedDB Caching** - Cache models locally for instant reload
|
||||
- **Privacy-First** - All processing happens on-device
|
||||
- **SIMD Support** - Optimized WASM with SIMD instructions
|
||||
- **Multi-Threading** - Parallel inference with SharedArrayBuffer
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install @ruvector/ruvllm-wasm
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```typescript
|
||||
import { RuvLLMWasm, checkWebGPU } from '@ruvector/ruvllm-wasm';
|
||||
|
||||
// Check browser capabilities
|
||||
const webgpu = await checkWebGPU();
|
||||
console.log('WebGPU:', webgpu); // 'available' | 'unavailable' | 'not_supported'
|
||||
|
||||
// Create instance with WebGPU (if available)
|
||||
const llm = await RuvLLMWasm.create({
|
||||
useWebGPU: true,
|
||||
memoryLimit: 4096, // 4GB max
|
||||
});
|
||||
|
||||
// Load a model (with progress tracking)
|
||||
await llm.loadModel('https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf', {
|
||||
onProgress: (loaded, total) => {
|
||||
console.log(`Loading: ${Math.round(loaded / total * 100)}%`);
|
||||
}
|
||||
});
|
||||
|
||||
// Generate text
|
||||
const result = await llm.generate('What is the capital of France?', {
|
||||
maxTokens: 100,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
console.log(result.text);
|
||||
console.log(`${result.stats.tokensPerSecond.toFixed(1)} tokens/sec`);
|
||||
```
|
||||
|
||||
## Streaming Tokens
|
||||
|
||||
```typescript
|
||||
// Stream tokens as they're generated
|
||||
await llm.generate('Tell me a story about a robot', {
|
||||
maxTokens: 200,
|
||||
stream: true,
|
||||
}, (token, done) => {
|
||||
process.stdout.write(token);
|
||||
if (done) console.log('\n--- Done ---');
|
||||
});
|
||||
```
|
||||
|
||||
## Chat Interface
|
||||
|
||||
```typescript
|
||||
import { ChatMessage } from '@ruvector/ruvllm-wasm';
|
||||
|
||||
const messages: ChatMessage[] = [
|
||||
{ role: 'system', content: 'You are a helpful assistant.' },
|
||||
{ role: 'user', content: 'What is 2 + 2?' },
|
||||
];
|
||||
|
||||
const response = await llm.chat(messages, {
|
||||
maxTokens: 100,
|
||||
temperature: 0.5,
|
||||
});
|
||||
|
||||
console.log(response.text); // "2 + 2 equals 4."
|
||||
```
|
||||
|
||||
## React Hook Example
|
||||
|
||||
```tsx
|
||||
import { useState, useEffect } from 'react';
|
||||
import { RuvLLMWasm, LoadingStatus } from '@ruvector/ruvllm-wasm';
|
||||
|
||||
function useLLM(modelUrl: string) {
|
||||
const [llm, setLLM] = useState<RuvLLMWasm | null>(null);
|
||||
const [status, setStatus] = useState<LoadingStatus>('idle');
|
||||
const [progress, setProgress] = useState(0);
|
||||
|
||||
useEffect(() => {
|
||||
let instance: RuvLLMWasm;
|
||||
|
||||
async function init() {
|
||||
instance = await RuvLLMWasm.create({ useWebGPU: true });
|
||||
setStatus('downloading');
|
||||
|
||||
await instance.loadModel(modelUrl, {
|
||||
onProgress: (loaded, total) => setProgress(loaded / total),
|
||||
});
|
||||
|
||||
setStatus('ready');
|
||||
setLLM(instance);
|
||||
}
|
||||
|
||||
init();
|
||||
return () => instance?.unload();
|
||||
}, [modelUrl]);
|
||||
|
||||
return { llm, status, progress };
|
||||
}
|
||||
|
||||
// Usage
|
||||
function ChatApp() {
|
||||
const { llm, status, progress } = useLLM('https://example.com/model.gguf');
|
||||
const [response, setResponse] = useState('');
|
||||
|
||||
if (status !== 'ready') {
|
||||
return <div>Loading: {Math.round(progress * 100)}%</div>;
|
||||
}
|
||||
|
||||
const generate = async () => {
|
||||
const result = await llm!.generate('Hello!', { maxTokens: 50 });
|
||||
setResponse(result.text);
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
<button onClick={generate}>Generate</button>
|
||||
<p>{response}</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
## Browser Requirements
|
||||
|
||||
| Feature | Required | Benefit |
|
||||
|---------|----------|---------|
|
||||
| WebAssembly | Yes | Core execution |
|
||||
| WebGPU | No (recommended) | 10-50x faster |
|
||||
| SharedArrayBuffer | No | Multi-threading |
|
||||
| SIMD | No | 2-4x faster math |
|
||||
|
||||
### Check Capabilities
|
||||
|
||||
```typescript
|
||||
import { getCapabilities } from '@ruvector/ruvllm-wasm';
|
||||
|
||||
const caps = await getCapabilities();
|
||||
console.log(caps);
|
||||
// {
|
||||
// webgpu: 'available',
|
||||
// sharedArrayBuffer: true,
|
||||
// simd: true,
|
||||
// crossOriginIsolated: true
|
||||
// }
|
||||
```
|
||||
|
||||
### Enable SharedArrayBuffer
|
||||
|
||||
Add these headers to your server:
|
||||
|
||||
```
|
||||
Cross-Origin-Opener-Policy: same-origin
|
||||
Cross-Origin-Embedder-Policy: require-corp
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### `RuvLLMWasm.create(options?)`
|
||||
|
||||
Create a new instance.
|
||||
|
||||
```typescript
|
||||
const llm = await RuvLLMWasm.create({
|
||||
useWebGPU: true, // Enable WebGPU acceleration
|
||||
threads: 4, // CPU threads (requires SharedArrayBuffer)
|
||||
memoryLimit: 4096, // Max memory in MB
|
||||
});
|
||||
```
|
||||
|
||||
### `loadModel(source, options?)`
|
||||
|
||||
Load a GGUF model.
|
||||
|
||||
```typescript
|
||||
await llm.loadModel(url, {
|
||||
onProgress: (loaded, total) => { /* ... */ }
|
||||
});
|
||||
```
|
||||
|
||||
### `generate(prompt, config?, onToken?)`
|
||||
|
||||
Generate text completion.
|
||||
|
||||
```typescript
|
||||
const result = await llm.generate('Hello', {
|
||||
maxTokens: 100,
|
||||
temperature: 0.7,
|
||||
topP: 0.9,
|
||||
topK: 40,
|
||||
repetitionPenalty: 1.1,
|
||||
stopSequences: ['\n\n'],
|
||||
stream: true,
|
||||
}, (token, done) => { /* ... */ });
|
||||
```
|
||||
|
||||
### `chat(messages, config?, onToken?)`
|
||||
|
||||
Chat completion with message history.
|
||||
|
||||
```typescript
|
||||
const result = await llm.chat([
|
||||
{ role: 'system', content: 'You are helpful.' },
|
||||
{ role: 'user', content: 'Hi!' },
|
||||
], { maxTokens: 100 });
|
||||
```
|
||||
|
||||
### `unload()`
|
||||
|
||||
Free memory and unload model.
|
||||
|
||||
```typescript
|
||||
llm.unload();
|
||||
```
|
||||
|
||||
## Recommended Models
|
||||
|
||||
Small models suitable for browser inference:
|
||||
|
||||
| Model | Size | Use Case |
|
||||
|-------|------|----------|
|
||||
| TinyLlama-1.1B-Q4 | ~700 MB | General chat |
|
||||
| Phi-2-Q4 | ~1.6 GB | Code, reasoning |
|
||||
| Qwen2-0.5B-Q4 | ~400 MB | Fast responses |
|
||||
| StableLM-Zephyr-3B-Q4 | ~2 GB | Quality chat |
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Use WebGPU** - Check support and enable for 10-50x speedup
|
||||
2. **Smaller models** - Q4_K_M quantization balances quality/size
|
||||
3. **Cache models** - IndexedDB caching avoids re-downloads
|
||||
4. **Limit context** - Smaller context = faster inference
|
||||
5. **Stream tokens** - Better UX with progressive output
|
||||
|
||||
## Related Packages
|
||||
|
||||
- [@ruvector/ruvllm](https://www.npmjs.com/package/@ruvector/ruvllm) - Node.js LLM library
|
||||
- [@ruvector/ruvllm-cli](https://www.npmjs.com/package/@ruvector/ruvllm-cli) - CLI tool
|
||||
- [ruvector](https://www.npmjs.com/package/ruvector) - Vector database
|
||||
|
||||
## Documentation
|
||||
|
||||
- [WASM Crate](https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm-wasm)
|
||||
- [API Reference](https://docs.rs/ruvllm-wasm)
|
||||
- [Examples](https://github.com/ruvnet/ruvector/tree/main/examples/ruvLLM)
|
||||
|
||||
## License
|
||||
|
||||
MIT OR Apache-2.0
|
||||
|
||||
---
|
||||
|
||||
**Part of the [RuVector](https://github.com/ruvnet/ruvector) ecosystem** - High-performance vector database with self-learning capabilities.
|
||||
68
vendor/ruvector/npm/packages/ruvllm-wasm/package.json
vendored
Normal file
68
vendor/ruvector/npm/packages/ruvllm-wasm/package.json
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"name": "@ruvector/ruvllm-wasm",
|
||||
"version": "0.1.0",
|
||||
"description": "WASM bindings for browser-based LLM inference - run AI models directly in the browser with WebGPU acceleration",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"import": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"default": "./dist/index.js"
|
||||
},
|
||||
"require": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"default": "./dist/index.js"
|
||||
}
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"prepublishOnly": "npm run build",
|
||||
"test": "node --test test/*.test.js",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"clean": "rm -rf dist"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.19.30",
|
||||
"@webgpu/types": "^0.1.69",
|
||||
"typescript": "^5.9.3"
|
||||
},
|
||||
"keywords": [
|
||||
"llm",
|
||||
"wasm",
|
||||
"webassembly",
|
||||
"browser",
|
||||
"inference",
|
||||
"webgpu",
|
||||
"ai",
|
||||
"machine-learning",
|
||||
"edge",
|
||||
"offline",
|
||||
"ruvector",
|
||||
"ruvllm",
|
||||
"transformers"
|
||||
],
|
||||
"author": "rUv Team <team@ruv.io>",
|
||||
"license": "MIT OR Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ruvnet/ruvector.git",
|
||||
"directory": "npm/packages/ruvllm-wasm"
|
||||
},
|
||||
"homepage": "https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm-wasm",
|
||||
"bugs": {
|
||||
"url": "https://github.com/ruvnet/ruvector/issues"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"publishConfig": {
|
||||
"registry": "https://registry.npmjs.org/",
|
||||
"access": "public"
|
||||
},
|
||||
"files": [
|
||||
"dist",
|
||||
"README.md"
|
||||
]
|
||||
}
|
||||
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/index.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/index.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,EACL,YAAY,EACZ,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,UAAU,EACV,gBAAgB,EAChB,aAAa,EACb,gBAAgB,EAChB,cAAc,EACd,WAAW,EACX,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,YAAY,CAAC;AAEpB,sBAAsB;AACtB,eAAO,MAAM,OAAO,UAAU,CAAC;AAE/B;;GAEG;AACH,wBAAsB,WAAW,IAAI,OAAO,CAAC,OAAO,YAAY,EAAE,YAAY,CAAC,CAkB9E;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,OAAO,CAEhD;AAED;;GAEG;AACH,wBAAsB,SAAS,IAAI,OAAO,CAAC,OAAO,CAAC,CAclD;AAED;;GAEG;AACH,wBAAsB,eAAe,IAAI,OAAO,CAAC;IAC/C,MAAM,EAAE,OAAO,YAAY,EAAE,YAAY,CAAC;IAC1C,iBAAiB,EAAE,OAAO,CAAC;IAC3B,IAAI,EAAE,OAAO,CAAC;IACd,mBAAmB,EAAE,OAAO,CAAC;CAC9B,CAAC,CAYD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAWpD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,aAAa,EAAE,MAAM,GAAG;IACrD,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB,CAQA;AAED;;;GAGG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,MAAM,CAAkC;IAChD,OAAO,CAAC,MAAM,CAAoF;IAElG,OAAO;IAIP;;OAEG;WACU,MAAM,CAAC,OAAO,CAAC,EAAE;QAC5B,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,OAAO,CAAC,UAAU,CAAC;IAqBvB;;OAEG;IACH,SAAS,IAAI,OAAO,YAAY,EAAE,aAAa;IAI/C;;OAEG;IACG,SAAS,CACb,MAAM,EAAE,MAAM,GAAG,WAAW,EAC5B,OAAO,CAAC,EAAE;QACR,UAAU,CAAC,EAAE,OAAO,YAAY,EAAE,gBAAgB,CAAC;KACpD,GACA,OAAO,CAAC,OAAO,YAAY,EAAE,aAAa,CAAC;IAuB9C;;OAEG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,OAAO,YAAY,EAAE,gBAAgB,EAC9C,OAAO,CAAC,EAAE,OAAO,YAAY,EAAE,aAAa,GAC3C,OAAO,CAAC,OAAO,YAAY,EAAE,gBAAgB,CAAC;IAkBjD;;OAEG;IACG,IAAI,CACR,QAAQ,EAAE,OAAO,YAAY,EAAE,WAAW,EAAE,EAC5C,MAAM,CAAC,EAAE,OAAO,YAAY,EAAE,gBAAgB,EAC9C,OAAO,CAAC,EAAE,OAAO,YAAY,EAAE,aAAa,GAC3C,OAAO,CAAC,OAAO,YAAY,EAAE,gBAAgB,CAAC;IAQjD;;OAEG;IACH,MAAM,IAAI,IAAI;CAGf"}
|
||||
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/index.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/index.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;;;AAuBH,kCAkBC;AAKD,wDAEC;AAKD,8BAcC;AAKD,0CAiBC;AAKD,wCAWC;AAKD,wCAWC;AAvHD,uCAaoB;AAZlB,wGAAA,YAAY,OAAA;AACZ,yGAAA,aAAa,OAAA;AACb,6GAAA,iBAAiB,OAAA;AAYnB,sBAAsB;AACT,QAAA,OAAO,GAAG,OAAO,CAAC;AAE/B;;GAEG;AACI,KAAK,UAAU,WAAW;IAC/B,IAAI,OAAO,SAAS,KAAK,WAAW,EAAE,CAAC;QACrC,OAAO,eAAoD,CAAC;IAC9D,CAAC;IAED,IAAI,CAAC,CAAC,KAAK,IAAI,SAAS,CAAC,EAAE,CAAC;QAC1B,OAAO,eAAoD,CAAC;IAC9D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAO,SAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAC9D,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,WAAgD,CAAC;QAC1D,CAAC;QACD,OAAO,aAAkD,CAAC;IAC5D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,aAAkD,CAAC;IAC5D,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,sBAAsB;IACpC,OAAO,OAAO,iBAAiB,KAAK,WAAW,CAAC;AAClD,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,SAAS;IAC7B,IAAI,CAAC;QACH,8BAA8B;QAC9B,MAAM,QAAQ,GAAG,IAAI,UAAU,CAAC;YAC9B,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;YAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;YAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;YAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;SACnC,CAAC,CAAC;QACH,MAAM,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,eAAe;IAMnC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACvC,WAAW,EAAE;QACb,SAAS,EAAE;KACZ,CAAC,CAAC;IAEH,OAAO;QACL,MAAM;QACN,iBAAiB,EAAE,sBAAsB,EAAE;QAC3C,IAAI;QACJ,mBAAmB,EAAE,OAAO,mBAAmB,KAAK,WAAW,IAAI,mBAAmB;KACvF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAAC,KAAa;IAC1C,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACtC,IAAI,IAAI,GAAG,KAAK,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,OAAO,IAAI,IAAI,IAAI,IAAI,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,IAAI,IAAI,IAAI,CAAC;QACb,SAAS,EAAE,CAAC;IACd,CAAC;IAED,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAAC,aAAqB;IAIlD,sCAAsC;IACtC,MAAM,UAAU,GAAG,aAAa,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAEjD,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,EAAE,eAAe;QACrD,WAAW,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,EAAE,4BAA4B;KACvE,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAa,UAAU;IAIrB,YAAoB,MAAuC;QAFnD,WAAM,GAAuC,MAA4C,CAAC;QAGhG,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAInB;QACC,MAAM,MAAM,GAAoC;YAC9C,OAAO,EAAE,OAAO,EAAE,OAAO;YACzB,WAAW,EAAE,OAAO,EAAE,WAAW;YACjC,IAAI,EAAE,MAAM,SAAS,EAAE;YACvB,WAAW,EAAE,IAAI;SAClB,CAAC;QAEF,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACvB,MAAM,YAAY,GAAG,MAAM,WAAW,EAAE,CAAC;YACzC,IAAI,YAAY,KAAK,WAAW,EAAE,CAAC;gBACjC,MAAM,OAAO,GAAG,MAAO,SAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;gBAC9D,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,EAAE,CAAC;gBAChD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CACb,MAA4B,EAC5B,OAEC;QAED,IAAI,CAAC,MAAM,GAAG,SAA+C,CAAC;QAE9D,2DAA2D;QAC3D,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QACxF,OAAO,CAAC,GAAG,CAAC,2DAA2D,CAAC,CAAC;QACzE,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;QAE9C,IAAI,CAAC,MAAM,GAAG,OAA6C,CAAC;QAE5D,OAAO;YACL,IAAI,EAAE,aAAa;YACnB,YAAY,EAAE,OAAiD;YAC/D,UAAU,EAAE,IAAI;YAChB,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,KAAK;YAChB,YAAY,EAAE,IAAI;YAClB,SAAS,EAAE,EAAE;YACb,YAAY,EAAE,QAAQ;YACtB,QAAQ,EAAE,CAAC;SACZ,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,MAA8C,EAC9C,OAA4C;QAE5C,OAAO,CAAC,GAAG,CAAC,yBAAyB,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC;QACxE,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;QAEtE,OAAO;YACL,IAAI,EAAE,8DAA8D;YACpE,KAAK,EAAE;gBACL,eAAe,EAAE,CAAC;gBAClB,gBAAgB,EAAE,CAAC;gBACnB,SAAS,EAAE,CAAC;gBACZ,eAAe,EAAE,CAAC;gBAClB,YAAY,EAAE,CAAC;gBACf,UAAU,EAAE,CAAC;aACd;YACD,YAAY,EAAE,MAAM;SACrB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CACR,QAA4C,EAC5C,MAA8C,EAC9C,OAA4C;QAE5C,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC;aACnC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,IAAI,CAAC,MAAM,GAAG,MAA4C,CAAC;IAC7D,CAAC;CACF;AAxHD,gCAwHC"}
|
||||
276
vendor/ruvector/npm/packages/ruvllm-wasm/src/index.ts
vendored
Normal file
276
vendor/ruvector/npm/packages/ruvllm-wasm/src/index.ts
vendored
Normal file
@@ -0,0 +1,276 @@
|
||||
/**
|
||||
* @ruvector/ruvllm-wasm - Browser LLM Inference with WebAssembly
|
||||
*
|
||||
* Run large language models directly in the browser using WebAssembly
|
||||
* with optional WebGPU acceleration for faster inference.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* import { RuvLLMWasm } from '@ruvector/ruvllm-wasm';
|
||||
*
|
||||
* // Initialize with WebGPU (if available)
|
||||
* const llm = await RuvLLMWasm.create({ useWebGPU: true });
|
||||
*
|
||||
* // Load a model
|
||||
* await llm.loadModel('https://example.com/model.gguf', {
|
||||
* onProgress: (loaded, total) => console.log(`${loaded}/${total}`)
|
||||
* });
|
||||
*
|
||||
* // Generate text
|
||||
* const result = await llm.generate('Hello, world!', {
|
||||
* maxTokens: 100,
|
||||
* temperature: 0.7,
|
||||
* });
|
||||
*
|
||||
* console.log(result.text);
|
||||
* ```
|
||||
*
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
export {
|
||||
WebGPUStatus,
|
||||
LoadingStatus,
|
||||
ModelArchitecture,
|
||||
ModelMetadata,
|
||||
WASMConfig,
|
||||
GenerationConfig,
|
||||
TokenCallback,
|
||||
ProgressCallback,
|
||||
InferenceStats,
|
||||
ChatMessage,
|
||||
CompletionResult,
|
||||
DownloadProgress,
|
||||
} from './types.js';
|
||||
|
||||
/** Package version */
|
||||
export const VERSION = '0.1.0';
|
||||
|
||||
/**
|
||||
* Check WebGPU availability
|
||||
*/
|
||||
export async function checkWebGPU(): Promise<import('./types.js').WebGPUStatus> {
|
||||
if (typeof navigator === 'undefined') {
|
||||
return 'not_supported' as import('./types.js').WebGPUStatus;
|
||||
}
|
||||
|
||||
if (!('gpu' in navigator)) {
|
||||
return 'not_supported' as import('./types.js').WebGPUStatus;
|
||||
}
|
||||
|
||||
try {
|
||||
const adapter = await (navigator as any).gpu.requestAdapter();
|
||||
if (adapter) {
|
||||
return 'available' as import('./types.js').WebGPUStatus;
|
||||
}
|
||||
return 'unavailable' as import('./types.js').WebGPUStatus;
|
||||
} catch {
|
||||
return 'unavailable' as import('./types.js').WebGPUStatus;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check SharedArrayBuffer support (required for threading)
|
||||
*/
|
||||
export function checkSharedArrayBuffer(): boolean {
|
||||
return typeof SharedArrayBuffer !== 'undefined';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check SIMD support
|
||||
*/
|
||||
export async function checkSIMD(): Promise<boolean> {
|
||||
try {
|
||||
// Check for WASM SIMD support
|
||||
const simdTest = new Uint8Array([
|
||||
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7b, 0x03,
|
||||
0x02, 0x01, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00,
|
||||
0x41, 0x00, 0xfd, 0x0f, 0x00, 0x0b,
|
||||
]);
|
||||
await WebAssembly.compile(simdTest);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get browser capabilities for LLM inference
|
||||
*/
|
||||
export async function getCapabilities(): Promise<{
|
||||
webgpu: import('./types.js').WebGPUStatus;
|
||||
sharedArrayBuffer: boolean;
|
||||
simd: boolean;
|
||||
crossOriginIsolated: boolean;
|
||||
}> {
|
||||
const [webgpu, simd] = await Promise.all([
|
||||
checkWebGPU(),
|
||||
checkSIMD(),
|
||||
]);
|
||||
|
||||
return {
|
||||
webgpu,
|
||||
sharedArrayBuffer: checkSharedArrayBuffer(),
|
||||
simd,
|
||||
crossOriginIsolated: typeof crossOriginIsolated !== 'undefined' && crossOriginIsolated,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file size for display
|
||||
*/
|
||||
export function formatFileSize(bytes: number): string {
|
||||
const units = ['B', 'KB', 'MB', 'GB'];
|
||||
let size = bytes;
|
||||
let unitIndex = 0;
|
||||
|
||||
while (size >= 1024 && unitIndex < units.length - 1) {
|
||||
size /= 1024;
|
||||
unitIndex++;
|
||||
}
|
||||
|
||||
return `${size.toFixed(1)} ${units[unitIndex]}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate memory requirements for a model
|
||||
*/
|
||||
export function estimateMemory(fileSizeBytes: number): {
|
||||
minimum: number;
|
||||
recommended: number;
|
||||
} {
|
||||
// Rough estimates based on model size
|
||||
const fileSizeMB = fileSizeBytes / (1024 * 1024);
|
||||
|
||||
return {
|
||||
minimum: Math.ceil(fileSizeMB * 1.2), // 20% overhead
|
||||
recommended: Math.ceil(fileSizeMB * 1.5), // 50% overhead for KV cache
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* RuvLLM WASM class placeholder
|
||||
* Full implementation requires WASM binary from ruvllm-wasm crate
|
||||
*/
|
||||
export class RuvLLMWasm {
|
||||
private config: import('./types.js').WASMConfig;
|
||||
private status: import('./types.js').LoadingStatus = 'idle' as import('./types.js').LoadingStatus;
|
||||
|
||||
private constructor(config: import('./types.js').WASMConfig) {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new RuvLLMWasm instance
|
||||
*/
|
||||
static async create(options?: {
|
||||
useWebGPU?: boolean;
|
||||
threads?: number;
|
||||
memoryLimit?: number;
|
||||
}): Promise<RuvLLMWasm> {
|
||||
const config: import('./types.js').WASMConfig = {
|
||||
threads: options?.threads,
|
||||
memoryLimit: options?.memoryLimit,
|
||||
simd: await checkSIMD(),
|
||||
cacheModels: true,
|
||||
};
|
||||
|
||||
if (options?.useWebGPU) {
|
||||
const webgpuStatus = await checkWebGPU();
|
||||
if (webgpuStatus === 'available') {
|
||||
const adapter = await (navigator as any).gpu.requestAdapter();
|
||||
if (adapter) {
|
||||
config.device = await adapter.requestDevice();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new RuvLLMWasm(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current loading status
|
||||
*/
|
||||
getStatus(): import('./types.js').LoadingStatus {
|
||||
return this.status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a model from URL or ArrayBuffer
|
||||
*/
|
||||
async loadModel(
|
||||
source: string | ArrayBuffer,
|
||||
options?: {
|
||||
onProgress?: import('./types.js').ProgressCallback;
|
||||
}
|
||||
): Promise<import('./types.js').ModelMetadata> {
|
||||
this.status = 'loading' as import('./types.js').LoadingStatus;
|
||||
|
||||
// Placeholder - actual implementation requires WASM binary
|
||||
console.log('Loading model from:', typeof source === 'string' ? source : 'ArrayBuffer');
|
||||
console.log('Note: Full model loading requires the ruvllm-wasm binary.');
|
||||
console.log('Build from: crates/ruvllm-wasm');
|
||||
|
||||
this.status = 'ready' as import('./types.js').LoadingStatus;
|
||||
|
||||
return {
|
||||
name: 'placeholder',
|
||||
architecture: 'llama' as import('./types.js').ModelArchitecture,
|
||||
parameters: '0B',
|
||||
contextLength: 2048,
|
||||
vocabSize: 32000,
|
||||
embeddingDim: 2048,
|
||||
numLayers: 22,
|
||||
quantization: 'q4_k_m',
|
||||
fileSize: 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate text completion
|
||||
*/
|
||||
async generate(
|
||||
prompt: string,
|
||||
config?: import('./types.js').GenerationConfig,
|
||||
onToken?: import('./types.js').TokenCallback
|
||||
): Promise<import('./types.js').CompletionResult> {
|
||||
console.log('Generating with prompt:', prompt.substring(0, 50) + '...');
|
||||
console.log('Note: Full generation requires the ruvllm-wasm binary.');
|
||||
|
||||
return {
|
||||
text: '[Placeholder - build ruvllm-wasm crate for actual inference]',
|
||||
stats: {
|
||||
tokensGenerated: 0,
|
||||
timeToFirstToken: 0,
|
||||
totalTime: 0,
|
||||
tokensPerSecond: 0,
|
||||
promptTokens: 0,
|
||||
memoryUsed: 0,
|
||||
},
|
||||
finishReason: 'stop',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat completion with message history
|
||||
*/
|
||||
async chat(
|
||||
messages: import('./types.js').ChatMessage[],
|
||||
config?: import('./types.js').GenerationConfig,
|
||||
onToken?: import('./types.js').TokenCallback
|
||||
): Promise<import('./types.js').CompletionResult> {
|
||||
const prompt = messages
|
||||
.map(m => `${m.role}: ${m.content}`)
|
||||
.join('\n');
|
||||
|
||||
return this.generate(prompt, config, onToken);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unload model and free memory
|
||||
*/
|
||||
unload(): void {
|
||||
this.status = 'idle' as import('./types.js').LoadingStatus;
|
||||
}
|
||||
}
|
||||
123
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.d.ts
vendored
Normal file
123
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.d.ts
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
* RuvLLM WASM Types
|
||||
* Types for browser-based LLM inference
|
||||
*/
|
||||
/** WebGPU availability status */
|
||||
export declare enum WebGPUStatus {
|
||||
Available = "available",
|
||||
Unavailable = "unavailable",
|
||||
NotSupported = "not_supported"
|
||||
}
|
||||
/** Model loading status */
|
||||
export declare enum LoadingStatus {
|
||||
Idle = "idle",
|
||||
Downloading = "downloading",
|
||||
Loading = "loading",
|
||||
Ready = "ready",
|
||||
Error = "error"
|
||||
}
|
||||
/** Supported model architectures */
|
||||
export declare enum ModelArchitecture {
|
||||
Llama = "llama",
|
||||
Mistral = "mistral",
|
||||
Phi = "phi",
|
||||
Qwen = "qwen",
|
||||
Gemma = "gemma",
|
||||
StableLM = "stablelm"
|
||||
}
|
||||
/** Model metadata */
|
||||
export interface ModelMetadata {
|
||||
/** Model name */
|
||||
name: string;
|
||||
/** Model architecture */
|
||||
architecture: ModelArchitecture;
|
||||
/** Number of parameters */
|
||||
parameters: string;
|
||||
/** Context length */
|
||||
contextLength: number;
|
||||
/** Vocabulary size */
|
||||
vocabSize: number;
|
||||
/** Embedding dimension */
|
||||
embeddingDim: number;
|
||||
/** Number of layers */
|
||||
numLayers: number;
|
||||
/** Quantization type */
|
||||
quantization: string;
|
||||
/** File size in bytes */
|
||||
fileSize: number;
|
||||
}
|
||||
/** WASM module configuration */
|
||||
export interface WASMConfig {
|
||||
/** WebGPU device (optional) */
|
||||
device?: GPUDevice;
|
||||
/** Number of threads (SharedArrayBuffer required) */
|
||||
threads?: number;
|
||||
/** SIMD enabled */
|
||||
simd?: boolean;
|
||||
/** Memory limit in MB */
|
||||
memoryLimit?: number;
|
||||
/** Cache models in IndexedDB */
|
||||
cacheModels?: boolean;
|
||||
}
|
||||
/** Generation configuration */
|
||||
export interface GenerationConfig {
|
||||
/** Maximum tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** Temperature (0-2) */
|
||||
temperature?: number;
|
||||
/** Top-p sampling */
|
||||
topP?: number;
|
||||
/** Top-k sampling */
|
||||
topK?: number;
|
||||
/** Repetition penalty */
|
||||
repetitionPenalty?: number;
|
||||
/** Stop sequences */
|
||||
stopSequences?: string[];
|
||||
/** Stream tokens as generated */
|
||||
stream?: boolean;
|
||||
}
|
||||
/** Token callback for streaming */
|
||||
export type TokenCallback = (token: string, done: boolean) => void;
|
||||
/** Progress callback for model loading */
|
||||
export type ProgressCallback = (loaded: number, total: number) => void;
|
||||
/** Inference statistics */
|
||||
export interface InferenceStats {
|
||||
/** Tokens generated */
|
||||
tokensGenerated: number;
|
||||
/** Time to first token (ms) */
|
||||
timeToFirstToken: number;
|
||||
/** Total time (ms) */
|
||||
totalTime: number;
|
||||
/** Tokens per second */
|
||||
tokensPerSecond: number;
|
||||
/** Prompt tokens */
|
||||
promptTokens: number;
|
||||
/** Memory used (MB) */
|
||||
memoryUsed: number;
|
||||
}
|
||||
/** Chat message */
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
/** Completion result */
|
||||
export interface CompletionResult {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Inference statistics */
|
||||
stats: InferenceStats;
|
||||
/** Finish reason */
|
||||
finishReason: 'stop' | 'length' | 'error';
|
||||
}
|
||||
/** Model download progress */
|
||||
export interface DownloadProgress {
|
||||
/** Bytes downloaded */
|
||||
loaded: number;
|
||||
/** Total bytes */
|
||||
total: number;
|
||||
/** Download speed (bytes/sec) */
|
||||
speed: number;
|
||||
/** Estimated time remaining (seconds) */
|
||||
eta: number;
|
||||
}
|
||||
//# sourceMappingURL=types.d.ts.map
|
||||
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.d.ts.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.d.ts.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,iCAAiC;AACjC,oBAAY,YAAY;IACtB,SAAS,cAAc;IACvB,WAAW,gBAAgB;IAC3B,YAAY,kBAAkB;CAC/B;AAED,2BAA2B;AAC3B,oBAAY,aAAa;IACvB,IAAI,SAAS;IACb,WAAW,gBAAgB;IAC3B,OAAO,YAAY;IACnB,KAAK,UAAU;IACf,KAAK,UAAU;CAChB;AAED,oCAAoC;AACpC,oBAAY,iBAAiB;IAC3B,KAAK,UAAU;IACf,OAAO,YAAY;IACnB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,KAAK,UAAU;IACf,QAAQ,aAAa;CACtB;AAED,qBAAqB;AACrB,MAAM,WAAW,aAAa;IAC5B,iBAAiB;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,yBAAyB;IACzB,YAAY,EAAE,iBAAiB,CAAC;IAChC,2BAA2B;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,qBAAqB;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,sBAAsB;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,0BAA0B;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,gCAAgC;AAChC,MAAM,WAAW,UAAU;IACzB,+BAA+B;IAC/B,MAAM,CAAC,EAAE,SAAS,CAAC;IACnB,qDAAqD;IACrD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mBAAmB;IACnB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,yBAAyB;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,+BAA+B;AAC/B,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,iCAAiC;IACjC,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,mCAAmC;AACnC,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,KAAK,IAAI,CAAC;AAEnE,0CAA0C;AAC1C,MAAM,MAAM,gBAAgB,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AAEvE,2BAA2B;AAC3B,MAAM,WAAW,cAAc;IAC7B,uBAAuB;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,+BAA+B;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,sBAAsB;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,mBAAmB;AACnB,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAwB;AACxB,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,2BAA2B;IAC3B,KAAK,EAAE,cAAc,CAAC;IACtB,oBAAoB;IACpB,YAAY,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;CAC3C;AAED,8BAA8B;AAC9B,MAAM,WAAW,gBAAgB;IAC/B,uBAAuB;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,yCAAyC;IACzC,GAAG,EAAE,MAAM,CAAC;CACb"}
|
||||
34
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.js
vendored
Normal file
34
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.js
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
"use strict";
|
||||
/**
|
||||
* RuvLLM WASM Types
|
||||
* Types for browser-based LLM inference
|
||||
*/
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.ModelArchitecture = exports.LoadingStatus = exports.WebGPUStatus = void 0;
|
||||
/** WebGPU availability status */
|
||||
var WebGPUStatus;
|
||||
(function (WebGPUStatus) {
|
||||
WebGPUStatus["Available"] = "available";
|
||||
WebGPUStatus["Unavailable"] = "unavailable";
|
||||
WebGPUStatus["NotSupported"] = "not_supported";
|
||||
})(WebGPUStatus || (exports.WebGPUStatus = WebGPUStatus = {}));
|
||||
/** Model loading status */
|
||||
var LoadingStatus;
|
||||
(function (LoadingStatus) {
|
||||
LoadingStatus["Idle"] = "idle";
|
||||
LoadingStatus["Downloading"] = "downloading";
|
||||
LoadingStatus["Loading"] = "loading";
|
||||
LoadingStatus["Ready"] = "ready";
|
||||
LoadingStatus["Error"] = "error";
|
||||
})(LoadingStatus || (exports.LoadingStatus = LoadingStatus = {}));
|
||||
/** Supported model architectures */
|
||||
var ModelArchitecture;
|
||||
(function (ModelArchitecture) {
|
||||
ModelArchitecture["Llama"] = "llama";
|
||||
ModelArchitecture["Mistral"] = "mistral";
|
||||
ModelArchitecture["Phi"] = "phi";
|
||||
ModelArchitecture["Qwen"] = "qwen";
|
||||
ModelArchitecture["Gemma"] = "gemma";
|
||||
ModelArchitecture["StableLM"] = "stablelm";
|
||||
})(ModelArchitecture || (exports.ModelArchitecture = ModelArchitecture = {}));
|
||||
//# sourceMappingURL=types.js.map
|
||||
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.js.map
vendored
Normal file
1
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.js.map
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":3,"file":"types.js","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,iCAAiC;AACjC,IAAY,YAIX;AAJD,WAAY,YAAY;IACtB,uCAAuB,CAAA;IACvB,2CAA2B,CAAA;IAC3B,8CAA8B,CAAA;AAChC,CAAC,EAJW,YAAY,4BAAZ,YAAY,QAIvB;AAED,2BAA2B;AAC3B,IAAY,aAMX;AAND,WAAY,aAAa;IACvB,8BAAa,CAAA;IACb,4CAA2B,CAAA;IAC3B,oCAAmB,CAAA;IACnB,gCAAe,CAAA;IACf,gCAAe,CAAA;AACjB,CAAC,EANW,aAAa,6BAAb,aAAa,QAMxB;AAED,oCAAoC;AACpC,IAAY,iBAOX;AAPD,WAAY,iBAAiB;IAC3B,oCAAe,CAAA;IACf,wCAAmB,CAAA;IACnB,gCAAW,CAAA;IACX,kCAAa,CAAA;IACb,oCAAe,CAAA;IACf,0CAAqB,CAAA;AACvB,CAAC,EAPW,iBAAiB,iCAAjB,iBAAiB,QAO5B"}
|
||||
134
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.ts
vendored
Normal file
134
vendor/ruvector/npm/packages/ruvllm-wasm/src/types.ts
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
/**
|
||||
* RuvLLM WASM Types
|
||||
* Types for browser-based LLM inference
|
||||
*/
|
||||
|
||||
/** WebGPU availability status */
|
||||
export enum WebGPUStatus {
|
||||
Available = 'available',
|
||||
Unavailable = 'unavailable',
|
||||
NotSupported = 'not_supported',
|
||||
}
|
||||
|
||||
/** Model loading status */
|
||||
export enum LoadingStatus {
|
||||
Idle = 'idle',
|
||||
Downloading = 'downloading',
|
||||
Loading = 'loading',
|
||||
Ready = 'ready',
|
||||
Error = 'error',
|
||||
}
|
||||
|
||||
/** Supported model architectures */
|
||||
export enum ModelArchitecture {
|
||||
Llama = 'llama',
|
||||
Mistral = 'mistral',
|
||||
Phi = 'phi',
|
||||
Qwen = 'qwen',
|
||||
Gemma = 'gemma',
|
||||
StableLM = 'stablelm',
|
||||
}
|
||||
|
||||
/** Model metadata */
|
||||
export interface ModelMetadata {
|
||||
/** Model name */
|
||||
name: string;
|
||||
/** Model architecture */
|
||||
architecture: ModelArchitecture;
|
||||
/** Number of parameters */
|
||||
parameters: string;
|
||||
/** Context length */
|
||||
contextLength: number;
|
||||
/** Vocabulary size */
|
||||
vocabSize: number;
|
||||
/** Embedding dimension */
|
||||
embeddingDim: number;
|
||||
/** Number of layers */
|
||||
numLayers: number;
|
||||
/** Quantization type */
|
||||
quantization: string;
|
||||
/** File size in bytes */
|
||||
fileSize: number;
|
||||
}
|
||||
|
||||
/** WASM module configuration */
|
||||
export interface WASMConfig {
|
||||
/** WebGPU device (optional) */
|
||||
device?: GPUDevice;
|
||||
/** Number of threads (SharedArrayBuffer required) */
|
||||
threads?: number;
|
||||
/** SIMD enabled */
|
||||
simd?: boolean;
|
||||
/** Memory limit in MB */
|
||||
memoryLimit?: number;
|
||||
/** Cache models in IndexedDB */
|
||||
cacheModels?: boolean;
|
||||
}
|
||||
|
||||
/** Generation configuration */
|
||||
export interface GenerationConfig {
|
||||
/** Maximum tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** Temperature (0-2) */
|
||||
temperature?: number;
|
||||
/** Top-p sampling */
|
||||
topP?: number;
|
||||
/** Top-k sampling */
|
||||
topK?: number;
|
||||
/** Repetition penalty */
|
||||
repetitionPenalty?: number;
|
||||
/** Stop sequences */
|
||||
stopSequences?: string[];
|
||||
/** Stream tokens as generated */
|
||||
stream?: boolean;
|
||||
}
|
||||
|
||||
/** Token callback for streaming */
|
||||
export type TokenCallback = (token: string, done: boolean) => void;
|
||||
|
||||
/** Progress callback for model loading */
|
||||
export type ProgressCallback = (loaded: number, total: number) => void;
|
||||
|
||||
/** Inference statistics */
|
||||
export interface InferenceStats {
|
||||
/** Tokens generated */
|
||||
tokensGenerated: number;
|
||||
/** Time to first token (ms) */
|
||||
timeToFirstToken: number;
|
||||
/** Total time (ms) */
|
||||
totalTime: number;
|
||||
/** Tokens per second */
|
||||
tokensPerSecond: number;
|
||||
/** Prompt tokens */
|
||||
promptTokens: number;
|
||||
/** Memory used (MB) */
|
||||
memoryUsed: number;
|
||||
}
|
||||
|
||||
/** Chat message */
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
/** Completion result */
|
||||
export interface CompletionResult {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Inference statistics */
|
||||
stats: InferenceStats;
|
||||
/** Finish reason */
|
||||
finishReason: 'stop' | 'length' | 'error';
|
||||
}
|
||||
|
||||
/** Model download progress */
|
||||
export interface DownloadProgress {
|
||||
/** Bytes downloaded */
|
||||
loaded: number;
|
||||
/** Total bytes */
|
||||
total: number;
|
||||
/** Download speed (bytes/sec) */
|
||||
speed: number;
|
||||
/** Estimated time remaining (seconds) */
|
||||
eta: number;
|
||||
}
|
||||
20
vendor/ruvector/npm/packages/ruvllm-wasm/tsconfig.json
vendored
Normal file
20
vendor/ruvector/npm/packages/ruvllm-wasm/tsconfig.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"types": ["node", "@webgpu/types"]
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "test"]
|
||||
}
|
||||
Reference in New Issue
Block a user