Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,276 @@
# @ruvector/ruvllm-wasm
[![npm version](https://img.shields.io/npm/v/@ruvector/ruvllm-wasm.svg)](https://www.npmjs.com/package/@ruvector/ruvllm-wasm)
[![npm downloads](https://img.shields.io/npm/dt/@ruvector/ruvllm-wasm.svg)](https://www.npmjs.com/package/@ruvector/ruvllm-wasm)
[![npm downloads/month](https://img.shields.io/npm/dm/@ruvector/ruvllm-wasm.svg)](https://www.npmjs.com/package/@ruvector/ruvllm-wasm)
[![License](https://img.shields.io/npm/l/@ruvector/ruvllm-wasm.svg)](https://github.com/ruvnet/ruvector/blob/main/LICENSE)
[![TypeScript](https://img.shields.io/badge/TypeScript-5.0-blue.svg)](https://www.typescriptlang.org/)
**Run large language models directly in the browser** using WebAssembly with optional WebGPU acceleration for faster inference.
## Features
- **Browser-Native** - No server required, runs entirely client-side
- **WebGPU Acceleration** - 10-50x faster inference with GPU support
- **GGUF Models** - Load quantized models for efficient browser inference
- **Streaming** - Real-time token streaming for responsive UX
- **IndexedDB Caching** - Cache models locally for instant reload
- **Privacy-First** - All processing happens on-device
- **SIMD Support** - Optimized WASM with SIMD instructions
- **Multi-Threading** - Parallel inference with SharedArrayBuffer
## Installation
```bash
npm install @ruvector/ruvllm-wasm
```
## Quick Start
```typescript
import { RuvLLMWasm, checkWebGPU } from '@ruvector/ruvllm-wasm';
// Check browser capabilities
const webgpu = await checkWebGPU();
console.log('WebGPU:', webgpu); // 'available' | 'unavailable' | 'not_supported'
// Create instance with WebGPU (if available)
const llm = await RuvLLMWasm.create({
useWebGPU: true,
memoryLimit: 4096, // 4GB max
});
// Load a model (with progress tracking)
await llm.loadModel('https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf', {
onProgress: (loaded, total) => {
console.log(`Loading: ${Math.round(loaded / total * 100)}%`);
}
});
// Generate text
const result = await llm.generate('What is the capital of France?', {
maxTokens: 100,
temperature: 0.7,
});
console.log(result.text);
console.log(`${result.stats.tokensPerSecond.toFixed(1)} tokens/sec`);
```
## Streaming Tokens
```typescript
// Stream tokens as they're generated
await llm.generate('Tell me a story about a robot', {
maxTokens: 200,
stream: true,
}, (token, done) => {
process.stdout.write(token);
if (done) console.log('\n--- Done ---');
});
```
## Chat Interface
```typescript
import { ChatMessage } from '@ruvector/ruvllm-wasm';
const messages: ChatMessage[] = [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: 'What is 2 + 2?' },
];
const response = await llm.chat(messages, {
maxTokens: 100,
temperature: 0.5,
});
console.log(response.text); // "2 + 2 equals 4."
```
## React Hook Example
```tsx
import { useState, useEffect } from 'react';
import { RuvLLMWasm, LoadingStatus } from '@ruvector/ruvllm-wasm';
function useLLM(modelUrl: string) {
const [llm, setLLM] = useState<RuvLLMWasm | null>(null);
const [status, setStatus] = useState<LoadingStatus>('idle');
const [progress, setProgress] = useState(0);
useEffect(() => {
let instance: RuvLLMWasm;
async function init() {
instance = await RuvLLMWasm.create({ useWebGPU: true });
setStatus('downloading');
await instance.loadModel(modelUrl, {
onProgress: (loaded, total) => setProgress(loaded / total),
});
setStatus('ready');
setLLM(instance);
}
init();
return () => instance?.unload();
}, [modelUrl]);
return { llm, status, progress };
}
// Usage
function ChatApp() {
const { llm, status, progress } = useLLM('https://example.com/model.gguf');
const [response, setResponse] = useState('');
if (status !== 'ready') {
return <div>Loading: {Math.round(progress * 100)}%</div>;
}
const generate = async () => {
const result = await llm!.generate('Hello!', { maxTokens: 50 });
setResponse(result.text);
};
return (
<div>
<button onClick={generate}>Generate</button>
<p>{response}</p>
</div>
);
}
```
## Browser Requirements
| Feature | Required | Benefit |
|---------|----------|---------|
| WebAssembly | Yes | Core execution |
| WebGPU | No (recommended) | 10-50x faster |
| SharedArrayBuffer | No | Multi-threading |
| SIMD | No | 2-4x faster math |
### Check Capabilities
```typescript
import { getCapabilities } from '@ruvector/ruvllm-wasm';
const caps = await getCapabilities();
console.log(caps);
// {
// webgpu: 'available',
// sharedArrayBuffer: true,
// simd: true,
// crossOriginIsolated: true
// }
```
### Enable SharedArrayBuffer
Add these headers to your server:
```
Cross-Origin-Opener-Policy: same-origin
Cross-Origin-Embedder-Policy: require-corp
```
## API Reference
### `RuvLLMWasm.create(options?)`
Create a new instance.
```typescript
const llm = await RuvLLMWasm.create({
useWebGPU: true, // Enable WebGPU acceleration
threads: 4, // CPU threads (requires SharedArrayBuffer)
memoryLimit: 4096, // Max memory in MB
});
```
### `loadModel(source, options?)`
Load a GGUF model.
```typescript
await llm.loadModel(url, {
onProgress: (loaded, total) => { /* ... */ }
});
```
### `generate(prompt, config?, onToken?)`
Generate text completion.
```typescript
const result = await llm.generate('Hello', {
maxTokens: 100,
temperature: 0.7,
topP: 0.9,
topK: 40,
repetitionPenalty: 1.1,
stopSequences: ['\n\n'],
stream: true,
}, (token, done) => { /* ... */ });
```
### `chat(messages, config?, onToken?)`
Chat completion with message history.
```typescript
const result = await llm.chat([
{ role: 'system', content: 'You are helpful.' },
{ role: 'user', content: 'Hi!' },
], { maxTokens: 100 });
```
### `unload()`
Free memory and unload model.
```typescript
llm.unload();
```
## Recommended Models
Small models suitable for browser inference:
| Model | Size | Use Case |
|-------|------|----------|
| TinyLlama-1.1B-Q4 | ~700 MB | General chat |
| Phi-2-Q4 | ~1.6 GB | Code, reasoning |
| Qwen2-0.5B-Q4 | ~400 MB | Fast responses |
| StableLM-Zephyr-3B-Q4 | ~2 GB | Quality chat |
## Performance Tips
1. **Use WebGPU** - Check support and enable for 10-50x speedup
2. **Smaller models** - Q4_K_M quantization balances quality/size
3. **Cache models** - IndexedDB caching avoids re-downloads
4. **Limit context** - Smaller context = faster inference
5. **Stream tokens** - Better UX with progressive output
## Related Packages
- [@ruvector/ruvllm](https://www.npmjs.com/package/@ruvector/ruvllm) - Node.js LLM library
- [@ruvector/ruvllm-cli](https://www.npmjs.com/package/@ruvector/ruvllm-cli) - CLI tool
- [ruvector](https://www.npmjs.com/package/ruvector) - Vector database
## Documentation
- [WASM Crate](https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm-wasm)
- [API Reference](https://docs.rs/ruvllm-wasm)
- [Examples](https://github.com/ruvnet/ruvector/tree/main/examples/ruvLLM)
## License
MIT OR Apache-2.0
---
**Part of the [RuVector](https://github.com/ruvnet/ruvector) ecosystem** - High-performance vector database with self-learning capabilities.

View File

@@ -0,0 +1,68 @@
{
"name": "@ruvector/ruvllm-wasm",
"version": "0.1.0",
"description": "WASM bindings for browser-based LLM inference - run AI models directly in the browser with WebGPU acceleration",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
".": {
"import": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
},
"require": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
}
}
},
"scripts": {
"build": "tsc",
"prepublishOnly": "npm run build",
"test": "node --test test/*.test.js",
"typecheck": "tsc --noEmit",
"clean": "rm -rf dist"
},
"devDependencies": {
"@types/node": "^20.19.30",
"@webgpu/types": "^0.1.69",
"typescript": "^5.9.3"
},
"keywords": [
"llm",
"wasm",
"webassembly",
"browser",
"inference",
"webgpu",
"ai",
"machine-learning",
"edge",
"offline",
"ruvector",
"ruvllm",
"transformers"
],
"author": "rUv Team <team@ruv.io>",
"license": "MIT OR Apache-2.0",
"repository": {
"type": "git",
"url": "https://github.com/ruvnet/ruvector.git",
"directory": "npm/packages/ruvllm-wasm"
},
"homepage": "https://github.com/ruvnet/ruvector/tree/main/crates/ruvllm-wasm",
"bugs": {
"url": "https://github.com/ruvnet/ruvector/issues"
},
"engines": {
"node": ">= 18"
},
"publishConfig": {
"registry": "https://registry.npmjs.org/",
"access": "public"
},
"files": [
"dist",
"README.md"
]
}

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,EACL,YAAY,EACZ,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,UAAU,EACV,gBAAgB,EAChB,aAAa,EACb,gBAAgB,EAChB,cAAc,EACd,WAAW,EACX,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,YAAY,CAAC;AAEpB,sBAAsB;AACtB,eAAO,MAAM,OAAO,UAAU,CAAC;AAE/B;;GAEG;AACH,wBAAsB,WAAW,IAAI,OAAO,CAAC,OAAO,YAAY,EAAE,YAAY,CAAC,CAkB9E;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,OAAO,CAEhD;AAED;;GAEG;AACH,wBAAsB,SAAS,IAAI,OAAO,CAAC,OAAO,CAAC,CAclD;AAED;;GAEG;AACH,wBAAsB,eAAe,IAAI,OAAO,CAAC;IAC/C,MAAM,EAAE,OAAO,YAAY,EAAE,YAAY,CAAC;IAC1C,iBAAiB,EAAE,OAAO,CAAC;IAC3B,IAAI,EAAE,OAAO,CAAC;IACd,mBAAmB,EAAE,OAAO,CAAC;CAC9B,CAAC,CAYD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAWpD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,aAAa,EAAE,MAAM,GAAG;IACrD,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB,CAQA;AAED;;;GAGG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,MAAM,CAAkC;IAChD,OAAO,CAAC,MAAM,CAAoF;IAElG,OAAO;IAIP;;OAEG;WACU,MAAM,CAAC,OAAO,CAAC,EAAE;QAC5B,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,OAAO,CAAC,UAAU,CAAC;IAqBvB;;OAEG;IACH,SAAS,IAAI,OAAO,YAAY,EAAE,aAAa;IAI/C;;OAEG;IACG,SAAS,CACb,MAAM,EAAE,MAAM,GAAG,WAAW,EAC5B,OAAO,CAAC,EAAE;QACR,UAAU,CAAC,EAAE,OAAO,YAAY,EAAE,gBAAgB,CAAC;KACpD,GACA,OAAO,CAAC,OAAO,YAAY,EAAE,aAAa,CAAC;IAuB9C;;OAEG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,OAAO,YAAY,EAAE,gBAAgB,EAC9C,OAAO,CAAC,EAAE,OAAO,YAAY,EAAE,aAAa,GAC3C,OAAO,CAAC,OAAO,YAAY,EAAE,gBAAgB,CAAC;IAkBjD;;OAEG;IACG,IAAI,CACR,QAAQ,EAAE,OAAO,YAAY,EAAE,WAAW,EAAE,EAC5C,MAAM,CAAC,EAAE,OAAO,YAAY,EAAE,gBAAgB,EAC9C,OAAO,CAAC,EAAE,OAAO,YAAY,EAAE,aAAa,GAC3C,OAAO,CAAC,OAAO,YAAY,EAAE,gBAAgB,CAAC;IAQjD;;OAEG;IACH,MAAM,IAAI,IAAI;CAGf"}

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;;;AAuBH,kCAkBC;AAKD,wDAEC;AAKD,8BAcC;AAKD,0CAiBC;AAKD,wCAWC;AAKD,wCAWC;AAvHD,uCAaoB;AAZlB,wGAAA,YAAY,OAAA;AACZ,yGAAA,aAAa,OAAA;AACb,6GAAA,iBAAiB,OAAA;AAYnB,sBAAsB;AACT,QAAA,OAAO,GAAG,OAAO,CAAC;AAE/B;;GAEG;AACI,KAAK,UAAU,WAAW;IAC/B,IAAI,OAAO,SAAS,KAAK,WAAW,EAAE,CAAC;QACrC,OAAO,eAAoD,CAAC;IAC9D,CAAC;IAED,IAAI,CAAC,CAAC,KAAK,IAAI,SAAS,CAAC,EAAE,CAAC;QAC1B,OAAO,eAAoD,CAAC;IAC9D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAO,SAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAC9D,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,WAAgD,CAAC;QAC1D,CAAC;QACD,OAAO,aAAkD,CAAC;IAC5D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,aAAkD,CAAC;IAC5D,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,sBAAsB;IACpC,OAAO,OAAO,iBAAiB,KAAK,WAAW,CAAC;AAClD,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,SAAS;IAC7B,IAAI,CAAC;QACH,8BAA8B;QAC9B,MAAM,QAAQ,GAAG,IAAI,UAAU,CAAC;YAC9B,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;YAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;YAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;YAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;SACnC,CAAC,CAAC;QACH,MAAM,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,eAAe;IAMnC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACvC,WAAW,EAAE;QACb,SAAS,EAAE;KACZ,CAAC,CAAC;IAEH,OAAO;QACL,MAAM;QACN,iBAAiB,EAAE,sBAAsB,EAAE;QAC3C,IAAI;QACJ,mBAAmB,EAAE,OAAO,mBAAmB,KAAK,WAAW,IAAI,mBAAmB;KACvF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAAC,KAAa;IAC1C,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACtC,IAAI,IAAI,GAAG,KAAK,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,OAAO,IAAI,IAAI,IAAI,IAAI,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,IAAI,IAAI,IAAI,CAAC;QACb,SAAS,EAAE,CAAC;IACd,CAAC;IAED,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAAC,aAAqB;IAIlD,sCAAsC;IACtC,MAAM,UAAU,GAAG,aAAa,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAEjD,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,EAAE,eAAe;QACrD,WAAW,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,EAAE,4BAA4B;KACvE,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAa,UAAU;IAIrB,YAAoB,MAAuC;QAFnD,WAAM,GAAuC,MAA4C,CAAC;QAGhG,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAInB;QACC,MAAM,MAAM,GAAoC;YAC9C,OAAO,EAAE,OAAO,EAAE,OAAO;YACzB,WAAW,EAAE,OAAO,EAAE,WAAW;YACjC,IAAI,EAAE,MAAM,SAAS,EAAE;YACvB,WAAW,EAAE,IAAI;SAClB,CAAC;QAEF,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACvB,MAAM,YAAY,GAAG,MAAM,WAAW,EAAE,CAAC;YACzC,IAAI,YAAY,KAAK,WAAW,EAAE,CAAC;gBACjC,MAAM,OAAO,GAAG,MAAO,SAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;gBAC9D,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,EAAE,CAAC;gBAChD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CACb,MAA4B,EAC5B,OAEC;QAED,IAAI,CAAC,MAAM,GAAG,SAA+C,CAAC;QAE9D,2DAA2D;QAC3D,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QACxF,OAAO,CAAC,GAAG,CAAC,2DAA2D,CAAC,CAAC;QACzE,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;QAE9C,IAAI,CAAC,MAAM,GAAG,OAA6C,CAAC;QAE5D,OAAO;YACL,IAAI,EAAE,aAAa;YACnB,YAAY,EAAE,OAAiD;YAC/D,UAAU,EAAE,IAAI;YAChB,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,KAAK;YAChB,YAAY,EAAE,IAAI;YAClB,SAAS,EAAE,EAAE;YACb,YAAY,EAAE,QAAQ;YACtB,QAAQ,EAAE,CAAC;SACZ,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,MAA8C,EAC9C,OAA4C;QAE5C,OAAO,CAAC,GAAG,CAAC,yBAAyB,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC;QACxE,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;QAEtE,OAAO;YACL,IAAI,EAAE,8DAA8D;YACpE,KAAK,EAAE;gBACL,eAAe,EAAE,CAAC;gBAClB,gBAAgB,EAAE,CAAC;gBACnB,SAAS,EAAE,CAAC;gBACZ,eAAe,EAAE,CAAC;gBAClB,YAAY,EAAE,CAAC;gBACf,UAAU,EAAE,CAAC;aACd;YACD,YAAY,EAAE,MAAM;SACrB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CACR,QAA4C,EAC5C,MAA8C,EAC9C,OAA4C;QAE5C,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC;aACnC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,IAAI,CAAC,MAAM,GAAG,MAA4C,CAAC;IAC7D,CAAC;CACF;AAxHD,gCAwHC"}

View File

@@ -0,0 +1,276 @@
/**
* @ruvector/ruvllm-wasm - Browser LLM Inference with WebAssembly
*
* Run large language models directly in the browser using WebAssembly
* with optional WebGPU acceleration for faster inference.
*
* @example
* ```typescript
* import { RuvLLMWasm } from '@ruvector/ruvllm-wasm';
*
* // Initialize with WebGPU (if available)
* const llm = await RuvLLMWasm.create({ useWebGPU: true });
*
* // Load a model
* await llm.loadModel('https://example.com/model.gguf', {
* onProgress: (loaded, total) => console.log(`${loaded}/${total}`)
* });
*
* // Generate text
* const result = await llm.generate('Hello, world!', {
* maxTokens: 100,
* temperature: 0.7,
* });
*
* console.log(result.text);
* ```
*
* @packageDocumentation
*/
export {
WebGPUStatus,
LoadingStatus,
ModelArchitecture,
ModelMetadata,
WASMConfig,
GenerationConfig,
TokenCallback,
ProgressCallback,
InferenceStats,
ChatMessage,
CompletionResult,
DownloadProgress,
} from './types.js';
/** Package version */
export const VERSION = '0.1.0';
/**
* Check WebGPU availability
*/
export async function checkWebGPU(): Promise<import('./types.js').WebGPUStatus> {
if (typeof navigator === 'undefined') {
return 'not_supported' as import('./types.js').WebGPUStatus;
}
if (!('gpu' in navigator)) {
return 'not_supported' as import('./types.js').WebGPUStatus;
}
try {
const adapter = await (navigator as any).gpu.requestAdapter();
if (adapter) {
return 'available' as import('./types.js').WebGPUStatus;
}
return 'unavailable' as import('./types.js').WebGPUStatus;
} catch {
return 'unavailable' as import('./types.js').WebGPUStatus;
}
}
/**
* Check SharedArrayBuffer support (required for threading)
*/
export function checkSharedArrayBuffer(): boolean {
return typeof SharedArrayBuffer !== 'undefined';
}
/**
* Check SIMD support
*/
export async function checkSIMD(): Promise<boolean> {
try {
// Check for WASM SIMD support
const simdTest = new Uint8Array([
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7b, 0x03,
0x02, 0x01, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00,
0x41, 0x00, 0xfd, 0x0f, 0x00, 0x0b,
]);
await WebAssembly.compile(simdTest);
return true;
} catch {
return false;
}
}
/**
* Get browser capabilities for LLM inference
*/
export async function getCapabilities(): Promise<{
webgpu: import('./types.js').WebGPUStatus;
sharedArrayBuffer: boolean;
simd: boolean;
crossOriginIsolated: boolean;
}> {
const [webgpu, simd] = await Promise.all([
checkWebGPU(),
checkSIMD(),
]);
return {
webgpu,
sharedArrayBuffer: checkSharedArrayBuffer(),
simd,
crossOriginIsolated: typeof crossOriginIsolated !== 'undefined' && crossOriginIsolated,
};
}
/**
* Format file size for display
*/
export function formatFileSize(bytes: number): string {
const units = ['B', 'KB', 'MB', 'GB'];
let size = bytes;
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(1)} ${units[unitIndex]}`;
}
/**
* Estimate memory requirements for a model
*/
export function estimateMemory(fileSizeBytes: number): {
minimum: number;
recommended: number;
} {
// Rough estimates based on model size
const fileSizeMB = fileSizeBytes / (1024 * 1024);
return {
minimum: Math.ceil(fileSizeMB * 1.2), // 20% overhead
recommended: Math.ceil(fileSizeMB * 1.5), // 50% overhead for KV cache
};
}
/**
* RuvLLM WASM class placeholder
* Full implementation requires WASM binary from ruvllm-wasm crate
*/
export class RuvLLMWasm {
private config: import('./types.js').WASMConfig;
private status: import('./types.js').LoadingStatus = 'idle' as import('./types.js').LoadingStatus;
private constructor(config: import('./types.js').WASMConfig) {
this.config = config;
}
/**
* Create a new RuvLLMWasm instance
*/
static async create(options?: {
useWebGPU?: boolean;
threads?: number;
memoryLimit?: number;
}): Promise<RuvLLMWasm> {
const config: import('./types.js').WASMConfig = {
threads: options?.threads,
memoryLimit: options?.memoryLimit,
simd: await checkSIMD(),
cacheModels: true,
};
if (options?.useWebGPU) {
const webgpuStatus = await checkWebGPU();
if (webgpuStatus === 'available') {
const adapter = await (navigator as any).gpu.requestAdapter();
if (adapter) {
config.device = await adapter.requestDevice();
}
}
}
return new RuvLLMWasm(config);
}
/**
* Get current loading status
*/
getStatus(): import('./types.js').LoadingStatus {
return this.status;
}
/**
* Load a model from URL or ArrayBuffer
*/
async loadModel(
source: string | ArrayBuffer,
options?: {
onProgress?: import('./types.js').ProgressCallback;
}
): Promise<import('./types.js').ModelMetadata> {
this.status = 'loading' as import('./types.js').LoadingStatus;
// Placeholder - actual implementation requires WASM binary
console.log('Loading model from:', typeof source === 'string' ? source : 'ArrayBuffer');
console.log('Note: Full model loading requires the ruvllm-wasm binary.');
console.log('Build from: crates/ruvllm-wasm');
this.status = 'ready' as import('./types.js').LoadingStatus;
return {
name: 'placeholder',
architecture: 'llama' as import('./types.js').ModelArchitecture,
parameters: '0B',
contextLength: 2048,
vocabSize: 32000,
embeddingDim: 2048,
numLayers: 22,
quantization: 'q4_k_m',
fileSize: 0,
};
}
/**
* Generate text completion
*/
async generate(
prompt: string,
config?: import('./types.js').GenerationConfig,
onToken?: import('./types.js').TokenCallback
): Promise<import('./types.js').CompletionResult> {
console.log('Generating with prompt:', prompt.substring(0, 50) + '...');
console.log('Note: Full generation requires the ruvllm-wasm binary.');
return {
text: '[Placeholder - build ruvllm-wasm crate for actual inference]',
stats: {
tokensGenerated: 0,
timeToFirstToken: 0,
totalTime: 0,
tokensPerSecond: 0,
promptTokens: 0,
memoryUsed: 0,
},
finishReason: 'stop',
};
}
/**
* Chat completion with message history
*/
async chat(
messages: import('./types.js').ChatMessage[],
config?: import('./types.js').GenerationConfig,
onToken?: import('./types.js').TokenCallback
): Promise<import('./types.js').CompletionResult> {
const prompt = messages
.map(m => `${m.role}: ${m.content}`)
.join('\n');
return this.generate(prompt, config, onToken);
}
/**
* Unload model and free memory
*/
unload(): void {
this.status = 'idle' as import('./types.js').LoadingStatus;
}
}

View File

@@ -0,0 +1,123 @@
/**
* RuvLLM WASM Types
* Types for browser-based LLM inference
*/
/** WebGPU availability status */
export declare enum WebGPUStatus {
Available = "available",
Unavailable = "unavailable",
NotSupported = "not_supported"
}
/** Model loading status */
export declare enum LoadingStatus {
Idle = "idle",
Downloading = "downloading",
Loading = "loading",
Ready = "ready",
Error = "error"
}
/** Supported model architectures */
export declare enum ModelArchitecture {
Llama = "llama",
Mistral = "mistral",
Phi = "phi",
Qwen = "qwen",
Gemma = "gemma",
StableLM = "stablelm"
}
/** Model metadata */
export interface ModelMetadata {
/** Model name */
name: string;
/** Model architecture */
architecture: ModelArchitecture;
/** Number of parameters */
parameters: string;
/** Context length */
contextLength: number;
/** Vocabulary size */
vocabSize: number;
/** Embedding dimension */
embeddingDim: number;
/** Number of layers */
numLayers: number;
/** Quantization type */
quantization: string;
/** File size in bytes */
fileSize: number;
}
/** WASM module configuration */
export interface WASMConfig {
/** WebGPU device (optional) */
device?: GPUDevice;
/** Number of threads (SharedArrayBuffer required) */
threads?: number;
/** SIMD enabled */
simd?: boolean;
/** Memory limit in MB */
memoryLimit?: number;
/** Cache models in IndexedDB */
cacheModels?: boolean;
}
/** Generation configuration */
export interface GenerationConfig {
/** Maximum tokens to generate */
maxTokens?: number;
/** Temperature (0-2) */
temperature?: number;
/** Top-p sampling */
topP?: number;
/** Top-k sampling */
topK?: number;
/** Repetition penalty */
repetitionPenalty?: number;
/** Stop sequences */
stopSequences?: string[];
/** Stream tokens as generated */
stream?: boolean;
}
/** Token callback for streaming */
export type TokenCallback = (token: string, done: boolean) => void;
/** Progress callback for model loading */
export type ProgressCallback = (loaded: number, total: number) => void;
/** Inference statistics */
export interface InferenceStats {
/** Tokens generated */
tokensGenerated: number;
/** Time to first token (ms) */
timeToFirstToken: number;
/** Total time (ms) */
totalTime: number;
/** Tokens per second */
tokensPerSecond: number;
/** Prompt tokens */
promptTokens: number;
/** Memory used (MB) */
memoryUsed: number;
}
/** Chat message */
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
/** Completion result */
export interface CompletionResult {
/** Generated text */
text: string;
/** Inference statistics */
stats: InferenceStats;
/** Finish reason */
finishReason: 'stop' | 'length' | 'error';
}
/** Model download progress */
export interface DownloadProgress {
/** Bytes downloaded */
loaded: number;
/** Total bytes */
total: number;
/** Download speed (bytes/sec) */
speed: number;
/** Estimated time remaining (seconds) */
eta: number;
}
//# sourceMappingURL=types.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,iCAAiC;AACjC,oBAAY,YAAY;IACtB,SAAS,cAAc;IACvB,WAAW,gBAAgB;IAC3B,YAAY,kBAAkB;CAC/B;AAED,2BAA2B;AAC3B,oBAAY,aAAa;IACvB,IAAI,SAAS;IACb,WAAW,gBAAgB;IAC3B,OAAO,YAAY;IACnB,KAAK,UAAU;IACf,KAAK,UAAU;CAChB;AAED,oCAAoC;AACpC,oBAAY,iBAAiB;IAC3B,KAAK,UAAU;IACf,OAAO,YAAY;IACnB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,KAAK,UAAU;IACf,QAAQ,aAAa;CACtB;AAED,qBAAqB;AACrB,MAAM,WAAW,aAAa;IAC5B,iBAAiB;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,yBAAyB;IACzB,YAAY,EAAE,iBAAiB,CAAC;IAChC,2BAA2B;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,qBAAqB;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,sBAAsB;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,0BAA0B;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,gCAAgC;AAChC,MAAM,WAAW,UAAU;IACzB,+BAA+B;IAC/B,MAAM,CAAC,EAAE,SAAS,CAAC;IACnB,qDAAqD;IACrD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mBAAmB;IACnB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,yBAAyB;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,+BAA+B;AAC/B,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,iCAAiC;IACjC,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,mCAAmC;AACnC,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,KAAK,IAAI,CAAC;AAEnE,0CAA0C;AAC1C,MAAM,MAAM,gBAAgB,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AAEvE,2BAA2B;AAC3B,MAAM,WAAW,cAAc;IAC7B,uBAAuB;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,+BAA+B;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,sBAAsB;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,mBAAmB;AACnB,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAwB;AACxB,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,2BAA2B;IAC3B,KAAK,EAAE,cAAc,CAAC;IACtB,oBAAoB;IACpB,YAAY,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;CAC3C;AAED,8BAA8B;AAC9B,MAAM,WAAW,gBAAgB;IAC/B,uBAAuB;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,yCAAyC;IACzC,GAAG,EAAE,MAAM,CAAC;CACb"}

View File

@@ -0,0 +1,34 @@
"use strict";
/**
* RuvLLM WASM Types
* Types for browser-based LLM inference
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.ModelArchitecture = exports.LoadingStatus = exports.WebGPUStatus = void 0;
/** WebGPU availability status */
var WebGPUStatus;
(function (WebGPUStatus) {
WebGPUStatus["Available"] = "available";
WebGPUStatus["Unavailable"] = "unavailable";
WebGPUStatus["NotSupported"] = "not_supported";
})(WebGPUStatus || (exports.WebGPUStatus = WebGPUStatus = {}));
/** Model loading status */
var LoadingStatus;
(function (LoadingStatus) {
LoadingStatus["Idle"] = "idle";
LoadingStatus["Downloading"] = "downloading";
LoadingStatus["Loading"] = "loading";
LoadingStatus["Ready"] = "ready";
LoadingStatus["Error"] = "error";
})(LoadingStatus || (exports.LoadingStatus = LoadingStatus = {}));
/** Supported model architectures */
var ModelArchitecture;
(function (ModelArchitecture) {
ModelArchitecture["Llama"] = "llama";
ModelArchitecture["Mistral"] = "mistral";
ModelArchitecture["Phi"] = "phi";
ModelArchitecture["Qwen"] = "qwen";
ModelArchitecture["Gemma"] = "gemma";
ModelArchitecture["StableLM"] = "stablelm";
})(ModelArchitecture || (exports.ModelArchitecture = ModelArchitecture = {}));
//# sourceMappingURL=types.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"types.js","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,iCAAiC;AACjC,IAAY,YAIX;AAJD,WAAY,YAAY;IACtB,uCAAuB,CAAA;IACvB,2CAA2B,CAAA;IAC3B,8CAA8B,CAAA;AAChC,CAAC,EAJW,YAAY,4BAAZ,YAAY,QAIvB;AAED,2BAA2B;AAC3B,IAAY,aAMX;AAND,WAAY,aAAa;IACvB,8BAAa,CAAA;IACb,4CAA2B,CAAA;IAC3B,oCAAmB,CAAA;IACnB,gCAAe,CAAA;IACf,gCAAe,CAAA;AACjB,CAAC,EANW,aAAa,6BAAb,aAAa,QAMxB;AAED,oCAAoC;AACpC,IAAY,iBAOX;AAPD,WAAY,iBAAiB;IAC3B,oCAAe,CAAA;IACf,wCAAmB,CAAA;IACnB,gCAAW,CAAA;IACX,kCAAa,CAAA;IACb,oCAAe,CAAA;IACf,0CAAqB,CAAA;AACvB,CAAC,EAPW,iBAAiB,iCAAjB,iBAAiB,QAO5B"}

View File

@@ -0,0 +1,134 @@
/**
* RuvLLM WASM Types
* Types for browser-based LLM inference
*/
/** WebGPU availability status */
export enum WebGPUStatus {
Available = 'available',
Unavailable = 'unavailable',
NotSupported = 'not_supported',
}
/** Model loading status */
export enum LoadingStatus {
Idle = 'idle',
Downloading = 'downloading',
Loading = 'loading',
Ready = 'ready',
Error = 'error',
}
/** Supported model architectures */
export enum ModelArchitecture {
Llama = 'llama',
Mistral = 'mistral',
Phi = 'phi',
Qwen = 'qwen',
Gemma = 'gemma',
StableLM = 'stablelm',
}
/** Model metadata */
export interface ModelMetadata {
/** Model name */
name: string;
/** Model architecture */
architecture: ModelArchitecture;
/** Number of parameters */
parameters: string;
/** Context length */
contextLength: number;
/** Vocabulary size */
vocabSize: number;
/** Embedding dimension */
embeddingDim: number;
/** Number of layers */
numLayers: number;
/** Quantization type */
quantization: string;
/** File size in bytes */
fileSize: number;
}
/** WASM module configuration */
export interface WASMConfig {
/** WebGPU device (optional) */
device?: GPUDevice;
/** Number of threads (SharedArrayBuffer required) */
threads?: number;
/** SIMD enabled */
simd?: boolean;
/** Memory limit in MB */
memoryLimit?: number;
/** Cache models in IndexedDB */
cacheModels?: boolean;
}
/** Generation configuration */
export interface GenerationConfig {
/** Maximum tokens to generate */
maxTokens?: number;
/** Temperature (0-2) */
temperature?: number;
/** Top-p sampling */
topP?: number;
/** Top-k sampling */
topK?: number;
/** Repetition penalty */
repetitionPenalty?: number;
/** Stop sequences */
stopSequences?: string[];
/** Stream tokens as generated */
stream?: boolean;
}
/** Token callback for streaming */
export type TokenCallback = (token: string, done: boolean) => void;
/** Progress callback for model loading */
export type ProgressCallback = (loaded: number, total: number) => void;
/** Inference statistics */
export interface InferenceStats {
/** Tokens generated */
tokensGenerated: number;
/** Time to first token (ms) */
timeToFirstToken: number;
/** Total time (ms) */
totalTime: number;
/** Tokens per second */
tokensPerSecond: number;
/** Prompt tokens */
promptTokens: number;
/** Memory used (MB) */
memoryUsed: number;
}
/** Chat message */
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
/** Completion result */
export interface CompletionResult {
/** Generated text */
text: string;
/** Inference statistics */
stats: InferenceStats;
/** Finish reason */
finishReason: 'stop' | 'length' | 'error';
}
/** Model download progress */
export interface DownloadProgress {
/** Bytes downloaded */
loaded: number;
/** Total bytes */
total: number;
/** Download speed (bytes/sec) */
speed: number;
/** Estimated time remaining (seconds) */
eta: number;
}

View File

@@ -0,0 +1,20 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"types": ["node", "@webgpu/types"]
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "test"]
}