Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
32
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/Cargo.toml
vendored
Normal file
32
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/Cargo.toml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
[package]
|
||||
name = "ruvector-mincut-gated-transformer-wasm"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
rust-version = "1.77"
|
||||
authors = ["RuVector Team"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
description = "WASM bindings for mincut-gated transformer inference"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
keywords = ["transformer", "wasm", "mincut", "inference", "webassembly"]
|
||||
categories = ["wasm", "algorithms", "science"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
[features]
|
||||
default = ["console_error_panic_hook"]
|
||||
|
||||
[dependencies]
|
||||
ruvector-mincut-gated-transformer = { path = "../ruvector-mincut-gated-transformer", default-features = false, features = ["wasm"] }
|
||||
wasm-bindgen = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde-wasm-bindgen = "0.6"
|
||||
console_error_panic_hook = { version = "0.1", optional = true }
|
||||
js-sys = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
wasm-bindgen-test = "0.3"
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
344
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/README.md
vendored
Normal file
344
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/README.md
vendored
Normal file
@@ -0,0 +1,344 @@
|
||||
# ruvector-mincut-gated-transformer-wasm
|
||||
|
||||
WebAssembly bindings for the mincut-gated transformer - ultra-low-latency inference with coherence control.
|
||||
|
||||
## Overview
|
||||
|
||||
This crate provides JavaScript-friendly WASM bindings for the `ruvector-mincut-gated-transformer` crate, enabling browser-based transformer inference with deterministic latency bounds and explainable decision making.
|
||||
|
||||
## Features
|
||||
|
||||
- **Zero-copy inference**: Direct memory access from JavaScript
|
||||
- **Deterministic bounds**: Predictable p99 latency guarantees
|
||||
- **Explainable decisions**: Every inference produces a witness
|
||||
- **Coherence control**: Integration with dynamic minimum cut signals
|
||||
- **Event-driven scheduling**: Optional spike-based compute tier selection
|
||||
|
||||
## Installation
|
||||
|
||||
### NPM
|
||||
|
||||
```bash
|
||||
npm install ruvector-mincut-gated-transformer-wasm
|
||||
```
|
||||
|
||||
### Build from source
|
||||
|
||||
```bash
|
||||
wasm-pack build --target web
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```javascript
|
||||
import init, { WasmTransformer, WasmGatePacket } from './pkg';
|
||||
|
||||
async function run() {
|
||||
await init();
|
||||
|
||||
// Create transformer with micro config (optimized for WASM)
|
||||
const transformer = new WasmTransformer();
|
||||
|
||||
// Create gate packet from coherence signals
|
||||
const gate = new WasmGatePacket();
|
||||
gate.lambda = 100;
|
||||
gate.lambda_prev = 95;
|
||||
gate.boundary_edges = 5;
|
||||
gate.boundary_concentration_q15 = 8192;
|
||||
gate.partition_count = 3;
|
||||
|
||||
// Run inference
|
||||
const tokens = new Uint32Array([1, 2, 3, 4]);
|
||||
const result = transformer.infer(tokens, gate);
|
||||
|
||||
console.log('Decision:', result.decision);
|
||||
console.log('Reason:', result.reason);
|
||||
console.log('Tier:', result.tier);
|
||||
console.log('KV writes enabled:', result.kv_writes_enabled);
|
||||
console.log('External writes enabled:', result.external_writes_enabled);
|
||||
console.log('Logits:', result.logits);
|
||||
}
|
||||
|
||||
run();
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### WasmTransformer
|
||||
|
||||
Main transformer class for inference.
|
||||
|
||||
#### Constructor
|
||||
|
||||
```javascript
|
||||
const transformer = new WasmTransformer();
|
||||
```
|
||||
|
||||
Creates a transformer with micro config (sequence length: 32, hidden: 128, heads: 4, layers: 2).
|
||||
|
||||
#### Methods
|
||||
|
||||
- `new_baseline()`: Create with baseline config (larger model)
|
||||
- `with_config(config)`: Create with custom configuration
|
||||
- `infer(tokens, gate)`: Run inference with gate packet
|
||||
- `infer_with_spikes(tokens, gate, spikes)`: Run inference with gate and spike packets
|
||||
- `reset()`: Reset all state (KV cache, cached logits)
|
||||
- `buffer_size()`: Get logits buffer size
|
||||
- `set_policy(policy)`: Update gate policy
|
||||
|
||||
### WasmGatePacket
|
||||
|
||||
Gate packet carrying coherence control signals.
|
||||
|
||||
#### Constructor
|
||||
|
||||
```javascript
|
||||
const gate = new WasmGatePacket();
|
||||
```
|
||||
|
||||
#### Properties
|
||||
|
||||
- `lambda`: Current coherence metric (minimum cut value)
|
||||
- `lambda_prev`: Previous lambda for trend detection
|
||||
- `boundary_edges`: Number of edges crossing partition boundaries
|
||||
- `boundary_concentration_q15`: Boundary concentration (Q15: 0-32767)
|
||||
- `partition_count`: Number of partitions in graph
|
||||
- `flags`: Policy flags (force safe mode, etc.)
|
||||
|
||||
### WasmSpikePacket
|
||||
|
||||
Spike packet for event-driven scheduling.
|
||||
|
||||
#### Constructor
|
||||
|
||||
```javascript
|
||||
const spike = new WasmSpikePacket();
|
||||
```
|
||||
|
||||
#### Properties
|
||||
|
||||
- `fired`: Spike fired indicator (0 = skip, 1 = active)
|
||||
- `rate_q15`: Spike rate (Q15: 0-32767)
|
||||
- `novelty_q15`: Novelty metric (Q15: 0-32767)
|
||||
- `flags`: Spike flags
|
||||
|
||||
### WasmInferResult
|
||||
|
||||
Inference result with logits and witness information.
|
||||
|
||||
#### Properties
|
||||
|
||||
- `logits`: Output logits (Int32Array)
|
||||
- `decision`: Gate decision ("Allow", "ReduceScope", "FlushKv", "FreezeWrites", "QuarantineUpdates")
|
||||
- `reason`: Decision reason ("None", "LambdaBelowMin", "LambdaDroppedFast", etc.)
|
||||
- `tier`: Compute tier used (0-3)
|
||||
- `kv_writes_enabled`: Whether KV writes were enabled
|
||||
- `external_writes_enabled`: Whether external writes are enabled
|
||||
- `effective_seq_len`: Effective sequence length used
|
||||
- `effective_window`: Effective window size used
|
||||
- `lambda`: Current lambda value
|
||||
- `lambda_prev`: Previous lambda value
|
||||
- `boundary_edges`: Boundary edges count
|
||||
- `partition_count`: Partition count
|
||||
|
||||
## Configuration
|
||||
|
||||
### Micro Config (Default)
|
||||
|
||||
Optimized for WASM and edge gateways:
|
||||
|
||||
```javascript
|
||||
{
|
||||
seq_len_max: 32,
|
||||
hidden: 128,
|
||||
heads: 4,
|
||||
layers: 2,
|
||||
window_normal: 8,
|
||||
window_degraded: 4,
|
||||
ffn_mult: 4,
|
||||
logits: 256
|
||||
}
|
||||
```
|
||||
|
||||
### Baseline Config
|
||||
|
||||
Larger model for more capacity:
|
||||
|
||||
```javascript
|
||||
const transformer = WasmTransformer.new_baseline();
|
||||
// seq_len_max: 64, hidden: 256, heads: 4, layers: 4, logits: 1024
|
||||
```
|
||||
|
||||
### Custom Config
|
||||
|
||||
```javascript
|
||||
const config = {
|
||||
seq_len_max: 32,
|
||||
hidden: 128,
|
||||
heads: 4,
|
||||
layers: 2,
|
||||
window_normal: 8,
|
||||
window_degraded: 4,
|
||||
ffn_mult: 4,
|
||||
logits: 256,
|
||||
layers_degraded: 1,
|
||||
seq_len_degraded: 16,
|
||||
seq_len_safe: 4,
|
||||
enable_kv_cache: true,
|
||||
enable_external_writes: true
|
||||
};
|
||||
|
||||
const transformer = WasmTransformer.with_config(config);
|
||||
```
|
||||
|
||||
## Gate Policy
|
||||
|
||||
Control when the gate intervenes:
|
||||
|
||||
```javascript
|
||||
const policy = {
|
||||
lambda_min: 30,
|
||||
drop_ratio_q15_max: 12288, // ~37.5%
|
||||
boundary_edges_max: 20,
|
||||
boundary_concentration_q15_max: 20480, // ~62.5%
|
||||
partitions_max: 10,
|
||||
spike_rate_q15_max: 16384,
|
||||
spike_novelty_q15_min: 2048,
|
||||
allow_kv_write_when_unstable: true,
|
||||
allow_external_write_when_unstable: false
|
||||
};
|
||||
|
||||
transformer.set_policy(policy);
|
||||
```
|
||||
|
||||
## Decision Types
|
||||
|
||||
### Gate Decisions
|
||||
|
||||
- **Allow**: Proceed normally with full capabilities
|
||||
- **ReduceScope**: Reduce sequence length and window size
|
||||
- **FlushKv**: Flush KV cache before proceeding
|
||||
- **FreezeWrites**: Run in read-only mode (no KV updates)
|
||||
- **QuarantineUpdates**: Run compute but discard all state changes
|
||||
|
||||
### Decision Reasons
|
||||
|
||||
- **None**: No intervention needed
|
||||
- **LambdaBelowMin**: Lambda below minimum threshold
|
||||
- **LambdaDroppedFast**: Lambda dropped too quickly
|
||||
- **BoundarySpike**: Boundary edge count exceeded threshold
|
||||
- **BoundaryConcentrationSpike**: Boundary concentration too high
|
||||
- **PartitionDrift**: Partition count indicates drift
|
||||
- **SpikeStorm**: Spike rate indicates overload
|
||||
- **ForcedByFlag**: Forced by flag in gate packet
|
||||
|
||||
## Examples
|
||||
|
||||
### Basic Inference
|
||||
|
||||
```javascript
|
||||
const transformer = new WasmTransformer();
|
||||
const gate = new WasmGatePacket();
|
||||
const tokens = new Uint32Array([1, 2, 3, 4]);
|
||||
const result = transformer.infer(tokens, gate);
|
||||
console.log(result.decision);
|
||||
```
|
||||
|
||||
### With Spike Scheduling
|
||||
|
||||
```javascript
|
||||
const transformer = new WasmTransformer();
|
||||
const gate = new WasmGatePacket();
|
||||
const spike = new WasmSpikePacket();
|
||||
spike.fired = 1;
|
||||
spike.novelty_q15 = 8192;
|
||||
|
||||
const tokens = new Uint32Array([1, 2, 3, 4]);
|
||||
const result = transformer.infer_with_spikes(tokens, gate, spike);
|
||||
```
|
||||
|
||||
### Handling Interventions
|
||||
|
||||
```javascript
|
||||
const transformer = new WasmTransformer();
|
||||
const gate = new WasmGatePacket();
|
||||
gate.lambda = 10; // Low coherence
|
||||
gate.lambda_prev = 100;
|
||||
|
||||
const tokens = new Uint32Array([1, 2, 3, 4]);
|
||||
const result = transformer.infer(tokens, gate);
|
||||
|
||||
if (result.decision !== 'Allow') {
|
||||
console.log('Intervention triggered:', result.reason);
|
||||
console.log('Effective seq_len:', result.effective_seq_len);
|
||||
console.log('KV writes:', result.kv_writes_enabled);
|
||||
}
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
### Development
|
||||
|
||||
```bash
|
||||
wasm-pack build --dev --target web
|
||||
```
|
||||
|
||||
### Release (optimized)
|
||||
|
||||
```bash
|
||||
wasm-pack build --release --target web
|
||||
```
|
||||
|
||||
### For Node.js
|
||||
|
||||
```bash
|
||||
wasm-pack build --target nodejs
|
||||
```
|
||||
|
||||
### For Bundlers
|
||||
|
||||
```bash
|
||||
wasm-pack build --target bundler
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
### Browser tests
|
||||
|
||||
```bash
|
||||
wasm-pack test --headless --firefox
|
||||
wasm-pack test --headless --chrome
|
||||
```
|
||||
|
||||
### Node.js tests
|
||||
|
||||
```bash
|
||||
wasm-pack test --node
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
The WASM bindings maintain the core performance characteristics:
|
||||
|
||||
- **Allocation-free hot path**: Zero heap allocations during inference
|
||||
- **Predictable latency**: Bounded p99 latency guarantees
|
||||
- **Small binary size**: ~50KB compressed (micro config)
|
||||
- **Low memory footprint**: ~128KB runtime state (micro config)
|
||||
|
||||
## Integration with RuVector
|
||||
|
||||
This transformer integrates with the RuVector ecosystem:
|
||||
|
||||
- **ruvector-mincut**: Provides coherence signals via gate packets
|
||||
- **ruvector-core**: Vector search and semantic retrieval
|
||||
- **ruvector-router**: Query routing and orchestration
|
||||
|
||||
## License
|
||||
|
||||
MIT OR Apache-2.0
|
||||
|
||||
## Links
|
||||
|
||||
- [GitHub Repository](https://github.com/ruvnet/ruvector)
|
||||
- [Core Library](../ruvector-mincut-gated-transformer)
|
||||
- [RuVector Documentation](../../README.md)
|
||||
158
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/examples/web_scorer.rs
vendored
Normal file
158
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/examples/web_scorer.rs
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
//! Example WASM scorer demonstrating mincut-gated transformer in the browser.
|
||||
//!
|
||||
//! This example shows how to:
|
||||
//! 1. Create a transformer with micro config (optimized for WASM)
|
||||
//! 2. Create gate packets from coherence signals
|
||||
//! 3. Run inference and inspect witness
|
||||
//! 4. Handle different decision outcomes
|
||||
//!
|
||||
//! To run this example:
|
||||
//! ```bash
|
||||
//! wasm-pack build --target web
|
||||
//! # Then serve index.html and import the generated package
|
||||
//! ```
|
||||
|
||||
use ruvector_mincut_gated_transformer_wasm::{WasmGatePacket, WasmTransformer};
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// Example showing basic inference with coherence control.
|
||||
#[wasm_bindgen]
|
||||
pub fn run_basic_example() -> Result<JsValue, JsValue> {
|
||||
// Create transformer with micro config
|
||||
let mut transformer = WasmTransformer::new()?;
|
||||
|
||||
// Create gate packet with stable coherence
|
||||
let gate = WasmGatePacket::new();
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate)?;
|
||||
|
||||
// Sample tokens
|
||||
let tokens = vec![1, 2, 3, 4, 5];
|
||||
|
||||
// Run inference
|
||||
let result = transformer.infer(&tokens, gate_js)?;
|
||||
|
||||
// Create result object for JavaScript
|
||||
let output = js_sys::Object::new();
|
||||
|
||||
js_sys::Reflect::set(&output, &"decision".into(), &result.decision().into())?;
|
||||
|
||||
js_sys::Reflect::set(&output, &"reason".into(), &result.reason().into())?;
|
||||
|
||||
js_sys::Reflect::set(&output, &"tier".into(), &result.tier().into())?;
|
||||
|
||||
js_sys::Reflect::set(
|
||||
&output,
|
||||
&"kv_writes_enabled".into(),
|
||||
&result.kv_writes_enabled().into(),
|
||||
)?;
|
||||
|
||||
Ok(output.into())
|
||||
}
|
||||
|
||||
/// Example showing intervention scenarios.
|
||||
#[wasm_bindgen]
|
||||
pub fn run_intervention_example() -> Result<JsValue, JsValue> {
|
||||
let mut transformer = WasmTransformer::new()?;
|
||||
|
||||
// Create gate packet with low lambda (triggering intervention)
|
||||
let mut gate = WasmGatePacket::new();
|
||||
gate.lambda = 10; // Very low coherence
|
||||
gate.lambda_prev = 100;
|
||||
gate.boundary_edges = 50; // High boundary crossing
|
||||
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate)?;
|
||||
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let result = transformer.infer(&tokens, gate_js)?;
|
||||
|
||||
// Create result object
|
||||
let output = js_sys::Object::new();
|
||||
|
||||
js_sys::Reflect::set(&output, &"decision".into(), &result.decision().into())?;
|
||||
|
||||
js_sys::Reflect::set(&output, &"reason".into(), &result.reason().into())?;
|
||||
|
||||
js_sys::Reflect::set(&output, &"lambda".into(), &result.lambda().into())?;
|
||||
|
||||
js_sys::Reflect::set(
|
||||
&output,
|
||||
&"boundary_edges".into(),
|
||||
&result.boundary_edges().into(),
|
||||
)?;
|
||||
|
||||
Ok(output.into())
|
||||
}
|
||||
|
||||
/// Example showing multiple inference calls with state tracking.
|
||||
#[wasm_bindgen]
|
||||
pub fn run_sequence_example() -> Result<JsValue, JsValue> {
|
||||
let mut transformer = WasmTransformer::new()?;
|
||||
|
||||
let results = js_sys::Array::new();
|
||||
|
||||
// Run sequence of inferences with varying coherence
|
||||
let lambda_sequence = vec![100, 95, 85, 70, 50, 30, 60, 80, 95];
|
||||
|
||||
for (i, &lambda) in lambda_sequence.iter().enumerate() {
|
||||
let mut gate = WasmGatePacket::new();
|
||||
gate.lambda = lambda;
|
||||
gate.lambda_prev = if i > 0 {
|
||||
lambda_sequence[i - 1]
|
||||
} else {
|
||||
lambda
|
||||
};
|
||||
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate)?;
|
||||
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let result = transformer.infer(&tokens, gate_js)?;
|
||||
|
||||
let step = js_sys::Object::new();
|
||||
js_sys::Reflect::set(&step, &"step".into(), &i.into())?;
|
||||
js_sys::Reflect::set(&step, &"lambda".into(), &lambda.into())?;
|
||||
js_sys::Reflect::set(&step, &"decision".into(), &result.decision().into())?;
|
||||
js_sys::Reflect::set(&step, &"reason".into(), &result.reason().into())?;
|
||||
|
||||
results.push(&step);
|
||||
}
|
||||
|
||||
Ok(results.into())
|
||||
}
|
||||
|
||||
/// Example showing custom configuration.
|
||||
#[wasm_bindgen]
|
||||
pub fn run_custom_config_example() -> Result<JsValue, JsValue> {
|
||||
// Create custom config object
|
||||
let config = js_sys::Object::new();
|
||||
js_sys::Reflect::set(&config, &"seq_len_max".into(), &32.into())?;
|
||||
js_sys::Reflect::set(&config, &"hidden".into(), &128.into())?;
|
||||
js_sys::Reflect::set(&config, &"heads".into(), &4.into())?;
|
||||
js_sys::Reflect::set(&config, &"layers".into(), &2.into())?;
|
||||
js_sys::Reflect::set(&config, &"window_normal".into(), &8.into())?;
|
||||
js_sys::Reflect::set(&config, &"window_degraded".into(), &4.into())?;
|
||||
js_sys::Reflect::set(&config, &"ffn_mult".into(), &4.into())?;
|
||||
js_sys::Reflect::set(&config, &"logits".into(), &256.into())?;
|
||||
js_sys::Reflect::set(&config, &"layers_degraded".into(), &1.into())?;
|
||||
js_sys::Reflect::set(&config, &"seq_len_degraded".into(), &16.into())?;
|
||||
js_sys::Reflect::set(&config, &"seq_len_safe".into(), &4.into())?;
|
||||
js_sys::Reflect::set(&config, &"enable_kv_cache".into(), &true.into())?;
|
||||
js_sys::Reflect::set(&config, &"enable_external_writes".into(), &true.into())?;
|
||||
|
||||
let mut transformer = WasmTransformer::with_config(config.into())?;
|
||||
|
||||
let gate = WasmGatePacket::new();
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate)?;
|
||||
|
||||
let tokens = vec![1, 2, 3];
|
||||
let result = transformer.infer(&tokens, gate_js)?;
|
||||
|
||||
let output = js_sys::Object::new();
|
||||
js_sys::Reflect::set(
|
||||
&output,
|
||||
&"buffer_size".into(),
|
||||
&transformer.buffer_size().into(),
|
||||
)?;
|
||||
js_sys::Reflect::set(&output, &"decision".into(), &result.decision().into())?;
|
||||
|
||||
Ok(output.into())
|
||||
}
|
||||
488
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/src/lib.rs
vendored
Normal file
488
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/src/lib.rs
vendored
Normal file
@@ -0,0 +1,488 @@
|
||||
//! WASM bindings for Mincut-Gated Transformer.
|
||||
//!
|
||||
//! Provides JavaScript-friendly API for ultra-low-latency inference with
|
||||
//! coherence control via dynamic minimum cut signals.
|
||||
//!
|
||||
//! ## Features
|
||||
//!
|
||||
//! - **Zero-copy inference**: Direct memory access from JavaScript
|
||||
//! - **Deterministic bounds**: Predictable latency guarantees
|
||||
//! - **Explainable decisions**: Every inference produces a witness
|
||||
//! - **Coherence control**: Integration with mincut gate signals
|
||||
//!
|
||||
//! ## Example (JavaScript)
|
||||
//!
|
||||
//! ```javascript
|
||||
//! import { WasmTransformer, WasmGatePacket } from './pkg';
|
||||
//!
|
||||
//! // Create transformer with micro config (optimized for WASM)
|
||||
//! const transformer = new WasmTransformer();
|
||||
//!
|
||||
//! // Create gate packet from coherence signals
|
||||
//! const gate = new WasmGatePacket();
|
||||
//! gate.lambda = 100;
|
||||
//! gate.lambda_prev = 95;
|
||||
//! gate.boundary_edges = 5;
|
||||
//! gate.boundary_concentration_q15 = 8192;
|
||||
//! gate.partition_count = 3;
|
||||
//!
|
||||
//! // Run inference
|
||||
//! const tokens = new Uint32Array([1, 2, 3, 4]);
|
||||
//! const result = transformer.infer(tokens, gate);
|
||||
//!
|
||||
//! console.log('Decision:', result.decision);
|
||||
//! console.log('Reason:', result.reason);
|
||||
//! console.log('Logits:', result.logits);
|
||||
//! ```
|
||||
|
||||
use ruvector_mincut_gated_transformer::{
|
||||
GateDecision, GatePacket, GatePolicy, GateReason, InferInput, InferOutput,
|
||||
MincutGatedTransformer, QuantizedWeights, SpikePacket, TransformerConfig,
|
||||
};
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
#[wasm_bindgen(start)]
|
||||
pub fn init() {
|
||||
#[cfg(feature = "console_error_panic_hook")]
|
||||
console_error_panic_hook::set_once();
|
||||
}
|
||||
|
||||
/// JavaScript-friendly transformer wrapper.
|
||||
///
|
||||
/// This wraps the core `MincutGatedTransformer` and provides a JavaScript-friendly API.
|
||||
#[wasm_bindgen]
|
||||
pub struct WasmTransformer {
|
||||
inner: MincutGatedTransformer,
|
||||
logits_buffer: Vec<i32>,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl WasmTransformer {
|
||||
/// Create with micro config (optimized for WASM).
|
||||
///
|
||||
/// Micro config:
|
||||
/// - Sequence length: 32
|
||||
/// - Hidden size: 128
|
||||
/// - Heads: 4
|
||||
/// - Layers: 2
|
||||
/// - Logits: 256
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new() -> Result<WasmTransformer, JsValue> {
|
||||
let config = TransformerConfig::micro();
|
||||
let policy = GatePolicy::default();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
|
||||
let inner = MincutGatedTransformer::new(config.clone(), policy, weights)
|
||||
.map_err(|e| JsValue::from_str(&format!("Failed to create transformer: {}", e)))?;
|
||||
|
||||
let logits_buffer = vec![0i32; config.logits as usize];
|
||||
|
||||
Ok(WasmTransformer {
|
||||
inner,
|
||||
logits_buffer,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with baseline config (larger model).
|
||||
///
|
||||
/// Baseline config:
|
||||
/// - Sequence length: 64
|
||||
/// - Hidden size: 256
|
||||
/// - Heads: 4
|
||||
/// - Layers: 4
|
||||
/// - Logits: 1024
|
||||
#[wasm_bindgen]
|
||||
pub fn new_baseline() -> Result<WasmTransformer, JsValue> {
|
||||
let config = TransformerConfig::baseline();
|
||||
let policy = GatePolicy::default();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
|
||||
let inner = MincutGatedTransformer::new(config.clone(), policy, weights)
|
||||
.map_err(|e| JsValue::from_str(&format!("Failed to create transformer: {}", e)))?;
|
||||
|
||||
let logits_buffer = vec![0i32; config.logits as usize];
|
||||
|
||||
Ok(WasmTransformer {
|
||||
inner,
|
||||
logits_buffer,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with custom config from JavaScript object.
|
||||
///
|
||||
/// Example:
|
||||
/// ```javascript
|
||||
/// const config = {
|
||||
/// seq_len_max: 32,
|
||||
/// hidden: 128,
|
||||
/// heads: 4,
|
||||
/// layers: 2,
|
||||
/// window_normal: 8,
|
||||
/// window_degraded: 4,
|
||||
/// ffn_mult: 4,
|
||||
/// logits: 256
|
||||
/// };
|
||||
/// const transformer = WasmTransformer.with_config(config);
|
||||
/// ```
|
||||
#[wasm_bindgen]
|
||||
pub fn with_config(config_js: JsValue) -> Result<WasmTransformer, JsValue> {
|
||||
let config: TransformerConfig = serde_wasm_bindgen::from_value(config_js)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
|
||||
|
||||
let policy = GatePolicy::default();
|
||||
let weights = QuantizedWeights::empty(&config);
|
||||
|
||||
let inner = MincutGatedTransformer::new(config.clone(), policy, weights)
|
||||
.map_err(|e| JsValue::from_str(&format!("Failed to create transformer: {}", e)))?;
|
||||
|
||||
let logits_buffer = vec![0i32; config.logits as usize];
|
||||
|
||||
Ok(WasmTransformer {
|
||||
inner,
|
||||
logits_buffer,
|
||||
})
|
||||
}
|
||||
|
||||
/// Run inference with gate packet.
|
||||
///
|
||||
/// Returns a `WasmInferResult` containing logits, decision, and witness information.
|
||||
#[wasm_bindgen]
|
||||
pub fn infer(&mut self, tokens: &[u32], gate_js: JsValue) -> Result<WasmInferResult, JsValue> {
|
||||
let gate: WasmGatePacket = serde_wasm_bindgen::from_value(gate_js)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid gate packet: {}", e)))?;
|
||||
|
||||
let gate_packet = gate.to_native();
|
||||
let input = InferInput::from_tokens(tokens, gate_packet);
|
||||
|
||||
let mut output = InferOutput::new(&mut self.logits_buffer);
|
||||
|
||||
self.inner
|
||||
.infer(&input, &mut output)
|
||||
.map_err(|e| JsValue::from_str(&format!("Inference failed: {}", e)))?;
|
||||
|
||||
Ok(WasmInferResult::from_output(&output))
|
||||
}
|
||||
|
||||
/// Run inference with gate and spike packets.
|
||||
///
|
||||
/// This enables event-driven scheduling with spike signals.
|
||||
#[wasm_bindgen]
|
||||
pub fn infer_with_spikes(
|
||||
&mut self,
|
||||
tokens: &[u32],
|
||||
gate_js: JsValue,
|
||||
spikes_js: JsValue,
|
||||
) -> Result<WasmInferResult, JsValue> {
|
||||
let gate: WasmGatePacket = serde_wasm_bindgen::from_value(gate_js)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid gate packet: {}", e)))?;
|
||||
|
||||
let spikes: WasmSpikePacket = serde_wasm_bindgen::from_value(spikes_js)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid spike packet: {}", e)))?;
|
||||
|
||||
let gate_packet = gate.to_native();
|
||||
let spike_packet = spikes.to_native();
|
||||
|
||||
let input = InferInput::from_tokens(tokens, gate_packet).with_spikes(spike_packet);
|
||||
|
||||
let mut output = InferOutput::new(&mut self.logits_buffer);
|
||||
|
||||
self.inner
|
||||
.infer(&input, &mut output)
|
||||
.map_err(|e| JsValue::from_str(&format!("Inference failed: {}", e)))?;
|
||||
|
||||
Ok(WasmInferResult::from_output(&output))
|
||||
}
|
||||
|
||||
/// Reset all state (KV cache, cached logits, etc.).
|
||||
#[wasm_bindgen]
|
||||
pub fn reset(&mut self) {
|
||||
self.inner.reset();
|
||||
}
|
||||
|
||||
/// Get the logits buffer size.
|
||||
#[wasm_bindgen]
|
||||
pub fn buffer_size(&self) -> usize {
|
||||
self.logits_buffer.len()
|
||||
}
|
||||
|
||||
/// Update gate policy from JavaScript object.
|
||||
#[wasm_bindgen]
|
||||
pub fn set_policy(&mut self, policy_js: JsValue) -> Result<(), JsValue> {
|
||||
let policy: GatePolicy = serde_wasm_bindgen::from_value(policy_js)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid policy: {}", e)))?;
|
||||
|
||||
self.inner.set_policy(policy);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// JavaScript-friendly gate packet.
|
||||
///
|
||||
/// This carries coherence control signals from the mincut engine.
|
||||
#[wasm_bindgen]
|
||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct WasmGatePacket {
|
||||
/// Current lambda (minimum cut value / coherence metric)
|
||||
pub lambda: u32,
|
||||
|
||||
/// Previous lambda for trend detection
|
||||
pub lambda_prev: u32,
|
||||
|
||||
/// Number of edges crossing partition boundaries
|
||||
pub boundary_edges: u16,
|
||||
|
||||
/// Boundary edge concentration (Q15: 0-32767)
|
||||
pub boundary_concentration_q15: u16,
|
||||
|
||||
/// Number of partitions in current graph state
|
||||
pub partition_count: u16,
|
||||
|
||||
/// Policy flags (force safe mode, etc.)
|
||||
pub flags: u16,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl WasmGatePacket {
|
||||
/// Create a new gate packet with default values.
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new() -> WasmGatePacket {
|
||||
WasmGatePacket {
|
||||
lambda: 100,
|
||||
lambda_prev: 100,
|
||||
boundary_edges: 0,
|
||||
boundary_concentration_q15: 0,
|
||||
partition_count: 1,
|
||||
flags: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create from JavaScript object.
|
||||
#[wasm_bindgen]
|
||||
pub fn from_js(js: JsValue) -> Result<WasmGatePacket, JsValue> {
|
||||
serde_wasm_bindgen::from_value(js)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid gate packet: {}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
impl WasmGatePacket {
|
||||
fn to_native(&self) -> GatePacket {
|
||||
GatePacket {
|
||||
lambda: self.lambda,
|
||||
lambda_prev: self.lambda_prev,
|
||||
boundary_edges: self.boundary_edges,
|
||||
boundary_concentration_q15: self.boundary_concentration_q15,
|
||||
partition_count: self.partition_count,
|
||||
flags: self.flags,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WasmGatePacket {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// JavaScript-friendly spike packet.
|
||||
///
|
||||
/// Used for event-driven scheduling to determine whether to run inference.
|
||||
#[wasm_bindgen]
|
||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct WasmSpikePacket {
|
||||
/// Spike fired indicator (0 = skip or cheap path)
|
||||
pub fired: u8,
|
||||
|
||||
/// Spike rate (Q15: 0-32767)
|
||||
pub rate_q15: u16,
|
||||
|
||||
/// Novelty metric (Q15: 0-32767)
|
||||
pub novelty_q15: u16,
|
||||
|
||||
/// Flags
|
||||
pub flags: u16,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl WasmSpikePacket {
|
||||
/// Create a new spike packet with default values.
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new() -> WasmSpikePacket {
|
||||
WasmSpikePacket {
|
||||
fired: 1,
|
||||
rate_q15: 0,
|
||||
novelty_q15: 0,
|
||||
flags: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WasmSpikePacket {
|
||||
fn to_native(&self) -> SpikePacket {
|
||||
SpikePacket {
|
||||
fired: self.fired,
|
||||
rate_q15: self.rate_q15,
|
||||
novelty_q15: self.novelty_q15,
|
||||
top_len: 0,
|
||||
top_idx: [0; 16],
|
||||
top_w_q15: [0; 16],
|
||||
flags: self.flags,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WasmSpikePacket {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// JavaScript-friendly inference result.
|
||||
///
|
||||
/// Contains output logits and witness information about the inference decision.
|
||||
#[wasm_bindgen]
|
||||
pub struct WasmInferResult {
|
||||
logits: Vec<i32>,
|
||||
decision: String,
|
||||
reason: String,
|
||||
tier: u8,
|
||||
kv_writes_enabled: bool,
|
||||
external_writes_enabled: bool,
|
||||
effective_seq_len: u16,
|
||||
effective_window: u16,
|
||||
lambda: u32,
|
||||
lambda_prev: u32,
|
||||
boundary_edges: u16,
|
||||
partition_count: u16,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl WasmInferResult {
|
||||
/// Get output logits as Int32Array.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn logits(&self) -> Vec<i32> {
|
||||
self.logits.clone()
|
||||
}
|
||||
|
||||
/// Get gate decision as string.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn decision(&self) -> String {
|
||||
self.decision.clone()
|
||||
}
|
||||
|
||||
/// Get decision reason as string.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn reason(&self) -> String {
|
||||
self.reason.clone()
|
||||
}
|
||||
|
||||
/// Get compute tier (0-3).
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn tier(&self) -> u8 {
|
||||
self.tier
|
||||
}
|
||||
|
||||
/// Check if KV writes were enabled.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn kv_writes_enabled(&self) -> bool {
|
||||
self.kv_writes_enabled
|
||||
}
|
||||
|
||||
/// Check if external writes are enabled.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn external_writes_enabled(&self) -> bool {
|
||||
self.external_writes_enabled
|
||||
}
|
||||
|
||||
/// Get effective sequence length used.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn effective_seq_len(&self) -> u16 {
|
||||
self.effective_seq_len
|
||||
}
|
||||
|
||||
/// Get effective window size used.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn effective_window(&self) -> u16 {
|
||||
self.effective_window
|
||||
}
|
||||
|
||||
/// Get current lambda value.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn lambda(&self) -> u32 {
|
||||
self.lambda
|
||||
}
|
||||
|
||||
/// Get previous lambda value.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn lambda_prev(&self) -> u32 {
|
||||
self.lambda_prev
|
||||
}
|
||||
|
||||
/// Get boundary edges count.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn boundary_edges(&self) -> u16 {
|
||||
self.boundary_edges
|
||||
}
|
||||
|
||||
/// Get partition count.
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn partition_count(&self) -> u16 {
|
||||
self.partition_count
|
||||
}
|
||||
}
|
||||
|
||||
impl WasmInferResult {
|
||||
fn from_output(output: &InferOutput) -> Self {
|
||||
let decision = match output.witness.decision {
|
||||
GateDecision::Allow => "Allow",
|
||||
GateDecision::ReduceScope => "ReduceScope",
|
||||
GateDecision::FlushKv => "FlushKv",
|
||||
GateDecision::FreezeWrites => "FreezeWrites",
|
||||
GateDecision::QuarantineUpdates => "QuarantineUpdates",
|
||||
};
|
||||
|
||||
let reason = match output.witness.reason {
|
||||
GateReason::None => "None",
|
||||
GateReason::LambdaBelowMin => "LambdaBelowMin",
|
||||
GateReason::LambdaDroppedFast => "LambdaDroppedFast",
|
||||
GateReason::BoundarySpike => "BoundarySpike",
|
||||
GateReason::BoundaryConcentrationSpike => "BoundaryConcentrationSpike",
|
||||
GateReason::PartitionDrift => "PartitionDrift",
|
||||
GateReason::SpikeStorm => "SpikeStorm",
|
||||
GateReason::ForcedByFlag => "ForcedByFlag",
|
||||
};
|
||||
|
||||
WasmInferResult {
|
||||
logits: output.logits_i32.to_vec(),
|
||||
decision: decision.to_string(),
|
||||
reason: reason.to_string(),
|
||||
tier: output.stats.tier,
|
||||
kv_writes_enabled: output.witness.kv_writes_enabled != 0,
|
||||
external_writes_enabled: output.witness.external_writes_enabled != 0,
|
||||
effective_seq_len: output.witness.effective_seq_len,
|
||||
effective_window: output.witness.effective_window,
|
||||
lambda: output.witness.lambda,
|
||||
lambda_prev: output.witness.lambda_prev,
|
||||
boundary_edges: output.witness.boundary_edges,
|
||||
partition_count: output.witness.partition_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use wasm_bindgen_test::*;
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_transformer_creation() {
|
||||
let transformer = WasmTransformer::new();
|
||||
assert!(transformer.is_ok());
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_gate_packet() {
|
||||
let gate = WasmGatePacket::new();
|
||||
assert_eq!(gate.lambda, 100);
|
||||
assert_eq!(gate.lambda_prev, 100);
|
||||
}
|
||||
}
|
||||
152
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/tests/web.rs
vendored
Normal file
152
vendor/ruvector/crates/ruvector-mincut-gated-transformer-wasm/tests/web.rs
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
//! WebAssembly tests for mincut-gated transformer.
|
||||
//!
|
||||
//! Run with: wasm-pack test --node
|
||||
|
||||
use ruvector_mincut_gated_transformer_wasm::{WasmGatePacket, WasmSpikePacket, WasmTransformer};
|
||||
use wasm_bindgen_test::*;
|
||||
|
||||
wasm_bindgen_test_configure!(run_in_browser);
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_transformer_new() {
|
||||
let transformer = WasmTransformer::new();
|
||||
assert!(transformer.is_ok());
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_transformer_baseline() {
|
||||
let transformer = WasmTransformer::new_baseline();
|
||||
assert!(transformer.is_ok());
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_gate_packet_creation() {
|
||||
let gate = WasmGatePacket::new();
|
||||
assert_eq!(gate.lambda, 100);
|
||||
assert_eq!(gate.lambda_prev, 100);
|
||||
assert_eq!(gate.boundary_edges, 0);
|
||||
assert_eq!(gate.partition_count, 1);
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_gate_packet_modification() {
|
||||
let mut gate = WasmGatePacket::new();
|
||||
gate.lambda = 150;
|
||||
gate.lambda_prev = 140;
|
||||
gate.boundary_edges = 10;
|
||||
gate.partition_count = 3;
|
||||
|
||||
assert_eq!(gate.lambda, 150);
|
||||
assert_eq!(gate.lambda_prev, 140);
|
||||
assert_eq!(gate.boundary_edges, 10);
|
||||
assert_eq!(gate.partition_count, 3);
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_spike_packet_creation() {
|
||||
let spike = WasmSpikePacket::new();
|
||||
assert_eq!(spike.fired, 1);
|
||||
assert_eq!(spike.rate_q15, 0);
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_basic_inference() {
|
||||
let mut transformer = WasmTransformer::new().unwrap();
|
||||
|
||||
let gate = WasmGatePacket::new();
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
|
||||
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let result = transformer.infer(&tokens, gate_js);
|
||||
|
||||
assert!(result.is_ok());
|
||||
|
||||
let result = result.unwrap();
|
||||
assert_eq!(result.decision(), "Allow");
|
||||
assert_eq!(result.reason(), "None");
|
||||
assert_eq!(result.logits().len(), transformer.buffer_size());
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_inference_with_spikes() {
|
||||
let mut transformer = WasmTransformer::new().unwrap();
|
||||
|
||||
let gate = WasmGatePacket::new();
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
|
||||
|
||||
let spike = WasmSpikePacket::new();
|
||||
let spike_js = serde_wasm_bindgen::to_value(&spike).unwrap();
|
||||
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let result = transformer.infer_with_spikes(&tokens, gate_js, spike_js);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_reset() {
|
||||
let mut transformer = WasmTransformer::new().unwrap();
|
||||
|
||||
let gate = WasmGatePacket::new();
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
|
||||
|
||||
// Run inference
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let _result = transformer.infer(&tokens, gate_js.clone());
|
||||
|
||||
// Reset
|
||||
transformer.reset();
|
||||
|
||||
// Run again
|
||||
let result = transformer.infer(&tokens, gate_js);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_buffer_size() {
|
||||
let transformer = WasmTransformer::new().unwrap();
|
||||
assert_eq!(transformer.buffer_size(), 256); // Micro config logits
|
||||
|
||||
let transformer = WasmTransformer::new_baseline().unwrap();
|
||||
assert_eq!(transformer.buffer_size(), 1024); // Baseline config logits
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_low_lambda_intervention() {
|
||||
let mut transformer = WasmTransformer::new().unwrap();
|
||||
|
||||
let mut gate = WasmGatePacket::new();
|
||||
gate.lambda = 10; // Very low lambda
|
||||
gate.lambda_prev = 100;
|
||||
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
|
||||
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let result = transformer.infer(&tokens, gate_js).unwrap();
|
||||
|
||||
// Should trigger intervention due to low lambda
|
||||
assert_ne!(result.decision(), "Allow");
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test]
|
||||
fn test_witness_fields() {
|
||||
let mut transformer = WasmTransformer::new().unwrap();
|
||||
|
||||
let mut gate = WasmGatePacket::new();
|
||||
gate.lambda = 100;
|
||||
gate.lambda_prev = 95;
|
||||
gate.boundary_edges = 5;
|
||||
gate.partition_count = 3;
|
||||
|
||||
let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
|
||||
|
||||
let tokens = vec![1, 2, 3, 4];
|
||||
let result = transformer.infer(&tokens, gate_js).unwrap();
|
||||
|
||||
assert_eq!(result.lambda(), 100);
|
||||
assert_eq!(result.lambda_prev(), 95);
|
||||
assert_eq!(result.boundary_edges(), 5);
|
||||
assert_eq!(result.partition_count(), 3);
|
||||
assert!(result.effective_seq_len() > 0);
|
||||
assert!(result.effective_window() > 0);
|
||||
}
|
||||
Reference in New Issue
Block a user