Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/crates/ruvector-mincut-gated-transformer-wasm/Cargo.toml
+++ b/crates/ruvector-mincut-gated-transformer-wasm/Cargo.toml
@@ -0,0 +1,32 @@
+[package]
+name = "ruvector-mincut-gated-transformer-wasm"
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.77"
+authors = ["RuVector Team"]
+license = "MIT OR Apache-2.0"
+description = "WASM bindings for mincut-gated transformer inference"
+repository = "https://github.com/ruvnet/ruvector"
+keywords = ["transformer", "wasm", "mincut", "inference", "webassembly"]
+categories = ["wasm", "algorithms", "science"]
+
+[lib]
+crate-type = ["cdylib", "rlib"]
+
+[features]
+default = ["console_error_panic_hook"]
+
+[dependencies]
+ruvector-mincut-gated-transformer = { path = "../ruvector-mincut-gated-transformer", default-features = false, features = ["wasm"] }
+wasm-bindgen = { workspace = true }
+serde = { workspace = true }
+serde-wasm-bindgen = "0.6"
+console_error_panic_hook = { version = "0.1", optional = true }
+js-sys = { workspace = true }
+
+[dev-dependencies]
+wasm-bindgen-test = "0.3"
+
+[profile.release]
+opt-level = "s"
+lto = true
--- a/crates/ruvector-mincut-gated-transformer-wasm/README.md
+++ b/crates/ruvector-mincut-gated-transformer-wasm/README.md
@@ -0,0 +1,344 @@
+# ruvector-mincut-gated-transformer-wasm
+
+WebAssembly bindings for the mincut-gated transformer - ultra-low-latency inference with coherence control.
+
+## Overview
+
+This crate provides JavaScript-friendly WASM bindings for the `ruvector-mincut-gated-transformer` crate, enabling browser-based transformer inference with deterministic latency bounds and explainable decision making.
+
+## Features
+
+- **Zero-copy inference**: Direct memory access from JavaScript
+- **Deterministic bounds**: Predictable p99 latency guarantees
+- **Explainable decisions**: Every inference produces a witness
+- **Coherence control**: Integration with dynamic minimum cut signals
+- **Event-driven scheduling**: Optional spike-based compute tier selection
+
+## Installation
+
+### NPM
+
+```bash
+npm install ruvector-mincut-gated-transformer-wasm
+```
+
+### Build from source
+
+```bash
+wasm-pack build --target web
+```
+
+## Quick Start
+
+```javascript
+import init, { WasmTransformer, WasmGatePacket } from './pkg';
+
+async function run() {
+  await init();
+
+  // Create transformer with micro config (optimized for WASM)
+  const transformer = new WasmTransformer();
+
+  // Create gate packet from coherence signals
+  const gate = new WasmGatePacket();
+  gate.lambda = 100;
+  gate.lambda_prev = 95;
+  gate.boundary_edges = 5;
+  gate.boundary_concentration_q15 = 8192;
+  gate.partition_count = 3;
+
+  // Run inference
+  const tokens = new Uint32Array([1, 2, 3, 4]);
+  const result = transformer.infer(tokens, gate);
+
+  console.log('Decision:', result.decision);
+  console.log('Reason:', result.reason);
+  console.log('Tier:', result.tier);
+  console.log('KV writes enabled:', result.kv_writes_enabled);
+  console.log('External writes enabled:', result.external_writes_enabled);
+  console.log('Logits:', result.logits);
+}
+
+run();
+```
+
+## API Reference
+
+### WasmTransformer
+
+Main transformer class for inference.
+
+#### Constructor
+
+```javascript
+const transformer = new WasmTransformer();
+```
+
+Creates a transformer with micro config (sequence length: 32, hidden: 128, heads: 4, layers: 2).
+
+#### Methods
+
+- `new_baseline()`: Create with baseline config (larger model)
+- `with_config(config)`: Create with custom configuration
+- `infer(tokens, gate)`: Run inference with gate packet
+- `infer_with_spikes(tokens, gate, spikes)`: Run inference with gate and spike packets
+- `reset()`: Reset all state (KV cache, cached logits)
+- `buffer_size()`: Get logits buffer size
+- `set_policy(policy)`: Update gate policy
+
+### WasmGatePacket
+
+Gate packet carrying coherence control signals.
+
+#### Constructor
+
+```javascript
+const gate = new WasmGatePacket();
+```
+
+#### Properties
+
+- `lambda`: Current coherence metric (minimum cut value)
+- `lambda_prev`: Previous lambda for trend detection
+- `boundary_edges`: Number of edges crossing partition boundaries
+- `boundary_concentration_q15`: Boundary concentration (Q15: 0-32767)
+- `partition_count`: Number of partitions in graph
+- `flags`: Policy flags (force safe mode, etc.)
+
+### WasmSpikePacket
+
+Spike packet for event-driven scheduling.
+
+#### Constructor
+
+```javascript
+const spike = new WasmSpikePacket();
+```
+
+#### Properties
+
+- `fired`: Spike fired indicator (0 = skip, 1 = active)
+- `rate_q15`: Spike rate (Q15: 0-32767)
+- `novelty_q15`: Novelty metric (Q15: 0-32767)
+- `flags`: Spike flags
+
+### WasmInferResult
+
+Inference result with logits and witness information.
+
+#### Properties
+
+- `logits`: Output logits (Int32Array)
+- `decision`: Gate decision ("Allow", "ReduceScope", "FlushKv", "FreezeWrites", "QuarantineUpdates")
+- `reason`: Decision reason ("None", "LambdaBelowMin", "LambdaDroppedFast", etc.)
+- `tier`: Compute tier used (0-3)
+- `kv_writes_enabled`: Whether KV writes were enabled
+- `external_writes_enabled`: Whether external writes are enabled
+- `effective_seq_len`: Effective sequence length used
+- `effective_window`: Effective window size used
+- `lambda`: Current lambda value
+- `lambda_prev`: Previous lambda value
+- `boundary_edges`: Boundary edges count
+- `partition_count`: Partition count
+
+## Configuration
+
+### Micro Config (Default)
+
+Optimized for WASM and edge gateways:
+
+```javascript
+{
+  seq_len_max: 32,
+  hidden: 128,
+  heads: 4,
+  layers: 2,
+  window_normal: 8,
+  window_degraded: 4,
+  ffn_mult: 4,
+  logits: 256
+}
+```
+
+### Baseline Config
+
+Larger model for more capacity:
+
+```javascript
+const transformer = WasmTransformer.new_baseline();
+// seq_len_max: 64, hidden: 256, heads: 4, layers: 4, logits: 1024
+```
+
+### Custom Config
+
+```javascript
+const config = {
+  seq_len_max: 32,
+  hidden: 128,
+  heads: 4,
+  layers: 2,
+  window_normal: 8,
+  window_degraded: 4,
+  ffn_mult: 4,
+  logits: 256,
+  layers_degraded: 1,
+  seq_len_degraded: 16,
+  seq_len_safe: 4,
+  enable_kv_cache: true,
+  enable_external_writes: true
+};
+
+const transformer = WasmTransformer.with_config(config);
+```
+
+## Gate Policy
+
+Control when the gate intervenes:
+
+```javascript
+const policy = {
+  lambda_min: 30,
+  drop_ratio_q15_max: 12288,  // ~37.5%
+  boundary_edges_max: 20,
+  boundary_concentration_q15_max: 20480,  // ~62.5%
+  partitions_max: 10,
+  spike_rate_q15_max: 16384,
+  spike_novelty_q15_min: 2048,
+  allow_kv_write_when_unstable: true,
+  allow_external_write_when_unstable: false
+};
+
+transformer.set_policy(policy);
+```
+
+## Decision Types
+
+### Gate Decisions
+
+- **Allow**: Proceed normally with full capabilities
+- **ReduceScope**: Reduce sequence length and window size
+- **FlushKv**: Flush KV cache before proceeding
+- **FreezeWrites**: Run in read-only mode (no KV updates)
+- **QuarantineUpdates**: Run compute but discard all state changes
+
+### Decision Reasons
+
+- **None**: No intervention needed
+- **LambdaBelowMin**: Lambda below minimum threshold
+- **LambdaDroppedFast**: Lambda dropped too quickly
+- **BoundarySpike**: Boundary edge count exceeded threshold
+- **BoundaryConcentrationSpike**: Boundary concentration too high
+- **PartitionDrift**: Partition count indicates drift
+- **SpikeStorm**: Spike rate indicates overload
+- **ForcedByFlag**: Forced by flag in gate packet
+
+## Examples
+
+### Basic Inference
+
+```javascript
+const transformer = new WasmTransformer();
+const gate = new WasmGatePacket();
+const tokens = new Uint32Array([1, 2, 3, 4]);
+const result = transformer.infer(tokens, gate);
+console.log(result.decision);
+```
+
+### With Spike Scheduling
+
+```javascript
+const transformer = new WasmTransformer();
+const gate = new WasmGatePacket();
+const spike = new WasmSpikePacket();
+spike.fired = 1;
+spike.novelty_q15 = 8192;
+
+const tokens = new Uint32Array([1, 2, 3, 4]);
+const result = transformer.infer_with_spikes(tokens, gate, spike);
+```
+
+### Handling Interventions
+
+```javascript
+const transformer = new WasmTransformer();
+const gate = new WasmGatePacket();
+gate.lambda = 10;  // Low coherence
+gate.lambda_prev = 100;
+
+const tokens = new Uint32Array([1, 2, 3, 4]);
+const result = transformer.infer(tokens, gate);
+
+if (result.decision !== 'Allow') {
+  console.log('Intervention triggered:', result.reason);
+  console.log('Effective seq_len:', result.effective_seq_len);
+  console.log('KV writes:', result.kv_writes_enabled);
+}
+```
+
+## Building
+
+### Development
+
+```bash
+wasm-pack build --dev --target web
+```
+
+### Release (optimized)
+
+```bash
+wasm-pack build --release --target web
+```
+
+### For Node.js
+
+```bash
+wasm-pack build --target nodejs
+```
+
+### For Bundlers
+
+```bash
+wasm-pack build --target bundler
+```
+
+## Testing
+
+### Browser tests
+
+```bash
+wasm-pack test --headless --firefox
+wasm-pack test --headless --chrome
+```
+
+### Node.js tests
+
+```bash
+wasm-pack test --node
+```
+
+## Performance
+
+The WASM bindings maintain the core performance characteristics:
+
+- **Allocation-free hot path**: Zero heap allocations during inference
+- **Predictable latency**: Bounded p99 latency guarantees
+- **Small binary size**: ~50KB compressed (micro config)
+- **Low memory footprint**: ~128KB runtime state (micro config)
+
+## Integration with RuVector
+
+This transformer integrates with the RuVector ecosystem:
+
+- **ruvector-mincut**: Provides coherence signals via gate packets
+- **ruvector-core**: Vector search and semantic retrieval
+- **ruvector-router**: Query routing and orchestration
+
+## License
+
+MIT OR Apache-2.0
+
+## Links
+
+- [GitHub Repository](https://github.com/ruvnet/ruvector)
+- [Core Library](../ruvector-mincut-gated-transformer)
+- [RuVector Documentation](../../README.md)
--- a/crates/ruvector-mincut-gated-transformer-wasm/examples/web_scorer.rs
+++ b/crates/ruvector-mincut-gated-transformer-wasm/examples/web_scorer.rs
@@ -0,0 +1,158 @@
+//! Example WASM scorer demonstrating mincut-gated transformer in the browser.
+//!
+//! This example shows how to:
+//! 1. Create a transformer with micro config (optimized for WASM)
+//! 2. Create gate packets from coherence signals
+//! 3. Run inference and inspect witness
+//! 4. Handle different decision outcomes
+//!
+//! To run this example:
+//! ```bash
+//! wasm-pack build --target web
+//! # Then serve index.html and import the generated package
+//! ```
+
+use ruvector_mincut_gated_transformer_wasm::{WasmGatePacket, WasmTransformer};
+use wasm_bindgen::prelude::*;
+
+/// Example showing basic inference with coherence control.
+#[wasm_bindgen]
+pub fn run_basic_example() -> Result<JsValue, JsValue> {
+    // Create transformer with micro config
+    let mut transformer = WasmTransformer::new()?;
+
+    // Create gate packet with stable coherence
+    let gate = WasmGatePacket::new();
+    let gate_js = serde_wasm_bindgen::to_value(&gate)?;
+
+    // Sample tokens
+    let tokens = vec![1, 2, 3, 4, 5];
+
+    // Run inference
+    let result = transformer.infer(&tokens, gate_js)?;
+
+    // Create result object for JavaScript
+    let output = js_sys::Object::new();
+
+    js_sys::Reflect::set(&output, &"decision".into(), &result.decision().into())?;
+
+    js_sys::Reflect::set(&output, &"reason".into(), &result.reason().into())?;
+
+    js_sys::Reflect::set(&output, &"tier".into(), &result.tier().into())?;
+
+    js_sys::Reflect::set(
+        &output,
+        &"kv_writes_enabled".into(),
+        &result.kv_writes_enabled().into(),
+    )?;
+
+    Ok(output.into())
+}
+
+/// Example showing intervention scenarios.
+#[wasm_bindgen]
+pub fn run_intervention_example() -> Result<JsValue, JsValue> {
+    let mut transformer = WasmTransformer::new()?;
+
+    // Create gate packet with low lambda (triggering intervention)
+    let mut gate = WasmGatePacket::new();
+    gate.lambda = 10; // Very low coherence
+    gate.lambda_prev = 100;
+    gate.boundary_edges = 50; // High boundary crossing
+
+    let gate_js = serde_wasm_bindgen::to_value(&gate)?;
+
+    let tokens = vec![1, 2, 3, 4];
+    let result = transformer.infer(&tokens, gate_js)?;
+
+    // Create result object
+    let output = js_sys::Object::new();
+
+    js_sys::Reflect::set(&output, &"decision".into(), &result.decision().into())?;
+
+    js_sys::Reflect::set(&output, &"reason".into(), &result.reason().into())?;
+
+    js_sys::Reflect::set(&output, &"lambda".into(), &result.lambda().into())?;
+
+    js_sys::Reflect::set(
+        &output,
+        &"boundary_edges".into(),
+        &result.boundary_edges().into(),
+    )?;
+
+    Ok(output.into())
+}
+
+/// Example showing multiple inference calls with state tracking.
+#[wasm_bindgen]
+pub fn run_sequence_example() -> Result<JsValue, JsValue> {
+    let mut transformer = WasmTransformer::new()?;
+
+    let results = js_sys::Array::new();
+
+    // Run sequence of inferences with varying coherence
+    let lambda_sequence = vec![100, 95, 85, 70, 50, 30, 60, 80, 95];
+
+    for (i, &lambda) in lambda_sequence.iter().enumerate() {
+        let mut gate = WasmGatePacket::new();
+        gate.lambda = lambda;
+        gate.lambda_prev = if i > 0 {
+            lambda_sequence[i - 1]
+        } else {
+            lambda
+        };
+
+        let gate_js = serde_wasm_bindgen::to_value(&gate)?;
+
+        let tokens = vec![1, 2, 3, 4];
+        let result = transformer.infer(&tokens, gate_js)?;
+
+        let step = js_sys::Object::new();
+        js_sys::Reflect::set(&step, &"step".into(), &i.into())?;
+        js_sys::Reflect::set(&step, &"lambda".into(), &lambda.into())?;
+        js_sys::Reflect::set(&step, &"decision".into(), &result.decision().into())?;
+        js_sys::Reflect::set(&step, &"reason".into(), &result.reason().into())?;
+
+        results.push(&step);
+    }
+
+    Ok(results.into())
+}
+
+/// Example showing custom configuration.
+#[wasm_bindgen]
+pub fn run_custom_config_example() -> Result<JsValue, JsValue> {
+    // Create custom config object
+    let config = js_sys::Object::new();
+    js_sys::Reflect::set(&config, &"seq_len_max".into(), &32.into())?;
+    js_sys::Reflect::set(&config, &"hidden".into(), &128.into())?;
+    js_sys::Reflect::set(&config, &"heads".into(), &4.into())?;
+    js_sys::Reflect::set(&config, &"layers".into(), &2.into())?;
+    js_sys::Reflect::set(&config, &"window_normal".into(), &8.into())?;
+    js_sys::Reflect::set(&config, &"window_degraded".into(), &4.into())?;
+    js_sys::Reflect::set(&config, &"ffn_mult".into(), &4.into())?;
+    js_sys::Reflect::set(&config, &"logits".into(), &256.into())?;
+    js_sys::Reflect::set(&config, &"layers_degraded".into(), &1.into())?;
+    js_sys::Reflect::set(&config, &"seq_len_degraded".into(), &16.into())?;
+    js_sys::Reflect::set(&config, &"seq_len_safe".into(), &4.into())?;
+    js_sys::Reflect::set(&config, &"enable_kv_cache".into(), &true.into())?;
+    js_sys::Reflect::set(&config, &"enable_external_writes".into(), &true.into())?;
+
+    let mut transformer = WasmTransformer::with_config(config.into())?;
+
+    let gate = WasmGatePacket::new();
+    let gate_js = serde_wasm_bindgen::to_value(&gate)?;
+
+    let tokens = vec![1, 2, 3];
+    let result = transformer.infer(&tokens, gate_js)?;
+
+    let output = js_sys::Object::new();
+    js_sys::Reflect::set(
+        &output,
+        &"buffer_size".into(),
+        &transformer.buffer_size().into(),
+    )?;
+    js_sys::Reflect::set(&output, &"decision".into(), &result.decision().into())?;
+
+    Ok(output.into())
+}
--- a/crates/ruvector-mincut-gated-transformer-wasm/src/lib.rs
+++ b/crates/ruvector-mincut-gated-transformer-wasm/src/lib.rs
@@ -0,0 +1,488 @@
+//! WASM bindings for Mincut-Gated Transformer.
+//!
+//! Provides JavaScript-friendly API for ultra-low-latency inference with
+//! coherence control via dynamic minimum cut signals.
+//!
+//! ## Features
+//!
+//! - **Zero-copy inference**: Direct memory access from JavaScript
+//! - **Deterministic bounds**: Predictable latency guarantees
+//! - **Explainable decisions**: Every inference produces a witness
+//! - **Coherence control**: Integration with mincut gate signals
+//!
+//! ## Example (JavaScript)
+//!
+//! ```javascript
+//! import { WasmTransformer, WasmGatePacket } from './pkg';
+//!
+//! // Create transformer with micro config (optimized for WASM)
+//! const transformer = new WasmTransformer();
+//!
+//! // Create gate packet from coherence signals
+//! const gate = new WasmGatePacket();
+//! gate.lambda = 100;
+//! gate.lambda_prev = 95;
+//! gate.boundary_edges = 5;
+//! gate.boundary_concentration_q15 = 8192;
+//! gate.partition_count = 3;
+//!
+//! // Run inference
+//! const tokens = new Uint32Array([1, 2, 3, 4]);
+//! const result = transformer.infer(tokens, gate);
+//!
+//! console.log('Decision:', result.decision);
+//! console.log('Reason:', result.reason);
+//! console.log('Logits:', result.logits);
+//! ```
+
+use ruvector_mincut_gated_transformer::{
+    GateDecision, GatePacket, GatePolicy, GateReason, InferInput, InferOutput,
+    MincutGatedTransformer, QuantizedWeights, SpikePacket, TransformerConfig,
+};
+use wasm_bindgen::prelude::*;
+
+#[wasm_bindgen(start)]
+pub fn init() {
+    #[cfg(feature = "console_error_panic_hook")]
+    console_error_panic_hook::set_once();
+}
+
+/// JavaScript-friendly transformer wrapper.
+///
+/// This wraps the core `MincutGatedTransformer` and provides a JavaScript-friendly API.
+#[wasm_bindgen]
+pub struct WasmTransformer {
+    inner: MincutGatedTransformer,
+    logits_buffer: Vec<i32>,
+}
+
+#[wasm_bindgen]
+impl WasmTransformer {
+    /// Create with micro config (optimized for WASM).
+    ///
+    /// Micro config:
+    /// - Sequence length: 32
+    /// - Hidden size: 128
+    /// - Heads: 4
+    /// - Layers: 2
+    /// - Logits: 256
+    #[wasm_bindgen(constructor)]
+    pub fn new() -> Result<WasmTransformer, JsValue> {
+        let config = TransformerConfig::micro();
+        let policy = GatePolicy::default();
+        let weights = QuantizedWeights::empty(&config);
+
+        let inner = MincutGatedTransformer::new(config.clone(), policy, weights)
+            .map_err(|e| JsValue::from_str(&format!("Failed to create transformer: {}", e)))?;
+
+        let logits_buffer = vec![0i32; config.logits as usize];
+
+        Ok(WasmTransformer {
+            inner,
+            logits_buffer,
+        })
+    }
+
+    /// Create with baseline config (larger model).
+    ///
+    /// Baseline config:
+    /// - Sequence length: 64
+    /// - Hidden size: 256
+    /// - Heads: 4
+    /// - Layers: 4
+    /// - Logits: 1024
+    #[wasm_bindgen]
+    pub fn new_baseline() -> Result<WasmTransformer, JsValue> {
+        let config = TransformerConfig::baseline();
+        let policy = GatePolicy::default();
+        let weights = QuantizedWeights::empty(&config);
+
+        let inner = MincutGatedTransformer::new(config.clone(), policy, weights)
+            .map_err(|e| JsValue::from_str(&format!("Failed to create transformer: {}", e)))?;
+
+        let logits_buffer = vec![0i32; config.logits as usize];
+
+        Ok(WasmTransformer {
+            inner,
+            logits_buffer,
+        })
+    }
+
+    /// Create with custom config from JavaScript object.
+    ///
+    /// Example:
+    /// ```javascript
+    /// const config = {
+    ///   seq_len_max: 32,
+    ///   hidden: 128,
+    ///   heads: 4,
+    ///   layers: 2,
+    ///   window_normal: 8,
+    ///   window_degraded: 4,
+    ///   ffn_mult: 4,
+    ///   logits: 256
+    /// };
+    /// const transformer = WasmTransformer.with_config(config);
+    /// ```
+    #[wasm_bindgen]
+    pub fn with_config(config_js: JsValue) -> Result<WasmTransformer, JsValue> {
+        let config: TransformerConfig = serde_wasm_bindgen::from_value(config_js)
+            .map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
+
+        let policy = GatePolicy::default();
+        let weights = QuantizedWeights::empty(&config);
+
+        let inner = MincutGatedTransformer::new(config.clone(), policy, weights)
+            .map_err(|e| JsValue::from_str(&format!("Failed to create transformer: {}", e)))?;
+
+        let logits_buffer = vec![0i32; config.logits as usize];
+
+        Ok(WasmTransformer {
+            inner,
+            logits_buffer,
+        })
+    }
+
+    /// Run inference with gate packet.
+    ///
+    /// Returns a `WasmInferResult` containing logits, decision, and witness information.
+    #[wasm_bindgen]
+    pub fn infer(&mut self, tokens: &[u32], gate_js: JsValue) -> Result<WasmInferResult, JsValue> {
+        let gate: WasmGatePacket = serde_wasm_bindgen::from_value(gate_js)
+            .map_err(|e| JsValue::from_str(&format!("Invalid gate packet: {}", e)))?;
+
+        let gate_packet = gate.to_native();
+        let input = InferInput::from_tokens(tokens, gate_packet);
+
+        let mut output = InferOutput::new(&mut self.logits_buffer);
+
+        self.inner
+            .infer(&input, &mut output)
+            .map_err(|e| JsValue::from_str(&format!("Inference failed: {}", e)))?;
+
+        Ok(WasmInferResult::from_output(&output))
+    }
+
+    /// Run inference with gate and spike packets.
+    ///
+    /// This enables event-driven scheduling with spike signals.
+    #[wasm_bindgen]
+    pub fn infer_with_spikes(
+        &mut self,
+        tokens: &[u32],
+        gate_js: JsValue,
+        spikes_js: JsValue,
+    ) -> Result<WasmInferResult, JsValue> {
+        let gate: WasmGatePacket = serde_wasm_bindgen::from_value(gate_js)
+            .map_err(|e| JsValue::from_str(&format!("Invalid gate packet: {}", e)))?;
+
+        let spikes: WasmSpikePacket = serde_wasm_bindgen::from_value(spikes_js)
+            .map_err(|e| JsValue::from_str(&format!("Invalid spike packet: {}", e)))?;
+
+        let gate_packet = gate.to_native();
+        let spike_packet = spikes.to_native();
+
+        let input = InferInput::from_tokens(tokens, gate_packet).with_spikes(spike_packet);
+
+        let mut output = InferOutput::new(&mut self.logits_buffer);
+
+        self.inner
+            .infer(&input, &mut output)
+            .map_err(|e| JsValue::from_str(&format!("Inference failed: {}", e)))?;
+
+        Ok(WasmInferResult::from_output(&output))
+    }
+
+    /// Reset all state (KV cache, cached logits, etc.).
+    #[wasm_bindgen]
+    pub fn reset(&mut self) {
+        self.inner.reset();
+    }
+
+    /// Get the logits buffer size.
+    #[wasm_bindgen]
+    pub fn buffer_size(&self) -> usize {
+        self.logits_buffer.len()
+    }
+
+    /// Update gate policy from JavaScript object.
+    #[wasm_bindgen]
+    pub fn set_policy(&mut self, policy_js: JsValue) -> Result<(), JsValue> {
+        let policy: GatePolicy = serde_wasm_bindgen::from_value(policy_js)
+            .map_err(|e| JsValue::from_str(&format!("Invalid policy: {}", e)))?;
+
+        self.inner.set_policy(policy);
+        Ok(())
+    }
+}
+
+/// JavaScript-friendly gate packet.
+///
+/// This carries coherence control signals from the mincut engine.
+#[wasm_bindgen]
+#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
+pub struct WasmGatePacket {
+    /// Current lambda (minimum cut value / coherence metric)
+    pub lambda: u32,
+
+    /// Previous lambda for trend detection
+    pub lambda_prev: u32,
+
+    /// Number of edges crossing partition boundaries
+    pub boundary_edges: u16,
+
+    /// Boundary edge concentration (Q15: 0-32767)
+    pub boundary_concentration_q15: u16,
+
+    /// Number of partitions in current graph state
+    pub partition_count: u16,
+
+    /// Policy flags (force safe mode, etc.)
+    pub flags: u16,
+}
+
+#[wasm_bindgen]
+impl WasmGatePacket {
+    /// Create a new gate packet with default values.
+    #[wasm_bindgen(constructor)]
+    pub fn new() -> WasmGatePacket {
+        WasmGatePacket {
+            lambda: 100,
+            lambda_prev: 100,
+            boundary_edges: 0,
+            boundary_concentration_q15: 0,
+            partition_count: 1,
+            flags: 0,
+        }
+    }
+
+    /// Create from JavaScript object.
+    #[wasm_bindgen]
+    pub fn from_js(js: JsValue) -> Result<WasmGatePacket, JsValue> {
+        serde_wasm_bindgen::from_value(js)
+            .map_err(|e| JsValue::from_str(&format!("Invalid gate packet: {}", e)))
+    }
+}
+
+impl WasmGatePacket {
+    fn to_native(&self) -> GatePacket {
+        GatePacket {
+            lambda: self.lambda,
+            lambda_prev: self.lambda_prev,
+            boundary_edges: self.boundary_edges,
+            boundary_concentration_q15: self.boundary_concentration_q15,
+            partition_count: self.partition_count,
+            flags: self.flags,
+        }
+    }
+}
+
+impl Default for WasmGatePacket {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// JavaScript-friendly spike packet.
+///
+/// Used for event-driven scheduling to determine whether to run inference.
+#[wasm_bindgen]
+#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
+pub struct WasmSpikePacket {
+    /// Spike fired indicator (0 = skip or cheap path)
+    pub fired: u8,
+
+    /// Spike rate (Q15: 0-32767)
+    pub rate_q15: u16,
+
+    /// Novelty metric (Q15: 0-32767)
+    pub novelty_q15: u16,
+
+    /// Flags
+    pub flags: u16,
+}
+
+#[wasm_bindgen]
+impl WasmSpikePacket {
+    /// Create a new spike packet with default values.
+    #[wasm_bindgen(constructor)]
+    pub fn new() -> WasmSpikePacket {
+        WasmSpikePacket {
+            fired: 1,
+            rate_q15: 0,
+            novelty_q15: 0,
+            flags: 0,
+        }
+    }
+}
+
+impl WasmSpikePacket {
+    fn to_native(&self) -> SpikePacket {
+        SpikePacket {
+            fired: self.fired,
+            rate_q15: self.rate_q15,
+            novelty_q15: self.novelty_q15,
+            top_len: 0,
+            top_idx: [0; 16],
+            top_w_q15: [0; 16],
+            flags: self.flags,
+        }
+    }
+}
+
+impl Default for WasmSpikePacket {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// JavaScript-friendly inference result.
+///
+/// Contains output logits and witness information about the inference decision.
+#[wasm_bindgen]
+pub struct WasmInferResult {
+    logits: Vec<i32>,
+    decision: String,
+    reason: String,
+    tier: u8,
+    kv_writes_enabled: bool,
+    external_writes_enabled: bool,
+    effective_seq_len: u16,
+    effective_window: u16,
+    lambda: u32,
+    lambda_prev: u32,
+    boundary_edges: u16,
+    partition_count: u16,
+}
+
+#[wasm_bindgen]
+impl WasmInferResult {
+    /// Get output logits as Int32Array.
+    #[wasm_bindgen(getter)]
+    pub fn logits(&self) -> Vec<i32> {
+        self.logits.clone()
+    }
+
+    /// Get gate decision as string.
+    #[wasm_bindgen(getter)]
+    pub fn decision(&self) -> String {
+        self.decision.clone()
+    }
+
+    /// Get decision reason as string.
+    #[wasm_bindgen(getter)]
+    pub fn reason(&self) -> String {
+        self.reason.clone()
+    }
+
+    /// Get compute tier (0-3).
+    #[wasm_bindgen(getter)]
+    pub fn tier(&self) -> u8 {
+        self.tier
+    }
+
+    /// Check if KV writes were enabled.
+    #[wasm_bindgen(getter)]
+    pub fn kv_writes_enabled(&self) -> bool {
+        self.kv_writes_enabled
+    }
+
+    /// Check if external writes are enabled.
+    #[wasm_bindgen(getter)]
+    pub fn external_writes_enabled(&self) -> bool {
+        self.external_writes_enabled
+    }
+
+    /// Get effective sequence length used.
+    #[wasm_bindgen(getter)]
+    pub fn effective_seq_len(&self) -> u16 {
+        self.effective_seq_len
+    }
+
+    /// Get effective window size used.
+    #[wasm_bindgen(getter)]
+    pub fn effective_window(&self) -> u16 {
+        self.effective_window
+    }
+
+    /// Get current lambda value.
+    #[wasm_bindgen(getter)]
+    pub fn lambda(&self) -> u32 {
+        self.lambda
+    }
+
+    /// Get previous lambda value.
+    #[wasm_bindgen(getter)]
+    pub fn lambda_prev(&self) -> u32 {
+        self.lambda_prev
+    }
+
+    /// Get boundary edges count.
+    #[wasm_bindgen(getter)]
+    pub fn boundary_edges(&self) -> u16 {
+        self.boundary_edges
+    }
+
+    /// Get partition count.
+    #[wasm_bindgen(getter)]
+    pub fn partition_count(&self) -> u16 {
+        self.partition_count
+    }
+}
+
+impl WasmInferResult {
+    fn from_output(output: &InferOutput) -> Self {
+        let decision = match output.witness.decision {
+            GateDecision::Allow => "Allow",
+            GateDecision::ReduceScope => "ReduceScope",
+            GateDecision::FlushKv => "FlushKv",
+            GateDecision::FreezeWrites => "FreezeWrites",
+            GateDecision::QuarantineUpdates => "QuarantineUpdates",
+        };
+
+        let reason = match output.witness.reason {
+            GateReason::None => "None",
+            GateReason::LambdaBelowMin => "LambdaBelowMin",
+            GateReason::LambdaDroppedFast => "LambdaDroppedFast",
+            GateReason::BoundarySpike => "BoundarySpike",
+            GateReason::BoundaryConcentrationSpike => "BoundaryConcentrationSpike",
+            GateReason::PartitionDrift => "PartitionDrift",
+            GateReason::SpikeStorm => "SpikeStorm",
+            GateReason::ForcedByFlag => "ForcedByFlag",
+        };
+
+        WasmInferResult {
+            logits: output.logits_i32.to_vec(),
+            decision: decision.to_string(),
+            reason: reason.to_string(),
+            tier: output.stats.tier,
+            kv_writes_enabled: output.witness.kv_writes_enabled != 0,
+            external_writes_enabled: output.witness.external_writes_enabled != 0,
+            effective_seq_len: output.witness.effective_seq_len,
+            effective_window: output.witness.effective_window,
+            lambda: output.witness.lambda,
+            lambda_prev: output.witness.lambda_prev,
+            boundary_edges: output.witness.boundary_edges,
+            partition_count: output.witness.partition_count,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use wasm_bindgen_test::*;
+
+    #[wasm_bindgen_test]
+    fn test_transformer_creation() {
+        let transformer = WasmTransformer::new();
+        assert!(transformer.is_ok());
+    }
+
+    #[wasm_bindgen_test]
+    fn test_gate_packet() {
+        let gate = WasmGatePacket::new();
+        assert_eq!(gate.lambda, 100);
+        assert_eq!(gate.lambda_prev, 100);
+    }
+}
--- a/crates/ruvector-mincut-gated-transformer-wasm/tests/web.rs
+++ b/crates/ruvector-mincut-gated-transformer-wasm/tests/web.rs
@@ -0,0 +1,152 @@
+//! WebAssembly tests for mincut-gated transformer.
+//!
+//! Run with: wasm-pack test --node
+
+use ruvector_mincut_gated_transformer_wasm::{WasmGatePacket, WasmSpikePacket, WasmTransformer};
+use wasm_bindgen_test::*;
+
+wasm_bindgen_test_configure!(run_in_browser);
+
+#[wasm_bindgen_test]
+fn test_transformer_new() {
+    let transformer = WasmTransformer::new();
+    assert!(transformer.is_ok());
+}
+
+#[wasm_bindgen_test]
+fn test_transformer_baseline() {
+    let transformer = WasmTransformer::new_baseline();
+    assert!(transformer.is_ok());
+}
+
+#[wasm_bindgen_test]
+fn test_gate_packet_creation() {
+    let gate = WasmGatePacket::new();
+    assert_eq!(gate.lambda, 100);
+    assert_eq!(gate.lambda_prev, 100);
+    assert_eq!(gate.boundary_edges, 0);
+    assert_eq!(gate.partition_count, 1);
+}
+
+#[wasm_bindgen_test]
+fn test_gate_packet_modification() {
+    let mut gate = WasmGatePacket::new();
+    gate.lambda = 150;
+    gate.lambda_prev = 140;
+    gate.boundary_edges = 10;
+    gate.partition_count = 3;
+
+    assert_eq!(gate.lambda, 150);
+    assert_eq!(gate.lambda_prev, 140);
+    assert_eq!(gate.boundary_edges, 10);
+    assert_eq!(gate.partition_count, 3);
+}
+
+#[wasm_bindgen_test]
+fn test_spike_packet_creation() {
+    let spike = WasmSpikePacket::new();
+    assert_eq!(spike.fired, 1);
+    assert_eq!(spike.rate_q15, 0);
+}
+
+#[wasm_bindgen_test]
+fn test_basic_inference() {
+    let mut transformer = WasmTransformer::new().unwrap();
+
+    let gate = WasmGatePacket::new();
+    let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
+
+    let tokens = vec![1, 2, 3, 4];
+    let result = transformer.infer(&tokens, gate_js);
+
+    assert!(result.is_ok());
+
+    let result = result.unwrap();
+    assert_eq!(result.decision(), "Allow");
+    assert_eq!(result.reason(), "None");
+    assert_eq!(result.logits().len(), transformer.buffer_size());
+}
+
+#[wasm_bindgen_test]
+fn test_inference_with_spikes() {
+    let mut transformer = WasmTransformer::new().unwrap();
+
+    let gate = WasmGatePacket::new();
+    let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
+
+    let spike = WasmSpikePacket::new();
+    let spike_js = serde_wasm_bindgen::to_value(&spike).unwrap();
+
+    let tokens = vec![1, 2, 3, 4];
+    let result = transformer.infer_with_spikes(&tokens, gate_js, spike_js);
+
+    assert!(result.is_ok());
+}
+
+#[wasm_bindgen_test]
+fn test_reset() {
+    let mut transformer = WasmTransformer::new().unwrap();
+
+    let gate = WasmGatePacket::new();
+    let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
+
+    // Run inference
+    let tokens = vec![1, 2, 3, 4];
+    let _result = transformer.infer(&tokens, gate_js.clone());
+
+    // Reset
+    transformer.reset();
+
+    // Run again
+    let result = transformer.infer(&tokens, gate_js);
+    assert!(result.is_ok());
+}
+
+#[wasm_bindgen_test]
+fn test_buffer_size() {
+    let transformer = WasmTransformer::new().unwrap();
+    assert_eq!(transformer.buffer_size(), 256); // Micro config logits
+
+    let transformer = WasmTransformer::new_baseline().unwrap();
+    assert_eq!(transformer.buffer_size(), 1024); // Baseline config logits
+}
+
+#[wasm_bindgen_test]
+fn test_low_lambda_intervention() {
+    let mut transformer = WasmTransformer::new().unwrap();
+
+    let mut gate = WasmGatePacket::new();
+    gate.lambda = 10; // Very low lambda
+    gate.lambda_prev = 100;
+
+    let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
+
+    let tokens = vec![1, 2, 3, 4];
+    let result = transformer.infer(&tokens, gate_js).unwrap();
+
+    // Should trigger intervention due to low lambda
+    assert_ne!(result.decision(), "Allow");
+}
+
+#[wasm_bindgen_test]
+fn test_witness_fields() {
+    let mut transformer = WasmTransformer::new().unwrap();
+
+    let mut gate = WasmGatePacket::new();
+    gate.lambda = 100;
+    gate.lambda_prev = 95;
+    gate.boundary_edges = 5;
+    gate.partition_count = 3;
+
+    let gate_js = serde_wasm_bindgen::to_value(&gate).unwrap();
+
+    let tokens = vec![1, 2, 3, 4];
+    let result = transformer.infer(&tokens, gate_js).unwrap();
+
+    assert_eq!(result.lambda(), 100);
+    assert_eq!(result.lambda_prev(), 95);
+    assert_eq!(result.boundary_edges(), 5);
+    assert_eq!(result.partition_count(), 3);
+    assert!(result.effective_seq_len() > 0);
+    assert!(result.effective_window() > 0);
+}