Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
7
crates/micro-hnsw-wasm/Cargo.lock
generated
Normal file
7
crates/micro-hnsw-wasm/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "micro-hnsw-wasm"
|
||||
version = "2.3.0"
|
||||
37
crates/micro-hnsw-wasm/Cargo.toml
Normal file
37
crates/micro-hnsw-wasm/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[package]
|
||||
name = "micro-hnsw-wasm"
|
||||
version = "2.3.2"
|
||||
edition = "2021"
|
||||
description = "Neuromorphic HNSW vector search with spiking neural networks - 11.8KB WASM for edge AI, ASIC, and embedded systems. Features LIF neurons, STDP learning, winner-take-all, dendritic computation."
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
homepage = "https://ruv.io"
|
||||
documentation = "https://docs.rs/micro-hnsw-wasm"
|
||||
readme = "README.md"
|
||||
authors = ["rUv <ruvnet@users.noreply.github.com>"]
|
||||
keywords = ["hnsw", "neuromorphic", "snn", "vector-search", "wasm"]
|
||||
categories = ["algorithms", "wasm", "embedded", "science", "no-std"]
|
||||
rust-version = "1.70"
|
||||
include = ["src/**/*", "README.md", "LICENSE*", "Cargo.toml"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[workspace]
|
||||
|
||||
[dependencies]
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
[profile.release]
|
||||
opt-level = "z"
|
||||
lto = true
|
||||
codegen-units = 1
|
||||
panic = "abort"
|
||||
strip = true
|
||||
|
||||
[profile.release.package."*"]
|
||||
opt-level = "z"
|
||||
134
crates/micro-hnsw-wasm/DEEP_REVIEW.md
Normal file
134
crates/micro-hnsw-wasm/DEEP_REVIEW.md
Normal file
@@ -0,0 +1,134 @@
|
||||
# Micro HNSW WASM v2.3 - Deep Review & Optimization Analysis
|
||||
|
||||
## Binary Analysis (Post-Optimization)
|
||||
|
||||
| Metric | Value | Target | Status |
|
||||
|--------|-------|--------|--------|
|
||||
| Size | 11,848 bytes | < 12,288 bytes | ✅ PASS (3.6% headroom) |
|
||||
| Functions | 58 | - | ✅ Full feature set (v2.3 neuromorphic) |
|
||||
| Memory | 1,053,184 bytes static | - | ⚠️ Large for ASIC |
|
||||
|
||||
## Performance Benchmarks (Post-Optimization)
|
||||
|
||||
### HNSW Operations
|
||||
| Operation | Time | Throughput | Notes |
|
||||
|-----------|------|------------|-------|
|
||||
| init() | 515 ns | 1.94 M/s | ✅ Fast |
|
||||
| insert() first | 5.8 µs | 172 K/s | ✅ Good |
|
||||
| insert() avg | 2.3 µs | 430 K/s | ✅ Good |
|
||||
| search(k=1) | 1.6 µs | 638 K/s | ✅ Good |
|
||||
| search(k=6) | 1.3 µs | 770 K/s | ✅ Fixed |
|
||||
| search(k=16) | 1.2 µs | 824 K/s | ✅ Expected beam search behavior |
|
||||
|
||||
### GNN Operations
|
||||
| Operation | Time | Notes |
|
||||
|-----------|------|-------|
|
||||
| set_node_type() | 294 ns | ✅ Fast |
|
||||
| get_node_type() | 83 ns | ✅ Very fast |
|
||||
| aggregate() | 880 ns | ✅ **7% faster (optimized)** |
|
||||
| update_vector() | 494 ns | ✅ Good |
|
||||
|
||||
### SNN Operations (Significantly Improved)
|
||||
| Operation | Before | After | Improvement |
|
||||
|-----------|--------|-------|-------------|
|
||||
| snn_inject() | 49 ns | 51 ns | ✅ ~Same |
|
||||
| snn_step() | 577 ns | 585 ns | ✅ ~Same |
|
||||
| snn_propagate() | 1186 ns | 737 ns | ✅ **38% faster** |
|
||||
| snn_stdp() | 1085 ns | 885 ns | ✅ **18% faster** |
|
||||
| snn_tick() | 2726 ns | 499 ns | ✅ **5.5x faster** |
|
||||
| hnsw_to_snn() | 772 ns | 776 ns | ✅ ~Same |
|
||||
|
||||
---
|
||||
|
||||
## v2.3 Novel Neuromorphic Features
|
||||
|
||||
The v2.3 release adds 22 new functions for advanced neuromorphic computing:
|
||||
|
||||
### Spike-Timing Vector Encoding
|
||||
- `encode_vector_to_spikes()` - Rate-to-time conversion
|
||||
- `spike_timing_similarity()` - Victor-Purpura-inspired metric
|
||||
- `spike_search()` - Temporal code matching
|
||||
|
||||
### Homeostatic Plasticity
|
||||
- `homeostatic_update()` - Self-stabilizing thresholds
|
||||
- `get_spike_rate()` - Running spike rate estimate
|
||||
|
||||
### Oscillatory Resonance
|
||||
- `oscillator_step()` - Gamma rhythm (40 Hz)
|
||||
- `oscillator_get_phase()` - Phase readout
|
||||
- `compute_resonance()` - Phase alignment score
|
||||
- `resonance_search()` - Phase-modulated search
|
||||
|
||||
### Winner-Take-All Circuits
|
||||
- `wta_reset()` - Reset WTA state
|
||||
- `wta_compete()` - Hard WTA selection
|
||||
- `wta_soft()` - Soft competitive inhibition
|
||||
|
||||
### Dendritic Computation
|
||||
- `dendrite_reset()` - Clear compartments
|
||||
- `dendrite_inject()` - Branch-specific input
|
||||
- `dendrite_integrate()` - Nonlinear integration
|
||||
- `dendrite_propagate()` - Spike to dendrite
|
||||
|
||||
### Temporal Pattern Recognition
|
||||
- `pattern_record()` - Shift register encoding
|
||||
- `get_pattern()` - Read pattern buffer
|
||||
- `pattern_match()` - Hamming similarity
|
||||
- `pattern_correlate()` - Find correlated neurons
|
||||
|
||||
### Combined Neuromorphic Search
|
||||
- `neuromorphic_search()` - All mechanisms combined
|
||||
- `get_network_activity()` - Total spike rate
|
||||
|
||||
---
|
||||
|
||||
## Optimizations Applied ✅
|
||||
|
||||
### 1. Reciprocal Constants (APPLIED)
|
||||
```rust
|
||||
const INV_TAU_STDP: f32 = 0.05; // 1/TAU_STDP
|
||||
const INV_255: f32 = 0.00392157; // 1/255
|
||||
```
|
||||
|
||||
### 2. STDP Division Elimination (APPLIED)
|
||||
```rust
|
||||
// Before: dt / TAU_STDP (division)
|
||||
// After: dt * INV_TAU_STDP (multiplication)
|
||||
```
|
||||
Result: **18% faster STDP, 5.5x faster snn_tick()**
|
||||
|
||||
### 3. Aggregate Optimization (APPLIED)
|
||||
```rust
|
||||
// Before: 1.0 / (nc as f32 * 255.0)
|
||||
// After: INV_255 / nc as f32
|
||||
```
|
||||
Result: **7% faster aggregate()**
|
||||
|
||||
---
|
||||
|
||||
## ASIC Projection (256-Core)
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Search Throughput | 0.20 B ops/sec |
|
||||
| SNN Tick Throughput | 513 M neurons/sec |
|
||||
| Total Vectors | 8,192 (32/core × 256) |
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Category | Score | Notes |
|
||||
|----------|-------|-------|
|
||||
| Correctness | ✅ 95% | All tests pass |
|
||||
| Performance | ✅ 95% | Major SNN improvements |
|
||||
| Size | ✅ 96% | 11.8 KB < 12 KB target |
|
||||
| Features | ✅ 100% | 58 functions, full neuromorphic |
|
||||
| Maintainability | ✅ 85% | Clean code, well documented |
|
||||
|
||||
**Optimizations Complete:**
|
||||
- ✅ Reciprocal constants added
|
||||
- ✅ Division eliminated from hot paths
|
||||
- ✅ Binary size under 12 KB target
|
||||
- ✅ All tests passing
|
||||
- ✅ 5.5x improvement in SNN tick throughput
|
||||
790
crates/micro-hnsw-wasm/README.md
Normal file
790
crates/micro-hnsw-wasm/README.md
Normal file
@@ -0,0 +1,790 @@
|
||||
# Micro HNSW v2.2 - Neuromorphic Vector Search Engine
|
||||
|
||||
A **7.2KB** neuromorphic computing core that fuses graph-based vector search (HNSW) with biologically-inspired spiking neural networks. Designed for 256-core ASIC deployment, edge AI, and real-time similarity-driven neural processing.
|
||||
|
||||
> **Vector search meets brain-inspired computing** — query vectors trigger neural spikes, enabling attention mechanisms, winner-take-all selection, and online learning through spike-timing dependent plasticity (STDP).
|
||||
|
||||
## Why Micro HNSW + SNN?
|
||||
|
||||
Traditional vector databases return ranked results. Micro HNSW v2.2 goes further: similarity scores become neural currents that drive a spiking network. This enables:
|
||||
|
||||
- **Spiking Attention**: Similar vectors compete via lateral inhibition — only the strongest survive
|
||||
- **Temporal Coding**: Spike timing encodes confidence (first spike = best match)
|
||||
- **Online Learning**: STDP automatically strengthens connections between co-activated vectors
|
||||
- **Event-Driven Efficiency**: Neurons only compute when they spike — 1000x more efficient than dense networks
|
||||
- **Neuromorphic Hardware Ready**: Direct mapping to Intel Loihi, IBM TrueNorth, or custom ASIC
|
||||
|
||||
## Features
|
||||
|
||||
### Vector Search (HNSW Core)
|
||||
- **Multi-core sharding**: 256 cores × 32 vectors = 8,192 total vectors
|
||||
- **Distance metrics**: L2 (Euclidean), Cosine similarity, Dot product
|
||||
- **Beam search**: Width-3 beam for improved recall
|
||||
- **Cross-core merging**: Unified results from distributed search
|
||||
|
||||
### Graph Neural Network Extensions
|
||||
- **Typed nodes**: 16 Cypher-style types for heterogeneous graphs
|
||||
- **Weighted edges**: Per-node weights for message passing
|
||||
- **Neighbor aggregation**: GNN-style feature propagation
|
||||
- **In-place updates**: Online learning and embedding refinement
|
||||
|
||||
### Spiking Neural Network Layer
|
||||
- **LIF neurons**: Leaky Integrate-and-Fire with membrane dynamics
|
||||
- **Refractory periods**: Biologically-realistic spike timing
|
||||
- **STDP plasticity**: Hebbian learning from spike correlations
|
||||
- **Spike propagation**: Graph-routed neural activation
|
||||
- **HNSW→SNN bridge**: Vector similarity drives neural currents
|
||||
|
||||
### Deployment
|
||||
- **7.2KB WASM**: Runs anywhere WebAssembly runs
|
||||
- **No allocator**: Pure static memory, `no_std` Rust
|
||||
- **ASIC-ready**: Synthesizable for custom silicon
|
||||
- **Edge-native**: Microcontrollers to data centers
|
||||
|
||||
"Real-World Applications" Section
|
||||
|
||||
| Application | Description |
|
||||
|-----------------------------------|--------------------------------------------------------------------------------|
|
||||
| 1. Embedded Vector Database | Semantic search on microcontrollers/IoT with 256-core sharding |
|
||||
| 2. Knowledge Graphs | Cypher-style typed entities (GENE, PROTEIN, DISEASE) with spreading activation |
|
||||
| 3. Self-Learning Systems | Anomaly detection that learns via STDP without retraining |
|
||||
| 4. DNA/Protein Analysis | k-mer embeddings for genomic similarity with winner-take-all alignment |
|
||||
| 5. Algorithmic Trading | Microsecond pattern matching with neural winner-take-all signals |
|
||||
| 6. Industrial Control (PLC/SCADA) | Predictive maintenance via vibration analysis at the edge |
|
||||
| 7. Robotics & Sensor Fusion | Multi-modal LIDAR/camera/IMU fusion with spike-based binding |
|
||||
|
||||
## Specifications
|
||||
|
||||
| Parameter | Value | Notes |
|
||||
|-----------|-------|-------|
|
||||
| Vectors/Core | 32 | Static allocation |
|
||||
| Total Vectors | 8,192 | 256 cores × 32 vectors |
|
||||
| Max Dimensions | 16 | Per vector |
|
||||
| Neighbors (M) | 6 | Graph connectivity |
|
||||
| Beam Width | 3 | Search beam size |
|
||||
| Node Types | 16 | 4-bit packed |
|
||||
| SNN Neurons | 32 | One per vector |
|
||||
| **WASM Size** | **~7.2KB** | After wasm-opt -Oz |
|
||||
| Gate Count | ~45K | Estimated for ASIC |
|
||||
|
||||
## Building
|
||||
|
||||
```bash
|
||||
# Add wasm32 target
|
||||
rustup target add wasm32-unknown-unknown
|
||||
|
||||
# Build with size optimizations
|
||||
cargo build --release --target wasm32-unknown-unknown
|
||||
|
||||
# Optimize with wasm-opt (required for SNN features)
|
||||
wasm-opt -Oz --enable-nontrapping-float-to-int -o micro_hnsw.wasm \
|
||||
target/wasm32-unknown-unknown/release/micro_hnsw_wasm.wasm
|
||||
|
||||
# Check size
|
||||
ls -la micro_hnsw.wasm
|
||||
```
|
||||
|
||||
## JavaScript Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```javascript
|
||||
const response = await fetch('micro_hnsw.wasm');
|
||||
const bytes = await response.arrayBuffer();
|
||||
const { instance } = await WebAssembly.instantiate(bytes);
|
||||
const wasm = instance.exports;
|
||||
|
||||
// Initialize: init(dims, metric, core_id)
|
||||
// metric: 0=L2, 1=Cosine, 2=Dot
|
||||
wasm.init(8, 1, 0); // 8 dims, cosine similarity, core 0
|
||||
|
||||
// Insert vectors
|
||||
const insertBuf = new Float32Array(wasm.memory.buffer, wasm.get_insert_ptr(), 16);
|
||||
insertBuf.set([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
|
||||
const idx = wasm.insert(); // Returns 0, or 255 if full
|
||||
|
||||
// Set node type (for Cypher-style queries)
|
||||
wasm.set_node_type(idx, 3); // Type 3 = e.g., "Person"
|
||||
|
||||
// Search
|
||||
const queryBuf = new Float32Array(wasm.memory.buffer, wasm.get_query_ptr(), 16);
|
||||
queryBuf.set([0.95, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
|
||||
const resultCount = wasm.search(5); // k=5
|
||||
|
||||
// Read results
|
||||
const resultPtr = wasm.get_result_ptr();
|
||||
const resultView = new DataView(wasm.memory.buffer, resultPtr);
|
||||
for (let i = 0; i < resultCount; i++) {
|
||||
const idx = resultView.getUint8(i * 8);
|
||||
const coreId = resultView.getUint8(i * 8 + 1);
|
||||
const dist = resultView.getFloat32(i * 8 + 4, true);
|
||||
|
||||
// Filter by type if needed
|
||||
if (wasm.type_matches(idx, 0b1000)) { // Only type 3
|
||||
console.log(`Result: idx=${idx}, distance=${dist}`);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Spiking Neural Network (NEW)
|
||||
|
||||
```javascript
|
||||
// Reset SNN state
|
||||
wasm.snn_reset();
|
||||
|
||||
// Inject current into neurons (simulates input)
|
||||
wasm.snn_inject(0, 1.5); // Strong input to neuron 0
|
||||
wasm.snn_inject(1, 0.8); // Weaker input to neuron 1
|
||||
|
||||
// Run simulation step (dt in ms)
|
||||
const spikeCount = wasm.snn_step(1.0); // 1ms timestep
|
||||
console.log(`${spikeCount} neurons spiked`);
|
||||
|
||||
// Propagate spikes to neighbors
|
||||
wasm.snn_propagate(0.5); // gain=0.5
|
||||
|
||||
// Apply STDP learning
|
||||
wasm.snn_stdp();
|
||||
|
||||
// Or use combined tick (step + propagate + optional STDP)
|
||||
const spikes = wasm.snn_tick(1.0, 0.5, 1); // dt=1ms, gain=0.5, learn=true
|
||||
|
||||
// Get spike bitset (which neurons fired)
|
||||
const spikeBits = wasm.snn_get_spikes();
|
||||
for (let i = 0; i < 32; i++) {
|
||||
if (spikeBits & (1 << i)) {
|
||||
console.log(`Neuron ${i} spiked!`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check individual neuron
|
||||
if (wasm.snn_spiked(0)) {
|
||||
console.log('Neuron 0 fired');
|
||||
}
|
||||
|
||||
// Get/set membrane potential
|
||||
const v = wasm.snn_get_membrane(0);
|
||||
wasm.snn_set_membrane(0, 0.5);
|
||||
|
||||
// Get simulation time
|
||||
console.log(`Time: ${wasm.snn_get_time()} ms`);
|
||||
```
|
||||
|
||||
### HNSW-SNN Integration
|
||||
|
||||
```javascript
|
||||
// Vector search activates matching neurons
|
||||
// Search converts similarity to neural current
|
||||
const queryBuf = new Float32Array(wasm.memory.buffer, wasm.get_query_ptr(), 16);
|
||||
queryBuf.set([0.9, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
|
||||
|
||||
// hnsw_to_snn: search + inject currents based on distance
|
||||
const found = wasm.hnsw_to_snn(5, 2.0); // k=5, gain=2.0
|
||||
|
||||
// Now run SNN to see which neurons fire from similarity
|
||||
wasm.snn_tick(1.0, 0.5, 1);
|
||||
const spikes = wasm.snn_get_spikes();
|
||||
console.log(`Similar vectors that spiked: 0b${spikes.toString(2)}`);
|
||||
```
|
||||
|
||||
### GNN Message Passing
|
||||
|
||||
```javascript
|
||||
// Set edge weights for nodes (0-255, higher = more important)
|
||||
wasm.set_edge_weight(0, 255); // Node 0: full weight
|
||||
wasm.set_edge_weight(1, 128); // Node 1: half weight
|
||||
|
||||
// Aggregate neighbors (GNN-style)
|
||||
wasm.aggregate_neighbors(0); // Aggregates neighbors of node 0
|
||||
|
||||
// Read aggregated embedding from DELTA buffer
|
||||
const deltaBuf = new Float32Array(wasm.memory.buffer, wasm.get_delta_ptr(), 16);
|
||||
console.log('Aggregated:', Array.from(deltaBuf));
|
||||
|
||||
// Update vector: v = v + alpha * delta
|
||||
wasm.update_vector(0, 0.1); // 10% update toward neighbors
|
||||
```
|
||||
|
||||
### Multi-Core (256 Cores)
|
||||
|
||||
```javascript
|
||||
const cores = [];
|
||||
for (let i = 0; i < 256; i++) {
|
||||
const { instance } = await WebAssembly.instantiate(wasmBytes);
|
||||
instance.exports.init(8, 1, i);
|
||||
cores.push(instance.exports);
|
||||
}
|
||||
|
||||
// Parallel search with merging
|
||||
async function searchAll(query, k) {
|
||||
for (const core of cores) {
|
||||
new Float32Array(core.memory.buffer, core.get_query_ptr(), 16).set(query);
|
||||
}
|
||||
|
||||
const results = await Promise.all(cores.map(c => c.search(k)));
|
||||
|
||||
cores[0].clear_global();
|
||||
for (let i = 0; i < cores.length; i++) {
|
||||
cores[0].merge(cores[i].get_result_ptr(), results[i]);
|
||||
}
|
||||
|
||||
return cores[0].get_global_ptr();
|
||||
}
|
||||
```
|
||||
|
||||
## C API
|
||||
|
||||
```c
|
||||
// Core API
|
||||
void init(uint8_t dims, uint8_t metric, uint8_t core_id);
|
||||
float* get_insert_ptr(void);
|
||||
float* get_query_ptr(void);
|
||||
SearchResult* get_result_ptr(void);
|
||||
SearchResult* get_global_ptr(void);
|
||||
uint8_t insert(void);
|
||||
uint8_t search(uint8_t k);
|
||||
uint8_t merge(SearchResult* results, uint8_t count);
|
||||
void clear_global(void);
|
||||
|
||||
// Info
|
||||
uint8_t count(void);
|
||||
uint8_t get_core_id(void);
|
||||
uint8_t get_metric(void);
|
||||
uint8_t get_dims(void);
|
||||
uint8_t get_capacity(void);
|
||||
|
||||
// Cypher Node Types
|
||||
void set_node_type(uint8_t idx, uint8_t type); // type: 0-15
|
||||
uint8_t get_node_type(uint8_t idx);
|
||||
uint8_t type_matches(uint8_t idx, uint16_t type_mask);
|
||||
|
||||
// GNN Edge Weights
|
||||
void set_edge_weight(uint8_t node, uint8_t weight); // weight: 0-255
|
||||
uint8_t get_edge_weight(uint8_t node);
|
||||
void aggregate_neighbors(uint8_t idx); // Results in DELTA buffer
|
||||
|
||||
// Vector Updates
|
||||
float* get_delta_ptr(void);
|
||||
float* set_delta_ptr(void); // Mutable access
|
||||
void update_vector(uint8_t idx, float alpha); // v += alpha * delta
|
||||
|
||||
// Spiking Neural Network (NEW in v2.2)
|
||||
void snn_reset(void); // Reset all SNN state
|
||||
void snn_set_membrane(uint8_t idx, float v); // Set membrane potential
|
||||
float snn_get_membrane(uint8_t idx); // Get membrane potential
|
||||
void snn_set_threshold(uint8_t idx, float t); // Set firing threshold
|
||||
void snn_inject(uint8_t idx, float current); // Inject current
|
||||
uint8_t snn_spiked(uint8_t idx); // Did neuron spike?
|
||||
uint32_t snn_get_spikes(void); // Spike bitset (32 neurons)
|
||||
uint8_t snn_step(float dt); // LIF step, returns spike count
|
||||
void snn_propagate(float gain); // Propagate spikes to neighbors
|
||||
void snn_stdp(void); // STDP weight update
|
||||
uint8_t snn_tick(float dt, float gain, uint8_t learn); // Combined step
|
||||
float snn_get_time(void); // Get simulation time
|
||||
uint8_t hnsw_to_snn(uint8_t k, float gain); // Search → neural activation
|
||||
|
||||
// SearchResult structure (8 bytes)
|
||||
typedef struct {
|
||||
uint8_t idx;
|
||||
uint8_t core_id;
|
||||
uint8_t _pad[2];
|
||||
float distance;
|
||||
} SearchResult;
|
||||
```
|
||||
|
||||
## Real-World Applications
|
||||
|
||||
### 1. Embedded Vector Database
|
||||
|
||||
Run semantic search on microcontrollers, IoT devices, or edge servers without external dependencies.
|
||||
|
||||
```javascript
|
||||
// Semantic search on edge device
|
||||
// Each core handles a shard of your embedding space
|
||||
const cores = await initializeCores(256);
|
||||
|
||||
// Insert document embeddings (from TinyBERT, MiniLM, etc.)
|
||||
for (const doc of documents) {
|
||||
const embedding = await encoder.encode(doc.text);
|
||||
const coreId = hashToCoreId(doc.id);
|
||||
cores[coreId].insertVector(embedding, doc.type);
|
||||
}
|
||||
|
||||
// Query: "machine learning tutorials"
|
||||
const queryVec = await encoder.encode(query);
|
||||
const results = await searchAllCores(queryVec, k=10);
|
||||
|
||||
// Results ranked by cosine similarity across 8K vectors
|
||||
// Total memory: 7.2KB × 256 = 1.8MB for 8K vectors
|
||||
```
|
||||
|
||||
**Why SNN helps**: After search, run `snn_tick()` with inhibition — only the most relevant results survive the neural competition. Better than simple top-k.
|
||||
|
||||
---
|
||||
|
||||
### 2. Knowledge Graphs (Cypher-Style)
|
||||
|
||||
Build typed property graphs with vector-enhanced traversal.
|
||||
|
||||
```javascript
|
||||
// Define entity types for a biomedical knowledge graph
|
||||
const GENE = 0, PROTEIN = 1, DISEASE = 2, DRUG = 3, PATHWAY = 4;
|
||||
|
||||
// Insert entities with embeddings
|
||||
insertVector(geneEmbedding, GENE); // "BRCA1" → type 0
|
||||
insertVector(proteinEmbedding, PROTEIN); // "p53" → type 1
|
||||
insertVector(diseaseEmbedding, DISEASE); // "breast cancer" → type 2
|
||||
|
||||
// Cypher-like query: Find proteins similar to query, connected to diseases
|
||||
const proteinMask = 1 << PROTEIN;
|
||||
const results = wasm.search(20);
|
||||
|
||||
for (const r of results) {
|
||||
if (wasm.type_matches(r.idx, proteinMask)) {
|
||||
// Found similar protein - now traverse edges
|
||||
wasm.aggregate_neighbors(r.idx);
|
||||
// Check if neighbors include diseases
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Why SNN helps**: Model spreading activation through the knowledge graph. A query about "cancer treatment" activates DISEASE nodes, which propagate to connected DRUG and GENE nodes via `snn_propagate()`.
|
||||
|
||||
---
|
||||
|
||||
### 3. Self-Learning Systems (Online STDP)
|
||||
|
||||
Systems that learn patterns from experience without retraining.
|
||||
|
||||
```javascript
|
||||
// Anomaly detection that learns normal patterns
|
||||
class SelfLearningAnomalyDetector {
|
||||
async processEvent(sensorVector) {
|
||||
// Find similar past events
|
||||
wasm.hnsw_to_snn(5, 2.0); // Top-5 similar → neural current
|
||||
|
||||
// Run SNN with STDP learning enabled
|
||||
const spikes = wasm.snn_tick(1.0, 0.5, 1); // learn=1
|
||||
|
||||
if (spikes === 0) {
|
||||
// Nothing spiked = no similar patterns = ANOMALY
|
||||
return { anomaly: true, confidence: 0.95 };
|
||||
}
|
||||
|
||||
// Normal: similar patterns recognized and reinforced
|
||||
// STDP strengthened the connection for next time
|
||||
return { anomaly: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Over time, the system learns what "normal" looks like
|
||||
// New attack patterns won't match → no spikes → alert
|
||||
```
|
||||
|
||||
**How it works**: STDP increases edge weights between vectors that co-activate. Repeated normal patterns build strong connections; novel anomalies find no matching pathways.
|
||||
|
||||
---
|
||||
|
||||
### 4. DNA/Protein Sequence Analysis
|
||||
|
||||
k-mer embeddings enable similarity search across genomic data.
|
||||
|
||||
```javascript
|
||||
// DNA sequence similarity with neuromorphic processing
|
||||
const KMER_SIZE = 6; // 6-mer embeddings
|
||||
|
||||
// Embed reference genome k-mers
|
||||
for (let i = 0; i < genome.length - KMER_SIZE; i++) {
|
||||
const kmer = genome.slice(i, i + KMER_SIZE);
|
||||
const embedding = kmerToVector(kmer); // One-hot or learned embedding
|
||||
wasm.insert();
|
||||
wasm.set_node_type(i % 32, positionToType(i)); // Encode genomic region
|
||||
}
|
||||
|
||||
// Query: Find similar sequences to a mutation site
|
||||
const mutationKmer = "ATCGTA";
|
||||
const queryVec = kmerToVector(mutationKmer);
|
||||
wasm.hnsw_to_snn(10, 3.0);
|
||||
|
||||
// SNN competition finds the MOST similar reference positions
|
||||
wasm.snn_tick(1.0, -0.2, 0); // Lateral inhibition
|
||||
const matches = wasm.snn_get_spikes();
|
||||
|
||||
// Surviving spikes = strongest matches
|
||||
// Spike timing = match confidence (earlier = better)
|
||||
```
|
||||
|
||||
**Why SNN helps**:
|
||||
- **Winner-take-all**: Only the best alignments survive
|
||||
- **Temporal coding**: First spike indicates highest similarity
|
||||
- **Distributed processing**: 256 cores = parallel genome scanning
|
||||
|
||||
---
|
||||
|
||||
### 5. Algorithmic Trading
|
||||
|
||||
Microsecond pattern matching for market microstructure.
|
||||
|
||||
```javascript
|
||||
// Real-time order flow pattern recognition
|
||||
class TradingPatternMatcher {
|
||||
constructor() {
|
||||
// Pre-load known patterns: momentum, mean-reversion, spoofing, etc.
|
||||
this.patterns = [
|
||||
{ name: 'momentum_breakout', vector: [...], type: 0 },
|
||||
{ name: 'mean_reversion', vector: [...], type: 1 },
|
||||
{ name: 'spoofing_signature', vector: [...], type: 2 },
|
||||
{ name: 'iceberg_order', vector: [...], type: 3 },
|
||||
];
|
||||
|
||||
for (const p of this.patterns) {
|
||||
insertVector(p.vector, p.type);
|
||||
}
|
||||
}
|
||||
|
||||
// Called every tick (microseconds)
|
||||
onMarketData(orderBookSnapshot) {
|
||||
const features = extractFeatures(orderBookSnapshot);
|
||||
// [bid_depth, ask_depth, spread, imbalance, volatility, ...]
|
||||
|
||||
// Find matching patterns
|
||||
setQuery(features);
|
||||
wasm.hnsw_to_snn(5, 2.0);
|
||||
|
||||
// SNN decides which pattern "wins"
|
||||
wasm.snn_tick(0.1, -0.5, 0); // Fast tick, strong inhibition
|
||||
|
||||
const winner = wasm.snn_get_spikes();
|
||||
if (winner & (1 << 0)) return 'GO_LONG'; // Momentum
|
||||
if (winner & (1 << 1)) return 'GO_SHORT'; // Mean reversion
|
||||
if (winner & (1 << 2)) return 'CANCEL'; // Spoofing detected
|
||||
|
||||
return 'HOLD';
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Why SNN helps**:
|
||||
- **Sub-millisecond latency**: 7.2KB WASM runs in L1 cache
|
||||
- **Winner-take-all**: Only one signal fires, no conflicting trades
|
||||
- **Adaptive thresholds**: Market regime changes adjust neuron sensitivity
|
||||
|
||||
---
|
||||
|
||||
### 6. Industrial Control Systems (PLC/SCADA)
|
||||
|
||||
Predictive maintenance and anomaly detection at the edge.
|
||||
|
||||
```javascript
|
||||
// Vibration analysis for rotating machinery
|
||||
class PredictiveMaintenance {
|
||||
constructor() {
|
||||
// Reference signatures: healthy, bearing_wear, misalignment, imbalance
|
||||
this.signatures = loadVibrationSignatures();
|
||||
for (const sig of this.signatures) {
|
||||
insertVector(sig.fftFeatures, sig.condition);
|
||||
}
|
||||
}
|
||||
|
||||
// Called every 100ms from accelerometer
|
||||
analyzeVibration(fftSpectrum) {
|
||||
setQuery(fftSpectrum);
|
||||
|
||||
// Match against known conditions
|
||||
wasm.hnsw_to_snn(this.signatures.length, 1.5);
|
||||
wasm.snn_tick(1.0, 0.3, 1); // Learn new patterns over time
|
||||
|
||||
const spikes = wasm.snn_get_spikes();
|
||||
|
||||
// Check which condition matched
|
||||
if (spikes & (1 << HEALTHY)) {
|
||||
return { status: 'OK', confidence: wasm.snn_get_membrane(HEALTHY) };
|
||||
}
|
||||
if (spikes & (1 << BEARING_WEAR)) {
|
||||
return {
|
||||
status: 'WARNING',
|
||||
condition: 'bearing_wear',
|
||||
action: 'Schedule maintenance in 72 hours'
|
||||
};
|
||||
}
|
||||
if (spikes & (1 << CRITICAL)) {
|
||||
return { status: 'ALARM', action: 'Immediate shutdown' };
|
||||
}
|
||||
|
||||
// No match = unknown condition = anomaly
|
||||
return { status: 'UNKNOWN', action: 'Flag for analysis' };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Why SNN helps**:
|
||||
- **Edge deployment**: Runs on PLC without cloud connectivity
|
||||
- **Continuous learning**: STDP adapts to machine aging
|
||||
- **Deterministic timing**: No garbage collection pauses
|
||||
|
||||
---
|
||||
|
||||
### 7. Robotics & Sensor Fusion
|
||||
|
||||
Combine LIDAR, camera, and IMU embeddings for navigation.
|
||||
|
||||
```javascript
|
||||
// Multi-modal sensor fusion for autonomous navigation
|
||||
class SensorFusion {
|
||||
// Each sensor type gets dedicated neurons
|
||||
LIDAR_NEURONS = [0, 1, 2, 3, 4, 5, 6, 7]; // 8 neurons
|
||||
CAMERA_NEURONS = [8, 9, 10, 11, 12, 13, 14, 15]; // 8 neurons
|
||||
IMU_NEURONS = [16, 17, 18, 19, 20, 21, 22, 23]; // 8 neurons
|
||||
|
||||
fuseAndDecide(lidarEmbed, cameraEmbed, imuEmbed) {
|
||||
wasm.snn_reset();
|
||||
|
||||
// Inject sensor readings as currents
|
||||
for (let i = 0; i < 8; i++) {
|
||||
wasm.snn_inject(this.LIDAR_NEURONS[i], lidarEmbed[i] * 2.0);
|
||||
wasm.snn_inject(this.CAMERA_NEURONS[i], cameraEmbed[i] * 1.5);
|
||||
wasm.snn_inject(this.IMU_NEURONS[i], imuEmbed[i] * 1.0);
|
||||
}
|
||||
|
||||
// Run competition — strongest signals propagate
|
||||
for (let t = 0; t < 5; t++) {
|
||||
wasm.snn_tick(1.0, 0.4, 0);
|
||||
}
|
||||
|
||||
// Surviving spikes = fused representation
|
||||
const fusedSpikes = wasm.snn_get_spikes();
|
||||
|
||||
// Decision: which direction is clear?
|
||||
// Spike pattern encodes navigable directions
|
||||
return decodeSpikePattern(fusedSpikes);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Why SNN helps**:
|
||||
- **Natural sensor fusion**: Different modalities compete and cooperate
|
||||
- **Graceful degradation**: If camera fails, LIDAR/IMU still produce spikes
|
||||
- **Temporal binding**: Synchronous spikes indicate consistent information
|
||||
|
||||
---
|
||||
|
||||
## Architecture: How It All Connects
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ APPLICATION LAYER │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ Trading │ Genomics │ Robotics │ Industrial │ Knowledge │
|
||||
│ Signals │ k-mers │ Sensors │ Vibration │ Graphs │
|
||||
└─────┬──────┴─────┬──────┴─────┬──────┴──────┬───────┴──────┬───────┘
|
||||
│ │ │ │ │
|
||||
▼ ▼ ▼ ▼ ▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ EMBEDDING LAYER │
|
||||
│ Convert domain data → 16-dimensional vectors │
|
||||
│ (TinyBERT, k-mer encoding, FFT features, one-hot, learned, etc.) │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ MICRO HNSW v2.2 CORE (7.2KB) │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ HNSW │───▶│ GNN │───▶│ SNN │ │
|
||||
│ │ (Search) │ │ (Propagate)│ │ (Decide) │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │ │ │ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Cosine │ │ Neighbor │ │ LIF │ │
|
||||
│ │ L2, Dot │ │ Aggregate│ │ Dynamics │ │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────┐ │
|
||||
│ │ STDP │ │
|
||||
│ │ Learning │ │
|
||||
│ └──────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ OUTPUT: SPIKE PATTERN │
|
||||
│ • Which neurons fired → Classification/Decision │
|
||||
│ • Spike timing → Confidence ranking │
|
||||
│ • Membrane levels → Continuous scores │
|
||||
│ • Updated weights → Learned associations │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference: API by Use Case
|
||||
|
||||
| Use Case | Key Functions | Pattern |
|
||||
|----------|---------------|---------|
|
||||
| **Vector DB** | `insert()`, `search()`, `merge()` | Insert → Search → Rank |
|
||||
| **Knowledge Graph** | `set_node_type()`, `type_matches()`, `aggregate_neighbors()` | Type → Filter → Traverse |
|
||||
| **Self-Learning** | `snn_tick(..., learn=1)`, `snn_stdp()` | Process → Learn → Adapt |
|
||||
| **Anomaly Detection** | `hnsw_to_snn()`, `snn_get_spikes()` | Match → Spike/NoSpike → Alert |
|
||||
| **Trading** | `snn_tick()` with inhibition, `snn_get_spikes()` | Compete → Winner → Signal |
|
||||
| **Industrial** | `snn_inject()`, `snn_tick()`, `snn_get_membrane()` | Sense → Fuse → Classify |
|
||||
| **Sensor Fusion** | Multiple `snn_inject()`, `snn_propagate()` | Inject → Propagate → Bind |
|
||||
|
||||
---
|
||||
|
||||
## Code Examples
|
||||
|
||||
### Cypher-Style Typed Queries
|
||||
|
||||
```javascript
|
||||
// Define node types
|
||||
const PERSON = 0, COMPANY = 1, PRODUCT = 2;
|
||||
|
||||
// Insert typed nodes
|
||||
insertVector([...], PERSON);
|
||||
insertVector([...], COMPANY);
|
||||
|
||||
// Search only for PERSON nodes
|
||||
const personMask = 1 << PERSON; // 0b001
|
||||
for (let i = 0; i < resultCount; i++) {
|
||||
if (wasm.type_matches(results[i].idx, personMask)) {
|
||||
// This is a Person node
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### GNN Layer Implementation
|
||||
|
||||
```javascript
|
||||
// One GNN propagation step across all nodes
|
||||
function gnnStep(alpha = 0.1) {
|
||||
for (let i = 0; i < wasm.count(); i++) {
|
||||
wasm.aggregate_neighbors(i); // Mean of neighbors
|
||||
wasm.update_vector(i, alpha); // Blend with self
|
||||
}
|
||||
}
|
||||
|
||||
// Run 3 GNN layers
|
||||
for (let layer = 0; layer < 3; layer++) {
|
||||
gnnStep(0.5);
|
||||
}
|
||||
```
|
||||
|
||||
### Spiking Attention Layer
|
||||
|
||||
```javascript
|
||||
// Use SNN for attention: similar vectors compete via lateral inhibition
|
||||
function spikingAttention(queryVec, steps = 10) {
|
||||
wasm.snn_reset();
|
||||
|
||||
const queryBuf = new Float32Array(wasm.memory.buffer, wasm.get_query_ptr(), 16);
|
||||
queryBuf.set(queryVec);
|
||||
wasm.hnsw_to_snn(wasm.count(), 3.0); // Strong activation from similarity
|
||||
|
||||
// Run SNN dynamics - winner-take-all emerges
|
||||
for (let t = 0; t < steps; t++) {
|
||||
wasm.snn_tick(1.0, -0.3, 0); // Negative gain = inhibition
|
||||
}
|
||||
|
||||
// Surviving spikes = attention winners
|
||||
return wasm.snn_get_spikes();
|
||||
}
|
||||
```
|
||||
|
||||
### Online Learning with STDP
|
||||
|
||||
```javascript
|
||||
// Present pattern sequence, learn associations
|
||||
function learnSequence(patterns, dt = 10.0) {
|
||||
wasm.snn_reset();
|
||||
|
||||
for (const pattern of patterns) {
|
||||
// Inject current for active neurons
|
||||
for (const neuron of pattern) {
|
||||
wasm.snn_inject(neuron, 2.0);
|
||||
}
|
||||
|
||||
// Run with STDP learning enabled
|
||||
wasm.snn_tick(dt, 0.5, 1);
|
||||
}
|
||||
|
||||
// Edge weights now encode sequence associations
|
||||
}
|
||||
```
|
||||
|
||||
## ASIC / Verilog
|
||||
|
||||
The `verilog/` directory contains synthesizable RTL for direct ASIC implementation.
|
||||
|
||||
### Multi-Core Architecture with SNN
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ 256-Core ASIC Layout │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ SNN Controller │ │
|
||||
│ │ (Membrane, Threshold, Spike Router, STDP Engine) │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
│ ↕ │
|
||||
│ ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ │
|
||||
│ │Core │ │Core │ │Core │ │Core │ ... │Core │ │Core │ │
|
||||
│ │ 0 │ │ 1 │ │ 2 │ │ 3 │ │ 254 │ │ 255 │ │
|
||||
│ │ 32 │ │ 32 │ │ 32 │ │ 32 │ │ 32 │ │ 32 │ │
|
||||
│ │ vec │ │ vec │ │ vec │ │ vec │ │ vec │ │ vec │ │
|
||||
│ │ LIF │ │ LIF │ │ LIF │ │ LIF │ │ LIF │ │ LIF │ │
|
||||
│ └──┬──┘ └──┬──┘ └──┬──┘ └──┬──┘ └──┬──┘ └──┬──┘ │
|
||||
│ │ │ │ │ │ │ │
|
||||
│ └───────┴───────┴───────┴───────────┴───────┘ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ Result Merger │ │
|
||||
│ │ (Priority Queue) │ │
|
||||
│ └─────────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────┐ │
|
||||
│ │ AXI-Lite I/F │ │
|
||||
│ └─────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Version History
|
||||
|
||||
| Version | Size | Features |
|
||||
|---------|------|----------|
|
||||
| v1 | 4.6KB | L2 only, single core, greedy search |
|
||||
| v2 | 7.3KB | +3 metrics, +multi-core, +beam search |
|
||||
| v2.1 | 5.5KB | +node types, +edge weights, +GNN updates, wasm-opt |
|
||||
| **v2.2** | **7.2KB** | +LIF neurons, +STDP learning, +spike propagation, +HNSW-SNN bridge |
|
||||
|
||||
## Performance
|
||||
|
||||
| Operation | Complexity | Notes |
|
||||
|-----------|------------|-------|
|
||||
| Insert | O(n × dims) | Per core |
|
||||
| Search | O(beam × M × dims) | Beam search |
|
||||
| Merge | O(k × cores) | Result combining |
|
||||
| Aggregate | O(M × dims) | GNN message passing |
|
||||
| Update | O(dims) | Vector modification |
|
||||
| SNN Step | O(n) | Per neuron LIF |
|
||||
| Propagate | O(n × M) | Spike routing |
|
||||
| STDP | O(spikes × M) | Only for spiking neurons |
|
||||
|
||||
## SNN Parameters (Compile-time)
|
||||
|
||||
| Parameter | Value | Description |
|
||||
|-----------|-------|-------------|
|
||||
| TAU_MEMBRANE | 20.0 | Membrane time constant (ms) |
|
||||
| TAU_REFRAC | 2.0 | Refractory period (ms) |
|
||||
| V_RESET | 0.0 | Reset potential after spike |
|
||||
| V_REST | 0.0 | Resting potential |
|
||||
| STDP_A_PLUS | 0.01 | LTP magnitude |
|
||||
| STDP_A_MINUS | 0.012 | LTD magnitude |
|
||||
| TAU_STDP | 20.0 | STDP time constant (ms) |
|
||||
|
||||
## License
|
||||
|
||||
MIT OR Apache-2.0
|
||||
362
crates/micro-hnsw-wasm/benchmark.js
Normal file
362
crates/micro-hnsw-wasm/benchmark.js
Normal file
@@ -0,0 +1,362 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// High-resolution timer
|
||||
const now = () => {
|
||||
const [s, ns] = process.hrtime();
|
||||
return s * 1e9 + ns;
|
||||
};
|
||||
|
||||
async function benchmark() {
|
||||
console.log('╔══════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ MICRO HNSW WASM v2.2 - DEEP BENCHMARK & ANALYSIS ║');
|
||||
console.log('╚══════════════════════════════════════════════════════════════╝\n');
|
||||
|
||||
// Load WASM
|
||||
const wasmPath = path.join(__dirname, 'micro_hnsw.wasm');
|
||||
const wasmBuffer = fs.readFileSync(wasmPath);
|
||||
const wasmModule = await WebAssembly.instantiate(wasmBuffer);
|
||||
const wasm = wasmModule.instance.exports;
|
||||
const memory = new Float32Array(wasm.memory.buffer);
|
||||
|
||||
console.log('=== BINARY ANALYSIS ===');
|
||||
console.log('Size: ' + wasmBuffer.length + ' bytes (' + (wasmBuffer.length/1024).toFixed(2) + ' KB)');
|
||||
console.log('Target: 8192 bytes (8 KB)');
|
||||
console.log('Headroom: ' + (8192 - wasmBuffer.length) + ' bytes (' + ((8192 - wasmBuffer.length)/8192*100).toFixed(1) + '%)');
|
||||
console.log('Functions exported: ' + Object.keys(wasm).filter(k => typeof wasm[k] === 'function').length);
|
||||
console.log('');
|
||||
|
||||
// ========== HNSW BENCHMARKS ==========
|
||||
console.log('=== HNSW BENCHMARKS ===');
|
||||
|
||||
const DIMS = 16;
|
||||
const ITERATIONS = 1000;
|
||||
|
||||
// Benchmark: Init
|
||||
let t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.init(DIMS, 0, 0);
|
||||
}
|
||||
let initTime = (now() - t0) / ITERATIONS;
|
||||
console.log('init(): ' + initTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Prepare insert buffer
|
||||
wasm.init(DIMS, 0, 0);
|
||||
const insertPtr = wasm.get_insert_ptr() / 4;
|
||||
|
||||
// Benchmark: Single insert (empty index)
|
||||
t0 = now();
|
||||
for (let iter = 0; iter < 100; iter++) {
|
||||
wasm.init(DIMS, 0, 0);
|
||||
for (let j = 0; j < DIMS; j++) memory[insertPtr + j] = Math.random();
|
||||
wasm.insert();
|
||||
}
|
||||
let insertFirstTime = (now() - t0) / 100;
|
||||
console.log('insert() first: ' + insertFirstTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: Insert with connections (fill to 16 vectors)
|
||||
wasm.init(DIMS, 0, 0);
|
||||
for (let i = 0; i < 16; i++) {
|
||||
for (let j = 0; j < DIMS; j++) memory[insertPtr + j] = Math.random();
|
||||
wasm.insert();
|
||||
}
|
||||
|
||||
t0 = now();
|
||||
for (let iter = 0; iter < 100; iter++) {
|
||||
wasm.init(DIMS, 0, 0);
|
||||
for (let i = 0; i < 16; i++) {
|
||||
for (let j = 0; j < DIMS; j++) memory[insertPtr + j] = Math.random();
|
||||
wasm.insert();
|
||||
}
|
||||
}
|
||||
let insert16Time = (now() - t0) / 100;
|
||||
console.log('insert() x16: ' + (insert16Time/1000).toFixed(1) + ' µs total (' + (insert16Time/16).toFixed(0) + ' ns avg/vector)');
|
||||
|
||||
// Fill to 32 vectors for search benchmark
|
||||
wasm.init(DIMS, 0, 0);
|
||||
for (let i = 0; i < 32; i++) {
|
||||
for (let j = 0; j < DIMS; j++) memory[insertPtr + j] = Math.random();
|
||||
wasm.insert();
|
||||
}
|
||||
console.log('Indexed: ' + wasm.count() + ' vectors');
|
||||
|
||||
// Benchmark: Search k=1
|
||||
const queryPtr = wasm.get_query_ptr() / 4;
|
||||
for (let j = 0; j < DIMS; j++) memory[queryPtr + j] = Math.random();
|
||||
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.search(1);
|
||||
}
|
||||
let search1Time = (now() - t0) / ITERATIONS;
|
||||
console.log('search(k=1): ' + search1Time.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: Search k=6
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.search(6);
|
||||
}
|
||||
let search6Time = (now() - t0) / ITERATIONS;
|
||||
console.log('search(k=6): ' + search6Time.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: Search k=16
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.search(16);
|
||||
}
|
||||
let search16Time = (now() - t0) / ITERATIONS;
|
||||
console.log('search(k=16): ' + search16Time.toFixed(0) + ' ns/op');
|
||||
|
||||
console.log('');
|
||||
|
||||
// ========== GNN BENCHMARKS ==========
|
||||
console.log('=== GNN BENCHMARKS ===');
|
||||
|
||||
// Benchmark: Node type operations
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.set_node_type(i % 32, i % 16);
|
||||
}
|
||||
let setTypeTime = (now() - t0) / ITERATIONS;
|
||||
console.log('set_node_type(): ' + setTypeTime.toFixed(0) + ' ns/op');
|
||||
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.get_node_type(i % 32);
|
||||
}
|
||||
let getTypeTime = (now() - t0) / ITERATIONS;
|
||||
console.log('get_node_type(): ' + getTypeTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: Edge weight operations
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.set_edge_weight(i % 32, i % 256);
|
||||
}
|
||||
let setWeightTime = (now() - t0) / ITERATIONS;
|
||||
console.log('set_edge_weight(): ' + setWeightTime.toFixed(0) + ' ns/op');
|
||||
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.get_edge_weight(i % 32);
|
||||
}
|
||||
let getWeightTime = (now() - t0) / ITERATIONS;
|
||||
console.log('get_edge_weight(): ' + getWeightTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: Aggregate neighbors
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.aggregate_neighbors(i % 32);
|
||||
}
|
||||
let aggregateTime = (now() - t0) / ITERATIONS;
|
||||
console.log('aggregate(): ' + aggregateTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: Update vector
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.update_vector(i % 32, 0.01);
|
||||
}
|
||||
let updateTime = (now() - t0) / ITERATIONS;
|
||||
console.log('update_vector(): ' + updateTime.toFixed(0) + ' ns/op');
|
||||
|
||||
console.log('');
|
||||
|
||||
// ========== SNN BENCHMARKS ==========
|
||||
console.log('=== SNN BENCHMARKS ===');
|
||||
|
||||
wasm.snn_reset();
|
||||
|
||||
// Benchmark: snn_inject
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.snn_inject(i % 32, 0.1);
|
||||
}
|
||||
let injectTime = (now() - t0) / ITERATIONS;
|
||||
console.log('snn_inject(): ' + injectTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: snn_step
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.snn_step(1.0);
|
||||
}
|
||||
let stepTime = (now() - t0) / ITERATIONS;
|
||||
console.log('snn_step(): ' + stepTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: snn_propagate
|
||||
// First make some neurons spike
|
||||
wasm.snn_reset();
|
||||
for (let i = 0; i < 8; i++) wasm.snn_inject(i, 2.0);
|
||||
wasm.snn_step(1.0);
|
||||
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.snn_propagate(0.5);
|
||||
}
|
||||
let propagateTime = (now() - t0) / ITERATIONS;
|
||||
console.log('snn_propagate(): ' + propagateTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: snn_stdp
|
||||
wasm.snn_reset();
|
||||
for (let i = 0; i < 8; i++) wasm.snn_inject(i, 2.0);
|
||||
wasm.snn_step(1.0);
|
||||
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.snn_stdp();
|
||||
}
|
||||
let stdpTime = (now() - t0) / ITERATIONS;
|
||||
console.log('snn_stdp(): ' + stdpTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: snn_tick (combined)
|
||||
wasm.snn_reset();
|
||||
for (let i = 0; i < 8; i++) wasm.snn_inject(i, 0.5);
|
||||
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.snn_tick(1.0, 0.5, 1);
|
||||
}
|
||||
let tickTime = (now() - t0) / ITERATIONS;
|
||||
console.log('snn_tick(): ' + tickTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: snn_get_spikes
|
||||
t0 = now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
wasm.snn_get_spikes();
|
||||
}
|
||||
let getSpikesTime = (now() - t0) / ITERATIONS;
|
||||
console.log('snn_get_spikes(): ' + getSpikesTime.toFixed(0) + ' ns/op');
|
||||
|
||||
// Benchmark: hnsw_to_snn
|
||||
wasm.snn_reset();
|
||||
t0 = now();
|
||||
for (let i = 0; i < 100; i++) {
|
||||
wasm.hnsw_to_snn(6, 1.0);
|
||||
}
|
||||
let hnswToSnnTime = (now() - t0) / 100;
|
||||
console.log('hnsw_to_snn(): ' + hnswToSnnTime.toFixed(0) + ' ns/op');
|
||||
|
||||
console.log('');
|
||||
|
||||
// ========== MEMORY ANALYSIS ==========
|
||||
console.log('=== MEMORY LAYOUT ANALYSIS ===');
|
||||
|
||||
const memoryBytes = wasm.memory.buffer.byteLength;
|
||||
console.log('Linear memory: ' + memoryBytes + ' bytes (' + (memoryBytes/1024) + ' KB)');
|
||||
console.log('Insert ptr: ' + wasm.get_insert_ptr());
|
||||
console.log('Query ptr: ' + wasm.get_query_ptr());
|
||||
console.log('Result ptr: ' + wasm.get_result_ptr());
|
||||
console.log('Global ptr: ' + wasm.get_global_ptr());
|
||||
console.log('Delta ptr: ' + wasm.get_delta_ptr());
|
||||
|
||||
// Calculate static data size from WASM
|
||||
const dataEnd = wasm.__data_end;
|
||||
const heapBase = wasm.__heap_base;
|
||||
console.log('Data end: ' + dataEnd);
|
||||
console.log('Heap base: ' + heapBase);
|
||||
console.log('Static data: ' + (heapBase - 0) + ' bytes');
|
||||
|
||||
console.log('');
|
||||
|
||||
// ========== THROUGHPUT ANALYSIS ==========
|
||||
console.log('=== THROUGHPUT ANALYSIS ===');
|
||||
|
||||
const searchOpsPerSec = 1e9 / search6Time;
|
||||
const insertOpsPerSec = 1e9 / (insert16Time / 16);
|
||||
const tickOpsPerSec = 1e9 / tickTime;
|
||||
|
||||
console.log('Search (k=6): ' + (searchOpsPerSec/1e6).toFixed(2) + ' M ops/sec');
|
||||
console.log('Insert: ' + (insertOpsPerSec/1e6).toFixed(2) + ' M ops/sec');
|
||||
console.log('SNN tick: ' + (tickOpsPerSec/1e6).toFixed(2) + ' M ops/sec');
|
||||
|
||||
// ASIC projection (256 cores)
|
||||
console.log('\n--- 256-Core ASIC Projection ---');
|
||||
console.log('Search: ' + (searchOpsPerSec * 256 / 1e9).toFixed(2) + ' B ops/sec');
|
||||
console.log('SNN tick: ' + (tickOpsPerSec * 256 / 1e6).toFixed(0) + ' M neurons/sec');
|
||||
console.log('Total vectors: ' + (32 * 256) + ' (32/core × 256 cores)');
|
||||
|
||||
console.log('');
|
||||
|
||||
// ========== ACCURACY TEST ==========
|
||||
console.log('=== ACCURACY VALIDATION ===');
|
||||
|
||||
// Test search accuracy with known vectors
|
||||
wasm.init(4, 0, 0); // L2 metric, 4 dims
|
||||
const testVectors = [
|
||||
[1, 0, 0, 0],
|
||||
[0, 1, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, 0, 0, 1],
|
||||
[0.5, 0.5, 0, 0],
|
||||
];
|
||||
|
||||
for (const v of testVectors) {
|
||||
for (let j = 0; j < 4; j++) memory[insertPtr + j] = v[j];
|
||||
wasm.insert();
|
||||
}
|
||||
|
||||
// Query closest to [1,0,0,0]
|
||||
memory[queryPtr] = 0.9;
|
||||
memory[queryPtr + 1] = 0.1;
|
||||
memory[queryPtr + 2] = 0;
|
||||
memory[queryPtr + 3] = 0;
|
||||
|
||||
const found = wasm.search(3);
|
||||
const resultPtr = wasm.get_result_ptr();
|
||||
const resultU8 = new Uint8Array(wasm.memory.buffer);
|
||||
const resultF32 = new Float32Array(wasm.memory.buffer);
|
||||
|
||||
console.log('Query: [0.9, 0.1, 0, 0], Expected nearest: idx=0 [1,0,0,0]');
|
||||
console.log('Found ' + found + ' neighbors:');
|
||||
for (let i = 0; i < found; i++) {
|
||||
const idx = resultU8[resultPtr + i * 8];
|
||||
const dist = resultF32[(resultPtr + i * 8 + 4) / 4];
|
||||
console.log(' #' + (i+1) + ': idx=' + idx + ' dist=' + dist.toFixed(4) + ' vec=[' + testVectors[idx].join(',') + ']');
|
||||
}
|
||||
|
||||
// Verify correct ordering
|
||||
const firstIdx = resultU8[resultPtr];
|
||||
if (firstIdx === 0) {
|
||||
console.log('✓ Accuracy: PASS (nearest neighbor correct)');
|
||||
} else {
|
||||
console.log('✗ Accuracy: FAIL (expected idx=0, got idx=' + firstIdx + ')');
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
// ========== SNN DYNAMICS VALIDATION ==========
|
||||
console.log('=== SNN DYNAMICS VALIDATION ===');
|
||||
|
||||
wasm.init(4, 0, 0);
|
||||
for (const v of testVectors) {
|
||||
for (let j = 0; j < 4; j++) memory[insertPtr + j] = v[j];
|
||||
wasm.insert();
|
||||
}
|
||||
|
||||
wasm.snn_reset();
|
||||
|
||||
// Test LIF dynamics
|
||||
console.log('LIF Neuron Test (τ=20ms, threshold=1.0):');
|
||||
wasm.snn_inject(0, 0.8);
|
||||
console.log(' t=0: inject 0.8, membrane=' + wasm.snn_get_membrane(0).toFixed(3));
|
||||
|
||||
wasm.snn_step(5.0);
|
||||
console.log(' t=5: decay, membrane=' + wasm.snn_get_membrane(0).toFixed(3) + ' (expected ~0.6)');
|
||||
|
||||
wasm.snn_inject(0, 0.5);
|
||||
console.log(' t=5: inject +0.5, membrane=' + wasm.snn_get_membrane(0).toFixed(3));
|
||||
|
||||
const spiked = wasm.snn_step(1.0);
|
||||
console.log(' t=6: step, spiked=' + spiked + ', membrane=' + wasm.snn_get_membrane(0).toFixed(3));
|
||||
|
||||
if (spiked > 0) {
|
||||
console.log('✓ LIF dynamics: PASS (spike generated above threshold)');
|
||||
} else {
|
||||
console.log('✗ LIF dynamics: membrane should have spiked');
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log(' BENCHMARK COMPLETE');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
}
|
||||
|
||||
benchmark().catch(console.error);
|
||||
BIN
crates/micro-hnsw-wasm/micro_hnsw.wasm
Normal file
BIN
crates/micro-hnsw-wasm/micro_hnsw.wasm
Normal file
Binary file not shown.
1262
crates/micro-hnsw-wasm/src/lib.rs
Normal file
1262
crates/micro-hnsw-wasm/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
146
crates/micro-hnsw-wasm/test_wasm.js
Normal file
146
crates/micro-hnsw-wasm/test_wasm.js
Normal file
@@ -0,0 +1,146 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
async function test() {
|
||||
console.log('=== Micro HNSW WASM v2.2 Test Suite ===\n');
|
||||
|
||||
// Load WASM
|
||||
const wasmPath = path.join(__dirname, 'micro_hnsw.wasm');
|
||||
const wasmBuffer = fs.readFileSync(wasmPath);
|
||||
const wasmModule = await WebAssembly.instantiate(wasmBuffer);
|
||||
const wasm = wasmModule.instance.exports;
|
||||
|
||||
console.log('✓ WASM loaded successfully');
|
||||
console.log(' Binary size: ' + wasmBuffer.length + ' bytes (' + (wasmBuffer.length/1024).toFixed(2) + ' KB)\n');
|
||||
|
||||
// List all exports
|
||||
const exports = Object.keys(wasm).filter(k => typeof wasm[k] === 'function');
|
||||
console.log('Exported functions (' + exports.length + '):');
|
||||
exports.forEach(fn => console.log(' - ' + fn));
|
||||
console.log('');
|
||||
|
||||
// Test 1: Initialize HNSW
|
||||
console.log('Test 1: Initialize HNSW (dims=4, metric=0/euclidean, capacity=32)');
|
||||
wasm.init(4, 0, 32);
|
||||
console.log(' dims: ' + wasm.get_dims());
|
||||
console.log(' metric: ' + wasm.get_metric());
|
||||
console.log(' capacity: ' + wasm.get_capacity());
|
||||
console.log(' count: ' + wasm.count());
|
||||
console.log('✓ Init passed\n');
|
||||
|
||||
// Test 2: Insert vectors
|
||||
console.log('Test 2: Insert vectors');
|
||||
const memory = new Float32Array(wasm.memory.buffer);
|
||||
const insertPtr = wasm.get_insert_ptr() / 4;
|
||||
|
||||
// Insert 3 vectors
|
||||
const vectors = [
|
||||
[1.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 1.0, 0.0, 0.0],
|
||||
[0.5, 0.5, 0.0, 0.0],
|
||||
];
|
||||
|
||||
for (let i = 0; i < vectors.length; i++) {
|
||||
for (let j = 0; j < 4; j++) {
|
||||
memory[insertPtr + j] = vectors[i][j];
|
||||
}
|
||||
const idx = wasm.insert();
|
||||
console.log(' Inserted vector ' + i + ': index=' + idx);
|
||||
}
|
||||
console.log(' Total count: ' + wasm.count());
|
||||
console.log('✓ Insert passed\n');
|
||||
|
||||
// Test 3: Search
|
||||
console.log('Test 3: Search for nearest neighbors');
|
||||
const queryPtr = wasm.get_query_ptr() / 4;
|
||||
memory[queryPtr] = 0.9;
|
||||
memory[queryPtr + 1] = 0.1;
|
||||
memory[queryPtr + 2] = 0.0;
|
||||
memory[queryPtr + 3] = 0.0;
|
||||
|
||||
const found = wasm.search(3);
|
||||
console.log(' Query: [0.9, 0.1, 0.0, 0.0]');
|
||||
console.log(' Found: ' + found + ' neighbors');
|
||||
|
||||
const resultPtr = wasm.get_result_ptr();
|
||||
console.log(' Result ptr: ' + resultPtr);
|
||||
console.log('✓ Search passed\n');
|
||||
|
||||
// Test 4: Node types
|
||||
console.log('Test 4: Node types');
|
||||
wasm.set_node_type(0, 5);
|
||||
wasm.set_node_type(1, 10);
|
||||
console.log(' Node 0 type: ' + wasm.get_node_type(0));
|
||||
console.log(' Node 1 type: ' + wasm.get_node_type(1));
|
||||
console.log(' Type match (0,0): ' + wasm.type_matches(0, 0));
|
||||
console.log(' Type match (0,1): ' + wasm.type_matches(0, 1));
|
||||
console.log('✓ Node types passed\n');
|
||||
|
||||
// Test 5: Edge weights (GNN feature)
|
||||
console.log('Test 5: Edge weights (GNN)');
|
||||
wasm.set_edge_weight(0, 200);
|
||||
wasm.set_edge_weight(1, 100);
|
||||
console.log(' Edge 0 weight: ' + wasm.get_edge_weight(0));
|
||||
console.log(' Edge 1 weight: ' + wasm.get_edge_weight(1));
|
||||
console.log('✓ Edge weights passed\n');
|
||||
|
||||
// Test 6: SNN features (if available)
|
||||
if (wasm.snn_reset) {
|
||||
console.log('Test 6: Spiking Neural Network (SNN)');
|
||||
wasm.snn_reset();
|
||||
console.log(' Initial time: ' + wasm.snn_get_time());
|
||||
|
||||
// Inject current to node 0
|
||||
wasm.snn_inject(0, 0.5); // Inject below threshold
|
||||
console.log(' Injected current 0.5 to node 0');
|
||||
console.log(' Node 0 membrane: ' + wasm.snn_get_membrane(0).toFixed(3));
|
||||
|
||||
// Run simulation step with dt=1.0 ms
|
||||
const dt = 1.0;
|
||||
let spikes1 = wasm.snn_step(dt);
|
||||
console.log(' After step 1 (dt=' + dt + 'ms): time=' + wasm.snn_get_time().toFixed(1) + ', membrane=' + wasm.snn_get_membrane(0).toFixed(3) + ', spikeCount=' + spikes1);
|
||||
|
||||
// Inject more to reach threshold
|
||||
wasm.snn_inject(0, 0.8);
|
||||
let spikes2 = wasm.snn_step(dt);
|
||||
console.log(' After step 2 (+0.8 current): membrane=' + wasm.snn_get_membrane(0).toFixed(3) + ', spiked=' + wasm.snn_spiked(0) + ', spikeCount=' + spikes2);
|
||||
|
||||
// Check spikes bitset
|
||||
const spikes = wasm.snn_get_spikes();
|
||||
console.log(' Spike bitmask: 0b' + spikes.toString(2));
|
||||
|
||||
// Test combined tick function
|
||||
wasm.snn_reset();
|
||||
wasm.snn_inject(0, 1.5); // Above threshold
|
||||
const tickSpikes = wasm.snn_tick(1.0, 0.5, 1); // dt=1.0, gain=0.5, learn=1
|
||||
console.log(' snn_tick result: ' + tickSpikes + ' spikes');
|
||||
|
||||
console.log('✓ SNN passed\n');
|
||||
} else {
|
||||
console.log('Test 6: SNN not available (functions not exported)\n');
|
||||
}
|
||||
|
||||
// Test 7: HNSW to SNN conversion
|
||||
if (wasm.hnsw_to_snn) {
|
||||
console.log('Test 7: HNSW to SNN conversion');
|
||||
wasm.snn_reset();
|
||||
// hnsw_to_snn(k, gain) - search for k neighbors and inject currents
|
||||
const injected = wasm.hnsw_to_snn(3, 1.0);
|
||||
console.log(' Converted HNSW search to SNN currents for ' + injected + ' nodes');
|
||||
console.log(' Node 0 membrane after injection: ' + wasm.snn_get_membrane(0).toFixed(3));
|
||||
console.log('✓ HNSW→SNN passed\n');
|
||||
}
|
||||
|
||||
// Test 8: Aggregate neighbors (GNN)
|
||||
if (wasm.aggregate_neighbors) {
|
||||
console.log('Test 8: GNN aggregate neighbors');
|
||||
wasm.aggregate_neighbors(0);
|
||||
console.log(' Aggregated features for node 0');
|
||||
console.log('✓ Aggregate passed\n');
|
||||
}
|
||||
|
||||
console.log('=== All Tests Passed ===');
|
||||
console.log('Final stats: ' + wasm.count() + ' vectors, ' + wasmBuffer.length + ' bytes');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
555
crates/micro-hnsw-wasm/verilog/micro_hnsw.v
Normal file
555
crates/micro-hnsw-wasm/verilog/micro_hnsw.v
Normal file
@@ -0,0 +1,555 @@
|
||||
// Micro HNSW - ASIC Hardware Description
|
||||
// Ultra-minimal HNSW accelerator for vector similarity search
|
||||
//
|
||||
// Design specifications:
|
||||
// - Fixed-point arithmetic (Q8.8 format)
|
||||
// - 256 max vectors, 64 dimensions
|
||||
// - 8 neighbors per node, 4 levels
|
||||
// - Pipelined distance computation
|
||||
// - AXI-Lite interface for host communication
|
||||
//
|
||||
// Target: ASIC synthesis with <50K gates
|
||||
|
||||
`timescale 1ns / 1ps
|
||||
|
||||
module micro_hnsw #(
|
||||
parameter MAX_VECTORS = 256,
|
||||
parameter MAX_DIMS = 64,
|
||||
parameter MAX_NEIGHBORS = 8,
|
||||
parameter MAX_LEVELS = 4,
|
||||
parameter DATA_WIDTH = 16, // Q8.8 fixed-point
|
||||
parameter ADDR_WIDTH = 8 // log2(MAX_VECTORS)
|
||||
)(
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
// Control interface
|
||||
input wire cmd_valid,
|
||||
output reg cmd_ready,
|
||||
input wire [2:0] cmd_op, // 0=NOP, 1=INIT, 2=INSERT, 3=SEARCH
|
||||
input wire [7:0] cmd_dims,
|
||||
input wire [7:0] cmd_k,
|
||||
|
||||
// Vector data interface
|
||||
input wire vec_valid,
|
||||
output wire vec_ready,
|
||||
input wire [DATA_WIDTH-1:0] vec_data,
|
||||
input wire vec_last,
|
||||
|
||||
// Result interface
|
||||
output reg result_valid,
|
||||
input wire result_ready,
|
||||
output reg [ADDR_WIDTH-1:0] result_idx,
|
||||
output reg [DATA_WIDTH-1:0] result_dist,
|
||||
output reg result_last,
|
||||
|
||||
// Status
|
||||
output reg [ADDR_WIDTH-1:0] vector_count
|
||||
);
|
||||
|
||||
// ============ Local Parameters ============
|
||||
localparam STATE_IDLE = 3'd0;
|
||||
localparam STATE_LOAD_VEC = 3'd1;
|
||||
localparam STATE_COMPUTE = 3'd2;
|
||||
localparam STATE_SEARCH = 3'd3;
|
||||
localparam STATE_OUTPUT = 3'd4;
|
||||
|
||||
// ============ Memories ============
|
||||
// Vector storage (256 x 64 x 16-bit = 256KB)
|
||||
reg [DATA_WIDTH-1:0] vectors [0:MAX_VECTORS-1][0:MAX_DIMS-1];
|
||||
|
||||
// Graph structure - neighbor lists
|
||||
reg [ADDR_WIDTH-1:0] neighbors [0:MAX_VECTORS-1][0:MAX_LEVELS-1][0:MAX_NEIGHBORS-1];
|
||||
reg [3:0] neighbor_count [0:MAX_VECTORS-1][0:MAX_LEVELS-1];
|
||||
reg [1:0] node_level [0:MAX_VECTORS-1];
|
||||
|
||||
// ============ Registers ============
|
||||
reg [2:0] state;
|
||||
reg [ADDR_WIDTH-1:0] entry_point;
|
||||
reg [1:0] max_level;
|
||||
reg [7:0] current_dims;
|
||||
|
||||
// Vector loading
|
||||
reg [DATA_WIDTH-1:0] query_buf [0:MAX_DIMS-1];
|
||||
reg [DATA_WIDTH-1:0] insert_buf [0:MAX_DIMS-1];
|
||||
reg [5:0] load_idx;
|
||||
|
||||
// Search state
|
||||
reg [ADDR_WIDTH-1:0] current_node;
|
||||
reg [1:0] current_level;
|
||||
reg [7:0] current_k;
|
||||
reg [3:0] neighbor_idx;
|
||||
|
||||
// Candidate buffer (sorted by distance)
|
||||
reg [ADDR_WIDTH-1:0] candidates [0:15];
|
||||
reg [DATA_WIDTH-1:0] cand_dist [0:15];
|
||||
reg [3:0] cand_count;
|
||||
|
||||
// Distance computation
|
||||
reg [31:0] dist_accum;
|
||||
reg [5:0] dist_dim;
|
||||
reg dist_computing;
|
||||
reg [ADDR_WIDTH-1:0] dist_target;
|
||||
|
||||
// Visited flags (bit vector)
|
||||
reg [MAX_VECTORS-1:0] visited;
|
||||
|
||||
// ============ Vector Ready ============
|
||||
assign vec_ready = (state == STATE_LOAD_VEC);
|
||||
|
||||
// ============ State Machine ============
|
||||
always @(posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
state <= STATE_IDLE;
|
||||
cmd_ready <= 1'b1;
|
||||
result_valid <= 1'b0;
|
||||
vector_count <= 0;
|
||||
entry_point <= 0;
|
||||
max_level <= 0;
|
||||
current_dims <= 32;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
result_valid <= 1'b0;
|
||||
if (cmd_valid && cmd_ready) begin
|
||||
cmd_ready <= 1'b0;
|
||||
case (cmd_op)
|
||||
3'd1: begin // INIT
|
||||
current_dims <= cmd_dims;
|
||||
vector_count <= 0;
|
||||
entry_point <= 0;
|
||||
max_level <= 0;
|
||||
cmd_ready <= 1'b1;
|
||||
end
|
||||
3'd2: begin // INSERT
|
||||
load_idx <= 0;
|
||||
state <= STATE_LOAD_VEC;
|
||||
end
|
||||
3'd3: begin // SEARCH
|
||||
load_idx <= 0;
|
||||
current_k <= cmd_k;
|
||||
state <= STATE_LOAD_VEC;
|
||||
end
|
||||
default: cmd_ready <= 1'b1;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
STATE_LOAD_VEC: begin
|
||||
if (vec_valid) begin
|
||||
if (cmd_op == 3'd2) begin
|
||||
insert_buf[load_idx] <= vec_data;
|
||||
end else begin
|
||||
query_buf[load_idx] <= vec_data;
|
||||
end
|
||||
|
||||
if (vec_last || load_idx == current_dims - 1) begin
|
||||
if (cmd_op == 3'd2) begin
|
||||
state <= STATE_COMPUTE; // Insert processing
|
||||
end else begin
|
||||
state <= STATE_SEARCH; // Search processing
|
||||
end
|
||||
end else begin
|
||||
load_idx <= load_idx + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_COMPUTE: begin
|
||||
// Store vector
|
||||
integer i;
|
||||
for (i = 0; i < MAX_DIMS; i = i + 1) begin
|
||||
vectors[vector_count][i] <= insert_buf[i];
|
||||
end
|
||||
|
||||
// Generate random level (simplified)
|
||||
node_level[vector_count] <= vector_count[1:0] & 2'b11;
|
||||
|
||||
// Initialize neighbors
|
||||
for (i = 0; i < MAX_LEVELS; i = i + 1) begin
|
||||
neighbor_count[vector_count][i] <= 0;
|
||||
end
|
||||
|
||||
// Update entry point for first vector
|
||||
if (vector_count == 0) begin
|
||||
entry_point <= 0;
|
||||
max_level <= 0;
|
||||
end else begin
|
||||
// Simple nearest neighbor connection (level 0 only for minimal design)
|
||||
if (neighbor_count[vector_count][0] < MAX_NEIGHBORS) begin
|
||||
// Connect to entry point
|
||||
neighbors[vector_count][0][0] <= entry_point;
|
||||
neighbor_count[vector_count][0] <= 1;
|
||||
|
||||
// Bidirectional connection
|
||||
if (neighbor_count[entry_point][0] < MAX_NEIGHBORS) begin
|
||||
neighbors[entry_point][0][neighbor_count[entry_point][0]] <= vector_count;
|
||||
neighbor_count[entry_point][0] <= neighbor_count[entry_point][0] + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
vector_count <= vector_count + 1;
|
||||
cmd_ready <= 1'b1;
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
|
||||
STATE_SEARCH: begin
|
||||
// Initialize search
|
||||
visited <= 0;
|
||||
cand_count <= 0;
|
||||
current_node <= entry_point;
|
||||
current_level <= max_level;
|
||||
|
||||
// Start distance computation for entry point
|
||||
dist_target <= entry_point;
|
||||
dist_accum <= 0;
|
||||
dist_dim <= 0;
|
||||
dist_computing <= 1'b1;
|
||||
|
||||
// Simple greedy search (one level)
|
||||
if (!dist_computing && cand_count < current_k) begin
|
||||
// Add current to candidates
|
||||
candidates[cand_count] <= current_node;
|
||||
cand_dist[cand_count] <= dist_accum[DATA_WIDTH-1:0];
|
||||
cand_count <= cand_count + 1;
|
||||
visited[current_node] <= 1'b1;
|
||||
|
||||
// Check neighbors
|
||||
if (neighbor_idx < neighbor_count[current_node][0]) begin
|
||||
current_node <= neighbors[current_node][0][neighbor_idx];
|
||||
neighbor_idx <= neighbor_idx + 1;
|
||||
dist_target <= neighbors[current_node][0][neighbor_idx];
|
||||
dist_accum <= 0;
|
||||
dist_dim <= 0;
|
||||
dist_computing <= 1'b1;
|
||||
end else begin
|
||||
state <= STATE_OUTPUT;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_OUTPUT: begin
|
||||
if (result_ready || !result_valid) begin
|
||||
if (cand_count > 0) begin
|
||||
result_valid <= 1'b1;
|
||||
result_idx <= candidates[0];
|
||||
result_dist <= cand_dist[0];
|
||||
result_last <= (cand_count == 1);
|
||||
|
||||
// Shift candidates
|
||||
integer j;
|
||||
for (j = 0; j < 15; j = j + 1) begin
|
||||
candidates[j] <= candidates[j+1];
|
||||
cand_dist[j] <= cand_dist[j+1];
|
||||
end
|
||||
cand_count <= cand_count - 1;
|
||||
end else begin
|
||||
result_valid <= 1'b0;
|
||||
cmd_ready <= 1'b1;
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// ============ Distance Computation Pipeline ============
|
||||
always @(posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
dist_computing <= 1'b0;
|
||||
dist_accum <= 0;
|
||||
end else if (dist_computing) begin
|
||||
if (dist_dim < current_dims) begin
|
||||
// Compute (query - vector)^2 in fixed-point
|
||||
reg signed [DATA_WIDTH:0] diff;
|
||||
reg [31:0] sq;
|
||||
|
||||
diff = $signed(query_buf[dist_dim]) - $signed(vectors[dist_target][dist_dim]);
|
||||
sq = diff * diff;
|
||||
dist_accum <= dist_accum + sq;
|
||||
dist_dim <= dist_dim + 1;
|
||||
end else begin
|
||||
dist_computing <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
// ============ Distance Unit - Pipelined L2 ============
|
||||
module distance_unit #(
|
||||
parameter DATA_WIDTH = 16,
|
||||
parameter MAX_DIMS = 64
|
||||
)(
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
input wire start,
|
||||
input wire [5:0] dims,
|
||||
input wire [DATA_WIDTH-1:0] a_data,
|
||||
input wire [DATA_WIDTH-1:0] b_data,
|
||||
output reg [31:0] distance,
|
||||
output reg done
|
||||
);
|
||||
|
||||
reg [5:0] dim_idx;
|
||||
reg [31:0] accum;
|
||||
reg computing;
|
||||
|
||||
always @(posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
done <= 1'b0;
|
||||
computing <= 1'b0;
|
||||
accum <= 0;
|
||||
end else begin
|
||||
if (start && !computing) begin
|
||||
computing <= 1'b1;
|
||||
dim_idx <= 0;
|
||||
accum <= 0;
|
||||
done <= 1'b0;
|
||||
end else if (computing) begin
|
||||
if (dim_idx < dims) begin
|
||||
// Compute squared difference
|
||||
reg signed [DATA_WIDTH:0] diff;
|
||||
diff = $signed(a_data) - $signed(b_data);
|
||||
accum <= accum + (diff * diff);
|
||||
dim_idx <= dim_idx + 1;
|
||||
end else begin
|
||||
distance <= accum;
|
||||
done <= 1'b1;
|
||||
computing <= 1'b0;
|
||||
end
|
||||
end else begin
|
||||
done <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
// ============ Priority Queue for Candidates ============
|
||||
module priority_queue #(
|
||||
parameter DEPTH = 16,
|
||||
parameter IDX_WIDTH = 8,
|
||||
parameter DIST_WIDTH = 16
|
||||
)(
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
input wire clear,
|
||||
|
||||
// Insert interface
|
||||
input wire insert_valid,
|
||||
output wire insert_ready,
|
||||
input wire [IDX_WIDTH-1:0] insert_idx,
|
||||
input wire [DIST_WIDTH-1:0] insert_dist,
|
||||
|
||||
// Pop interface (returns min distance)
|
||||
input wire pop_valid,
|
||||
output reg pop_ready,
|
||||
output reg [IDX_WIDTH-1:0] pop_idx,
|
||||
output reg [DIST_WIDTH-1:0] pop_dist,
|
||||
|
||||
// Status
|
||||
output reg [4:0] count,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
|
||||
reg [IDX_WIDTH-1:0] indices [0:DEPTH-1];
|
||||
reg [DIST_WIDTH-1:0] distances [0:DEPTH-1];
|
||||
|
||||
assign empty = (count == 0);
|
||||
assign full = (count == DEPTH);
|
||||
assign insert_ready = !full;
|
||||
|
||||
integer i;
|
||||
|
||||
always @(posedge clk or negedge rst_n) begin
|
||||
if (!rst_n || clear) begin
|
||||
count <= 0;
|
||||
pop_ready <= 1'b0;
|
||||
end else begin
|
||||
// Insert operation (sorted insert)
|
||||
if (insert_valid && !full) begin
|
||||
// Find insertion position
|
||||
reg [4:0] pos;
|
||||
pos = count;
|
||||
|
||||
for (i = count - 1; i >= 0; i = i - 1) begin
|
||||
if (insert_dist < distances[i]) begin
|
||||
indices[i+1] <= indices[i];
|
||||
distances[i+1] <= distances[i];
|
||||
pos = i;
|
||||
end
|
||||
end
|
||||
|
||||
indices[pos] <= insert_idx;
|
||||
distances[pos] <= insert_dist;
|
||||
count <= count + 1;
|
||||
end
|
||||
|
||||
// Pop operation
|
||||
if (pop_valid && !empty) begin
|
||||
pop_idx <= indices[0];
|
||||
pop_dist <= distances[0];
|
||||
pop_ready <= 1'b1;
|
||||
|
||||
// Shift elements
|
||||
for (i = 0; i < DEPTH - 1; i = i + 1) begin
|
||||
indices[i] <= indices[i+1];
|
||||
distances[i] <= distances[i+1];
|
||||
end
|
||||
count <= count - 1;
|
||||
end else begin
|
||||
pop_ready <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
// ============ AXI-Lite Wrapper ============
|
||||
module micro_hnsw_axi #(
|
||||
parameter C_S_AXI_DATA_WIDTH = 32,
|
||||
parameter C_S_AXI_ADDR_WIDTH = 8
|
||||
)(
|
||||
// AXI-Lite interface
|
||||
input wire S_AXI_ACLK,
|
||||
input wire S_AXI_ARESETN,
|
||||
|
||||
// Write address channel
|
||||
input wire [C_S_AXI_ADDR_WIDTH-1:0] S_AXI_AWADDR,
|
||||
input wire S_AXI_AWVALID,
|
||||
output wire S_AXI_AWREADY,
|
||||
|
||||
// Write data channel
|
||||
input wire [C_S_AXI_DATA_WIDTH-1:0] S_AXI_WDATA,
|
||||
input wire [(C_S_AXI_DATA_WIDTH/8)-1:0] S_AXI_WSTRB,
|
||||
input wire S_AXI_WVALID,
|
||||
output wire S_AXI_WREADY,
|
||||
|
||||
// Write response channel
|
||||
output wire [1:0] S_AXI_BRESP,
|
||||
output wire S_AXI_BVALID,
|
||||
input wire S_AXI_BREADY,
|
||||
|
||||
// Read address channel
|
||||
input wire [C_S_AXI_ADDR_WIDTH-1:0] S_AXI_ARADDR,
|
||||
input wire S_AXI_ARVALID,
|
||||
output wire S_AXI_ARREADY,
|
||||
|
||||
// Read data channel
|
||||
output wire [C_S_AXI_DATA_WIDTH-1:0] S_AXI_RDATA,
|
||||
output wire [1:0] S_AXI_RRESP,
|
||||
output wire S_AXI_RVALID,
|
||||
input wire S_AXI_RREADY
|
||||
);
|
||||
|
||||
// Register map:
|
||||
// 0x00: Control (W) - [2:0] cmd_op, [15:8] dims, [23:16] k
|
||||
// 0x04: Status (R) - [0] ready, [15:8] vector_count
|
||||
// 0x08: Vector Data (W) - write vector data
|
||||
// 0x0C: Result (R) - [7:0] idx, [23:8] distance, [31] last
|
||||
|
||||
// Internal signals
|
||||
wire cmd_valid, cmd_ready;
|
||||
reg [2:0] cmd_op;
|
||||
reg [7:0] cmd_dims, cmd_k;
|
||||
wire vec_valid, vec_ready;
|
||||
reg [15:0] vec_data;
|
||||
reg vec_last;
|
||||
wire result_valid, result_ready;
|
||||
wire [7:0] result_idx;
|
||||
wire [15:0] result_dist;
|
||||
wire result_last;
|
||||
wire [7:0] vector_count;
|
||||
|
||||
// Instantiate core
|
||||
micro_hnsw core (
|
||||
.clk(S_AXI_ACLK),
|
||||
.rst_n(S_AXI_ARESETN),
|
||||
.cmd_valid(cmd_valid),
|
||||
.cmd_ready(cmd_ready),
|
||||
.cmd_op(cmd_op),
|
||||
.cmd_dims(cmd_dims),
|
||||
.cmd_k(cmd_k),
|
||||
.vec_valid(vec_valid),
|
||||
.vec_ready(vec_ready),
|
||||
.vec_data(vec_data),
|
||||
.vec_last(vec_last),
|
||||
.result_valid(result_valid),
|
||||
.result_ready(result_ready),
|
||||
.result_idx(result_idx),
|
||||
.result_dist(result_dist),
|
||||
.result_last(result_last),
|
||||
.vector_count(vector_count)
|
||||
);
|
||||
|
||||
// AXI-Lite state machine (simplified)
|
||||
reg aw_ready, w_ready, ar_ready;
|
||||
reg [1:0] b_resp;
|
||||
reg b_valid, r_valid;
|
||||
reg [C_S_AXI_DATA_WIDTH-1:0] r_data;
|
||||
|
||||
assign S_AXI_AWREADY = aw_ready;
|
||||
assign S_AXI_WREADY = w_ready;
|
||||
assign S_AXI_BRESP = b_resp;
|
||||
assign S_AXI_BVALID = b_valid;
|
||||
assign S_AXI_ARREADY = ar_ready;
|
||||
assign S_AXI_RDATA = r_data;
|
||||
assign S_AXI_RRESP = 2'b00;
|
||||
assign S_AXI_RVALID = r_valid;
|
||||
|
||||
assign cmd_valid = S_AXI_WVALID && (S_AXI_AWADDR == 8'h00);
|
||||
assign vec_valid = S_AXI_WVALID && (S_AXI_AWADDR == 8'h08);
|
||||
assign result_ready = S_AXI_RREADY && (S_AXI_ARADDR == 8'h0C);
|
||||
|
||||
always @(posedge S_AXI_ACLK or negedge S_AXI_ARESETN) begin
|
||||
if (!S_AXI_ARESETN) begin
|
||||
aw_ready <= 1'b1;
|
||||
w_ready <= 1'b1;
|
||||
ar_ready <= 1'b1;
|
||||
b_valid <= 1'b0;
|
||||
r_valid <= 1'b0;
|
||||
end else begin
|
||||
// Write handling
|
||||
if (S_AXI_AWVALID && S_AXI_WVALID && aw_ready && w_ready) begin
|
||||
case (S_AXI_AWADDR)
|
||||
8'h00: begin
|
||||
cmd_op <= S_AXI_WDATA[2:0];
|
||||
cmd_dims <= S_AXI_WDATA[15:8];
|
||||
cmd_k <= S_AXI_WDATA[23:16];
|
||||
end
|
||||
8'h08: begin
|
||||
vec_data <= S_AXI_WDATA[15:0];
|
||||
vec_last <= S_AXI_WDATA[31];
|
||||
end
|
||||
endcase
|
||||
b_valid <= 1'b1;
|
||||
end
|
||||
|
||||
if (S_AXI_BREADY && b_valid) begin
|
||||
b_valid <= 1'b0;
|
||||
end
|
||||
|
||||
// Read handling
|
||||
if (S_AXI_ARVALID && ar_ready) begin
|
||||
case (S_AXI_ARADDR)
|
||||
8'h04: r_data <= {16'b0, vector_count, 7'b0, cmd_ready};
|
||||
8'h0C: r_data <= {result_last, 7'b0, result_dist, result_idx};
|
||||
default: r_data <= 32'b0;
|
||||
endcase
|
||||
r_valid <= 1'b1;
|
||||
end
|
||||
|
||||
if (S_AXI_RREADY && r_valid) begin
|
||||
r_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user