Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
45
vendor/ruvector/crates/rvf/rvf-wasm/src/alloc_setup.rs
vendored
Normal file
45
vendor/ruvector/crates/rvf/rvf-wasm/src/alloc_setup.rs
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
//! Global allocator for WASM heap allocation.
|
||||
//!
|
||||
//! Uses dlmalloc as the global allocator, enabling Vec, String, etc.
|
||||
//! Exposes rvf_alloc/rvf_free for JS interop memory management.
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: GlobalDlmalloc = GlobalDlmalloc;
|
||||
|
||||
/// Allocate `size` bytes of memory, returning a pointer.
|
||||
/// Returns 0 on failure.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
let layout = match core::alloc::Layout::from_size_align(size as usize, 8) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return 0,
|
||||
};
|
||||
let ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if ptr.is_null() {
|
||||
0
|
||||
} else {
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
/// Free memory previously allocated by `rvf_alloc`.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_free(ptr: i32, size: i32) {
|
||||
if ptr == 0 || size <= 0 {
|
||||
return;
|
||||
}
|
||||
let layout = match core::alloc::Layout::from_size_align(size as usize, 8) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return,
|
||||
};
|
||||
unsafe {
|
||||
alloc::alloc::dealloc(ptr as *mut u8, layout);
|
||||
}
|
||||
}
|
||||
409
vendor/ruvector/crates/rvf/rvf-wasm/src/bootstrap.rs
vendored
Normal file
409
vendor/ruvector/crates/rvf/rvf-wasm/src/bootstrap.rs
vendored
Normal file
@@ -0,0 +1,409 @@
|
||||
//! Self-bootstrapping loader for WASM_SEG segments.
|
||||
//!
|
||||
//! When an RVF file contains embedded WASM modules (WASM_SEG, type 0x10),
|
||||
//! this module provides the logic to discover and sequence them for
|
||||
//! self-bootstrapping execution.
|
||||
//!
|
||||
//! # Bootstrap Resolution Order
|
||||
//!
|
||||
//! 1. Scan all segments for WASM_SEG (type 0x10)
|
||||
//! 2. Parse the 64-byte WasmHeader from each
|
||||
//! 3. Sort by `bootstrap_priority` (lower = earlier in chain)
|
||||
//! 4. Resolve the bootstrap chain:
|
||||
//! - If a `Combined` role module exists → single-step bootstrap
|
||||
//! - If `Interpreter` + `Microkernel` both exist → two-step bootstrap
|
||||
//! - If only `Microkernel` exists → requires host WASM runtime
|
||||
//!
|
||||
//! # Self-Bootstrapping Property
|
||||
//!
|
||||
//! When an RVF file contains a WASM_SEG with `role = Interpreter` or
|
||||
//! `role = Combined`, the file is **self-bootstrapping**: any host with
|
||||
//! raw execution capability (the ability to run native code or interpret
|
||||
//! bytecode) can execute the file's contents without any external runtime.
|
||||
//!
|
||||
//! This makes RVF "run anywhere compute exists."
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use crate::segment::{SegmentInfo, parse_segments};
|
||||
|
||||
/// WASM_SEG type discriminant (matches rvf_types::SegmentType::Wasm).
|
||||
const WASM_SEG_TYPE: u8 = 0x10;
|
||||
|
||||
/// WASM_SEG header magic: "RVWM" in little-endian.
|
||||
const WASM_HEADER_MAGIC: u32 = 0x5256_574D;
|
||||
|
||||
/// WASM_SEG header size in bytes.
|
||||
const WASM_HEADER_SIZE: usize = 64;
|
||||
|
||||
/// Role discriminants matching rvf_types::wasm_bootstrap::WasmRole.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum WasmRole {
|
||||
Microkernel = 0x00,
|
||||
Interpreter = 0x01,
|
||||
Combined = 0x02,
|
||||
Extension = 0x03,
|
||||
ControlPlane = 0x04,
|
||||
}
|
||||
|
||||
/// Parsed WASM module descriptor from a WASM_SEG.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WasmModule {
|
||||
/// Role in the bootstrap chain.
|
||||
pub role: u8,
|
||||
/// Target platform.
|
||||
pub target: u8,
|
||||
/// Required WASM features bitfield.
|
||||
pub required_features: u16,
|
||||
/// Number of exports.
|
||||
pub export_count: u16,
|
||||
/// Uncompressed bytecode size.
|
||||
pub bytecode_size: u32,
|
||||
/// Bootstrap priority (lower = first).
|
||||
pub bootstrap_priority: u8,
|
||||
/// Interpreter type (if role=Interpreter).
|
||||
pub interpreter_type: u8,
|
||||
/// Byte offset of the WASM bytecode within the RVF file.
|
||||
pub bytecode_offset: usize,
|
||||
/// Length of the WASM bytecode.
|
||||
pub bytecode_len: usize,
|
||||
/// SHAKE-256-256 hash of the bytecode.
|
||||
pub bytecode_hash: [u8; 32],
|
||||
}
|
||||
|
||||
/// Bootstrap chain describing how to execute this RVF file.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum BootstrapChain {
|
||||
/// File has no WASM_SEGs — requires external runtime for all processing.
|
||||
None,
|
||||
/// File contains only a microkernel — requires host WASM runtime.
|
||||
HostRequired {
|
||||
microkernel: WasmModule,
|
||||
},
|
||||
/// File contains a combined interpreter+microkernel — single-step bootstrap.
|
||||
SelfContained {
|
||||
combined: WasmModule,
|
||||
},
|
||||
/// File contains separate interpreter and microkernel — two-step bootstrap.
|
||||
TwoStage {
|
||||
interpreter: WasmModule,
|
||||
microkernel: WasmModule,
|
||||
},
|
||||
/// File contains interpreter, microkernel, and extensions.
|
||||
Full {
|
||||
interpreter: WasmModule,
|
||||
microkernel: WasmModule,
|
||||
extensions: Vec<WasmModule>,
|
||||
},
|
||||
}
|
||||
|
||||
impl BootstrapChain {
|
||||
/// Returns true if this file can execute without any external WASM runtime.
|
||||
pub fn is_self_bootstrapping(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
BootstrapChain::SelfContained { .. }
|
||||
| BootstrapChain::TwoStage { .. }
|
||||
| BootstrapChain::Full { .. }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a WasmModule descriptor from a WASM_SEG payload.
|
||||
fn parse_wasm_module(buf: &[u8], seg_offset: usize) -> Option<WasmModule> {
|
||||
let payload_start = seg_offset + rvf_types::constants::SEGMENT_HEADER_SIZE;
|
||||
|
||||
if buf.len() < payload_start + WASM_HEADER_SIZE {
|
||||
return None;
|
||||
}
|
||||
|
||||
let hdr = &buf[payload_start..payload_start + WASM_HEADER_SIZE];
|
||||
|
||||
// Verify magic
|
||||
let magic = u32::from_le_bytes([hdr[0], hdr[1], hdr[2], hdr[3]]);
|
||||
if magic != WASM_HEADER_MAGIC {
|
||||
return None;
|
||||
}
|
||||
|
||||
let role = hdr[0x06];
|
||||
let target = hdr[0x07];
|
||||
let required_features = u16::from_le_bytes([hdr[0x08], hdr[0x09]]);
|
||||
let export_count = u16::from_le_bytes([hdr[0x0A], hdr[0x0B]]);
|
||||
let bytecode_size = u32::from_le_bytes([hdr[0x0C], hdr[0x0D], hdr[0x0E], hdr[0x0F]]);
|
||||
let bootstrap_priority = hdr[0x38];
|
||||
let interpreter_type = hdr[0x39];
|
||||
|
||||
let mut bytecode_hash = [0u8; 32];
|
||||
bytecode_hash.copy_from_slice(&hdr[0x18..0x38]);
|
||||
|
||||
let bytecode_offset = payload_start + WASM_HEADER_SIZE;
|
||||
let bytecode_len = bytecode_size as usize;
|
||||
|
||||
Some(WasmModule {
|
||||
role,
|
||||
target,
|
||||
required_features,
|
||||
export_count,
|
||||
bytecode_size,
|
||||
bootstrap_priority,
|
||||
interpreter_type,
|
||||
bytecode_offset,
|
||||
bytecode_len,
|
||||
bytecode_hash,
|
||||
})
|
||||
}
|
||||
|
||||
/// Discover and resolve the bootstrap chain from a raw RVF byte buffer.
|
||||
///
|
||||
/// This is the core self-bootstrapping resolver. Given the raw bytes of
|
||||
/// an RVF file, it:
|
||||
/// 1. Scans for all WASM_SEG segments
|
||||
/// 2. Parses their WasmHeaders
|
||||
/// 3. Sorts by bootstrap_priority
|
||||
/// 4. Determines the optimal bootstrap strategy
|
||||
///
|
||||
/// The returned `BootstrapChain` tells the host exactly what it needs to do:
|
||||
/// - `None` → use external runtime (file has no embedded WASM)
|
||||
/// - `HostRequired` → use host's WASM runtime to run microkernel
|
||||
/// - `SelfContained` → file bootstraps itself in one step
|
||||
/// - `TwoStage` → file bootstraps: interpreter → microkernel
|
||||
/// - `Full` → interpreter → microkernel → extensions
|
||||
pub fn resolve_bootstrap_chain(buf: &[u8]) -> BootstrapChain {
|
||||
let segments = parse_segments(buf);
|
||||
|
||||
let mut wasm_modules: Vec<WasmModule> = segments
|
||||
.iter()
|
||||
.filter(|seg| seg.seg_type == WASM_SEG_TYPE)
|
||||
.filter_map(|seg| parse_wasm_module(buf, seg.offset))
|
||||
.collect();
|
||||
|
||||
if wasm_modules.is_empty() {
|
||||
return BootstrapChain::None;
|
||||
}
|
||||
|
||||
// Sort by bootstrap priority (lower = first)
|
||||
wasm_modules.sort_by_key(|m| m.bootstrap_priority);
|
||||
|
||||
// Check for combined module (single-step bootstrap)
|
||||
if let Some(idx) = wasm_modules.iter().position(|m| m.role == WasmRole::Combined as u8) {
|
||||
return BootstrapChain::SelfContained {
|
||||
combined: wasm_modules.remove(idx),
|
||||
};
|
||||
}
|
||||
|
||||
let interpreter_idx = wasm_modules.iter().position(|m| m.role == WasmRole::Interpreter as u8);
|
||||
let microkernel_idx = wasm_modules.iter().position(|m| m.role == WasmRole::Microkernel as u8);
|
||||
|
||||
match (interpreter_idx, microkernel_idx) {
|
||||
(Some(i_idx), Some(m_idx)) => {
|
||||
// Two-stage or full bootstrap
|
||||
// Remove in reverse order to preserve indices
|
||||
let (first, second) = if i_idx > m_idx {
|
||||
let interpreter = wasm_modules.remove(i_idx);
|
||||
let microkernel = wasm_modules.remove(m_idx);
|
||||
(microkernel, interpreter)
|
||||
} else {
|
||||
let microkernel = wasm_modules.remove(m_idx);
|
||||
let interpreter = wasm_modules.remove(i_idx);
|
||||
(interpreter, microkernel)
|
||||
};
|
||||
|
||||
let extensions: Vec<WasmModule> = wasm_modules
|
||||
.into_iter()
|
||||
.filter(|m| m.role == WasmRole::Extension as u8)
|
||||
.collect();
|
||||
|
||||
if extensions.is_empty() {
|
||||
BootstrapChain::TwoStage {
|
||||
interpreter: first,
|
||||
microkernel: second,
|
||||
}
|
||||
} else {
|
||||
BootstrapChain::Full {
|
||||
interpreter: first,
|
||||
microkernel: second,
|
||||
extensions,
|
||||
}
|
||||
}
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
// Only microkernel, no interpreter → host provides runtime
|
||||
let m_idx = wasm_modules.iter().position(|m| m.role == WasmRole::Microkernel as u8).unwrap();
|
||||
BootstrapChain::HostRequired {
|
||||
microkernel: wasm_modules.remove(m_idx),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// No standard bootstrap chain found
|
||||
BootstrapChain::None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the raw WASM bytecode for a module from the RVF buffer.
|
||||
///
|
||||
/// Returns a slice into `buf` containing the WASM bytecode for the given
|
||||
/// module. This avoids copying — the caller can feed the slice directly
|
||||
/// to a WASM runtime.
|
||||
pub fn get_bytecode<'a>(buf: &'a [u8], module: &WasmModule) -> Option<&'a [u8]> {
|
||||
let end = module.bytecode_offset.checked_add(module.bytecode_len)?;
|
||||
if end > buf.len() {
|
||||
return None;
|
||||
}
|
||||
Some(&buf[module.bytecode_offset..end])
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Build a minimal WASM_SEG for testing.
|
||||
fn build_wasm_seg(role: u8, priority: u8, bytecode: &[u8]) -> Vec<u8> {
|
||||
let seg_header_size = rvf_types::constants::SEGMENT_HEADER_SIZE;
|
||||
let payload_len = WASM_HEADER_SIZE + bytecode.len();
|
||||
|
||||
let mut seg = Vec::with_capacity(seg_header_size + payload_len);
|
||||
|
||||
// Segment header (64 bytes)
|
||||
seg.extend_from_slice(&rvf_types::constants::SEGMENT_MAGIC.to_le_bytes());
|
||||
seg.push(1); // version
|
||||
seg.push(WASM_SEG_TYPE); // seg_type
|
||||
seg.extend_from_slice(&[0, 0]); // flags
|
||||
seg.extend_from_slice(&1u64.to_le_bytes()); // segment_id
|
||||
seg.extend_from_slice(&(payload_len as u64).to_le_bytes()); // payload_length
|
||||
// Fill remaining header bytes to reach 64
|
||||
while seg.len() < seg_header_size {
|
||||
seg.push(0);
|
||||
}
|
||||
|
||||
// WasmHeader (64 bytes)
|
||||
let mut wasm_hdr = [0u8; 64];
|
||||
wasm_hdr[0..4].copy_from_slice(&WASM_HEADER_MAGIC.to_le_bytes());
|
||||
wasm_hdr[0x04..0x06].copy_from_slice(&1u16.to_le_bytes()); // version
|
||||
wasm_hdr[0x06] = role;
|
||||
wasm_hdr[0x07] = 0x00; // target: Wasm32
|
||||
wasm_hdr[0x0C..0x10].copy_from_slice(&(bytecode.len() as u32).to_le_bytes());
|
||||
wasm_hdr[0x38] = priority;
|
||||
seg.extend_from_slice(&wasm_hdr);
|
||||
|
||||
// Bytecode
|
||||
seg.extend_from_slice(bytecode);
|
||||
|
||||
seg
|
||||
}
|
||||
|
||||
/// Build a minimal MANIFEST_SEG so the file is structurally valid.
|
||||
fn build_manifest_seg(seg_id: u64) -> Vec<u8> {
|
||||
let seg_header_size = rvf_types::constants::SEGMENT_HEADER_SIZE;
|
||||
let payload = [0u8; 4]; // minimal payload
|
||||
let mut seg = Vec::with_capacity(seg_header_size + payload.len());
|
||||
|
||||
seg.extend_from_slice(&rvf_types::constants::SEGMENT_MAGIC.to_le_bytes());
|
||||
seg.push(1);
|
||||
seg.push(0x05); // Manifest
|
||||
seg.extend_from_slice(&[0, 0]);
|
||||
seg.extend_from_slice(&seg_id.to_le_bytes());
|
||||
seg.extend_from_slice(&(payload.len() as u64).to_le_bytes());
|
||||
while seg.len() < seg_header_size {
|
||||
seg.push(0);
|
||||
}
|
||||
seg.extend_from_slice(&payload);
|
||||
seg
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_no_wasm_segments() {
|
||||
let buf = build_manifest_seg(1);
|
||||
let chain = resolve_bootstrap_chain(&buf);
|
||||
assert!(matches!(chain, BootstrapChain::None));
|
||||
assert!(!chain.is_self_bootstrapping());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_microkernel_only() {
|
||||
let fake_bytecode = b"\x00asm\x01\x00\x00\x00_microkernel_";
|
||||
let mut buf = build_wasm_seg(WasmRole::Microkernel as u8, 1, fake_bytecode);
|
||||
buf.extend_from_slice(&build_manifest_seg(2));
|
||||
|
||||
let chain = resolve_bootstrap_chain(&buf);
|
||||
assert!(matches!(chain, BootstrapChain::HostRequired { .. }));
|
||||
assert!(!chain.is_self_bootstrapping());
|
||||
|
||||
if let BootstrapChain::HostRequired { microkernel } = &chain {
|
||||
assert_eq!(microkernel.role, WasmRole::Microkernel as u8);
|
||||
assert_eq!(microkernel.bytecode_len, fake_bytecode.len());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_combined_self_bootstrap() {
|
||||
let fake_bytecode = b"\x00asm\x01\x00\x00\x00_combined_interp_plus_kernel_";
|
||||
let mut buf = build_wasm_seg(WasmRole::Combined as u8, 0, fake_bytecode);
|
||||
buf.extend_from_slice(&build_manifest_seg(2));
|
||||
|
||||
let chain = resolve_bootstrap_chain(&buf);
|
||||
assert!(chain.is_self_bootstrapping());
|
||||
assert!(matches!(chain, BootstrapChain::SelfContained { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_two_stage_bootstrap() {
|
||||
let interp_bytecode = b"\x00asm\x01\x00\x00\x00_interpreter_runtime_";
|
||||
let kernel_bytecode = b"\x00asm\x01\x00\x00\x00_microkernel_";
|
||||
|
||||
let mut buf = build_wasm_seg(WasmRole::Interpreter as u8, 0, interp_bytecode);
|
||||
// Adjust segment_id for second segment
|
||||
let mut kernel_seg = build_wasm_seg(WasmRole::Microkernel as u8, 1, kernel_bytecode);
|
||||
// Fix the segment_id to 2
|
||||
kernel_seg[8..16].copy_from_slice(&2u64.to_le_bytes());
|
||||
buf.extend_from_slice(&kernel_seg);
|
||||
buf.extend_from_slice(&build_manifest_seg(3));
|
||||
|
||||
let chain = resolve_bootstrap_chain(&buf);
|
||||
assert!(chain.is_self_bootstrapping());
|
||||
assert!(matches!(chain, BootstrapChain::TwoStage { .. }));
|
||||
|
||||
if let BootstrapChain::TwoStage { interpreter, microkernel } = &chain {
|
||||
assert_eq!(interpreter.role, WasmRole::Interpreter as u8);
|
||||
assert_eq!(microkernel.role, WasmRole::Microkernel as u8);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_bytecode_returns_correct_slice() {
|
||||
let fake_bytecode = b"\x00asm\x01\x00\x00\x00_test_module_";
|
||||
let buf = build_wasm_seg(WasmRole::Microkernel as u8, 0, fake_bytecode);
|
||||
|
||||
let segments = parse_segments(&buf);
|
||||
assert!(!segments.is_empty());
|
||||
|
||||
let module = parse_wasm_module(&buf, segments[0].offset).unwrap();
|
||||
let extracted = get_bytecode(&buf, &module).unwrap();
|
||||
assert_eq!(extracted, fake_bytecode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_priority_ordering() {
|
||||
// Create two modules with reversed priorities
|
||||
let high_priority = b"\x00asm\x01\x00\x00\x00_hi_";
|
||||
let low_priority = b"\x00asm\x01\x00\x00\x00_lo_";
|
||||
|
||||
// Microkernel at priority 10, interpreter at priority 0
|
||||
let mut buf = build_wasm_seg(WasmRole::Microkernel as u8, 10, high_priority);
|
||||
let mut interp_seg = build_wasm_seg(WasmRole::Interpreter as u8, 0, low_priority);
|
||||
interp_seg[8..16].copy_from_slice(&2u64.to_le_bytes());
|
||||
buf.extend_from_slice(&interp_seg);
|
||||
buf.extend_from_slice(&build_manifest_seg(3));
|
||||
|
||||
let chain = resolve_bootstrap_chain(&buf);
|
||||
assert!(chain.is_self_bootstrapping());
|
||||
|
||||
// The interpreter should have lower priority (comes first)
|
||||
if let BootstrapChain::TwoStage { interpreter, microkernel } = &chain {
|
||||
assert_eq!(interpreter.bootstrap_priority, 0);
|
||||
assert_eq!(microkernel.bootstrap_priority, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
124
vendor/ruvector/crates/rvf/rvf-wasm/src/distance.rs
vendored
Normal file
124
vendor/ruvector/crates/rvf/rvf-wasm/src/distance.rs
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
//! Distance functions for vector similarity computation.
|
||||
//!
|
||||
//! Scalar fallbacks for all metrics. WASM v128 SIMD would be added
|
||||
//! as a future optimization when targeting wasm32 with simd128 feature.
|
||||
|
||||
/// Convert a 16-bit IEEE 754 half-precision value to f32.
|
||||
#[inline]
|
||||
fn f16_to_f32(bits: u16) -> f32 {
|
||||
let sign = ((bits >> 15) & 1) as u32;
|
||||
let exp = ((bits >> 10) & 0x1F) as u32;
|
||||
let mantissa = (bits & 0x03FF) as u32;
|
||||
|
||||
if exp == 0 {
|
||||
if mantissa == 0 {
|
||||
return f32::from_bits(sign << 31);
|
||||
}
|
||||
// Subnormal: normalize
|
||||
let mut m = mantissa;
|
||||
let mut e: i32 = -14;
|
||||
while m & 0x0400 == 0 {
|
||||
m <<= 1;
|
||||
e -= 1;
|
||||
}
|
||||
m &= 0x03FF;
|
||||
let f32_exp = ((e + 127) as u32) & 0xFF;
|
||||
return f32::from_bits((sign << 31) | (f32_exp << 23) | (m << 13));
|
||||
}
|
||||
|
||||
if exp == 0x1F {
|
||||
let f32_mantissa = mantissa << 13;
|
||||
return f32::from_bits((sign << 31) | (0xFF << 23) | f32_mantissa);
|
||||
}
|
||||
|
||||
let f32_exp = (exp as i32 - 15 + 127) as u32;
|
||||
f32::from_bits((sign << 31) | (f32_exp << 23) | (mantissa << 13))
|
||||
}
|
||||
|
||||
/// Read a u16 from a byte pointer at the given index (little-endian).
|
||||
#[inline]
|
||||
unsafe fn read_u16(ptr: *const u8, idx: usize) -> u16 {
|
||||
let p = ptr.add(idx * 2);
|
||||
u16::from_le_bytes([*p, *p.add(1)])
|
||||
}
|
||||
|
||||
/// L2 (squared Euclidean) distance between two fp16 vectors.
|
||||
pub fn l2_fp16(a_ptr: *const u8, b_ptr: *const u8, dim: usize) -> f32 {
|
||||
let mut sum: f32 = 0.0;
|
||||
for i in 0..dim {
|
||||
let a = f16_to_f32(unsafe { read_u16(a_ptr, i) });
|
||||
let b = f16_to_f32(unsafe { read_u16(b_ptr, i) });
|
||||
let diff = a - b;
|
||||
sum += diff * diff;
|
||||
}
|
||||
sum
|
||||
}
|
||||
|
||||
/// Inner product distance between two fp16 vectors.
|
||||
/// Returns negative inner product (so smaller = more similar).
|
||||
pub fn ip_fp16(a_ptr: *const u8, b_ptr: *const u8, dim: usize) -> f32 {
|
||||
let mut sum: f32 = 0.0;
|
||||
for i in 0..dim {
|
||||
let a = f16_to_f32(unsafe { read_u16(a_ptr, i) });
|
||||
let b = f16_to_f32(unsafe { read_u16(b_ptr, i) });
|
||||
sum += a * b;
|
||||
}
|
||||
-sum
|
||||
}
|
||||
|
||||
/// Cosine distance between two fp16 vectors.
|
||||
/// Returns 1.0 - cosine_similarity.
|
||||
pub fn cosine_fp16(a_ptr: *const u8, b_ptr: *const u8, dim: usize) -> f32 {
|
||||
let mut dot: f32 = 0.0;
|
||||
let mut norm_a: f32 = 0.0;
|
||||
let mut norm_b: f32 = 0.0;
|
||||
for i in 0..dim {
|
||||
let a = f16_to_f32(unsafe { read_u16(a_ptr, i) });
|
||||
let b = f16_to_f32(unsafe { read_u16(b_ptr, i) });
|
||||
dot += a * b;
|
||||
norm_a += a * a;
|
||||
norm_b += b * b;
|
||||
}
|
||||
let denom = sqrt_approx(norm_a) * sqrt_approx(norm_b);
|
||||
if denom < 1e-10 {
|
||||
return 1.0;
|
||||
}
|
||||
1.0 - (dot / denom)
|
||||
}
|
||||
|
||||
/// Hamming distance between two byte arrays.
|
||||
/// Counts the number of differing bits.
|
||||
pub fn hamming(a_ptr: *const u8, b_ptr: *const u8, byte_len: usize) -> f32 {
|
||||
let mut count: u32 = 0;
|
||||
for i in 0..byte_len {
|
||||
let xor = unsafe { *a_ptr.add(i) ^ *b_ptr.add(i) };
|
||||
count += xor.count_ones();
|
||||
}
|
||||
count as f32
|
||||
}
|
||||
|
||||
/// L2 (squared Euclidean) distance between two i8 vectors.
|
||||
pub fn l2_i8(a_ptr: *const u8, b_ptr: *const u8, dim: usize) -> f32 {
|
||||
let mut sum: f32 = 0.0;
|
||||
for i in 0..dim {
|
||||
let a = unsafe { *a_ptr.add(i) } as i8 as f32;
|
||||
let b = unsafe { *b_ptr.add(i) } as i8 as f32;
|
||||
let diff = a - b;
|
||||
sum += diff * diff;
|
||||
}
|
||||
sum
|
||||
}
|
||||
|
||||
/// Fast approximate square root.
|
||||
#[inline]
|
||||
fn sqrt_approx(x: f32) -> f32 {
|
||||
if x <= 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
let mut bits = x.to_bits();
|
||||
bits = 0x1FBD_1DF5 + (bits >> 1);
|
||||
let mut y = f32::from_bits(bits);
|
||||
y = 0.5 * (y + x / y);
|
||||
y = 0.5 * (y + x / y);
|
||||
y
|
||||
}
|
||||
775
vendor/ruvector/crates/rvf/rvf-wasm/src/lib.rs
vendored
Normal file
775
vendor/ruvector/crates/rvf/rvf-wasm/src/lib.rs
vendored
Normal file
@@ -0,0 +1,775 @@
|
||||
//! RVF WASM Microkernel for Cognitum tiles.
|
||||
//!
|
||||
//! All 14 exports as `#[no_mangle] pub extern "C" fn`.
|
||||
//! No allocator — all memory is statically laid out in WASM linear memory.
|
||||
//! Target: wasm32-unknown-unknown, < 8 KB after wasm-opt.
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
mod alloc_setup;
|
||||
pub mod bootstrap;
|
||||
mod distance;
|
||||
mod memory;
|
||||
mod segment;
|
||||
mod store;
|
||||
mod topk;
|
||||
|
||||
use memory::*;
|
||||
|
||||
// =====================================================================
|
||||
// Core Query Path
|
||||
// =====================================================================
|
||||
|
||||
/// Initialize tile with configuration from data memory.
|
||||
/// config_ptr: pointer to 64-byte tile config.
|
||||
/// Returns 0 on success, negative on error.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_init(config_ptr: i32) -> i32 {
|
||||
let ptr = config_ptr as usize;
|
||||
if ptr + TILE_CONFIG_SIZE > DATA_MEMORY_SIZE {
|
||||
return -1;
|
||||
}
|
||||
unsafe {
|
||||
let src = config_ptr as *const u8;
|
||||
let dst = DATA_MEMORY.as_mut_ptr();
|
||||
core::ptr::copy_nonoverlapping(src, dst, TILE_CONFIG_SIZE);
|
||||
}
|
||||
topk::heap_reset();
|
||||
0
|
||||
}
|
||||
|
||||
/// Load query vector into query scratch area.
|
||||
/// query_ptr: pointer to fp16 vector in data memory.
|
||||
/// dim: vector dimensionality.
|
||||
/// Returns 0 on success.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_load_query(query_ptr: i32, dim: i32) -> i32 {
|
||||
let dim = dim as usize;
|
||||
let byte_len = dim * 2; // fp16 = 2 bytes per element
|
||||
if byte_len > QUERY_SCRATCH_SIZE {
|
||||
return -1;
|
||||
}
|
||||
unsafe {
|
||||
let src = query_ptr as *const u8;
|
||||
let dst = DATA_MEMORY.as_mut_ptr().add(QUERY_SCRATCH_OFFSET);
|
||||
core::ptr::copy_nonoverlapping(src, dst, byte_len);
|
||||
let dim_ptr = DATA_MEMORY.as_mut_ptr().add(TILE_CONFIG_DIM_OFFSET) as *mut u32;
|
||||
*dim_ptr = dim as u32;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Load a block of vectors into SIMD scratch.
|
||||
/// block_ptr: source pointer, count: number of vectors, dtype: data type.
|
||||
/// Returns 0 on success.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_load_block(block_ptr: i32, count: i32, dtype: i32) -> i32 {
|
||||
let count = count as usize;
|
||||
let dim = unsafe {
|
||||
let dim_ptr = DATA_MEMORY.as_ptr().add(TILE_CONFIG_DIM_OFFSET) as *const u32;
|
||||
*dim_ptr as usize
|
||||
};
|
||||
let elem_size = match dtype {
|
||||
0 => 2, // fp16
|
||||
1 => 1, // i8
|
||||
2 => 4, // f32
|
||||
_ => return -1,
|
||||
};
|
||||
let total_bytes = count * dim * elem_size;
|
||||
if total_bytes > SIMD_BLOCK_SIZE {
|
||||
return -1;
|
||||
}
|
||||
unsafe {
|
||||
let src = block_ptr as *const u8;
|
||||
let dst = SIMD_SCRATCH.as_mut_ptr();
|
||||
core::ptr::copy_nonoverlapping(src, dst, total_bytes);
|
||||
let count_ptr = DATA_MEMORY.as_mut_ptr().add(TILE_CONFIG_COUNT_OFFSET) as *mut u32;
|
||||
*count_ptr = count as u32;
|
||||
let dtype_ptr = DATA_MEMORY.as_mut_ptr().add(TILE_CONFIG_DTYPE_OFFSET) as *mut u32;
|
||||
*dtype_ptr = dtype as u32;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Compute distances between query and loaded block.
|
||||
/// metric: 0=L2, 1=IP, 2=cosine, 3=hamming.
|
||||
/// result_ptr: pointer to write f32 distance results.
|
||||
/// Returns number of distances computed, or negative on error.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_distances(metric: i32, result_ptr: i32) -> i32 {
|
||||
let (dim, count, dtype) = unsafe {
|
||||
let dim = *(DATA_MEMORY.as_ptr().add(TILE_CONFIG_DIM_OFFSET) as *const u32) as usize;
|
||||
let count =
|
||||
*(DATA_MEMORY.as_ptr().add(TILE_CONFIG_COUNT_OFFSET) as *const u32) as usize;
|
||||
let dtype = *(DATA_MEMORY.as_ptr().add(TILE_CONFIG_DTYPE_OFFSET) as *const u32);
|
||||
(dim, count, dtype)
|
||||
};
|
||||
|
||||
if dim == 0 || count == 0 {
|
||||
return -1;
|
||||
}
|
||||
|
||||
let query_ptr = unsafe { DATA_MEMORY.as_ptr().add(QUERY_SCRATCH_OFFSET) };
|
||||
let block_ptr = unsafe { SIMD_SCRATCH.as_ptr() };
|
||||
let out_ptr = result_ptr as *mut f32;
|
||||
|
||||
for i in 0..count {
|
||||
let dist = match dtype {
|
||||
0 => {
|
||||
let vec_offset = i * dim * 2;
|
||||
let vec_ptr = unsafe { block_ptr.add(vec_offset) };
|
||||
match metric {
|
||||
0 => distance::l2_fp16(query_ptr, vec_ptr, dim),
|
||||
1 => distance::ip_fp16(query_ptr, vec_ptr, dim),
|
||||
2 => distance::cosine_fp16(query_ptr, vec_ptr, dim),
|
||||
3 => distance::hamming(query_ptr, vec_ptr, dim * 2),
|
||||
_ => return -1,
|
||||
}
|
||||
}
|
||||
1 => {
|
||||
let vec_offset = i * dim;
|
||||
let vec_ptr = unsafe { block_ptr.add(vec_offset) };
|
||||
match metric {
|
||||
0 => distance::l2_i8(query_ptr, vec_ptr, dim),
|
||||
3 => distance::hamming(query_ptr, vec_ptr, dim),
|
||||
_ => return -1,
|
||||
}
|
||||
}
|
||||
_ => return -1,
|
||||
};
|
||||
unsafe {
|
||||
*out_ptr.add(i) = dist;
|
||||
}
|
||||
}
|
||||
|
||||
count as i32
|
||||
}
|
||||
|
||||
/// Merge distances into top-K heap.
|
||||
/// Returns 0 on success.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_topk_merge(dist_ptr: i32, id_ptr: i32, count: i32, k: i32) -> i32 {
|
||||
let k = k as usize;
|
||||
let count = count as usize;
|
||||
if k > topk::MAX_K {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for i in 0..count {
|
||||
let dist = unsafe { *(dist_ptr as *const f32).add(i) };
|
||||
let id = unsafe { *(id_ptr as *const u64).add(i) };
|
||||
topk::heap_insert(dist, id, k);
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
/// Read current top-K results into output buffer.
|
||||
/// out_ptr: pointer to write (id: u64, dist: f32) pairs.
|
||||
/// Returns number of results written.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_topk_read(out_ptr: i32) -> i32 {
|
||||
topk::heap_read_sorted(out_ptr as *mut u8)
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Quantization
|
||||
// =====================================================================
|
||||
|
||||
/// Load scalar quantization parameters (min/max per dimension).
|
||||
/// params_ptr: pointer to f32 pairs [min0, max0, min1, max1, ...].
|
||||
/// dim: number of dimensions.
|
||||
/// Returns 0 on success.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_load_sq_params(params_ptr: i32, dim: i32) -> i32 {
|
||||
let byte_len = dim as usize * 8; // 2 f32 per dim
|
||||
if byte_len > DECODE_WORKSPACE_SIZE {
|
||||
return -1;
|
||||
}
|
||||
unsafe {
|
||||
let src = params_ptr as *const u8;
|
||||
let dst = DATA_MEMORY.as_mut_ptr().add(DECODE_WORKSPACE_OFFSET);
|
||||
core::ptr::copy_nonoverlapping(src, dst, byte_len);
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Dequantize int8 block to fp16 in SIMD scratch.
|
||||
/// src_ptr: source i8 data, dst_ptr: destination fp16 data, count: total values.
|
||||
/// Returns 0 on success.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_dequant_i8(src_ptr: i32, dst_ptr: i32, count: i32) -> i32 {
|
||||
let dim = unsafe {
|
||||
*(DATA_MEMORY.as_ptr().add(TILE_CONFIG_DIM_OFFSET) as *const u32) as usize
|
||||
};
|
||||
if dim == 0 {
|
||||
return -1;
|
||||
}
|
||||
|
||||
let params = unsafe { DATA_MEMORY.as_ptr().add(DECODE_WORKSPACE_OFFSET) as *const f32 };
|
||||
|
||||
for i in 0..(count as usize) {
|
||||
let dim_idx = i % dim;
|
||||
let min_val = unsafe { *params.add(dim_idx * 2) };
|
||||
let max_val = unsafe { *params.add(dim_idx * 2 + 1) };
|
||||
let raw = unsafe { *(src_ptr as *const i8).add(i) } as f32;
|
||||
let normalized = (raw + 128.0) / 255.0;
|
||||
let val = min_val + normalized * (max_val - min_val);
|
||||
let fp16_bits = f32_to_f16(val);
|
||||
unsafe {
|
||||
*(dst_ptr as *mut u16).add(i) = fp16_bits;
|
||||
}
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
/// Load PQ codebook subset into SIMD scratch distance accumulator area.
|
||||
/// codebook_ptr: source data, m: number of subspaces, k: centroids per subspace.
|
||||
/// Returns 0 on success.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_load_pq_codebook(codebook_ptr: i32, m: i32, k: i32) -> i32 {
|
||||
let dim = unsafe {
|
||||
*(DATA_MEMORY.as_ptr().add(TILE_CONFIG_DIM_OFFSET) as *const u32) as usize
|
||||
};
|
||||
let m_usize = m as usize;
|
||||
if m_usize == 0 {
|
||||
return -1;
|
||||
}
|
||||
let sub_dim = dim / m_usize;
|
||||
let total_bytes = m_usize * k as usize * sub_dim * 2;
|
||||
if total_bytes > SIMD_PQ_TABLE_SIZE {
|
||||
return -1;
|
||||
}
|
||||
unsafe {
|
||||
let src = codebook_ptr as *const u8;
|
||||
let dst = SIMD_SCRATCH.as_mut_ptr().add(SIMD_PQ_TABLE_OFFSET);
|
||||
core::ptr::copy_nonoverlapping(src, dst, total_bytes);
|
||||
let m_ptr = DATA_MEMORY.as_mut_ptr().add(TILE_CONFIG_PQ_M_OFFSET) as *mut u32;
|
||||
*m_ptr = m as u32;
|
||||
let k_ptr = DATA_MEMORY.as_mut_ptr().add(TILE_CONFIG_PQ_K_OFFSET) as *mut u32;
|
||||
*k_ptr = k as u32;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Compute PQ asymmetric distances.
|
||||
/// codes_ptr: PQ codes (m bytes per vector), count: number of vectors.
|
||||
/// result_ptr: output f32 distances.
|
||||
/// Returns number of distances computed.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_pq_distances(codes_ptr: i32, count: i32, result_ptr: i32) -> i32 {
|
||||
let (dim, m, k) = unsafe {
|
||||
let dim = *(DATA_MEMORY.as_ptr().add(TILE_CONFIG_DIM_OFFSET) as *const u32) as usize;
|
||||
let m = *(DATA_MEMORY.as_ptr().add(TILE_CONFIG_PQ_M_OFFSET) as *const u32) as usize;
|
||||
let k = *(DATA_MEMORY.as_ptr().add(TILE_CONFIG_PQ_K_OFFSET) as *const u32) as usize;
|
||||
(dim, m, k)
|
||||
};
|
||||
if m == 0 || k == 0 || dim == 0 {
|
||||
return -1;
|
||||
}
|
||||
let sub_dim = dim / m;
|
||||
let query_ptr = unsafe { DATA_MEMORY.as_ptr().add(QUERY_SCRATCH_OFFSET) };
|
||||
let codebook_ptr = unsafe { SIMD_SCRATCH.as_ptr().add(SIMD_PQ_TABLE_OFFSET) };
|
||||
|
||||
// Precompute query-centroid distance lookup table
|
||||
let dlt_ptr = unsafe { SIMD_SCRATCH.as_mut_ptr().add(SIMD_HOT_CACHE_OFFSET) as *mut f32 };
|
||||
for sub in 0..m {
|
||||
let q_offset = sub * sub_dim * 2;
|
||||
for c in 0..k {
|
||||
let cb_offset = (sub * k + c) * sub_dim * 2;
|
||||
let dist = distance::l2_fp16(
|
||||
unsafe { query_ptr.add(q_offset) },
|
||||
unsafe { codebook_ptr.add(cb_offset) },
|
||||
sub_dim,
|
||||
);
|
||||
unsafe {
|
||||
*dlt_ptr.add(sub * k + c) = dist;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..(count as usize) {
|
||||
let mut total_dist: f32 = 0.0;
|
||||
for sub in 0..m {
|
||||
let code = unsafe { *(codes_ptr as *const u8).add(i * m + sub) } as usize;
|
||||
if code < k {
|
||||
total_dist += unsafe { *dlt_ptr.add(sub * k + code) };
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
*(result_ptr as *mut f32).add(i) = total_dist;
|
||||
}
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// HNSW Navigation
|
||||
// =====================================================================
|
||||
|
||||
/// Load neighbor list for a node into the neighbor cache.
|
||||
/// Returns number of neighbors loaded.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_load_neighbors(node_id: i64, layer: i32, out_ptr: i32) -> i32 {
|
||||
let _ = node_id;
|
||||
let _ = layer;
|
||||
unsafe {
|
||||
let cache_ptr = DATA_MEMORY.as_mut_ptr().add(NEIGHBOR_CACHE_OFFSET) as *mut i32;
|
||||
*cache_ptr = out_ptr;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Greedy search step: from current_id at a given layer, find nearest neighbor.
|
||||
/// Returns the ID of the nearest unvisited neighbor, or -1 if none.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_greedy_step(current_id: i64, layer: i32) -> i64 {
|
||||
let _ = layer;
|
||||
let neighbor_ptr = unsafe {
|
||||
*(DATA_MEMORY.as_ptr().add(NEIGHBOR_CACHE_OFFSET) as *const i32)
|
||||
};
|
||||
if neighbor_ptr == 0 {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Neighbor list format: [count: u32, (id: u64, dist: f32)*]
|
||||
let count = unsafe { *(neighbor_ptr as *const u32) } as usize;
|
||||
if count == 0 {
|
||||
return -1;
|
||||
}
|
||||
|
||||
let mut best_id: i64 = -1;
|
||||
let mut best_dist: f32 = f32::MAX;
|
||||
|
||||
let entries_ptr = unsafe { (neighbor_ptr as *const u8).add(4) };
|
||||
for i in 0..count {
|
||||
let offset = i * 12; // 8 bytes id + 4 bytes dist
|
||||
let id = unsafe { *(entries_ptr.add(offset) as *const u64) } as i64;
|
||||
let dist = unsafe { *(entries_ptr.add(offset + 8) as *const f32) };
|
||||
if id != current_id && dist < best_dist {
|
||||
best_dist = dist;
|
||||
best_id = id;
|
||||
}
|
||||
}
|
||||
|
||||
best_id
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Segment Verification
|
||||
// =====================================================================
|
||||
|
||||
/// Verify segment header magic and version.
|
||||
/// Returns 0 if valid, non-zero error code otherwise.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_verify_header(header_ptr: i32) -> i32 {
|
||||
let ptr = header_ptr as *const u8;
|
||||
let magic = unsafe {
|
||||
let b = core::slice::from_raw_parts(ptr, 4);
|
||||
u32::from_le_bytes([b[0], b[1], b[2], b[3]])
|
||||
};
|
||||
if magic != 0x5256_4653 {
|
||||
return 1;
|
||||
}
|
||||
let version = unsafe { *ptr.add(4) };
|
||||
if version != 1 {
|
||||
return 2;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Compute CRC32C of a data region.
|
||||
/// Returns the 32-bit CRC value.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_crc32c(data_ptr: i32, len: i32) -> i32 {
|
||||
let ptr = data_ptr as *const u8;
|
||||
let data = unsafe { core::slice::from_raw_parts(ptr, len as usize) };
|
||||
crc32c_compute(data) as i32
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Helpers
|
||||
// =====================================================================
|
||||
|
||||
/// Software CRC32C (Castagnoli) implementation.
|
||||
fn crc32c_compute(data: &[u8]) -> u32 {
|
||||
let mut crc: u32 = 0xFFFF_FFFF;
|
||||
for &byte in data {
|
||||
crc ^= byte as u32;
|
||||
for _ in 0..8 {
|
||||
if crc & 1 != 0 {
|
||||
crc = (crc >> 1) ^ 0x82F6_3B78;
|
||||
} else {
|
||||
crc >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
crc ^ 0xFFFF_FFFF
|
||||
}
|
||||
|
||||
/// Convert f32 to IEEE 754 half-precision (f16) bit pattern.
|
||||
fn f32_to_f16(val: f32) -> u16 {
|
||||
let bits = val.to_bits();
|
||||
let sign = ((bits >> 16) & 0x8000) as u16;
|
||||
let exp = ((bits >> 23) & 0xFF) as i32;
|
||||
let mantissa = bits & 0x007F_FFFF;
|
||||
|
||||
if exp == 0xFF {
|
||||
return sign | 0x7C00 | if mantissa != 0 { 0x0200 } else { 0 };
|
||||
}
|
||||
|
||||
let new_exp = exp - 127 + 15;
|
||||
if new_exp >= 31 {
|
||||
return sign | 0x7C00;
|
||||
}
|
||||
if new_exp <= 0 {
|
||||
if new_exp < -10 {
|
||||
return sign;
|
||||
}
|
||||
let mant = (mantissa | 0x0080_0000) >> (1 - new_exp + 13);
|
||||
return sign | mant as u16;
|
||||
}
|
||||
sign | ((new_exp as u16) << 10) | ((mantissa >> 13) as u16)
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Control Plane — In-Memory Store
|
||||
// =====================================================================
|
||||
|
||||
/// Create an in-memory store. Returns a handle (>0) or negative on error.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_create(dim: i32, metric: i32) -> i32 {
|
||||
if dim <= 0 {
|
||||
return -1;
|
||||
}
|
||||
store::registry().create(dim as u32, metric as u8)
|
||||
}
|
||||
|
||||
/// Open a .rvf file from raw bytes. Returns a store handle.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_open(buf_ptr: i32, buf_len: i32) -> i32 {
|
||||
if buf_len <= 0 {
|
||||
return -1;
|
||||
}
|
||||
let buf = unsafe { core::slice::from_raw_parts(buf_ptr as *const u8, buf_len as usize) };
|
||||
let segments = segment::parse_segments(buf);
|
||||
|
||||
let reg = store::registry();
|
||||
let mut dim: u32 = 0;
|
||||
let mut entries: alloc::vec::Vec<(u64, alloc::vec::Vec<f32>)> = alloc::vec::Vec::new();
|
||||
|
||||
for seg in &segments {
|
||||
// SegmentType::Vec = 0x01
|
||||
if seg.seg_type == 0x01 {
|
||||
let payload_start = seg.offset + rvf_types::constants::SEGMENT_HEADER_SIZE;
|
||||
let payload_end = payload_start + seg.payload_length as usize;
|
||||
if payload_end > buf.len() || seg.payload_length < 6 {
|
||||
continue;
|
||||
}
|
||||
let payload = &buf[payload_start..payload_end];
|
||||
|
||||
let count = u16::from_le_bytes([payload[0], payload[1]]) as usize;
|
||||
let seg_dim = u32::from_le_bytes([payload[2], payload[3], payload[4], payload[5]]);
|
||||
if dim == 0 {
|
||||
dim = seg_dim;
|
||||
}
|
||||
|
||||
let mut offset = 6;
|
||||
for _ in 0..count {
|
||||
if offset + 8 > payload.len() {
|
||||
break;
|
||||
}
|
||||
let id = u64::from_le_bytes([
|
||||
payload[offset],
|
||||
payload[offset + 1],
|
||||
payload[offset + 2],
|
||||
payload[offset + 3],
|
||||
payload[offset + 4],
|
||||
payload[offset + 5],
|
||||
payload[offset + 6],
|
||||
payload[offset + 7],
|
||||
]);
|
||||
offset += 8;
|
||||
let vec_bytes = (seg_dim as usize) * 4;
|
||||
if offset + vec_bytes > payload.len() {
|
||||
break;
|
||||
}
|
||||
let mut vec_data = alloc::vec::Vec::with_capacity(seg_dim as usize);
|
||||
for d in 0..seg_dim as usize {
|
||||
let f = f32::from_le_bytes([
|
||||
payload[offset + d * 4],
|
||||
payload[offset + d * 4 + 1],
|
||||
payload[offset + d * 4 + 2],
|
||||
payload[offset + d * 4 + 3],
|
||||
]);
|
||||
vec_data.push(f);
|
||||
}
|
||||
offset += vec_bytes;
|
||||
entries.push((id, vec_data));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if dim == 0 {
|
||||
dim = 1;
|
||||
}
|
||||
let handle = reg.create(dim, 0);
|
||||
if handle <= 0 {
|
||||
return handle;
|
||||
}
|
||||
|
||||
if let Some(s) = reg.get_mut(handle) {
|
||||
for (id, data) in entries {
|
||||
s.entries.push(store::VecEntry {
|
||||
id,
|
||||
data,
|
||||
deleted: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
handle
|
||||
}
|
||||
|
||||
/// Ingest vectors into a store. Returns count ingested or negative on error.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_ingest(handle: i32, vecs_ptr: i32, ids_ptr: i32, count: i32) -> i32 {
|
||||
if count <= 0 {
|
||||
return 0;
|
||||
}
|
||||
match store::registry().get_mut(handle) {
|
||||
Some(s) => s.ingest(vecs_ptr as *const f32, ids_ptr as *const u64, count as u32),
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Query a store for k nearest neighbors.
|
||||
/// Results written to out_ptr as (id: u64, dist: f32) pairs.
|
||||
/// Returns number of results.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_query(
|
||||
handle: i32,
|
||||
query_ptr: i32,
|
||||
k: i32,
|
||||
metric: i32,
|
||||
out_ptr: i32,
|
||||
) -> i32 {
|
||||
if k <= 0 {
|
||||
return 0;
|
||||
}
|
||||
match store::registry().get(handle) {
|
||||
Some(s) => s.query(query_ptr as *const f32, k as u32, metric, out_ptr as *mut u8),
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete vectors by ID. Returns count deleted.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_delete(handle: i32, ids_ptr: i32, count: i32) -> i32 {
|
||||
if count <= 0 {
|
||||
return 0;
|
||||
}
|
||||
match store::registry().get_mut(handle) {
|
||||
Some(s) => s.delete(ids_ptr as *const u64, count as u32),
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get live vector count.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_count(handle: i32) -> i32 {
|
||||
match store::registry().get(handle) {
|
||||
Some(s) => s.count() as i32,
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get store dimension.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_dimension(handle: i32) -> i32 {
|
||||
match store::registry().get(handle) {
|
||||
Some(s) => s.dimension() as i32,
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write store status to output buffer (20 bytes).
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_status(handle: i32, out_ptr: i32) -> i32 {
|
||||
match store::registry().get(handle) {
|
||||
Some(s) => s.status(out_ptr as *mut u8),
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Export store as .rvf bytes. Returns bytes written or negative if buffer too small.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_export(handle: i32, out_ptr: i32, out_len: i32) -> i32 {
|
||||
match store::registry().get(handle) {
|
||||
Some(s) => s.export(out_ptr as *mut u8, out_len as u32),
|
||||
None => -1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Close and free a store.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_store_close(handle: i32) -> i32 {
|
||||
store::registry().close(handle)
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Segment Parsing & Inspection
|
||||
// =====================================================================
|
||||
|
||||
/// Parse a segment header from raw bytes.
|
||||
/// Writes 24 bytes to out_ptr.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_parse_header(buf_ptr: i32, buf_len: i32, out_ptr: i32) -> i32 {
|
||||
if buf_len < 64 {
|
||||
return -1;
|
||||
}
|
||||
let buf = unsafe { core::slice::from_raw_parts(buf_ptr as *const u8, buf_len as usize) };
|
||||
segment::parse_header_to_buf(buf, out_ptr as *mut u8)
|
||||
}
|
||||
|
||||
/// Count segments in a .rvf buffer.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_segment_count(buf_ptr: i32, buf_len: i32) -> i32 {
|
||||
if buf_len <= 0 {
|
||||
return 0;
|
||||
}
|
||||
let buf = unsafe { core::slice::from_raw_parts(buf_ptr as *const u8, buf_len as usize) };
|
||||
segment::parse_segments(buf).len() as i32
|
||||
}
|
||||
|
||||
/// Get info for segment at index `idx` in the buffer.
|
||||
/// Writes to out_ptr: [seg_id: u64, type: u8, padding: 3 bytes, payload_len: u64, offset: u64] = 28 bytes
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_segment_info(buf_ptr: i32, buf_len: i32, idx: i32, out_ptr: i32) -> i32 {
|
||||
if buf_len <= 0 || idx < 0 {
|
||||
return -1;
|
||||
}
|
||||
let buf = unsafe { core::slice::from_raw_parts(buf_ptr as *const u8, buf_len as usize) };
|
||||
let segments = segment::parse_segments(buf);
|
||||
let i = idx as usize;
|
||||
if i >= segments.len() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
let seg = &segments[i];
|
||||
let out = out_ptr as *mut u8;
|
||||
unsafe {
|
||||
let id_bytes = seg.seg_id.to_le_bytes();
|
||||
for b in 0..8 {
|
||||
*out.add(b) = id_bytes[b];
|
||||
}
|
||||
*out.add(8) = seg.seg_type;
|
||||
*out.add(9) = 0;
|
||||
*out.add(10) = 0;
|
||||
*out.add(11) = 0; // padding
|
||||
let pl_bytes = seg.payload_length.to_le_bytes();
|
||||
for b in 0..8 {
|
||||
*out.add(12 + b) = pl_bytes[b];
|
||||
}
|
||||
let off_bytes = (seg.offset as u64).to_le_bytes();
|
||||
for b in 0..8 {
|
||||
*out.add(20 + b) = off_bytes[b];
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Verify checksum of a data region.
|
||||
/// The last 4 bytes of the buffer are treated as the expected CRC32C.
|
||||
/// Returns 1 if CRC32C matches, 0 if not.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_verify_checksum(buf_ptr: i32, buf_len: i32) -> i32 {
|
||||
if buf_len < 4 {
|
||||
return -1;
|
||||
}
|
||||
let buf = unsafe { core::slice::from_raw_parts(buf_ptr as *const u8, buf_len as usize) };
|
||||
let data = &buf[..buf.len() - 4];
|
||||
let expected = u32::from_le_bytes([
|
||||
buf[buf.len() - 4],
|
||||
buf[buf.len() - 3],
|
||||
buf[buf.len() - 2],
|
||||
buf[buf.len() - 1],
|
||||
]);
|
||||
let computed = crc32c_compute(data);
|
||||
if computed == expected {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Witness Chain Verification
|
||||
// =====================================================================
|
||||
|
||||
/// Verify a SHAKE-256 witness chain in memory.
|
||||
///
|
||||
/// `chain_ptr`: pointer to serialized witness chain (73 bytes per entry).
|
||||
/// `chain_len`: total byte length of the chain.
|
||||
///
|
||||
/// Returns the number of verified entries on success, or a negative error code:
|
||||
/// -1: invalid pointer/length
|
||||
/// -2: truncated chain (not a multiple of 73 bytes)
|
||||
/// -3: chain integrity failure (prev_hash mismatch)
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_witness_verify(chain_ptr: i32, chain_len: i32) -> i32 {
|
||||
if chain_len < 0 {
|
||||
return -1;
|
||||
}
|
||||
let len = chain_len as usize;
|
||||
if len == 0 {
|
||||
return 0;
|
||||
}
|
||||
let data = unsafe { core::slice::from_raw_parts(chain_ptr as *const u8, len) };
|
||||
match rvf_crypto::verify_witness_chain(data) {
|
||||
Ok(entries) => entries.len() as i32,
|
||||
Err(e) => {
|
||||
use rvf_types::RvfError;
|
||||
match e {
|
||||
RvfError::Code(rvf_types::ErrorCode::TruncatedSegment) => -2,
|
||||
RvfError::Code(rvf_types::ErrorCode::InvalidChecksum) => -3,
|
||||
_ => -1,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Count witness entries in a chain without full verification.
|
||||
///
|
||||
/// Returns the entry count (chain_len / 73), or -1 if not aligned.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rvf_witness_count(chain_len: i32) -> i32 {
|
||||
if chain_len < 0 {
|
||||
return -1;
|
||||
}
|
||||
let len = chain_len as usize;
|
||||
if len == 0 {
|
||||
return 0;
|
||||
}
|
||||
if len % 73 != 0 {
|
||||
return -1;
|
||||
}
|
||||
(len / 73) as i32
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// Memory Management
|
||||
// =====================================================================
|
||||
|
||||
// rvf_alloc and rvf_free are exported from alloc_setup module.
|
||||
|
||||
/// Panic handler for no_std WASM.
|
||||
#[cfg(not(test))]
|
||||
#[panic_handler]
|
||||
fn panic(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
69
vendor/ruvector/crates/rvf/rvf-wasm/src/memory.rs
vendored
Normal file
69
vendor/ruvector/crates/rvf/rvf-wasm/src/memory.rs
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
//! Static memory layout matching the Cognitum tile spec.
|
||||
//!
|
||||
//! No allocator. All buffers are statically allocated as mutable byte arrays.
|
||||
|
||||
/// Total data memory: 8 KB
|
||||
pub const DATA_MEMORY_SIZE: usize = 8 * 1024;
|
||||
|
||||
/// Total SIMD scratch memory: 64 KB
|
||||
pub const SIMD_SCRATCH_SIZE: usize = 64 * 1024;
|
||||
|
||||
// === Data Memory Layout ===
|
||||
|
||||
/// Tile configuration: 64 bytes at offset 0x0000
|
||||
pub const TILE_CONFIG_SIZE: usize = 64;
|
||||
|
||||
/// Offset within tile config for stored dimension (u32)
|
||||
pub const TILE_CONFIG_DIM_OFFSET: usize = 0x04;
|
||||
/// Offset within tile config for stored vector count (u32)
|
||||
pub const TILE_CONFIG_COUNT_OFFSET: usize = 0x08;
|
||||
/// Offset within tile config for stored dtype (u32)
|
||||
pub const TILE_CONFIG_DTYPE_OFFSET: usize = 0x0C;
|
||||
/// Offset within tile config for PQ M parameter (u32)
|
||||
pub const TILE_CONFIG_PQ_M_OFFSET: usize = 0x10;
|
||||
/// Offset within tile config for PQ K parameter (u32)
|
||||
pub const TILE_CONFIG_PQ_K_OFFSET: usize = 0x14;
|
||||
|
||||
/// Query scratch: 192 bytes at offset 0x0040
|
||||
pub const QUERY_SCRATCH_OFFSET: usize = 0x0040;
|
||||
pub const QUERY_SCRATCH_SIZE: usize = 192;
|
||||
|
||||
/// Result buffer: 256 bytes at offset 0x0100
|
||||
pub const RESULT_BUFFER_OFFSET: usize = 0x0100;
|
||||
pub const RESULT_BUFFER_SIZE: usize = 256;
|
||||
|
||||
/// Routing table: 512 bytes at offset 0x0200
|
||||
pub const ROUTING_TABLE_OFFSET: usize = 0x0200;
|
||||
pub const ROUTING_TABLE_SIZE: usize = 512;
|
||||
|
||||
/// Decode workspace: 1 KB at offset 0x0400
|
||||
pub const DECODE_WORKSPACE_OFFSET: usize = 0x0400;
|
||||
pub const DECODE_WORKSPACE_SIZE: usize = 1024;
|
||||
|
||||
/// Message I/O buffer: 2 KB at offset 0x0800
|
||||
pub const MESSAGE_IO_OFFSET: usize = 0x0800;
|
||||
pub const MESSAGE_IO_SIZE: usize = 2048;
|
||||
|
||||
/// Neighbor list cache: 4 KB at offset 0x1000
|
||||
pub const NEIGHBOR_CACHE_OFFSET: usize = 0x1000;
|
||||
pub const NEIGHBOR_CACHE_SIZE: usize = 4096;
|
||||
|
||||
// === SIMD Scratch Layout ===
|
||||
|
||||
/// Vector block area: 32 KB at offset 0x0000
|
||||
pub const SIMD_BLOCK_SIZE: usize = 32 * 1024;
|
||||
|
||||
/// PQ distance table: 16 KB at offset 0x8000
|
||||
pub const SIMD_PQ_TABLE_OFFSET: usize = 0x8000;
|
||||
pub const SIMD_PQ_TABLE_SIZE: usize = 16 * 1024;
|
||||
|
||||
/// Hot cache: 12 KB at offset 0xC000
|
||||
pub const SIMD_HOT_CACHE_OFFSET: usize = 0xC000;
|
||||
|
||||
// === Static Buffers ===
|
||||
|
||||
/// Main data memory (8 KB).
|
||||
pub static mut DATA_MEMORY: [u8; DATA_MEMORY_SIZE] = [0u8; DATA_MEMORY_SIZE];
|
||||
|
||||
/// SIMD scratch memory (64 KB).
|
||||
pub static mut SIMD_SCRATCH: [u8; SIMD_SCRATCH_SIZE] = [0u8; SIMD_SCRATCH_SIZE];
|
||||
124
vendor/ruvector/crates/rvf/rvf-wasm/src/segment.rs
vendored
Normal file
124
vendor/ruvector/crates/rvf/rvf-wasm/src/segment.rs
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
//! Segment parsing and inspection exports for WASM.
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use rvf_types::constants::{SEGMENT_HEADER_SIZE, SEGMENT_MAGIC};
|
||||
|
||||
/// Parsed segment info for WASM consumers.
|
||||
pub struct SegmentInfo {
|
||||
pub seg_id: u64,
|
||||
pub seg_type: u8,
|
||||
pub payload_length: u64,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
/// Parse all segments from a raw .rvf byte buffer.
|
||||
pub fn parse_segments(buf: &[u8]) -> Vec<SegmentInfo> {
|
||||
let mut segments = Vec::new();
|
||||
let magic_bytes = SEGMENT_MAGIC.to_le_bytes();
|
||||
|
||||
if buf.len() < SEGMENT_HEADER_SIZE {
|
||||
return segments;
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
let last = buf.len().saturating_sub(SEGMENT_HEADER_SIZE);
|
||||
|
||||
while i <= last {
|
||||
if buf[i..i + 4] == magic_bytes {
|
||||
let version = buf[i + 4];
|
||||
if version != 1 {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
let seg_type = buf[i + 5];
|
||||
let seg_id = u64::from_le_bytes([
|
||||
buf[i + 8],
|
||||
buf[i + 9],
|
||||
buf[i + 10],
|
||||
buf[i + 11],
|
||||
buf[i + 12],
|
||||
buf[i + 13],
|
||||
buf[i + 14],
|
||||
buf[i + 15],
|
||||
]);
|
||||
let payload_length = u64::from_le_bytes([
|
||||
buf[i + 16],
|
||||
buf[i + 17],
|
||||
buf[i + 18],
|
||||
buf[i + 19],
|
||||
buf[i + 20],
|
||||
buf[i + 21],
|
||||
buf[i + 22],
|
||||
buf[i + 23],
|
||||
]);
|
||||
|
||||
segments.push(SegmentInfo {
|
||||
seg_id,
|
||||
seg_type,
|
||||
payload_length,
|
||||
offset: i,
|
||||
});
|
||||
|
||||
// Skip past this segment
|
||||
let total = SEGMENT_HEADER_SIZE + payload_length as usize;
|
||||
if let Some(next) = i.checked_add(total) {
|
||||
if next > i {
|
||||
i = next;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
|
||||
/// Parse a segment header from raw bytes.
|
||||
/// Writes to out_ptr: [magic: u32, version: u8, type: u8, flags: u16, seg_id: u64, payload_len: u64]
|
||||
/// = 24 bytes
|
||||
pub fn parse_header_to_buf(buf: &[u8], out_ptr: *mut u8) -> i32 {
|
||||
if buf.len() < SEGMENT_HEADER_SIZE {
|
||||
return -1;
|
||||
}
|
||||
|
||||
let magic = u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]);
|
||||
if magic != SEGMENT_MAGIC {
|
||||
return -2;
|
||||
}
|
||||
|
||||
// Copy first 24 bytes of header (magic through payload_length)
|
||||
unsafe {
|
||||
for i in 0..24 {
|
||||
*out_ptr.add(i) = buf[i];
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Verify CRC32C of a buffer. Returns 1 if valid (matches expected), 0 if not.
|
||||
pub fn verify_crc32c(buf: &[u8], expected: u32) -> i32 {
|
||||
let computed = crc32c_compute(buf);
|
||||
if computed == expected {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
fn crc32c_compute(data: &[u8]) -> u32 {
|
||||
let mut crc: u32 = 0xFFFF_FFFF;
|
||||
for &byte in data {
|
||||
crc ^= byte as u32;
|
||||
for _ in 0..8 {
|
||||
if crc & 1 != 0 {
|
||||
crc = (crc >> 1) ^ 0x82F6_3B78;
|
||||
} else {
|
||||
crc >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
crc ^ 0xFFFF_FFFF
|
||||
}
|
||||
399
vendor/ruvector/crates/rvf/rvf-wasm/src/store.rs
vendored
Normal file
399
vendor/ruvector/crates/rvf/rvf-wasm/src/store.rs
vendored
Normal file
@@ -0,0 +1,399 @@
|
||||
//! In-memory WasmStore for browser-side RVF operations.
|
||||
//!
|
||||
//! Handle-based API: each store gets an integer handle.
|
||||
//! Supports create, ingest, query, delete, export.
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// Distance metric enum matching rvf-types.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Metric {
|
||||
L2 = 0,
|
||||
InnerProduct = 1,
|
||||
Cosine = 2,
|
||||
}
|
||||
|
||||
/// A single vector entry in the store.
|
||||
pub struct VecEntry {
|
||||
pub id: u64,
|
||||
pub data: Vec<f32>,
|
||||
pub deleted: bool,
|
||||
}
|
||||
|
||||
/// An in-memory RVF store.
|
||||
pub struct WasmStore {
|
||||
dimension: u32,
|
||||
metric: Metric,
|
||||
pub entries: Vec<VecEntry>,
|
||||
}
|
||||
|
||||
impl WasmStore {
|
||||
pub fn new(dimension: u32, metric: u8) -> Self {
|
||||
let m = match metric {
|
||||
1 => Metric::InnerProduct,
|
||||
2 => Metric::Cosine,
|
||||
_ => Metric::L2,
|
||||
};
|
||||
Self {
|
||||
dimension,
|
||||
metric: m,
|
||||
entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dimension(&self) -> u32 {
|
||||
self.dimension
|
||||
}
|
||||
|
||||
pub fn count(&self) -> u32 {
|
||||
self.entries.iter().filter(|e| !e.deleted).count() as u32
|
||||
}
|
||||
|
||||
pub fn ingest(&mut self, vecs_ptr: *const f32, ids_ptr: *const u64, count: u32) -> i32 {
|
||||
let dim = self.dimension as usize;
|
||||
let mut accepted = 0i32;
|
||||
for i in 0..count as usize {
|
||||
let id = unsafe { *ids_ptr.add(i) };
|
||||
let mut data = Vec::with_capacity(dim);
|
||||
for d in 0..dim {
|
||||
data.push(unsafe { *vecs_ptr.add(i * dim + d) });
|
||||
}
|
||||
self.entries.push(VecEntry {
|
||||
id,
|
||||
data,
|
||||
deleted: false,
|
||||
});
|
||||
accepted += 1;
|
||||
}
|
||||
accepted
|
||||
}
|
||||
|
||||
pub fn query(
|
||||
&self,
|
||||
query_ptr: *const f32,
|
||||
k: u32,
|
||||
metric_override: i32,
|
||||
out_ptr: *mut u8,
|
||||
) -> i32 {
|
||||
let dim = self.dimension as usize;
|
||||
let metric = if metric_override >= 0 {
|
||||
match metric_override as u8 {
|
||||
1 => Metric::InnerProduct,
|
||||
2 => Metric::Cosine,
|
||||
_ => Metric::L2,
|
||||
}
|
||||
} else {
|
||||
self.metric
|
||||
};
|
||||
|
||||
let query: Vec<f32> = (0..dim).map(|i| unsafe { *query_ptr.add(i) }).collect();
|
||||
|
||||
// Collect (distance, id) for all live entries
|
||||
let mut candidates: Vec<(f32, u64)> = Vec::new();
|
||||
for entry in &self.entries {
|
||||
if entry.deleted {
|
||||
continue;
|
||||
}
|
||||
let dist = compute_distance(&query, &entry.data, metric);
|
||||
candidates.push((dist, entry.id));
|
||||
}
|
||||
|
||||
// Sort by distance ascending
|
||||
candidates.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(core::cmp::Ordering::Equal));
|
||||
|
||||
let result_count = (k as usize).min(candidates.len());
|
||||
// Write results: (id: u64, dist: f32) pairs = 12 bytes each
|
||||
for i in 0..result_count {
|
||||
let (dist, id) = candidates[i];
|
||||
let offset = i * 12;
|
||||
let id_bytes = id.to_le_bytes();
|
||||
let dist_bytes = dist.to_le_bytes();
|
||||
unsafe {
|
||||
for b in 0..8 {
|
||||
*out_ptr.add(offset + b) = id_bytes[b];
|
||||
}
|
||||
for b in 0..4 {
|
||||
*out_ptr.add(offset + 8 + b) = dist_bytes[b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result_count as i32
|
||||
}
|
||||
|
||||
pub fn delete(&mut self, ids_ptr: *const u64, count: u32) -> i32 {
|
||||
let mut deleted = 0i32;
|
||||
for i in 0..count as usize {
|
||||
let target_id = unsafe { *ids_ptr.add(i) };
|
||||
for entry in self.entries.iter_mut() {
|
||||
if entry.id == target_id && !entry.deleted {
|
||||
entry.deleted = true;
|
||||
deleted += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
deleted
|
||||
}
|
||||
|
||||
/// Write status to output buffer.
|
||||
/// Format: [count: u32, dimension: u32, metric: u32, total_entries: u32, deleted: u32]
|
||||
pub fn status(&self, out_ptr: *mut u8) -> i32 {
|
||||
let live = self.count();
|
||||
let total = self.entries.len() as u32;
|
||||
let deleted = total - live;
|
||||
let metric_val = self.metric as u32;
|
||||
|
||||
unsafe {
|
||||
write_u32(out_ptr, 0, live);
|
||||
write_u32(out_ptr, 4, self.dimension);
|
||||
write_u32(out_ptr, 8, metric_val);
|
||||
write_u32(out_ptr, 12, total);
|
||||
write_u32(out_ptr, 16, deleted);
|
||||
}
|
||||
20 // bytes written
|
||||
}
|
||||
|
||||
/// Export the store as .rvf bytes into a pre-allocated buffer.
|
||||
/// Returns bytes written, or negative if buffer too small.
|
||||
pub fn export(&self, out_ptr: *mut u8, out_len: u32) -> i32 {
|
||||
use rvf_types::constants::{SEGMENT_HEADER_SIZE, SEGMENT_MAGIC, SEGMENT_VERSION};
|
||||
use rvf_types::SegmentType;
|
||||
|
||||
let dim = self.dimension as usize;
|
||||
let live_entries: Vec<&VecEntry> = self.entries.iter().filter(|e| !e.deleted).collect();
|
||||
let n = live_entries.len();
|
||||
|
||||
// Vec segment payload: [count: u16, dim: u32, (id: u64, vec: f32*dim)*]
|
||||
let vec_payload_len = 2 + 4 + n * (8 + dim * 4);
|
||||
// Manifest segment payload: [epoch: u32, dim: u16, total_vecs: u64, profile: u8,
|
||||
// seg_count: u32, (seg_id: u64, offset: u64, payload_len: u64, type: u8)*]
|
||||
let manifest_payload_len = 4 + 2 + 8 + 1 + 4 + 1 * 25; // 1 segment entry
|
||||
|
||||
let total_size =
|
||||
SEGMENT_HEADER_SIZE + vec_payload_len + SEGMENT_HEADER_SIZE + manifest_payload_len;
|
||||
|
||||
if (out_len as usize) < total_size {
|
||||
return -(total_size as i32);
|
||||
}
|
||||
|
||||
let mut offset = 0usize;
|
||||
|
||||
// -- Vec segment header --
|
||||
unsafe {
|
||||
write_u32(out_ptr, offset, SEGMENT_MAGIC);
|
||||
*out_ptr.add(offset + 4) = SEGMENT_VERSION;
|
||||
*out_ptr.add(offset + 5) = SegmentType::Vec as u8;
|
||||
write_u16_at(out_ptr, offset + 6, 0); // flags
|
||||
write_u64(out_ptr, offset + 8, 1); // seg_id = 1
|
||||
write_u64(out_ptr, offset + 16, vec_payload_len as u64);
|
||||
// rest of header zeros (timestamp, checksum, etc.)
|
||||
for i in 24..SEGMENT_HEADER_SIZE {
|
||||
*out_ptr.add(offset + i) = 0;
|
||||
}
|
||||
}
|
||||
offset += SEGMENT_HEADER_SIZE;
|
||||
|
||||
// -- Vec segment payload --
|
||||
unsafe {
|
||||
write_u16_at(out_ptr, offset, n as u16);
|
||||
offset += 2;
|
||||
write_u32(out_ptr, offset, self.dimension);
|
||||
offset += 4;
|
||||
for entry in &live_entries {
|
||||
write_u64(out_ptr, offset, entry.id);
|
||||
offset += 8;
|
||||
for d in 0..dim {
|
||||
write_f32(out_ptr, offset, entry.data[d]);
|
||||
offset += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -- Manifest segment header --
|
||||
unsafe {
|
||||
write_u32(out_ptr, offset, SEGMENT_MAGIC);
|
||||
*out_ptr.add(offset + 4) = SEGMENT_VERSION;
|
||||
*out_ptr.add(offset + 5) = SegmentType::Manifest as u8;
|
||||
write_u16_at(out_ptr, offset + 6, 0);
|
||||
write_u64(out_ptr, offset + 8, 2); // seg_id = 2
|
||||
write_u64(out_ptr, offset + 16, manifest_payload_len as u64);
|
||||
for i in 24..SEGMENT_HEADER_SIZE {
|
||||
*out_ptr.add(offset + i) = 0;
|
||||
}
|
||||
}
|
||||
offset += SEGMENT_HEADER_SIZE;
|
||||
|
||||
// -- Manifest payload --
|
||||
unsafe {
|
||||
write_u32(out_ptr, offset, 1); // epoch
|
||||
offset += 4;
|
||||
write_u16_at(out_ptr, offset, self.dimension as u16);
|
||||
offset += 2;
|
||||
write_u64(out_ptr, offset, n as u64); // total_vectors
|
||||
offset += 8;
|
||||
*out_ptr.add(offset) = 0; // profile
|
||||
offset += 1;
|
||||
write_u32(out_ptr, offset, 1); // seg_count = 1
|
||||
offset += 4;
|
||||
// segment entry
|
||||
write_u64(out_ptr, offset, 1); // seg_id
|
||||
offset += 8;
|
||||
write_u64(out_ptr, offset, 0); // offset (start of file)
|
||||
offset += 8;
|
||||
write_u64(out_ptr, offset, vec_payload_len as u64);
|
||||
offset += 8;
|
||||
*out_ptr.add(offset) = SegmentType::Vec as u8;
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
offset as i32
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_distance(a: &[f32], b: &[f32], metric: Metric) -> f32 {
|
||||
match metric {
|
||||
Metric::L2 => {
|
||||
let mut sum = 0.0f32;
|
||||
for i in 0..a.len() {
|
||||
let d = a[i] - b[i];
|
||||
sum += d * d;
|
||||
}
|
||||
sum
|
||||
}
|
||||
Metric::InnerProduct => {
|
||||
let mut dot = 0.0f32;
|
||||
for i in 0..a.len() {
|
||||
dot += a[i] * b[i];
|
||||
}
|
||||
-dot
|
||||
}
|
||||
Metric::Cosine => {
|
||||
let mut dot = 0.0f32;
|
||||
let mut na = 0.0f32;
|
||||
let mut nb = 0.0f32;
|
||||
for i in 0..a.len() {
|
||||
dot += a[i] * b[i];
|
||||
na += a[i] * a[i];
|
||||
nb += b[i] * b[i];
|
||||
}
|
||||
let denom = sqrt_approx(na) * sqrt_approx(nb);
|
||||
if denom < 1e-10 {
|
||||
1.0
|
||||
} else {
|
||||
1.0 - dot / denom
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sqrt_approx(x: f32) -> f32 {
|
||||
if x <= 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
let mut bits = x.to_bits();
|
||||
bits = 0x1FBD_1DF5 + (bits >> 1);
|
||||
let mut y = f32::from_bits(bits);
|
||||
y = 0.5 * (y + x / y);
|
||||
y = 0.5 * (y + x / y);
|
||||
y
|
||||
}
|
||||
|
||||
unsafe fn write_u32(ptr: *mut u8, offset: usize, val: u32) {
|
||||
let bytes = val.to_le_bytes();
|
||||
for i in 0..4 {
|
||||
*ptr.add(offset + i) = bytes[i];
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn write_u64(ptr: *mut u8, offset: usize, val: u64) {
|
||||
let bytes = val.to_le_bytes();
|
||||
for i in 0..8 {
|
||||
*ptr.add(offset + i) = bytes[i];
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn write_u16_at(ptr: *mut u8, offset: usize, val: u16) {
|
||||
let bytes = val.to_le_bytes();
|
||||
*ptr.add(offset) = bytes[0];
|
||||
*ptr.add(offset + 1) = bytes[1];
|
||||
}
|
||||
|
||||
unsafe fn write_f32(ptr: *mut u8, offset: usize, val: f32) {
|
||||
let bytes = val.to_le_bytes();
|
||||
for i in 0..4 {
|
||||
*ptr.add(offset + i) = bytes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// -- Global store registry --
|
||||
|
||||
pub(crate) struct StoreRegistry {
|
||||
stores: Vec<Option<WasmStore>>,
|
||||
}
|
||||
|
||||
impl StoreRegistry {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
stores: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create(&mut self, dim: u32, metric: u8) -> i32 {
|
||||
let store = WasmStore::new(dim, metric);
|
||||
// Find an empty slot or push new
|
||||
for (i, slot) in self.stores.iter_mut().enumerate() {
|
||||
if slot.is_none() {
|
||||
*slot = Some(store);
|
||||
return (i + 1) as i32; // 1-based handles
|
||||
}
|
||||
}
|
||||
self.stores.push(Some(store));
|
||||
self.stores.len() as i32
|
||||
}
|
||||
|
||||
pub(crate) fn get(&self, handle: i32) -> Option<&WasmStore> {
|
||||
if handle <= 0 {
|
||||
return None;
|
||||
}
|
||||
self.stores
|
||||
.get((handle - 1) as usize)
|
||||
.and_then(|s| s.as_ref())
|
||||
}
|
||||
|
||||
pub(crate) fn get_mut(&mut self, handle: i32) -> Option<&mut WasmStore> {
|
||||
if handle <= 0 {
|
||||
return None;
|
||||
}
|
||||
self.stores
|
||||
.get_mut((handle - 1) as usize)
|
||||
.and_then(|s| s.as_mut())
|
||||
}
|
||||
|
||||
pub(crate) fn close(&mut self, handle: i32) -> i32 {
|
||||
if handle <= 0 {
|
||||
return -1;
|
||||
}
|
||||
let idx = (handle - 1) as usize;
|
||||
if idx < self.stores.len() && self.stores[idx].is_some() {
|
||||
self.stores[idx] = None;
|
||||
0
|
||||
} else {
|
||||
-1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Safety: WASM is single-threaded.
|
||||
// We use Option to lazily initialize, since Vec::new() is not const in all editions.
|
||||
static mut REGISTRY: Option<StoreRegistry> = None;
|
||||
|
||||
pub fn registry() -> &'static mut StoreRegistry {
|
||||
unsafe {
|
||||
if REGISTRY.is_none() {
|
||||
REGISTRY = Some(StoreRegistry::new());
|
||||
}
|
||||
REGISTRY.as_mut().unwrap()
|
||||
}
|
||||
}
|
||||
135
vendor/ruvector/crates/rvf/rvf-wasm/src/topk.rs
vendored
Normal file
135
vendor/ruvector/crates/rvf/rvf-wasm/src/topk.rs
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
//! Fixed-size min-heap for top-K tracking.
|
||||
//!
|
||||
//! Max K=16, stored in static memory. No allocator needed.
|
||||
|
||||
/// Maximum top-K value supported by the tile.
|
||||
pub const MAX_K: usize = 16;
|
||||
|
||||
/// A heap entry: (distance, vector_id).
|
||||
#[derive(Clone, Copy)]
|
||||
struct HeapEntry {
|
||||
dist: f32,
|
||||
id: u64,
|
||||
}
|
||||
|
||||
/// Static heap storage. Max-heap by distance — the largest distance
|
||||
/// is at index 0 so we can efficiently evict it when a closer
|
||||
/// candidate arrives.
|
||||
static mut HEAP: [HeapEntry; MAX_K] = [HeapEntry {
|
||||
dist: f32::MAX,
|
||||
id: 0,
|
||||
}; MAX_K];
|
||||
|
||||
/// Current number of elements in the heap.
|
||||
static mut HEAP_SIZE: usize = 0;
|
||||
|
||||
/// Reset the heap to empty state.
|
||||
pub fn heap_reset() {
|
||||
unsafe {
|
||||
HEAP_SIZE = 0;
|
||||
for entry in HEAP.iter_mut() {
|
||||
entry.dist = f32::MAX;
|
||||
entry.id = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a candidate into the max-heap if it improves the top-K set.
|
||||
pub fn heap_insert(dist: f32, id: u64, k: usize) {
|
||||
let k = if k > MAX_K { MAX_K } else { k };
|
||||
|
||||
unsafe {
|
||||
if HEAP_SIZE < k {
|
||||
let idx = HEAP_SIZE;
|
||||
HEAP[idx] = HeapEntry { dist, id };
|
||||
HEAP_SIZE += 1;
|
||||
sift_up(idx);
|
||||
} else if dist < HEAP[0].dist {
|
||||
HEAP[0] = HeapEntry { dist, id };
|
||||
sift_down(0, HEAP_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read sorted results (ascending by distance) into output buffer.
|
||||
/// Format: for each result, 8 bytes id (u64 LE) then 4 bytes dist (f32 LE).
|
||||
/// Returns number of results written.
|
||||
pub fn heap_read_sorted(out_ptr: *mut u8) -> i32 {
|
||||
unsafe {
|
||||
let size = HEAP_SIZE;
|
||||
if size == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy heap to temporary sort buffer
|
||||
let mut sorted: [HeapEntry; MAX_K] = [HeapEntry {
|
||||
dist: f32::MAX,
|
||||
id: 0,
|
||||
}; MAX_K];
|
||||
for i in 0..size {
|
||||
sorted[i] = HEAP[i];
|
||||
}
|
||||
|
||||
// Insertion sort (K <= 16)
|
||||
for i in 1..size {
|
||||
let key = sorted[i];
|
||||
let mut j = i;
|
||||
while j > 0 && sorted[j - 1].dist > key.dist {
|
||||
sorted[j] = sorted[j - 1];
|
||||
j -= 1;
|
||||
}
|
||||
sorted[j] = key;
|
||||
}
|
||||
|
||||
// Write to output
|
||||
for i in 0..size {
|
||||
let offset = i * 12;
|
||||
let id_bytes = sorted[i].id.to_le_bytes();
|
||||
let dist_bytes = sorted[i].dist.to_le_bytes();
|
||||
for b in 0..8 {
|
||||
*out_ptr.add(offset + b) = id_bytes[b];
|
||||
}
|
||||
for b in 0..4 {
|
||||
*out_ptr.add(offset + 8 + b) = dist_bytes[b];
|
||||
}
|
||||
}
|
||||
|
||||
size as i32
|
||||
}
|
||||
}
|
||||
|
||||
/// Sift up in a max-heap.
|
||||
unsafe fn sift_up(mut idx: usize) {
|
||||
while idx > 0 {
|
||||
let parent = (idx - 1) / 2;
|
||||
if HEAP[idx].dist > HEAP[parent].dist {
|
||||
HEAP.swap(idx, parent);
|
||||
idx = parent;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Sift down in a max-heap.
|
||||
unsafe fn sift_down(mut idx: usize, size: usize) {
|
||||
loop {
|
||||
let left = 2 * idx + 1;
|
||||
let right = 2 * idx + 2;
|
||||
let mut largest = idx;
|
||||
|
||||
if left < size && HEAP[left].dist > HEAP[largest].dist {
|
||||
largest = left;
|
||||
}
|
||||
if right < size && HEAP[right].dist > HEAP[largest].dist {
|
||||
largest = right;
|
||||
}
|
||||
|
||||
if largest == idx {
|
||||
break;
|
||||
}
|
||||
|
||||
HEAP.swap(idx, largest);
|
||||
idx = largest;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user