Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
265
vendor/ruvector/examples/OSpipe/src/wasm/bindings.rs
vendored
Normal file
265
vendor/ruvector/examples/OSpipe/src/wasm/bindings.rs
vendored
Normal file
@@ -0,0 +1,265 @@
|
||||
//! WASM-bindgen exports for OSpipe browser usage.
|
||||
//!
|
||||
//! This module exposes a self-contained vector store that runs entirely in the
|
||||
//! browser via WebAssembly. It supports embedding insertion, semantic search
|
||||
//! with optional time-range filtering, deduplication checks, simple text
|
||||
//! embedding (hash-based, suitable for demos), content safety checks, and
|
||||
//! query routing heuristics.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
use super::helpers;
|
||||
|
||||
/// Initialize WASM module: installs `console_error_panic_hook` so that Rust
|
||||
/// panics produce readable error messages in the browser developer console
|
||||
/// instead of the default `unreachable` with no context.
|
||||
#[wasm_bindgen(start)]
|
||||
pub fn init() {
|
||||
#[cfg(feature = "console_error_panic_hook")]
|
||||
console_error_panic_hook::set_once();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal data structures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// A single stored embedding with metadata.
|
||||
struct WasmEmbedding {
|
||||
id: String,
|
||||
vector: Vec<f32>,
|
||||
metadata: String, // JSON string
|
||||
timestamp: f64, // Unix milliseconds
|
||||
}
|
||||
|
||||
/// A search result returned to JavaScript.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct SearchHit {
|
||||
id: String,
|
||||
score: f64,
|
||||
metadata: String,
|
||||
timestamp: f64,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public WASM API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// OSpipe WASM -- browser-based personal AI memory search.
|
||||
#[wasm_bindgen]
|
||||
pub struct OsPipeWasm {
|
||||
dimension: usize,
|
||||
embeddings: Vec<WasmEmbedding>,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl OsPipeWasm {
|
||||
// -- lifecycle ---------------------------------------------------------
|
||||
|
||||
/// Create a new OsPipeWasm instance with the given embedding dimension.
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new(dimension: usize) -> Self {
|
||||
Self {
|
||||
dimension,
|
||||
embeddings: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// -- insertion ---------------------------------------------------------
|
||||
|
||||
/// Insert a frame embedding into the store.
|
||||
///
|
||||
/// * `id` - Unique identifier for this frame.
|
||||
/// * `embedding` - Float32 vector whose length must match `dimension`.
|
||||
/// * `metadata` - Arbitrary JSON string attached to this frame.
|
||||
/// * `timestamp` - Unix timestamp in milliseconds.
|
||||
pub fn insert(
|
||||
&mut self,
|
||||
id: &str,
|
||||
embedding: &[f32],
|
||||
metadata: &str,
|
||||
timestamp: f64,
|
||||
) -> Result<(), JsValue> {
|
||||
if embedding.len() != self.dimension {
|
||||
return Err(JsValue::from_str(&format!(
|
||||
"Embedding dimension mismatch: expected {}, got {}",
|
||||
self.dimension,
|
||||
embedding.len()
|
||||
)));
|
||||
}
|
||||
self.embeddings.push(WasmEmbedding {
|
||||
id: id.to_string(),
|
||||
vector: embedding.to_vec(),
|
||||
metadata: metadata.to_string(),
|
||||
timestamp,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// -- search ------------------------------------------------------------
|
||||
|
||||
/// Semantic search by embedding vector. Returns the top-k results as a
|
||||
/// JSON-serialized `JsValue` array of `{ id, score, metadata, timestamp }`.
|
||||
pub fn search(&self, query_embedding: &[f32], k: usize) -> Result<JsValue, JsValue> {
|
||||
if query_embedding.len() != self.dimension {
|
||||
return Err(JsValue::from_str(&format!(
|
||||
"Query dimension mismatch: expected {}, got {}",
|
||||
self.dimension,
|
||||
query_embedding.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let mut scored: Vec<(usize, f32)> = self
|
||||
.embeddings
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, e)| (i, helpers::cosine_similarity(query_embedding, &e.vector)))
|
||||
.collect();
|
||||
|
||||
// Sort descending by similarity.
|
||||
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
let hits: Vec<SearchHit> = scored
|
||||
.into_iter()
|
||||
.take(k)
|
||||
.map(|(i, score)| {
|
||||
let e = &self.embeddings[i];
|
||||
SearchHit {
|
||||
id: e.id.clone(),
|
||||
score: score as f64,
|
||||
metadata: e.metadata.clone(),
|
||||
timestamp: e.timestamp,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_wasm_bindgen::to_value(&hits).map_err(|e| JsValue::from_str(&e.to_string()))
|
||||
}
|
||||
|
||||
/// Search with a time-range filter. Only embeddings whose timestamp falls
|
||||
/// within `[start_time, end_time]` (inclusive) are considered.
|
||||
pub fn search_filtered(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
) -> Result<JsValue, JsValue> {
|
||||
if query_embedding.len() != self.dimension {
|
||||
return Err(JsValue::from_str(&format!(
|
||||
"Query dimension mismatch: expected {}, got {}",
|
||||
self.dimension,
|
||||
query_embedding.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let mut scored: Vec<(usize, f32)> = self
|
||||
.embeddings
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, e)| e.timestamp >= start_time && e.timestamp <= end_time)
|
||||
.map(|(i, e)| (i, helpers::cosine_similarity(query_embedding, &e.vector)))
|
||||
.collect();
|
||||
|
||||
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
let hits: Vec<SearchHit> = scored
|
||||
.into_iter()
|
||||
.take(k)
|
||||
.map(|(i, score)| {
|
||||
let e = &self.embeddings[i];
|
||||
SearchHit {
|
||||
id: e.id.clone(),
|
||||
score: score as f64,
|
||||
metadata: e.metadata.clone(),
|
||||
timestamp: e.timestamp,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_wasm_bindgen::to_value(&hits).map_err(|e| JsValue::from_str(&e.to_string()))
|
||||
}
|
||||
|
||||
// -- deduplication -----------------------------------------------------
|
||||
|
||||
/// Check whether `embedding` is a near-duplicate of any stored embedding.
|
||||
///
|
||||
/// Returns `true` when the cosine similarity to any existing embedding is
|
||||
/// greater than or equal to `threshold`.
|
||||
pub fn is_duplicate(&self, embedding: &[f32], threshold: f32) -> bool {
|
||||
self.embeddings
|
||||
.iter()
|
||||
.any(|e| helpers::cosine_similarity(embedding, &e.vector) >= threshold)
|
||||
}
|
||||
|
||||
// -- stats / accessors -------------------------------------------------
|
||||
|
||||
/// Number of stored embeddings.
|
||||
pub fn len(&self) -> usize {
|
||||
self.embeddings.len()
|
||||
}
|
||||
|
||||
/// Returns true if no embeddings are stored.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.embeddings.is_empty()
|
||||
}
|
||||
|
||||
/// Return pipeline statistics as a JSON string.
|
||||
pub fn stats(&self) -> String {
|
||||
serde_json::json!({
|
||||
"dimension": self.dimension,
|
||||
"total_embeddings": self.embeddings.len(),
|
||||
"memory_estimate_bytes": self.embeddings.len() * (self.dimension * 4 + 128),
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
// -- text embedding (demo / hash-based) --------------------------------
|
||||
|
||||
/// Generate a simple deterministic embedding from text.
|
||||
///
|
||||
/// This uses a hash-based approach and is **not** a real neural embedding.
|
||||
/// Suitable for demos and testing only.
|
||||
pub fn embed_text(&self, text: &str) -> Vec<f32> {
|
||||
helpers::hash_embed(text, self.dimension)
|
||||
}
|
||||
|
||||
/// Batch-embed multiple texts.
|
||||
///
|
||||
/// `texts` must be a JS `Array<string>`. Returns a JS `Array<Float32Array>`.
|
||||
pub fn batch_embed(&self, texts: JsValue) -> Result<JsValue, JsValue> {
|
||||
let text_list: Vec<String> = serde_wasm_bindgen::from_value(texts)
|
||||
.map_err(|e| JsValue::from_str(&format!("Failed to deserialize texts: {e}")))?;
|
||||
|
||||
let results: Vec<Vec<f32>> = text_list
|
||||
.iter()
|
||||
.map(|t| helpers::hash_embed(t, self.dimension))
|
||||
.collect();
|
||||
|
||||
serde_wasm_bindgen::to_value(&results).map_err(|e| JsValue::from_str(&e.to_string()))
|
||||
}
|
||||
|
||||
// -- safety ------------------------------------------------------------
|
||||
|
||||
/// Run a lightweight safety check on `content`.
|
||||
///
|
||||
/// Returns one of:
|
||||
/// - `"deny"` -- content contains patterns that should not be stored
|
||||
/// (e.g. credit card numbers, SSNs).
|
||||
/// - `"redact"` -- content contains potentially sensitive information
|
||||
/// that could be redacted.
|
||||
/// - `"allow"` -- content appears safe.
|
||||
pub fn safety_check(&self, content: &str) -> String {
|
||||
helpers::safety_classify(content).to_string()
|
||||
}
|
||||
|
||||
// -- query routing -----------------------------------------------------
|
||||
|
||||
/// Route a query string to the optimal search backend based on simple
|
||||
/// keyword heuristics.
|
||||
///
|
||||
/// Returns one of: `"Graph"`, `"Temporal"`, `"Keyword"`, `"Semantic"`.
|
||||
pub fn route_query(&self, query: &str) -> String {
|
||||
helpers::route_query(query).to_string()
|
||||
}
|
||||
}
|
||||
461
vendor/ruvector/examples/OSpipe/src/wasm/helpers.rs
vendored
Normal file
461
vendor/ruvector/examples/OSpipe/src/wasm/helpers.rs
vendored
Normal file
@@ -0,0 +1,461 @@
|
||||
//! Pure helper functions used by the WASM bindings.
|
||||
//!
|
||||
//! These functions have no WASM dependencies and can be tested on any target.
|
||||
|
||||
/// Cosine similarity between two vectors.
|
||||
///
|
||||
/// Returns 0.0 when either vector has zero magnitude.
|
||||
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
debug_assert_eq!(a.len(), b.len(), "vectors must be same length");
|
||||
|
||||
let mut dot: f32 = 0.0;
|
||||
let mut mag_a: f32 = 0.0;
|
||||
let mut mag_b: f32 = 0.0;
|
||||
|
||||
for i in 0..a.len() {
|
||||
dot += a[i] * b[i];
|
||||
mag_a += a[i] * a[i];
|
||||
mag_b += b[i] * b[i];
|
||||
}
|
||||
|
||||
let denom = mag_a.sqrt() * mag_b.sqrt();
|
||||
if denom == 0.0 {
|
||||
0.0
|
||||
} else {
|
||||
dot / denom
|
||||
}
|
||||
}
|
||||
|
||||
/// Produce a deterministic pseudo-embedding from text using a simple hash.
|
||||
///
|
||||
/// The algorithm:
|
||||
/// 1. Hash each character position into a seed.
|
||||
/// 2. Use the seed to generate a float in [-1, 1].
|
||||
/// 3. L2-normalise the resulting vector.
|
||||
///
|
||||
/// This is NOT a real embedding model -- it is only useful for demos and
|
||||
/// testing that the WASM plumbing works end-to-end.
|
||||
pub fn hash_embed(text: &str, dimension: usize) -> Vec<f32> {
|
||||
let mut vec = vec![0.0f32; dimension];
|
||||
let bytes = text.as_bytes();
|
||||
|
||||
for (i, slot) in vec.iter_mut().enumerate() {
|
||||
// Mix byte values into the slot.
|
||||
let mut h: u64 = 0xcbf29ce484222325; // FNV-1a offset basis
|
||||
for (j, &b) in bytes.iter().enumerate() {
|
||||
h ^= (b as u64)
|
||||
.wrapping_add((i as u64).wrapping_mul(31))
|
||||
.wrapping_add(j as u64);
|
||||
h = h.wrapping_mul(0x100000001b3); // FNV-1a prime
|
||||
}
|
||||
// Map to [-1, 1].
|
||||
*slot = ((h as i64) as f64 / i64::MAX as f64) as f32;
|
||||
}
|
||||
|
||||
// L2 normalise.
|
||||
let mag: f32 = vec.iter().map(|v| v * v).sum::<f32>().sqrt();
|
||||
if mag > 0.0 {
|
||||
for v in &mut vec {
|
||||
*v /= mag;
|
||||
}
|
||||
}
|
||||
|
||||
vec
|
||||
}
|
||||
|
||||
/// Check for credit-card-like patterns: 4 groups of 4 digits separated by
|
||||
/// spaces or dashes (or no separator).
|
||||
pub fn has_credit_card_pattern(content: &str) -> bool {
|
||||
// Strategy: scan for sequences of 16 digits (possibly with separators).
|
||||
let digits_only: String = content.chars().filter(|c| c.is_ascii_digit()).collect();
|
||||
|
||||
// Quick check: must have at least 16 digits somewhere.
|
||||
if digits_only.len() < 16 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Look for the formatted pattern: DDDD[-/ ]DDDD[-/ ]DDDD[-/ ]DDDD
|
||||
// We do a simple windowed scan on the original string.
|
||||
let chars: Vec<char> = content.chars().collect();
|
||||
let len = chars.len();
|
||||
let mut i = 0;
|
||||
|
||||
while i < len {
|
||||
if let Some(end) = try_parse_cc_at(&chars, i) {
|
||||
// Verify the group doesn't continue with more digits (avoid
|
||||
// matching longer numeric strings that aren't cards).
|
||||
if end >= len || !chars[end].is_ascii_digit() {
|
||||
// Also make sure it didn't start as part of a longer number.
|
||||
if i == 0 || !chars[i - 1].is_ascii_digit() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
i = end;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Try to parse a credit-card-like pattern starting at position `start`.
|
||||
/// Returns the index past the last consumed character on success.
|
||||
fn try_parse_cc_at(chars: &[char], start: usize) -> Option<usize> {
|
||||
let mut pos = start;
|
||||
for group in 0..4 {
|
||||
// Expect 4 digits.
|
||||
for _ in 0..4 {
|
||||
if pos >= chars.len() || !chars[pos].is_ascii_digit() {
|
||||
return None;
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
// After the first 3 groups, allow an optional separator.
|
||||
if group < 3 && pos < chars.len() && (chars[pos] == '-' || chars[pos] == ' ') {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
Some(pos)
|
||||
}
|
||||
|
||||
/// Check for SSN-like patterns: XXX-XX-XXXX
|
||||
pub fn has_ssn_pattern(content: &str) -> bool {
|
||||
let chars: Vec<char> = content.chars().collect();
|
||||
let len = chars.len();
|
||||
|
||||
// Pattern length: 3 + 1 + 2 + 1 + 4 = 11
|
||||
if len < 11 {
|
||||
return false;
|
||||
}
|
||||
|
||||
for i in 0..=len - 11 {
|
||||
// Must not be preceded by a digit.
|
||||
if i > 0 && chars[i - 1].is_ascii_digit() {
|
||||
continue;
|
||||
}
|
||||
// Must not be followed by a digit.
|
||||
if i + 11 < len && chars[i + 11].is_ascii_digit() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if chars[i].is_ascii_digit()
|
||||
&& chars[i + 1].is_ascii_digit()
|
||||
&& chars[i + 2].is_ascii_digit()
|
||||
&& chars[i + 3] == '-'
|
||||
&& chars[i + 4].is_ascii_digit()
|
||||
&& chars[i + 5].is_ascii_digit()
|
||||
&& chars[i + 6] == '-'
|
||||
&& chars[i + 7].is_ascii_digit()
|
||||
&& chars[i + 8].is_ascii_digit()
|
||||
&& chars[i + 9].is_ascii_digit()
|
||||
&& chars[i + 10].is_ascii_digit()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Simple safety classification for content.
|
||||
///
|
||||
/// Returns `"deny"`, `"redact"`, or `"allow"`.
|
||||
///
|
||||
/// Classification matches native `SafetyGate::check`:
|
||||
/// - Credit card patterns -> "redact"
|
||||
/// - SSN patterns -> "redact"
|
||||
/// - Email patterns -> "redact"
|
||||
/// - Custom sensitive keywords -> "deny"
|
||||
pub fn safety_classify(content: &str) -> &'static str {
|
||||
// PII patterns are redacted (matching native SafetyGate behavior)
|
||||
if has_credit_card_pattern(content) {
|
||||
return "redact";
|
||||
}
|
||||
if has_ssn_pattern(content) {
|
||||
return "redact";
|
||||
}
|
||||
if has_email_pattern(content) {
|
||||
return "redact";
|
||||
}
|
||||
|
||||
// Custom sensitive keywords are denied (matching native custom_patterns -> Deny)
|
||||
let lower = content.to_lowercase();
|
||||
let deny_keywords = [
|
||||
"password",
|
||||
"secret",
|
||||
"api_key",
|
||||
"api-key",
|
||||
"apikey",
|
||||
"token",
|
||||
"private_key",
|
||||
"private-key",
|
||||
];
|
||||
for kw in &deny_keywords {
|
||||
if lower.contains(kw) {
|
||||
return "deny";
|
||||
}
|
||||
}
|
||||
|
||||
"allow"
|
||||
}
|
||||
|
||||
/// Check for email-like patterns: local@domain.tld
|
||||
pub fn has_email_pattern(content: &str) -> bool {
|
||||
let chars: Vec<char> = content.chars().collect();
|
||||
let len = chars.len();
|
||||
|
||||
for i in 0..len {
|
||||
if chars[i] == '@' {
|
||||
// Must have at least one local-part char before '@'
|
||||
if i == 0 || chars[i - 1].is_whitespace() {
|
||||
continue;
|
||||
}
|
||||
// Must have at least one domain char and a dot after '@'
|
||||
if i + 1 >= len || chars[i + 1].is_whitespace() {
|
||||
continue;
|
||||
}
|
||||
// Scan backwards to find start of local part
|
||||
let mut start = i;
|
||||
while start > 0 && is_email_char(chars[start - 1]) {
|
||||
start -= 1;
|
||||
}
|
||||
if start == i {
|
||||
continue;
|
||||
}
|
||||
// Scan forwards to find end of domain
|
||||
let mut end = i + 1;
|
||||
let mut has_dot = false;
|
||||
while end < len && is_domain_char(chars[end]) {
|
||||
if chars[end] == '.' {
|
||||
has_dot = true;
|
||||
}
|
||||
end += 1;
|
||||
}
|
||||
if has_dot && end > i + 3 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_email_char(c: char) -> bool {
|
||||
c.is_ascii_alphanumeric() || c == '.' || c == '+' || c == '-' || c == '_'
|
||||
}
|
||||
|
||||
fn is_domain_char(c: char) -> bool {
|
||||
c.is_ascii_alphanumeric() || c == '.' || c == '-'
|
||||
}
|
||||
|
||||
/// Route a query string to the optimal search backend.
|
||||
///
|
||||
/// Returns `"Temporal"`, `"Graph"`, `"Keyword"`, or `"Hybrid"`.
|
||||
///
|
||||
/// Routing heuristics (matching native `QueryRouter::route`):
|
||||
/// - Temporal keywords ("yesterday", "last week", etc.) -> Temporal
|
||||
/// - Graph keywords ("related to", "connected to", etc.) -> Graph
|
||||
/// - Quoted exact phrases -> Keyword
|
||||
/// - Short queries (1-2 words) -> Keyword
|
||||
/// - Everything else -> Hybrid
|
||||
pub fn route_query(query: &str) -> &'static str {
|
||||
let lower = query.to_lowercase();
|
||||
let word_count = lower.split_whitespace().count();
|
||||
|
||||
// Temporal patterns (checked first, matching native router order)
|
||||
let temporal_keywords = [
|
||||
"yesterday",
|
||||
"last week",
|
||||
"last month",
|
||||
"today",
|
||||
"this morning",
|
||||
"this afternoon",
|
||||
"hours ago",
|
||||
"minutes ago",
|
||||
"days ago",
|
||||
"between",
|
||||
"before",
|
||||
"after",
|
||||
];
|
||||
for kw in &temporal_keywords {
|
||||
if lower.contains(kw) {
|
||||
return "Temporal";
|
||||
}
|
||||
}
|
||||
|
||||
// Graph patterns
|
||||
let graph_keywords = [
|
||||
"related to",
|
||||
"connected to",
|
||||
"linked with",
|
||||
"associated with",
|
||||
"relationship between",
|
||||
];
|
||||
for kw in &graph_keywords {
|
||||
if lower.contains(kw) {
|
||||
return "Graph";
|
||||
}
|
||||
}
|
||||
|
||||
// Exact phrase (quoted)
|
||||
if query.starts_with('"') && query.ends_with('"') {
|
||||
return "Keyword";
|
||||
}
|
||||
|
||||
// Very short queries are better served by keyword
|
||||
if word_count <= 2 {
|
||||
return "Keyword";
|
||||
}
|
||||
|
||||
// Default: hybrid combines the best of both
|
||||
"Hybrid"
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_identical() {
|
||||
let v = vec![1.0, 2.0, 3.0];
|
||||
let sim = cosine_similarity(&v, &v);
|
||||
assert!((sim - 1.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_orthogonal() {
|
||||
let a = vec![1.0, 0.0];
|
||||
let b = vec![0.0, 1.0];
|
||||
let sim = cosine_similarity(&a, &b);
|
||||
assert!(sim.abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_opposite() {
|
||||
let a = vec![1.0, 0.0];
|
||||
let b = vec![-1.0, 0.0];
|
||||
let sim = cosine_similarity(&a, &b);
|
||||
assert!((sim + 1.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_zero_vector() {
|
||||
let a = vec![0.0, 0.0];
|
||||
let b = vec![1.0, 2.0];
|
||||
assert_eq!(cosine_similarity(&a, &b), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_embed_deterministic() {
|
||||
let v1 = hash_embed("hello world", 128);
|
||||
let v2 = hash_embed("hello world", 128);
|
||||
assert_eq!(v1, v2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_embed_normalized() {
|
||||
let v = hash_embed("test text", 64);
|
||||
let mag: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!(
|
||||
(mag - 1.0).abs() < 1e-4,
|
||||
"magnitude should be ~1.0, got {mag}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_embed_different_texts_differ() {
|
||||
let v1 = hash_embed("hello", 64);
|
||||
let v2 = hash_embed("world", 64);
|
||||
assert_ne!(v1, v2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_credit_card_pattern() {
|
||||
assert!(has_credit_card_pattern("my card is 1234 5678 9012 3456"));
|
||||
assert!(has_credit_card_pattern("cc: 1234-5678-9012-3456"));
|
||||
assert!(has_credit_card_pattern("number 1234567890123456 here"));
|
||||
assert!(!has_credit_card_pattern("short 123456"));
|
||||
assert!(!has_credit_card_pattern("no cards here"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_ssn_pattern() {
|
||||
assert!(has_ssn_pattern("ssn is 123-45-6789"));
|
||||
assert!(has_ssn_pattern("start 999-99-9999 end"));
|
||||
assert!(!has_ssn_pattern("not a ssn 12-345-6789"));
|
||||
assert!(!has_ssn_pattern("1234-56-7890")); // preceded by extra digit
|
||||
assert!(!has_ssn_pattern("no ssn here"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safety_classify_redact_cc() {
|
||||
assert_eq!(safety_classify("pay with 4111-1111-1111-1111"), "redact");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safety_classify_redact_ssn() {
|
||||
assert_eq!(safety_classify("my ssn 123-45-6789"), "redact");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safety_classify_redact_email() {
|
||||
assert_eq!(safety_classify("contact user@example.com"), "redact");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safety_classify_deny_password() {
|
||||
assert_eq!(safety_classify("my password is foo"), "deny");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safety_classify_deny_api_key() {
|
||||
assert_eq!(safety_classify("api_key: sk-abc123"), "deny");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safety_classify_allow() {
|
||||
assert_eq!(safety_classify("the weather is nice"), "allow");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_email_pattern() {
|
||||
assert!(has_email_pattern("contact user@example.com please"));
|
||||
assert!(has_email_pattern("email: alice@test.org"));
|
||||
assert!(!has_email_pattern("not an email"));
|
||||
assert!(!has_email_pattern("@ alone"));
|
||||
assert!(!has_email_pattern("no@d"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_route_query_temporal() {
|
||||
assert_eq!(route_query("what happened yesterday"), "Temporal");
|
||||
assert_eq!(route_query("show me events from last week"), "Temporal");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_route_query_graph() {
|
||||
assert_eq!(route_query("documents related to authentication"), "Graph");
|
||||
assert_eq!(route_query("things connected to the API module"), "Graph");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_route_query_keyword_quoted() {
|
||||
assert_eq!(route_query("\"exact phrase search\""), "Keyword");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_route_query_keyword_short() {
|
||||
assert_eq!(route_query("rust programming"), "Keyword");
|
||||
assert_eq!(route_query("hello"), "Keyword");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_route_query_hybrid() {
|
||||
assert_eq!(route_query("something about machine learning"), "Hybrid");
|
||||
assert_eq!(route_query("explain how embeddings work"), "Hybrid");
|
||||
}
|
||||
}
|
||||
15
vendor/ruvector/examples/OSpipe/src/wasm/mod.rs
vendored
Normal file
15
vendor/ruvector/examples/OSpipe/src/wasm/mod.rs
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
//! WASM bindings for OSpipe.
|
||||
//!
|
||||
//! Provides browser-based personal AI memory search using vector embeddings.
|
||||
//!
|
||||
//! - [`helpers`] - Pure helper functions (cosine similarity, hashing, safety
|
||||
//! checks, query routing) that are available on all targets for testing.
|
||||
//! - `bindings` - wasm-bindgen exports, gated behind `target_arch = "wasm32"`.
|
||||
|
||||
/// Pure helper functions with no WASM dependencies.
|
||||
/// Always compiled so that unit tests can run on the host target.
|
||||
pub mod helpers;
|
||||
|
||||
/// wasm-bindgen exports. Only compiled for the `wasm32` target.
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
pub mod bindings;
|
||||
Reference in New Issue
Block a user