Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
18
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/Cargo.toml
vendored
Normal file
18
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/Cargo.toml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "rvf-adapter-agentdb"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "AgentDB adapter for RuVector Format -- maps agent memory to RVF segments"
|
||||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
|
||||
[dependencies]
|
||||
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
|
||||
rvf-types = { path = "../../rvf-types", features = ["std"] }
|
||||
rvf-index = { path = "../../rvf-index", features = ["std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
323
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/index_adapter.rs
vendored
Normal file
323
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/index_adapter.rs
vendored
Normal file
@@ -0,0 +1,323 @@
|
||||
//! Maps agentdb HNSW operations to RVF INDEX_SEG layers.
|
||||
//!
|
||||
//! Bridges agentdb's HNSW index lifecycle to the three-layer progressive
|
||||
//! indexing model (Layer A / B / C) defined in `rvf-index`.
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use rvf_index::builder::{build_full_index, build_layer_a, build_layer_b, build_layer_c};
|
||||
use rvf_index::distance::{cosine_distance, l2_distance};
|
||||
use rvf_index::hnsw::{HnswConfig, HnswGraph};
|
||||
|
||||
type DistanceFn = Box<dyn Fn(&[f32], &[f32]) -> f32>;
|
||||
use rvf_index::layers::{IndexLayer, LayerA, LayerB, LayerC};
|
||||
use rvf_index::progressive::ProgressiveIndex;
|
||||
use rvf_index::traits::InMemoryVectorStore;
|
||||
|
||||
/// Configuration for the RVF index adapter.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IndexAdapterConfig {
|
||||
/// HNSW M parameter.
|
||||
pub m: usize,
|
||||
/// HNSW M0 (layer-0 neighbors).
|
||||
pub m0: usize,
|
||||
/// ef_construction beam width.
|
||||
pub ef_construction: usize,
|
||||
/// ef_search beam width for queries.
|
||||
pub ef_search: usize,
|
||||
/// Use cosine distance (default true for agentdb text embeddings).
|
||||
pub use_cosine: bool,
|
||||
/// Hot node fraction for Layer B (0.0 - 1.0).
|
||||
pub hot_fraction: f32,
|
||||
}
|
||||
|
||||
impl Default for IndexAdapterConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
m: 16,
|
||||
m0: 32,
|
||||
ef_construction: 200,
|
||||
ef_search: 100,
|
||||
use_cosine: true,
|
||||
hot_fraction: 0.2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adapter that maps agentdb HNSW operations to RVF INDEX_SEG layers.
|
||||
///
|
||||
/// Manages the full HNSW graph and can extract progressive layers (A/B/C)
|
||||
/// for serialization into INDEX_SEG segments.
|
||||
pub struct RvfIndexAdapter {
|
||||
config: IndexAdapterConfig,
|
||||
graph: Option<HnswGraph>,
|
||||
vectors: Vec<Vec<f32>>,
|
||||
id_map: Vec<u64>,
|
||||
progressive: ProgressiveIndex,
|
||||
loaded_layers: Vec<IndexLayer>,
|
||||
}
|
||||
|
||||
impl RvfIndexAdapter {
|
||||
/// Create a new index adapter with the given configuration.
|
||||
pub fn new(config: IndexAdapterConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
graph: None,
|
||||
vectors: Vec::new(),
|
||||
id_map: Vec::new(),
|
||||
progressive: ProgressiveIndex::new(),
|
||||
loaded_layers: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the full HNSW index from a set of vectors and IDs.
|
||||
///
|
||||
/// This replaces any existing index.
|
||||
pub fn build(&mut self, vectors: Vec<Vec<f32>>, ids: Vec<u64>) {
|
||||
let n = vectors.len();
|
||||
if n == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let hnsw_config = HnswConfig {
|
||||
m: self.config.m,
|
||||
m0: self.config.m0,
|
||||
ef_construction: self.config.ef_construction,
|
||||
};
|
||||
|
||||
let store = InMemoryVectorStore::new(vectors.clone());
|
||||
let distance_fn = self.distance_fn();
|
||||
|
||||
// Generate deterministic pseudo-random values for level selection.
|
||||
let rng_values: Vec<f64> = (0..n)
|
||||
.map(|i| {
|
||||
let seed = (i as u64)
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
let val = (seed >> 33) as f64 / (1u64 << 31) as f64;
|
||||
val.clamp(0.001, 0.999)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let graph = build_full_index(&store, n, &hnsw_config, &rng_values, &distance_fn);
|
||||
|
||||
self.vectors = vectors;
|
||||
self.id_map = ids;
|
||||
self.graph = Some(graph);
|
||||
}
|
||||
|
||||
/// Extract Layer A (entry points + coarse routing) from the current graph.
|
||||
pub fn extract_layer_a(&self) -> Option<LayerA> {
|
||||
let graph = self.graph.as_ref()?;
|
||||
let n = self.vectors.len();
|
||||
|
||||
// Simple centroid computation: split vectors into 2 partitions.
|
||||
let mid = n / 2;
|
||||
let dim = self.vectors.first().map_or(0, |v| v.len());
|
||||
|
||||
let centroid_0 = compute_centroid(&self.vectors[..mid], dim);
|
||||
let centroid_1 = if mid < n {
|
||||
compute_centroid(&self.vectors[mid..], dim)
|
||||
} else {
|
||||
centroid_0.clone()
|
||||
};
|
||||
|
||||
let centroids = vec![centroid_0, centroid_1];
|
||||
let assignments: Vec<u32> = (0..n).map(|i| if i < mid { 0 } else { 1 }).collect();
|
||||
|
||||
Some(build_layer_a(graph, ¢roids, &assignments, n as u64))
|
||||
}
|
||||
|
||||
/// Extract Layer B (hot region partial adjacency) from the current graph.
|
||||
pub fn extract_layer_b(&self) -> Option<LayerB> {
|
||||
let graph = self.graph.as_ref()?;
|
||||
let n = self.vectors.len();
|
||||
let hot_count = ((n as f32) * self.config.hot_fraction).ceil() as usize;
|
||||
let hot_ids: BTreeSet<u64> = (0..hot_count as u64).collect();
|
||||
Some(build_layer_b(graph, &hot_ids))
|
||||
}
|
||||
|
||||
/// Extract Layer C (full adjacency) from the current graph.
|
||||
pub fn extract_layer_c(&self) -> Option<LayerC> {
|
||||
let graph = self.graph.as_ref()?;
|
||||
Some(build_layer_c(graph))
|
||||
}
|
||||
|
||||
/// Load progressive layers and configure the progressive index for search.
|
||||
pub fn load_progressive(&mut self, layers: &[IndexLayer]) {
|
||||
self.loaded_layers = layers.to_vec();
|
||||
|
||||
let mut idx = ProgressiveIndex::new();
|
||||
for layer in layers {
|
||||
match layer {
|
||||
IndexLayer::A => {
|
||||
idx.layer_a = self.extract_layer_a();
|
||||
}
|
||||
IndexLayer::B => {
|
||||
idx.layer_b = self.extract_layer_b();
|
||||
}
|
||||
IndexLayer::C => {
|
||||
idx.layer_c = self.extract_layer_c();
|
||||
}
|
||||
}
|
||||
}
|
||||
self.progressive = idx;
|
||||
}
|
||||
|
||||
/// Search using the progressive index with whatever layers are loaded.
|
||||
pub fn search(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
|
||||
let store = InMemoryVectorStore::new(self.vectors.clone());
|
||||
let distance_fn = self.distance_fn();
|
||||
self.progressive
|
||||
.search_with_distance(query, k, self.config.ef_search, &store, &distance_fn)
|
||||
}
|
||||
|
||||
/// Search using the full HNSW graph directly (bypasses progressive layers).
|
||||
pub fn search_full(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
|
||||
let graph = match self.graph.as_ref() {
|
||||
Some(g) => g,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let store = InMemoryVectorStore::new(self.vectors.clone());
|
||||
let distance_fn = self.distance_fn();
|
||||
graph.search(query, k, self.config.ef_search, &store, &distance_fn)
|
||||
}
|
||||
|
||||
/// Get the node count in the HNSW graph.
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.graph.as_ref().map_or(0, |g| g.node_count())
|
||||
}
|
||||
|
||||
/// Get the currently loaded layers.
|
||||
pub fn loaded_layers(&self) -> &[IndexLayer] {
|
||||
&self.loaded_layers
|
||||
}
|
||||
|
||||
fn distance_fn(&self) -> DistanceFn {
|
||||
if self.config.use_cosine {
|
||||
Box::new(cosine_distance)
|
||||
} else {
|
||||
Box::new(l2_distance)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the centroid of a set of vectors.
|
||||
fn compute_centroid(vectors: &[Vec<f32>], dim: usize) -> Vec<f32> {
|
||||
if vectors.is_empty() || dim == 0 {
|
||||
return vec![0.0; dim];
|
||||
}
|
||||
let n = vectors.len() as f32;
|
||||
let mut centroid = vec![0.0f32; dim];
|
||||
for v in vectors {
|
||||
for (i, &val) in v.iter().enumerate().take(dim) {
|
||||
centroid[i] += val;
|
||||
}
|
||||
}
|
||||
for c in &mut centroid {
|
||||
*c /= n;
|
||||
}
|
||||
centroid
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_vectors(n: usize, dim: usize) -> (Vec<Vec<f32>>, Vec<u64>) {
|
||||
let vecs: Vec<Vec<f32>> = (0..n)
|
||||
.map(|i| (0..dim).map(|d| (i * dim + d) as f32).collect())
|
||||
.collect();
|
||||
let ids: Vec<u64> = (0..n as u64).collect();
|
||||
(vecs, ids)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_and_search_full() {
|
||||
let (vecs, ids) = make_vectors(100, 8);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs.clone(), ids);
|
||||
|
||||
assert_eq!(adapter.node_count(), 100);
|
||||
|
||||
let results = adapter.search_full(&vecs[50], 5);
|
||||
assert!(!results.is_empty());
|
||||
assert_eq!(results[0].0, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_layers() {
|
||||
let (vecs, ids) = make_vectors(50, 4);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs, ids);
|
||||
|
||||
let layer_a = adapter.extract_layer_a();
|
||||
assert!(layer_a.is_some());
|
||||
let la = layer_a.unwrap();
|
||||
assert!(!la.entry_points.is_empty());
|
||||
assert_eq!(la.centroids.len(), 2);
|
||||
|
||||
let layer_b = adapter.extract_layer_b();
|
||||
assert!(layer_b.is_some());
|
||||
|
||||
let layer_c = adapter.extract_layer_c();
|
||||
assert!(layer_c.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn progressive_search_with_layers() {
|
||||
let (vecs, ids) = make_vectors(100, 4);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs.clone(), ids);
|
||||
|
||||
// Load all three layers.
|
||||
adapter.load_progressive(&[IndexLayer::A, IndexLayer::B, IndexLayer::C]);
|
||||
|
||||
let results = adapter.search(&vecs[25], 5);
|
||||
assert!(!results.is_empty());
|
||||
// With full Layer C, we should find the exact match.
|
||||
assert_eq!(results[0].0, 25);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn progressive_layer_a_only() {
|
||||
let (vecs, ids) = make_vectors(100, 4);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs.clone(), ids);
|
||||
|
||||
adapter.load_progressive(&[IndexLayer::A]);
|
||||
let results = adapter.search(&vecs[10], 5);
|
||||
// Layer A alone provides coarse results; we just verify non-empty.
|
||||
assert!(!results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_adapter() {
|
||||
let adapter = RvfIndexAdapter::new(IndexAdapterConfig::default());
|
||||
assert_eq!(adapter.node_count(), 0);
|
||||
let results = adapter.search_full(&[0.0; 4], 5);
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_centroid_basic() {
|
||||
let vecs = vec![
|
||||
vec![1.0, 2.0, 3.0],
|
||||
vec![3.0, 4.0, 5.0],
|
||||
];
|
||||
let centroid = compute_centroid(&vecs, 3);
|
||||
assert_eq!(centroid, vec![2.0, 3.0, 4.0]);
|
||||
}
|
||||
}
|
||||
18
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/lib.rs
vendored
Normal file
18
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/lib.rs
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
//! AgentDB adapter for the RuVector Format (RVF).
|
||||
//!
|
||||
//! Maps agentdb's vector storage, HNSW index, and memory pattern APIs
|
||||
//! onto the RVF segment model:
|
||||
//!
|
||||
//! - **VEC_SEG**: Raw vector data (episodes, state embeddings)
|
||||
//! - **INDEX_SEG**: HNSW index layers (A/B/C progressive indexing)
|
||||
//! - **META_SEG**: Memory pattern metadata (rewards, critiques, tags)
|
||||
//!
|
||||
//! Uses the RVText domain profile for text/embedding workloads.
|
||||
|
||||
pub mod index_adapter;
|
||||
pub mod pattern_store;
|
||||
pub mod vector_store;
|
||||
|
||||
pub use index_adapter::RvfIndexAdapter;
|
||||
pub use pattern_store::{MemoryPattern, RvfPatternStore};
|
||||
pub use vector_store::RvfVectorStore;
|
||||
456
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/pattern_store.rs
vendored
Normal file
456
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/pattern_store.rs
vendored
Normal file
@@ -0,0 +1,456 @@
|
||||
//! Memory pattern storage using RVF META_SEG.
|
||||
//!
|
||||
//! Stores agentdb memory patterns (task descriptions, rewards, critiques,
|
||||
//! success flags) as metadata alongside their state-embedding vectors.
|
||||
//! Patterns can be searched by similarity and filtered by reward threshold.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use rvf_runtime::options::{MetadataEntry, MetadataValue};
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::vector_store::{AgentDbMetric, RvfVectorStore, VectorStoreConfig};
|
||||
|
||||
/// A memory pattern stored in the agentdb reasoning bank.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MemoryPattern {
|
||||
/// Unique pattern identifier.
|
||||
pub id: u64,
|
||||
/// Task description that produced this pattern.
|
||||
pub task: String,
|
||||
/// Reward score (0.0 - 1.0) indicating quality.
|
||||
pub reward: f32,
|
||||
/// Whether the pattern was successful.
|
||||
pub success: bool,
|
||||
/// Self-critique / notes about the pattern.
|
||||
pub critique: String,
|
||||
/// State embedding vector for similarity search.
|
||||
pub embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
/// Well-known metadata field IDs for pattern attributes.
|
||||
mod field_ids {
|
||||
pub const TASK: u16 = 0;
|
||||
pub const REWARD: u16 = 1;
|
||||
pub const SUCCESS: u16 = 2;
|
||||
pub const CRITIQUE: u16 = 3;
|
||||
}
|
||||
|
||||
/// RVF-backed memory pattern store for agentdb.
|
||||
///
|
||||
/// Stores patterns as vectors (embeddings) with metadata (task, reward,
|
||||
/// critique, success flag). Supports similarity search with reward filtering.
|
||||
pub struct RvfPatternStore {
|
||||
vector_store: RvfVectorStore,
|
||||
patterns: HashMap<u64, PatternMetadata>,
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
/// In-memory metadata for a pattern (kept alongside the RVF store).
|
||||
#[derive(Clone, Debug)]
|
||||
struct PatternMetadata {
|
||||
task: String,
|
||||
reward: f32,
|
||||
success: bool,
|
||||
critique: String,
|
||||
}
|
||||
|
||||
impl RvfPatternStore {
|
||||
/// Create a new pattern store at the given path.
|
||||
pub fn create(path: &Path, dimension: u16) -> Result<Self, RvfError> {
|
||||
let config = VectorStoreConfig {
|
||||
dimension,
|
||||
metric: AgentDbMetric::Cosine,
|
||||
ef_search: 100,
|
||||
};
|
||||
let vector_store = RvfVectorStore::create(path, config)?;
|
||||
Ok(Self {
|
||||
vector_store,
|
||||
patterns: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing pattern store.
|
||||
pub fn open(path: &Path, dimension: u16) -> Result<Self, RvfError> {
|
||||
let config = VectorStoreConfig {
|
||||
dimension,
|
||||
metric: AgentDbMetric::Cosine,
|
||||
ef_search: 100,
|
||||
};
|
||||
let vector_store = RvfVectorStore::open(path, config)?;
|
||||
Ok(Self {
|
||||
vector_store,
|
||||
patterns: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Store a memory pattern.
|
||||
///
|
||||
/// Returns the assigned pattern ID.
|
||||
pub fn store_pattern(&mut self, pattern: MemoryPattern) -> Result<u64, RvfError> {
|
||||
let id = if pattern.id > 0 {
|
||||
pattern.id
|
||||
} else {
|
||||
let id = self.next_id;
|
||||
self.next_id += 1;
|
||||
id
|
||||
};
|
||||
|
||||
// Ensure next_id stays ahead of manually assigned IDs.
|
||||
if id >= self.next_id {
|
||||
self.next_id = id + 1;
|
||||
}
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry {
|
||||
field_id: field_ids::TASK,
|
||||
value: MetadataValue::String(pattern.task.clone()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: field_ids::REWARD,
|
||||
value: MetadataValue::F64(pattern.reward as f64),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: field_ids::SUCCESS,
|
||||
value: MetadataValue::U64(if pattern.success { 1 } else { 0 }),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: field_ids::CRITIQUE,
|
||||
value: MetadataValue::String(pattern.critique.clone()),
|
||||
},
|
||||
];
|
||||
|
||||
self.vector_store
|
||||
.add_vectors(&[pattern.embedding.as_slice()], &[id], Some(&metadata))?;
|
||||
|
||||
self.patterns.insert(
|
||||
id,
|
||||
PatternMetadata {
|
||||
task: pattern.task,
|
||||
reward: pattern.reward,
|
||||
success: pattern.success,
|
||||
critique: pattern.critique,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Search for patterns similar to the given embedding.
|
||||
///
|
||||
/// Returns `(pattern_id, distance)` pairs sorted by distance.
|
||||
/// Optionally filter by minimum reward score.
|
||||
pub fn search_patterns(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
min_reward: Option<f32>,
|
||||
) -> Result<Vec<PatternSearchResult>, RvfError> {
|
||||
let search_k = if min_reward.is_some() { k * 3 } else { k };
|
||||
let results = self.vector_store.search(query_embedding, search_k, None)?;
|
||||
|
||||
let mut filtered: Vec<PatternSearchResult> = results
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let meta = self.patterns.get(&r.id)?;
|
||||
if let Some(threshold) = min_reward {
|
||||
if meta.reward < threshold {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(PatternSearchResult {
|
||||
id: r.id,
|
||||
distance: r.distance,
|
||||
task: meta.task.clone(),
|
||||
reward: meta.reward,
|
||||
success: meta.success,
|
||||
critique: meta.critique.clone(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
filtered.truncate(k);
|
||||
Ok(filtered)
|
||||
}
|
||||
|
||||
/// Search for patterns that failed (success == false).
|
||||
pub fn search_failures(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<PatternSearchResult>, RvfError> {
|
||||
let results = self.vector_store.search(query_embedding, k * 5, None)?;
|
||||
|
||||
let mut filtered: Vec<PatternSearchResult> = results
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let meta = self.patterns.get(&r.id)?;
|
||||
if meta.success {
|
||||
return None;
|
||||
}
|
||||
Some(PatternSearchResult {
|
||||
id: r.id,
|
||||
distance: r.distance,
|
||||
task: meta.task.clone(),
|
||||
reward: meta.reward,
|
||||
success: false,
|
||||
critique: meta.critique.clone(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
filtered.truncate(k);
|
||||
Ok(filtered)
|
||||
}
|
||||
|
||||
/// Delete a pattern by ID.
|
||||
pub fn delete_pattern(&mut self, id: u64) -> Result<bool, RvfError> {
|
||||
let deleted = self.vector_store.delete_vectors(&[id])?;
|
||||
self.patterns.remove(&id);
|
||||
Ok(deleted > 0)
|
||||
}
|
||||
|
||||
/// Get pattern metadata by ID.
|
||||
pub fn get_pattern(&self, id: u64) -> Option<PatternSearchResult> {
|
||||
let meta = self.patterns.get(&id)?;
|
||||
Some(PatternSearchResult {
|
||||
id,
|
||||
distance: 0.0,
|
||||
task: meta.task.clone(),
|
||||
reward: meta.reward,
|
||||
success: meta.success,
|
||||
critique: meta.critique.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get aggregate statistics about stored patterns.
|
||||
pub fn stats(&self) -> PatternStoreStats {
|
||||
let total = self.patterns.len();
|
||||
let successful = self.patterns.values().filter(|p| p.success).count();
|
||||
let avg_reward = if total > 0 {
|
||||
self.patterns.values().map(|p| p.reward as f64).sum::<f64>() / total as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
PatternStoreStats {
|
||||
total_patterns: total,
|
||||
successful_patterns: successful,
|
||||
failed_patterns: total - successful,
|
||||
avg_reward,
|
||||
vector_count: self.vector_store.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the store to disk.
|
||||
pub fn save(&mut self) -> Result<(), RvfError> {
|
||||
self.vector_store.save()
|
||||
}
|
||||
|
||||
/// Get the total number of patterns.
|
||||
pub fn len(&self) -> usize {
|
||||
self.patterns.len()
|
||||
}
|
||||
|
||||
/// Returns true if no patterns are stored.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.patterns.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// A pattern search result with full metadata.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PatternSearchResult {
|
||||
pub id: u64,
|
||||
pub distance: f32,
|
||||
pub task: String,
|
||||
pub reward: f32,
|
||||
pub success: bool,
|
||||
pub critique: String,
|
||||
}
|
||||
|
||||
/// Aggregate statistics for the pattern store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PatternStoreStats {
|
||||
pub total_patterns: usize,
|
||||
pub successful_patterns: usize,
|
||||
pub failed_patterns: usize,
|
||||
pub avg_reward: f64,
|
||||
pub vector_count: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn dummy_embedding(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn store_and_search_patterns() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns.rvf");
|
||||
|
||||
let dim = 8;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: (i as f32) / 10.0,
|
||||
success: i >= 5,
|
||||
critique: format!("critique_{}", i),
|
||||
embedding: dummy_embedding(dim, i),
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(store.len(), 10);
|
||||
|
||||
let query = dummy_embedding(dim, 7);
|
||||
let results = store.search_patterns(&query, 3, None).unwrap();
|
||||
assert!(!results.is_empty());
|
||||
assert!(results.len() <= 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_with_min_reward() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_reward.rvf");
|
||||
|
||||
let dim = 8;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: (i as f32) / 10.0,
|
||||
success: true,
|
||||
critique: String::new(),
|
||||
embedding: dummy_embedding(dim, i),
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
let query = dummy_embedding(dim, 5);
|
||||
let results = store.search_patterns(&query, 10, Some(0.5)).unwrap();
|
||||
assert!(results.iter().all(|r| r.reward >= 0.5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_failures() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_fail.rvf");
|
||||
|
||||
let dim = 8;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: 0.5,
|
||||
success: i % 2 == 0,
|
||||
critique: String::new(),
|
||||
embedding: dummy_embedding(dim, i),
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
let query = dummy_embedding(dim, 3);
|
||||
let results = store.search_failures(&query, 5).unwrap();
|
||||
assert!(results.iter().all(|r| !r.success));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_pattern() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_del.rvf");
|
||||
|
||||
let dim = 4;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
let pattern = MemoryPattern {
|
||||
id: 42,
|
||||
task: "test".into(),
|
||||
reward: 0.9,
|
||||
success: true,
|
||||
critique: "good".into(),
|
||||
embedding: vec![1.0, 2.0, 3.0, 4.0],
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
assert_eq!(store.len(), 1);
|
||||
|
||||
let deleted = store.delete_pattern(42).unwrap();
|
||||
assert!(deleted);
|
||||
assert_eq!(store.len(), 0);
|
||||
assert!(store.get_pattern(42).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stats() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_stats.rvf");
|
||||
|
||||
let dim = 4;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..5u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: (i as f32) * 0.2,
|
||||
success: i >= 3,
|
||||
critique: String::new(),
|
||||
embedding: vec![i as f32; dim],
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
let stats = store.stats();
|
||||
assert_eq!(stats.total_patterns, 5);
|
||||
assert_eq!(stats.successful_patterns, 2);
|
||||
assert_eq!(stats.failed_patterns, 3);
|
||||
assert!(stats.avg_reward > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_pattern_by_id() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_get.rvf");
|
||||
|
||||
let dim = 4;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
let pattern = MemoryPattern {
|
||||
id: 100,
|
||||
task: "find_bugs".into(),
|
||||
reward: 0.85,
|
||||
success: true,
|
||||
critique: "good coverage".into(),
|
||||
embedding: vec![1.0, 0.0, 0.0, 0.0],
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
|
||||
let result = store.get_pattern(100).unwrap();
|
||||
assert_eq!(result.task, "find_bugs");
|
||||
assert_eq!(result.reward, 0.85);
|
||||
assert!(result.success);
|
||||
assert_eq!(result.critique, "good coverage");
|
||||
|
||||
assert!(store.get_pattern(999).is_none());
|
||||
}
|
||||
}
|
||||
326
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/vector_store.rs
vendored
Normal file
326
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/vector_store.rs
vendored
Normal file
@@ -0,0 +1,326 @@
|
||||
//! RVF-backed vector store for agentdb.
|
||||
//!
|
||||
//! Wraps [`RvfStore`] to provide the vector CRUD operations that agentdb
|
||||
//! expects: add, search, delete, get, save, and load.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, QueryOptions, RvfOptions, SearchResult,
|
||||
};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
|
||||
/// Distance metric selection matching agentdb's API.
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub enum AgentDbMetric {
|
||||
#[default]
|
||||
Cosine,
|
||||
L2,
|
||||
InnerProduct,
|
||||
}
|
||||
|
||||
impl From<AgentDbMetric> for DistanceMetric {
|
||||
fn from(m: AgentDbMetric) -> Self {
|
||||
match m {
|
||||
AgentDbMetric::Cosine => DistanceMetric::Cosine,
|
||||
AgentDbMetric::L2 => DistanceMetric::L2,
|
||||
AgentDbMetric::InnerProduct => DistanceMetric::InnerProduct,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for the RVF vector store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct VectorStoreConfig {
|
||||
/// Vector dimensionality.
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: AgentDbMetric,
|
||||
/// HNSW ef_search beam width for queries.
|
||||
pub ef_search: u16,
|
||||
}
|
||||
|
||||
impl Default for VectorStoreConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dimension: 128,
|
||||
metric: AgentDbMetric::Cosine,
|
||||
ef_search: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// RVF-backed vector store that provides the agentdb vector storage interface.
|
||||
///
|
||||
/// Maps agentdb operations to RvfStore calls:
|
||||
/// - `add_vectors` -> `ingest_batch`
|
||||
/// - `search` -> `query`
|
||||
/// - `delete_vectors` -> `delete`
|
||||
/// - `get_vector` -> single-vector query
|
||||
/// - `save` / `load` -> close / open
|
||||
pub struct RvfVectorStore {
|
||||
store: Option<RvfStore>,
|
||||
path: PathBuf,
|
||||
config: VectorStoreConfig,
|
||||
}
|
||||
|
||||
impl RvfVectorStore {
|
||||
/// Create a new RVF vector store at the given path.
|
||||
pub fn create(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
|
||||
let rvf_opts = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: config.metric.into(),
|
||||
profile: 1, // RVText profile
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(path, rvf_opts)?;
|
||||
|
||||
Ok(Self {
|
||||
store: Some(store),
|
||||
path: path.to_path_buf(),
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing RVF vector store.
|
||||
pub fn open(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
|
||||
let store = RvfStore::open(path)?;
|
||||
Ok(Self {
|
||||
store: Some(store),
|
||||
path: path.to_path_buf(),
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
/// Add vectors with their IDs and optional metadata.
|
||||
///
|
||||
/// `vectors`: slice of float slices, one per vector.
|
||||
/// `ids`: one ID per vector.
|
||||
/// `metadata`: optional metadata entries (flat list, one entry per vector).
|
||||
pub fn add_vectors(
|
||||
&mut self,
|
||||
vectors: &[&[f32]],
|
||||
ids: &[u64],
|
||||
metadata: Option<&[MetadataEntry]>,
|
||||
) -> Result<u64, RvfError> {
|
||||
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let result = store.ingest_batch(vectors, ids, metadata)?;
|
||||
Ok(result.accepted)
|
||||
}
|
||||
|
||||
/// Search for the k nearest neighbors of a query vector.
|
||||
///
|
||||
/// Returns results sorted by distance (ascending).
|
||||
pub fn search(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
ef_search: Option<u16>,
|
||||
) -> Result<Vec<SearchResult>, RvfError> {
|
||||
let store = self.store.as_ref().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let opts = QueryOptions {
|
||||
ef_search: ef_search.unwrap_or(self.config.ef_search),
|
||||
..Default::default()
|
||||
};
|
||||
store.query(query, k, &opts)
|
||||
}
|
||||
|
||||
/// Delete vectors by their IDs.
|
||||
pub fn delete_vectors(&mut self, ids: &[u64]) -> Result<u64, RvfError> {
|
||||
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let result = store.delete(ids)?;
|
||||
Ok(result.deleted)
|
||||
}
|
||||
|
||||
/// Retrieve a single vector by ID.
|
||||
///
|
||||
/// Uses a zero-distance search trick: queries with each candidate until
|
||||
/// the exact ID is found. For small stores this is acceptable; for large
|
||||
/// stores the caller should maintain an ID index.
|
||||
///
|
||||
/// Returns `None` if the vector is not found or has been deleted.
|
||||
pub fn get_vector(&self, id: u64) -> Option<SearchResult> {
|
||||
let store = self.store.as_ref()?;
|
||||
let status = store.status();
|
||||
if status.total_vectors == 0 {
|
||||
return None;
|
||||
}
|
||||
// Query a large k and find the matching ID in results.
|
||||
// This is O(n) but correct. Production agentdb should cache vectors.
|
||||
let dim = self.config.dimension as usize;
|
||||
let zero_query = vec![0.0f32; dim];
|
||||
let opts = QueryOptions {
|
||||
ef_search: self.config.ef_search,
|
||||
..Default::default()
|
||||
};
|
||||
let results = store.query(&zero_query, status.total_vectors as usize, &opts).ok()?;
|
||||
results.into_iter().find(|r| r.id == id)
|
||||
}
|
||||
|
||||
/// Save the store (flushes and closes the underlying RVF file).
|
||||
pub fn save(&mut self) -> Result<(), RvfError> {
|
||||
if let Some(store) = self.store.take() {
|
||||
store.close()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reload the store from disk.
|
||||
pub fn load(&mut self) -> Result<(), RvfError> {
|
||||
if self.store.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
let store = RvfStore::open(&self.path)?;
|
||||
self.store = Some(store);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the current vector count.
|
||||
pub fn len(&self) -> u64 {
|
||||
self.store.as_ref().map_or(0, |s| s.status().total_vectors)
|
||||
}
|
||||
|
||||
/// Returns true if the store is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Run compaction to reclaim space from deleted vectors.
|
||||
pub fn compact(&mut self) -> Result<u64, RvfError> {
|
||||
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let result = store.compact()?;
|
||||
Ok(result.bytes_reclaimed)
|
||||
}
|
||||
|
||||
/// Get the file path of the underlying RVF store.
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
|
||||
/// Get the store configuration.
|
||||
pub fn config(&self) -> &VectorStoreConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rvf_runtime::options::MetadataValue;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_config(dim: u16) -> VectorStoreConfig {
|
||||
VectorStoreConfig {
|
||||
dimension: dim,
|
||||
metric: AgentDbMetric::L2,
|
||||
ef_search: 100,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_add_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb.rvf");
|
||||
|
||||
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
|
||||
let v1 = [1.0f32, 0.0, 0.0, 0.0];
|
||||
let v2 = [0.0f32, 1.0, 0.0, 0.0];
|
||||
let v3 = [0.0f32, 0.0, 1.0, 0.0];
|
||||
|
||||
let accepted = store
|
||||
.add_vectors(&[&v1, &v2, &v3], &[10, 20, 30], None)
|
||||
.unwrap();
|
||||
assert_eq!(accepted, 3);
|
||||
|
||||
let results = store.search(&[1.0, 0.0, 0.0, 0.0], 2, None).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, 10);
|
||||
assert!(results[0].distance < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_and_compact() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_del.rvf");
|
||||
|
||||
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
|
||||
let vecs: Vec<[f32; 4]> = (0..10).map(|i| [i as f32, 0.0, 0.0, 0.0]).collect();
|
||||
let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (0..10).collect();
|
||||
|
||||
store.add_vectors(&refs, &ids, None).unwrap();
|
||||
|
||||
let deleted = store.delete_vectors(&[0, 2, 4]).unwrap();
|
||||
assert_eq!(deleted, 3);
|
||||
assert_eq!(store.len(), 7);
|
||||
|
||||
let reclaimed = store.compact().unwrap();
|
||||
assert!(reclaimed > 0);
|
||||
assert_eq!(store.len(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn save_and_load() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_persist.rvf");
|
||||
|
||||
let config = make_config(4);
|
||||
{
|
||||
let mut store = RvfVectorStore::create(&path, config.clone()).unwrap();
|
||||
let v1 = [1.0f32, 2.0, 3.0, 4.0];
|
||||
store.add_vectors(&[&v1], &[42], None).unwrap();
|
||||
store.save().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let store = RvfVectorStore::open(&path, config).unwrap();
|
||||
assert_eq!(store.len(), 1);
|
||||
let results = store.search(&[1.0, 2.0, 3.0, 4.0], 1, None).unwrap();
|
||||
assert_eq!(results[0].id, 42);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_with_metadata() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_meta.rvf");
|
||||
|
||||
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
|
||||
let v1 = [1.0f32, 0.0, 0.0, 0.0];
|
||||
let v2 = [0.0f32, 1.0, 0.0, 0.0];
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String("episode_a".into()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String("episode_b".into()),
|
||||
},
|
||||
];
|
||||
|
||||
let accepted = store
|
||||
.add_vectors(&[&v1, &v2], &[1, 2], Some(&metadata))
|
||||
.unwrap();
|
||||
assert_eq!(accepted, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_store() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_empty.rvf");
|
||||
|
||||
let store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
assert!(store.is_empty());
|
||||
assert_eq!(store.len(), 0);
|
||||
|
||||
let results = store.search(&[0.0, 0.0, 0.0, 0.0], 5, None).unwrap();
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
}
|
||||
20
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/Cargo.toml
vendored
Normal file
20
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/Cargo.toml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "rvf-adapter-agentic-flow"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Agentic-flow swarm adapter for RuVector Format -- maps inter-agent memory, coordination state, and learning patterns to RVF segments"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
rust-version = "1.87"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
|
||||
[dependencies]
|
||||
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
|
||||
rvf-types = { path = "../../rvf-types", features = ["std"] }
|
||||
rvf-crypto = { path = "../../rvf-crypto", features = ["std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
148
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/config.rs
vendored
Normal file
148
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/config.rs
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
//! Configuration for the agentic-flow swarm adapter.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Configuration for the RVF-backed agentic-flow swarm store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AgenticFlowConfig {
|
||||
/// Directory where RVF data files are stored.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension (must match embeddings used by agents).
|
||||
pub dimension: u16,
|
||||
/// Unique identifier for this agent.
|
||||
pub agent_id: String,
|
||||
/// Whether to log consensus events in a WITNESS_SEG audit trail.
|
||||
pub enable_witness: bool,
|
||||
/// Optional swarm group identifier for multi-swarm deployments.
|
||||
pub swarm_id: Option<String>,
|
||||
}
|
||||
|
||||
impl AgenticFlowConfig {
|
||||
/// Create a new configuration with required parameters.
|
||||
///
|
||||
/// Uses sensible defaults: dimension=384, witness enabled, no swarm group.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, agent_id: impl Into<String>) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension: 384,
|
||||
agent_id: agent_id.into(),
|
||||
enable_witness: true,
|
||||
swarm_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the embedding dimension.
|
||||
pub fn with_dimension(mut self, dimension: u16) -> Self {
|
||||
self.dimension = dimension;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable or disable witness audit trails.
|
||||
pub fn with_witness(mut self, enable: bool) -> Self {
|
||||
self.enable_witness = enable;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the swarm group identifier.
|
||||
pub fn with_swarm_id(mut self, swarm_id: impl Into<String>) -> Self {
|
||||
self.swarm_id = Some(swarm_id.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Return the path to the main vector store RVF file.
|
||||
pub fn store_path(&self) -> PathBuf {
|
||||
self.data_dir.join("swarm.rvf")
|
||||
}
|
||||
|
||||
/// Return the path to the witness chain file.
|
||||
pub fn witness_path(&self) -> PathBuf {
|
||||
self.data_dir.join("witness.bin")
|
||||
}
|
||||
|
||||
/// Ensure the data directory exists.
|
||||
pub fn ensure_dirs(&self) -> std::io::Result<()> {
|
||||
std::fs::create_dir_all(&self.data_dir)
|
||||
}
|
||||
|
||||
/// Validate the configuration.
|
||||
pub fn validate(&self) -> Result<(), ConfigError> {
|
||||
if self.dimension == 0 {
|
||||
return Err(ConfigError::InvalidDimension);
|
||||
}
|
||||
if self.agent_id.is_empty() {
|
||||
return Err(ConfigError::EmptyAgentId);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors specific to adapter configuration.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ConfigError {
|
||||
/// Dimension must be > 0.
|
||||
InvalidDimension,
|
||||
/// Agent ID must not be empty.
|
||||
EmptyAgentId,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
|
||||
Self::EmptyAgentId => write!(f, "agent_id must not be empty"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ConfigError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn config_defaults() {
|
||||
let cfg = AgenticFlowConfig::new("/tmp/test", "agent-1");
|
||||
assert_eq!(cfg.dimension, 384);
|
||||
assert!(cfg.enable_witness);
|
||||
assert!(cfg.swarm_id.is_none());
|
||||
assert_eq!(cfg.agent_id, "agent-1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_paths() {
|
||||
let cfg = AgenticFlowConfig::new("/data/swarm", "a1");
|
||||
assert_eq!(cfg.store_path(), Path::new("/data/swarm/swarm.rvf"));
|
||||
assert_eq!(cfg.witness_path(), Path::new("/data/swarm/witness.bin"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_zero_dimension() {
|
||||
let cfg = AgenticFlowConfig::new("/tmp", "a1").with_dimension(0);
|
||||
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_empty_agent_id() {
|
||||
let cfg = AgenticFlowConfig::new("/tmp", "");
|
||||
assert_eq!(cfg.validate(), Err(ConfigError::EmptyAgentId));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ok() {
|
||||
let cfg = AgenticFlowConfig::new("/tmp", "agent-1").with_dimension(64);
|
||||
assert!(cfg.validate().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builder_methods() {
|
||||
let cfg = AgenticFlowConfig::new("/tmp", "a1")
|
||||
.with_dimension(128)
|
||||
.with_witness(false)
|
||||
.with_swarm_id("swarm-alpha");
|
||||
assert_eq!(cfg.dimension, 128);
|
||||
assert!(!cfg.enable_witness);
|
||||
assert_eq!(cfg.swarm_id.as_deref(), Some("swarm-alpha"));
|
||||
}
|
||||
}
|
||||
283
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/coordination.rs
vendored
Normal file
283
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/coordination.rs
vendored
Normal file
@@ -0,0 +1,283 @@
|
||||
//! Swarm coordination state management.
|
||||
//!
|
||||
//! Tracks agent state changes and consensus votes in-memory, with the
|
||||
//! coordination state serialized alongside the RVF store. State entries
|
||||
//! and votes are appended chronologically for audit and replay.
|
||||
|
||||
/// A recorded agent state change.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct StateEntry {
|
||||
/// The agent that produced this state change.
|
||||
pub agent_id: String,
|
||||
/// State key (e.g., "status", "role", "topology").
|
||||
pub key: String,
|
||||
/// State value (e.g., "active", "coordinator", "mesh").
|
||||
pub value: String,
|
||||
/// Timestamp in nanoseconds since the Unix epoch.
|
||||
pub timestamp: u64,
|
||||
}
|
||||
|
||||
/// A consensus vote cast by an agent.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct ConsensusVote {
|
||||
/// The topic being voted on (e.g., "leader-election-42").
|
||||
pub topic: String,
|
||||
/// The agent casting the vote.
|
||||
pub agent_id: String,
|
||||
/// The vote (true = approve, false = reject).
|
||||
pub vote: bool,
|
||||
/// Timestamp in nanoseconds since the Unix epoch.
|
||||
pub timestamp: u64,
|
||||
}
|
||||
|
||||
/// Swarm coordination state tracker.
|
||||
///
|
||||
/// Maintains an in-memory log of agent state changes and consensus votes.
|
||||
/// This state lives alongside the RVF store and is used for coordination
|
||||
/// protocol decisions (leader election, topology changes, etc.).
|
||||
pub struct SwarmCoordination {
|
||||
states: Vec<StateEntry>,
|
||||
votes: Vec<ConsensusVote>,
|
||||
}
|
||||
|
||||
impl SwarmCoordination {
|
||||
/// Create a new, empty coordination tracker.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
states: Vec::new(),
|
||||
votes: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record an agent state change.
|
||||
pub fn record_state(
|
||||
&mut self,
|
||||
agent_id: &str,
|
||||
state_key: &str,
|
||||
state_value: &str,
|
||||
) -> Result<(), CoordinationError> {
|
||||
if agent_id.is_empty() {
|
||||
return Err(CoordinationError::EmptyAgentId);
|
||||
}
|
||||
if state_key.is_empty() {
|
||||
return Err(CoordinationError::EmptyKey);
|
||||
}
|
||||
self.states.push(StateEntry {
|
||||
agent_id: agent_id.to_string(),
|
||||
key: state_key.to_string(),
|
||||
value: state_value.to_string(),
|
||||
timestamp: now_ns(),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the state history for a specific agent.
|
||||
pub fn get_agent_states(&self, agent_id: &str) -> Vec<StateEntry> {
|
||||
self.states
|
||||
.iter()
|
||||
.filter(|s| s.agent_id == agent_id)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get all coordination state entries.
|
||||
pub fn get_all_states(&self) -> Vec<StateEntry> {
|
||||
self.states.clone()
|
||||
}
|
||||
|
||||
/// Record a consensus vote for a topic.
|
||||
pub fn record_consensus_vote(
|
||||
&mut self,
|
||||
topic: &str,
|
||||
agent_id: &str,
|
||||
vote: bool,
|
||||
) -> Result<(), CoordinationError> {
|
||||
if topic.is_empty() {
|
||||
return Err(CoordinationError::EmptyTopic);
|
||||
}
|
||||
if agent_id.is_empty() {
|
||||
return Err(CoordinationError::EmptyAgentId);
|
||||
}
|
||||
self.votes.push(ConsensusVote {
|
||||
topic: topic.to_string(),
|
||||
agent_id: agent_id.to_string(),
|
||||
vote,
|
||||
timestamp: now_ns(),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get all votes for a specific topic.
|
||||
pub fn get_votes(&self, topic: &str) -> Vec<ConsensusVote> {
|
||||
self.votes
|
||||
.iter()
|
||||
.filter(|v| v.topic == topic)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get the total number of state entries.
|
||||
pub fn state_count(&self) -> usize {
|
||||
self.states.len()
|
||||
}
|
||||
|
||||
/// Get the total number of votes.
|
||||
pub fn vote_count(&self) -> usize {
|
||||
self.votes.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SwarmCoordination {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from coordination operations.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum CoordinationError {
|
||||
/// Agent ID must not be empty.
|
||||
EmptyAgentId,
|
||||
/// State key must not be empty.
|
||||
EmptyKey,
|
||||
/// Topic must not be empty.
|
||||
EmptyTopic,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CoordinationError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::EmptyAgentId => write!(f, "agent_id must not be empty"),
|
||||
Self::EmptyKey => write!(f, "state key must not be empty"),
|
||||
Self::EmptyTopic => write!(f, "topic must not be empty"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for CoordinationError {}
|
||||
|
||||
fn now_ns() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn record_and_get_states() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
coord.record_state("a1", "status", "active").unwrap();
|
||||
coord.record_state("a2", "status", "idle").unwrap();
|
||||
coord.record_state("a1", "role", "coordinator").unwrap();
|
||||
|
||||
let a1_states = coord.get_agent_states("a1");
|
||||
assert_eq!(a1_states.len(), 2);
|
||||
assert_eq!(a1_states[0].key, "status");
|
||||
assert_eq!(a1_states[1].key, "role");
|
||||
|
||||
let a2_states = coord.get_agent_states("a2");
|
||||
assert_eq!(a2_states.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_all_states() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
coord.record_state("a1", "k1", "v1").unwrap();
|
||||
coord.record_state("a2", "k2", "v2").unwrap();
|
||||
|
||||
let all = coord.get_all_states();
|
||||
assert_eq!(all.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_and_get_votes() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
coord
|
||||
.record_consensus_vote("leader-election", "a1", true)
|
||||
.unwrap();
|
||||
coord
|
||||
.record_consensus_vote("leader-election", "a2", false)
|
||||
.unwrap();
|
||||
coord
|
||||
.record_consensus_vote("other-topic", "a1", true)
|
||||
.unwrap();
|
||||
|
||||
let votes = coord.get_votes("leader-election");
|
||||
assert_eq!(votes.len(), 2);
|
||||
assert!(votes[0].vote);
|
||||
assert!(!votes[1].vote);
|
||||
|
||||
let other = coord.get_votes("other-topic");
|
||||
assert_eq!(other.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_agent_id_rejected() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
assert_eq!(
|
||||
coord.record_state("", "k", "v"),
|
||||
Err(CoordinationError::EmptyAgentId)
|
||||
);
|
||||
assert_eq!(
|
||||
coord.record_consensus_vote("topic", "", true),
|
||||
Err(CoordinationError::EmptyAgentId)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_key_rejected() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
assert_eq!(
|
||||
coord.record_state("a1", "", "v"),
|
||||
Err(CoordinationError::EmptyKey)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_topic_rejected() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
assert_eq!(
|
||||
coord.record_consensus_vote("", "a1", true),
|
||||
Err(CoordinationError::EmptyTopic)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn counts() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
assert_eq!(coord.state_count(), 0);
|
||||
assert_eq!(coord.vote_count(), 0);
|
||||
|
||||
coord.record_state("a1", "k", "v").unwrap();
|
||||
coord.record_consensus_vote("t", "a1", true).unwrap();
|
||||
|
||||
assert_eq!(coord.state_count(), 1);
|
||||
assert_eq!(coord.vote_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_states_for_unknown_agent() {
|
||||
let coord = SwarmCoordination::new();
|
||||
assert!(coord.get_agent_states("ghost").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_votes_for_unknown_topic() {
|
||||
let coord = SwarmCoordination::new();
|
||||
assert!(coord.get_votes("nonexistent").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamps_are_monotonic() {
|
||||
let mut coord = SwarmCoordination::new();
|
||||
coord.record_state("a1", "k1", "v1").unwrap();
|
||||
coord.record_state("a1", "k2", "v2").unwrap();
|
||||
|
||||
let states = coord.get_agent_states("a1");
|
||||
assert!(states[0].timestamp <= states[1].timestamp);
|
||||
}
|
||||
}
|
||||
301
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/learning.rs
vendored
Normal file
301
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/learning.rs
vendored
Normal file
@@ -0,0 +1,301 @@
|
||||
//! Agent learning pattern management.
|
||||
//!
|
||||
//! Stores learned patterns as vectors with metadata (pattern type, description,
|
||||
//! effectiveness score) in the RVF store. Patterns can be searched by embedding
|
||||
//! similarity and ranked by their effectiveness scores.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// A learning pattern search result.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PatternResult {
|
||||
/// Unique pattern identifier.
|
||||
pub id: u64,
|
||||
/// The cognitive pattern type (e.g., "convergent", "divergent", "lateral").
|
||||
pub pattern_type: String,
|
||||
/// Human-readable description of the pattern.
|
||||
pub description: String,
|
||||
/// Effectiveness score (0.0 - 1.0).
|
||||
pub score: f32,
|
||||
/// Distance from query embedding (only meaningful in search results).
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// In-memory metadata for a stored pattern.
|
||||
#[derive(Clone, Debug)]
|
||||
struct PatternMeta {
|
||||
pattern_type: String,
|
||||
description: String,
|
||||
score: f32,
|
||||
}
|
||||
|
||||
/// Agent learning pattern store.
|
||||
///
|
||||
/// Wraps a vector store to provide pattern-specific operations: store, search,
|
||||
/// update scores, and retrieve top patterns. Each pattern has a type, description,
|
||||
/// effectiveness score, and an embedding vector for similarity search.
|
||||
pub struct LearningPatternStore {
|
||||
patterns: HashMap<u64, PatternMeta>,
|
||||
/// Ordered list of (score, id) for efficient top-k retrieval.
|
||||
score_index: Vec<(f32, u64)>,
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl LearningPatternStore {
|
||||
/// Create a new, empty learning pattern store.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
patterns: HashMap::new(),
|
||||
score_index: Vec::new(),
|
||||
next_id: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Store a learned pattern.
|
||||
///
|
||||
/// The `embedding` is stored in the parent `RvfSwarmStore` via metadata;
|
||||
/// this struct tracks the pattern metadata for filtering and ranking.
|
||||
///
|
||||
/// Returns the assigned pattern ID.
|
||||
pub fn store_pattern(
|
||||
&mut self,
|
||||
pattern_type: &str,
|
||||
description: &str,
|
||||
score: f32,
|
||||
) -> Result<u64, LearningError> {
|
||||
if pattern_type.is_empty() {
|
||||
return Err(LearningError::EmptyPatternType);
|
||||
}
|
||||
let clamped_score = score.clamp(0.0, 1.0);
|
||||
let id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
self.patterns.insert(
|
||||
id,
|
||||
PatternMeta {
|
||||
pattern_type: pattern_type.to_string(),
|
||||
description: description.to_string(),
|
||||
score: clamped_score,
|
||||
},
|
||||
);
|
||||
self.score_index.push((clamped_score, id));
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Search patterns by returning those whose IDs are in the given candidate
|
||||
/// set (from a vector similarity search), enriched with metadata.
|
||||
pub fn enrich_results(
|
||||
&self,
|
||||
candidates: &[(u64, f32)],
|
||||
k: usize,
|
||||
) -> Vec<PatternResult> {
|
||||
let mut results: Vec<PatternResult> = candidates
|
||||
.iter()
|
||||
.filter_map(|&(id, distance)| {
|
||||
let meta = self.patterns.get(&id)?;
|
||||
Some(PatternResult {
|
||||
id,
|
||||
pattern_type: meta.pattern_type.clone(),
|
||||
description: meta.description.clone(),
|
||||
score: meta.score,
|
||||
distance,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
results.truncate(k);
|
||||
results
|
||||
}
|
||||
|
||||
/// Update the effectiveness score for a pattern.
|
||||
pub fn update_score(&mut self, id: u64, new_score: f32) -> Result<(), LearningError> {
|
||||
let meta = self
|
||||
.patterns
|
||||
.get_mut(&id)
|
||||
.ok_or(LearningError::PatternNotFound(id))?;
|
||||
let clamped = new_score.clamp(0.0, 1.0);
|
||||
meta.score = clamped;
|
||||
|
||||
// Update the score index entry.
|
||||
if let Some(entry) = self.score_index.iter_mut().find(|(_, eid)| *eid == id) {
|
||||
entry.0 = clamped;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the top-k patterns by effectiveness score (highest first).
|
||||
pub fn get_top_patterns(&self, k: usize) -> Vec<PatternResult> {
|
||||
let mut sorted = self.score_index.clone();
|
||||
sorted.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
sorted.truncate(k);
|
||||
|
||||
sorted
|
||||
.into_iter()
|
||||
.filter_map(|(_, id)| {
|
||||
let meta = self.patterns.get(&id)?;
|
||||
Some(PatternResult {
|
||||
id,
|
||||
pattern_type: meta.pattern_type.clone(),
|
||||
description: meta.description.clone(),
|
||||
score: meta.score,
|
||||
distance: 0.0,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get a pattern by ID.
|
||||
pub fn get_pattern(&self, id: u64) -> Option<PatternResult> {
|
||||
let meta = self.patterns.get(&id)?;
|
||||
Some(PatternResult {
|
||||
id,
|
||||
pattern_type: meta.pattern_type.clone(),
|
||||
description: meta.description.clone(),
|
||||
score: meta.score,
|
||||
distance: 0.0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the total number of stored patterns.
|
||||
pub fn len(&self) -> usize {
|
||||
self.patterns.len()
|
||||
}
|
||||
|
||||
/// Returns true if no patterns are stored.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.patterns.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LearningPatternStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from learning pattern operations.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum LearningError {
|
||||
/// Pattern type must not be empty.
|
||||
EmptyPatternType,
|
||||
/// Pattern with the given ID was not found.
|
||||
PatternNotFound(u64),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for LearningError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::EmptyPatternType => write!(f, "pattern_type must not be empty"),
|
||||
Self::PatternNotFound(id) => write!(f, "pattern not found: {id}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LearningError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn store_and_retrieve() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
let id = store.store_pattern("convergent", "Use batched writes", 0.85).unwrap();
|
||||
|
||||
let p = store.get_pattern(id).unwrap();
|
||||
assert_eq!(p.pattern_type, "convergent");
|
||||
assert_eq!(p.description, "Use batched writes");
|
||||
assert!((p.score - 0.85).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_score() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
let id = store.store_pattern("lateral", "Try alternative approach", 0.5).unwrap();
|
||||
|
||||
store.update_score(id, 0.95).unwrap();
|
||||
let p = store.get_pattern(id).unwrap();
|
||||
assert!((p.score - 0.95).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_nonexistent_pattern() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
assert_eq!(
|
||||
store.update_score(999, 0.5),
|
||||
Err(LearningError::PatternNotFound(999))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_patterns() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
store.store_pattern("a", "low", 0.2).unwrap();
|
||||
store.store_pattern("b", "mid", 0.5).unwrap();
|
||||
store.store_pattern("c", "high", 0.9).unwrap();
|
||||
store.store_pattern("d", "highest", 1.0).unwrap();
|
||||
|
||||
let top = store.get_top_patterns(2);
|
||||
assert_eq!(top.len(), 2);
|
||||
assert!((top[0].score - 1.0).abs() < f32::EPSILON);
|
||||
assert!((top[1].score - 0.9).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_clamping() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
let id1 = store.store_pattern("a", "over", 1.5).unwrap();
|
||||
let id2 = store.store_pattern("b", "under", -0.3).unwrap();
|
||||
|
||||
assert!((store.get_pattern(id1).unwrap().score - 1.0).abs() < f32::EPSILON);
|
||||
assert!(store.get_pattern(id2).unwrap().score.abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_pattern_type_rejected() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
assert_eq!(
|
||||
store.store_pattern("", "desc", 0.5),
|
||||
Err(LearningError::EmptyPatternType)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enrich_results() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
let id1 = store.store_pattern("convergent", "desc1", 0.8).unwrap();
|
||||
let id2 = store.store_pattern("divergent", "desc2", 0.6).unwrap();
|
||||
let _id3 = store.store_pattern("lateral", "desc3", 0.4).unwrap();
|
||||
|
||||
let candidates = vec![(id1, 0.1), (id2, 0.3), (999, 0.5)];
|
||||
let results = store.enrich_results(&candidates, 10);
|
||||
// id 999 is filtered out (not in patterns map)
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, id1);
|
||||
assert_eq!(results[1].id, id2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn len_and_is_empty() {
|
||||
let mut store = LearningPatternStore::new();
|
||||
assert!(store.is_empty());
|
||||
assert_eq!(store.len(), 0);
|
||||
|
||||
store.store_pattern("a", "desc", 0.5).unwrap();
|
||||
assert!(!store.is_empty());
|
||||
assert_eq!(store.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_nonexistent_pattern() {
|
||||
let store = LearningPatternStore::new();
|
||||
assert!(store.get_pattern(42).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_patterns_empty_store() {
|
||||
let store = LearningPatternStore::new();
|
||||
assert!(store.get_top_patterns(5).is_empty());
|
||||
}
|
||||
}
|
||||
53
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/lib.rs
vendored
Normal file
53
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/lib.rs
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
//! RVF adapter for agentic-flow swarm coordination.
|
||||
//!
|
||||
//! This crate bridges agentic-flow's swarm coordination primitives with the
|
||||
//! RuVector Format (RVF) segment store, per ADR-029. It provides persistent
|
||||
//! storage for inter-agent memory sharing, swarm coordination state, and
|
||||
//! agent learning patterns.
|
||||
//!
|
||||
//! # Segment mapping
|
||||
//!
|
||||
//! - **VEC_SEG + META_SEG**: Shared memory entries (embeddings + key/value
|
||||
//! metadata) for inter-agent memory sharing via the RVF streaming protocol.
|
||||
//! - **META_SEG**: Swarm coordination state (agent states, topology changes).
|
||||
//! - **SKETCH_SEG**: Agent learning patterns with effectiveness scores.
|
||||
//! - **WITNESS_SEG**: Distributed consensus votes with signatures for
|
||||
//! tamper-evident audit trails.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use rvf_adapter_agentic_flow::{AgenticFlowConfig, RvfSwarmStore};
|
||||
//!
|
||||
//! let config = AgenticFlowConfig::new("/tmp/swarm-data", "agent-001");
|
||||
//! let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
//!
|
||||
//! // Share a memory entry with other agents
|
||||
//! let embedding = vec![0.1f32; 384];
|
||||
//! store.share_memory("auth-pattern", "JWT with refresh tokens",
|
||||
//! "patterns", &embedding).unwrap();
|
||||
//!
|
||||
//! // Search shared memories by embedding similarity
|
||||
//! let results = store.search_shared(&embedding, 5);
|
||||
//!
|
||||
//! // Record coordination state
|
||||
//! store.coordination().record_state("agent-001", "status", "active").unwrap();
|
||||
//!
|
||||
//! // Store a learning pattern
|
||||
//! store.learning().store_pattern("convergent", "Use batched writes",
|
||||
//! 0.92).unwrap();
|
||||
//!
|
||||
//! store.close().unwrap();
|
||||
//! ```
|
||||
|
||||
pub mod config;
|
||||
pub mod coordination;
|
||||
pub mod learning;
|
||||
pub mod swarm_store;
|
||||
|
||||
pub use config::{AgenticFlowConfig, ConfigError};
|
||||
pub use coordination::{ConsensusVote, StateEntry, SwarmCoordination};
|
||||
pub use learning::{LearningPatternStore, PatternResult};
|
||||
pub use swarm_store::{
|
||||
RvfSwarmStore, SharedMemoryEntry, SharedMemoryResult, SwarmStoreError,
|
||||
};
|
||||
587
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/swarm_store.rs
vendored
Normal file
587
vendor/ruvector/crates/rvf/rvf-adapters/agentic-flow/src/swarm_store.rs
vendored
Normal file
@@ -0,0 +1,587 @@
|
||||
//! `RvfSwarmStore` -- main API wrapping `RvfStore` for swarm operations.
|
||||
//!
|
||||
//! Maps agentic-flow's inter-agent memory sharing model onto the RVF
|
||||
//! segment model:
|
||||
//! - Embeddings are stored as vectors via `ingest_batch`
|
||||
//! - Agent ID, key, value, and namespace are encoded as metadata fields
|
||||
//! - Searches use `query` with optional namespace filtering
|
||||
//! - Coordination state and learning patterns are managed by sub-stores
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
|
||||
};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::config::{AgenticFlowConfig, ConfigError};
|
||||
use crate::coordination::SwarmCoordination;
|
||||
use crate::learning::LearningPatternStore;
|
||||
|
||||
/// Metadata field IDs for shared memory entries.
|
||||
const FIELD_AGENT_ID: u16 = 0;
|
||||
const FIELD_KEY: u16 = 1;
|
||||
const FIELD_VALUE: u16 = 2;
|
||||
const FIELD_NAMESPACE: u16 = 3;
|
||||
|
||||
/// A search result from shared memory, enriched with agent metadata.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SharedMemoryResult {
|
||||
/// Vector ID in the underlying store.
|
||||
pub id: u64,
|
||||
/// Distance from the query embedding (lower = more similar).
|
||||
pub distance: f32,
|
||||
/// The agent that shared this memory.
|
||||
pub agent_id: String,
|
||||
/// The memory key.
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
/// A full shared memory entry retrieved by ID.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SharedMemoryEntry {
|
||||
/// Vector ID in the underlying store.
|
||||
pub id: u64,
|
||||
/// The agent that shared this memory.
|
||||
pub agent_id: String,
|
||||
/// The memory key.
|
||||
pub key: String,
|
||||
/// The memory value.
|
||||
pub value: String,
|
||||
/// The namespace this entry belongs to.
|
||||
pub namespace: String,
|
||||
}
|
||||
|
||||
/// The RVF-backed swarm store for agentic-flow.
|
||||
pub struct RvfSwarmStore {
|
||||
store: RvfStore,
|
||||
config: AgenticFlowConfig,
|
||||
coordination: SwarmCoordination,
|
||||
learning: LearningPatternStore,
|
||||
/// Maps "agent_id/namespace/key" -> vector_id for fast lookup.
|
||||
key_index: HashMap<String, u64>,
|
||||
/// Maps vector_id -> SharedMemoryEntry for retrieval by ID.
|
||||
entry_index: HashMap<u64, SharedMemoryEntry>,
|
||||
/// Next vector ID to assign.
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl RvfSwarmStore {
|
||||
/// Create a new swarm store, initializing the data directory and RVF file.
|
||||
pub fn create(config: AgenticFlowConfig) -> Result<Self, SwarmStoreError> {
|
||||
config.validate().map_err(SwarmStoreError::Config)?;
|
||||
config
|
||||
.ensure_dirs()
|
||||
.map_err(|e| SwarmStoreError::Io(e.to_string()))?;
|
||||
|
||||
let rvf_options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: DistanceMetric::Cosine,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), rvf_options)
|
||||
.map_err(SwarmStoreError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
coordination: SwarmCoordination::new(),
|
||||
learning: LearningPatternStore::new(),
|
||||
key_index: HashMap::new(),
|
||||
entry_index: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing swarm store.
|
||||
pub fn open(config: AgenticFlowConfig) -> Result<Self, SwarmStoreError> {
|
||||
config.validate().map_err(SwarmStoreError::Config)?;
|
||||
|
||||
let store =
|
||||
RvfStore::open(&config.store_path()).map_err(SwarmStoreError::Rvf)?;
|
||||
|
||||
// Rebuild next_id from the store status so new IDs don't collide.
|
||||
let status = store.status();
|
||||
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
coordination: SwarmCoordination::new(),
|
||||
learning: LearningPatternStore::new(),
|
||||
key_index: HashMap::new(),
|
||||
entry_index: HashMap::new(),
|
||||
next_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Share a memory entry with other agents.
|
||||
///
|
||||
/// Stores the embedding vector with agent_id/key/value/namespace as
|
||||
/// metadata fields. If an entry with the same agent_id/namespace/key
|
||||
/// already exists, the old one is soft-deleted and replaced.
|
||||
///
|
||||
/// Returns the assigned vector ID.
|
||||
pub fn share_memory(
|
||||
&mut self,
|
||||
key: &str,
|
||||
value: &str,
|
||||
namespace: &str,
|
||||
embedding: &[f32],
|
||||
) -> Result<u64, SwarmStoreError> {
|
||||
if embedding.len() != self.config.dimension as usize {
|
||||
return Err(SwarmStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let compound_key = format!(
|
||||
"{}/{}/{}",
|
||||
self.config.agent_id, namespace, key
|
||||
);
|
||||
|
||||
// Soft-delete existing entry with the same compound key.
|
||||
if let Some(&old_id) = self.key_index.get(&compound_key) {
|
||||
self.store.delete(&[old_id]).map_err(SwarmStoreError::Rvf)?;
|
||||
self.entry_index.remove(&old_id);
|
||||
}
|
||||
|
||||
let vector_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry {
|
||||
field_id: FIELD_AGENT_ID,
|
||||
value: MetadataValue::String(self.config.agent_id.clone()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: FIELD_KEY,
|
||||
value: MetadataValue::String(key.to_string()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: FIELD_VALUE,
|
||||
value: MetadataValue::String(value.to_string()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: FIELD_NAMESPACE,
|
||||
value: MetadataValue::String(namespace.to_string()),
|
||||
},
|
||||
];
|
||||
|
||||
self.store
|
||||
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
|
||||
.map_err(SwarmStoreError::Rvf)?;
|
||||
|
||||
self.key_index.insert(compound_key, vector_id);
|
||||
self.entry_index.insert(
|
||||
vector_id,
|
||||
SharedMemoryEntry {
|
||||
id: vector_id,
|
||||
agent_id: self.config.agent_id.clone(),
|
||||
key: key.to_string(),
|
||||
value: value.to_string(),
|
||||
namespace: namespace.to_string(),
|
||||
},
|
||||
);
|
||||
|
||||
Ok(vector_id)
|
||||
}
|
||||
|
||||
/// Search for shared memories similar to the given embedding.
|
||||
///
|
||||
/// Returns up to `k` results sorted by distance (closest first),
|
||||
/// enriched with agent metadata from the in-memory index.
|
||||
pub fn search_shared(
|
||||
&self,
|
||||
embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Vec<SharedMemoryResult> {
|
||||
let options = QueryOptions::default();
|
||||
let results = match self.store.query(embedding, k, &options) {
|
||||
Ok(r) => r,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
|
||||
results
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let entry = self.entry_index.get(&r.id)?;
|
||||
Some(SharedMemoryResult {
|
||||
id: r.id,
|
||||
distance: r.distance,
|
||||
agent_id: entry.agent_id.clone(),
|
||||
key: entry.key.clone(),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Retrieve a shared memory entry by its vector ID.
|
||||
pub fn get_shared(&self, id: u64) -> Option<SharedMemoryEntry> {
|
||||
self.entry_index.get(&id).cloned()
|
||||
}
|
||||
|
||||
/// Delete shared memory entries by their vector IDs.
|
||||
///
|
||||
/// Returns the number of entries actually deleted.
|
||||
pub fn delete_shared(&mut self, ids: &[u64]) -> Result<usize, SwarmStoreError> {
|
||||
let existing: Vec<u64> = ids
|
||||
.iter()
|
||||
.filter(|id| self.entry_index.contains_key(id))
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
if existing.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
self.store
|
||||
.delete(&existing)
|
||||
.map_err(SwarmStoreError::Rvf)?;
|
||||
|
||||
let mut removed = 0;
|
||||
for &id in &existing {
|
||||
if let Some(entry) = self.entry_index.remove(&id) {
|
||||
let compound_key = format!(
|
||||
"{}/{}/{}",
|
||||
entry.agent_id, entry.namespace, entry.key
|
||||
);
|
||||
self.key_index.remove(&compound_key);
|
||||
removed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the coordination state tracker.
|
||||
pub fn coordination(&mut self) -> &mut SwarmCoordination {
|
||||
&mut self.coordination
|
||||
}
|
||||
|
||||
/// Get an immutable reference to the coordination state tracker.
|
||||
pub fn coordination_ref(&self) -> &SwarmCoordination {
|
||||
&self.coordination
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the learning pattern store.
|
||||
pub fn learning(&mut self) -> &mut LearningPatternStore {
|
||||
&mut self.learning
|
||||
}
|
||||
|
||||
/// Get an immutable reference to the learning pattern store.
|
||||
pub fn learning_ref(&self) -> &LearningPatternStore {
|
||||
&self.learning
|
||||
}
|
||||
|
||||
/// Get the current store status.
|
||||
pub fn status(&self) -> rvf_runtime::StoreStatus {
|
||||
self.store.status()
|
||||
}
|
||||
|
||||
/// Get the agent ID for this store.
|
||||
pub fn agent_id(&self) -> &str {
|
||||
&self.config.agent_id
|
||||
}
|
||||
|
||||
/// Close the swarm store, releasing locks.
|
||||
pub fn close(self) -> Result<(), SwarmStoreError> {
|
||||
self.store.close().map_err(SwarmStoreError::Rvf)
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from swarm store operations.
|
||||
#[derive(Debug)]
|
||||
pub enum SwarmStoreError {
|
||||
/// Underlying RVF store error.
|
||||
Rvf(RvfError),
|
||||
/// Configuration error.
|
||||
Config(ConfigError),
|
||||
/// I/O error.
|
||||
Io(String),
|
||||
/// Embedding dimension mismatch.
|
||||
DimensionMismatch { expected: usize, got: usize },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SwarmStoreError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
|
||||
Self::Config(e) => write!(f, "config error: {e}"),
|
||||
Self::Io(msg) => write!(f, "I/O error: {msg}"),
|
||||
Self::DimensionMismatch { expected, got } => {
|
||||
write!(f, "dimension mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for SwarmStoreError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn test_config(dir: &std::path::Path) -> AgenticFlowConfig {
|
||||
AgenticFlowConfig::new(dir, "test-agent").with_dimension(4)
|
||||
}
|
||||
|
||||
fn make_embedding(seed: f32) -> Vec<f32> {
|
||||
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_and_share() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
let id = store
|
||||
.share_memory("key1", "value1", "default", &make_embedding(1.0))
|
||||
.unwrap();
|
||||
assert!(id > 0);
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn share_and_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
|
||||
store
|
||||
.share_memory("a", "val_a", "ns1", &[1.0, 0.0, 0.0, 0.0])
|
||||
.unwrap();
|
||||
store
|
||||
.share_memory("b", "val_b", "ns1", &[0.0, 1.0, 0.0, 0.0])
|
||||
.unwrap();
|
||||
store
|
||||
.share_memory("c", "val_c", "ns2", &[0.0, 0.0, 1.0, 0.0])
|
||||
.unwrap();
|
||||
|
||||
let results = store.search_shared(&[1.0, 0.0, 0.0, 0.0], 3);
|
||||
assert_eq!(results.len(), 3);
|
||||
// Closest should be "a"
|
||||
assert_eq!(results[0].key, "a");
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_shared_by_id() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
let id = store
|
||||
.share_memory("mykey", "myval", "ns", &make_embedding(2.0))
|
||||
.unwrap();
|
||||
|
||||
let entry = store.get_shared(id).unwrap();
|
||||
assert_eq!(entry.key, "mykey");
|
||||
assert_eq!(entry.value, "myval");
|
||||
assert_eq!(entry.namespace, "ns");
|
||||
assert_eq!(entry.agent_id, "test-agent");
|
||||
|
||||
assert!(store.get_shared(9999).is_none());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_shared_entries() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
let id1 = store
|
||||
.share_memory("k1", "v1", "ns", &make_embedding(1.0))
|
||||
.unwrap();
|
||||
let id2 = store
|
||||
.share_memory("k2", "v2", "ns", &make_embedding(2.0))
|
||||
.unwrap();
|
||||
|
||||
let removed = store.delete_shared(&[id1]).unwrap();
|
||||
assert_eq!(removed, 1);
|
||||
assert!(store.get_shared(id1).is_none());
|
||||
assert!(store.get_shared(id2).is_some());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_nonexistent_ids() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
let removed = store.delete_shared(&[999, 1000]).unwrap();
|
||||
assert_eq!(removed, 0);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_existing_key() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
let id1 = store
|
||||
.share_memory("k", "v1", "ns", &make_embedding(1.0))
|
||||
.unwrap();
|
||||
let id2 = store
|
||||
.share_memory("k", "v2", "ns", &make_embedding(2.0))
|
||||
.unwrap();
|
||||
|
||||
assert_ne!(id1, id2);
|
||||
assert!(store.get_shared(id1).is_none());
|
||||
let entry = store.get_shared(id2).unwrap();
|
||||
assert_eq!(entry.value, "v2");
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_mismatch() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
let result = store.share_memory("k", "v", "ns", &[1.0, 2.0]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coordination_state() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
store
|
||||
.coordination()
|
||||
.record_state("agent-1", "status", "active")
|
||||
.unwrap();
|
||||
store
|
||||
.coordination()
|
||||
.record_state("agent-2", "status", "idle")
|
||||
.unwrap();
|
||||
|
||||
let states = store.coordination_ref().get_all_states();
|
||||
assert_eq!(states.len(), 2);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn learning_patterns() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
|
||||
let id = store
|
||||
.learning()
|
||||
.store_pattern("convergent", "Use batched writes", 0.85)
|
||||
.unwrap();
|
||||
|
||||
let pattern = store.learning_ref().get_pattern(id).unwrap();
|
||||
assert_eq!(pattern.pattern_type, "convergent");
|
||||
assert!((pattern.score - 0.85).abs() < f32::EPSILON);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_existing_store() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
{
|
||||
let mut store = RvfSwarmStore::create(config.clone()).unwrap();
|
||||
store
|
||||
.share_memory("k", "v", "ns", &make_embedding(1.0))
|
||||
.unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let store = RvfSwarmStore::open(config).unwrap();
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_id_accessor() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = AgenticFlowConfig::new(dir.path(), "special-agent")
|
||||
.with_dimension(4);
|
||||
|
||||
let store = RvfSwarmStore::create(config).unwrap();
|
||||
assert_eq!(store.agent_id(), "special-agent");
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_store_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let store = RvfSwarmStore::create(config).unwrap();
|
||||
let results = store.search_shared(&[1.0, 0.0, 0.0, 0.0], 5);
|
||||
assert!(results.is_empty());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consensus_votes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfSwarmStore::create(config).unwrap();
|
||||
store
|
||||
.coordination()
|
||||
.record_consensus_vote("leader-election", "a1", true)
|
||||
.unwrap();
|
||||
store
|
||||
.coordination()
|
||||
.record_consensus_vote("leader-election", "a2", false)
|
||||
.unwrap();
|
||||
|
||||
let votes = store.coordination_ref().get_votes("leader-election");
|
||||
assert_eq!(votes.len(), 2);
|
||||
assert!(votes[0].vote);
|
||||
assert!(!votes[1].vote);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_config_rejected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
// Zero dimension
|
||||
let config = AgenticFlowConfig::new(dir.path(), "a1").with_dimension(0);
|
||||
assert!(RvfSwarmStore::create(config).is_err());
|
||||
|
||||
// Empty agent_id
|
||||
let config = AgenticFlowConfig::new(dir.path(), "").with_dimension(4);
|
||||
assert!(RvfSwarmStore::create(config).is_err());
|
||||
}
|
||||
}
|
||||
19
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/Cargo.toml
vendored
Normal file
19
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/Cargo.toml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "rvf-adapter-claude-flow"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "RVF adapter for claude-flow memory subsystem — stores memory entries as RVF files with WITNESS_SEG audit trails"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
|
||||
[dependencies]
|
||||
rvf-types = { path = "../../rvf-types", features = ["std"] }
|
||||
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
|
||||
rvf-crypto = { path = "../../rvf-crypto", features = ["std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
124
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/config.rs
vendored
Normal file
124
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/config.rs
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
//! Configuration for the claude-flow memory adapter.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rvf_runtime::options::DistanceMetric;
|
||||
|
||||
/// Configuration for the RVF-backed claude-flow memory store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ClaudeFlowConfig {
|
||||
/// Directory where RVF data files are stored.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension (must match the embeddings used by claude-flow).
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: DistanceMetric,
|
||||
/// Whether to record witness entries for audit trails.
|
||||
pub enable_witness: bool,
|
||||
}
|
||||
|
||||
impl ClaudeFlowConfig {
|
||||
/// Create a new configuration with required parameters.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension,
|
||||
metric: DistanceMetric::Cosine,
|
||||
enable_witness: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the distance metric.
|
||||
pub fn with_metric(mut self, metric: DistanceMetric) -> Self {
|
||||
self.metric = metric;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable or disable witness audit trails.
|
||||
pub fn with_witness(mut self, enable: bool) -> Self {
|
||||
self.enable_witness = enable;
|
||||
self
|
||||
}
|
||||
|
||||
/// Return the path to the main vector store RVF file.
|
||||
pub fn store_path(&self) -> PathBuf {
|
||||
self.data_dir.join("memory.rvf")
|
||||
}
|
||||
|
||||
/// Return the path to the witness chain file.
|
||||
pub fn witness_path(&self) -> PathBuf {
|
||||
self.data_dir.join("witness.bin")
|
||||
}
|
||||
|
||||
/// Ensure the data directory exists.
|
||||
pub fn ensure_dirs(&self) -> std::io::Result<()> {
|
||||
std::fs::create_dir_all(&self.data_dir)
|
||||
}
|
||||
|
||||
/// Validate the configuration.
|
||||
pub fn validate(&self) -> Result<(), ConfigError> {
|
||||
if self.dimension == 0 {
|
||||
return Err(ConfigError::InvalidDimension);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors specific to adapter configuration.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ConfigError {
|
||||
/// Dimension must be > 0.
|
||||
InvalidDimension,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ConfigError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn config_defaults() {
|
||||
let cfg = ClaudeFlowConfig::new("/tmp/test", 384);
|
||||
assert_eq!(cfg.dimension, 384);
|
||||
assert_eq!(cfg.metric, DistanceMetric::Cosine);
|
||||
assert!(cfg.enable_witness);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_paths() {
|
||||
let cfg = ClaudeFlowConfig::new("/data/memory", 128);
|
||||
assert_eq!(cfg.store_path(), Path::new("/data/memory/memory.rvf"));
|
||||
assert_eq!(cfg.witness_path(), Path::new("/data/memory/witness.bin"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_zero_dimension() {
|
||||
let cfg = ClaudeFlowConfig::new("/tmp", 0);
|
||||
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ok() {
|
||||
let cfg = ClaudeFlowConfig::new("/tmp", 64);
|
||||
assert!(cfg.validate().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builder_methods() {
|
||||
let cfg = ClaudeFlowConfig::new("/tmp", 256)
|
||||
.with_metric(DistanceMetric::L2)
|
||||
.with_witness(false);
|
||||
assert_eq!(cfg.metric, DistanceMetric::L2);
|
||||
assert!(!cfg.enable_witness);
|
||||
}
|
||||
}
|
||||
48
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/lib.rs
vendored
Normal file
48
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/lib.rs
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
//! RVF adapter for the claude-flow memory subsystem.
|
||||
//!
|
||||
//! This crate bridges claude-flow's key/value/embedding memory model
|
||||
//! with the RuVector Format (RVF) segment store. Memory entries are
|
||||
//! persisted as RVF files with the RVText profile, and every mutation
|
||||
//! is recorded in a WITNESS_SEG audit trail for tamper-evident logging.
|
||||
//!
|
||||
//! # Architecture
|
||||
//!
|
||||
//! - **`RvfMemoryStore`**: Main API wrapping `RvfStore` for
|
||||
//! store/search/retrieve/delete operations on memory entries.
|
||||
//! - **`WitnessChain`**: Persistent, append-only audit log using
|
||||
//! `rvf_crypto::witness` chains (SHAKE-256 linked).
|
||||
//! - **`ClaudeFlowConfig`**: Configuration for data directory, embedding
|
||||
//! dimension, distance metric, and witness toggle.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use rvf_adapter_claude_flow::{ClaudeFlowConfig, RvfMemoryStore};
|
||||
//!
|
||||
//! let config = ClaudeFlowConfig::new("/tmp/claude-flow-memory", 384);
|
||||
//! let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
//!
|
||||
//! // Store a memory entry with its embedding
|
||||
//! let embedding = vec![0.1f32; 384];
|
||||
//! store.store_memory("auth-pattern", "JWT with refresh tokens",
|
||||
//! "patterns", &["auth".into()], &embedding).unwrap();
|
||||
//!
|
||||
//! // Search by embedding similarity
|
||||
//! let results = store.search_memory(&embedding, 5, Some("patterns"), None).unwrap();
|
||||
//!
|
||||
//! // Retrieve by key
|
||||
//! let id = store.retrieve_memory("auth-pattern", "patterns");
|
||||
//!
|
||||
//! // Delete
|
||||
//! store.delete_memory("auth-pattern", "patterns").unwrap();
|
||||
//!
|
||||
//! store.close().unwrap();
|
||||
//! ```
|
||||
|
||||
pub mod config;
|
||||
pub mod memory_store;
|
||||
pub mod witness;
|
||||
|
||||
pub use config::ClaudeFlowConfig;
|
||||
pub use memory_store::{MemoryEntry, MemoryStoreError, RvfMemoryStore};
|
||||
pub use witness::{WitnessChain, WitnessError};
|
||||
445
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/memory_store.rs
vendored
Normal file
445
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/memory_store.rs
vendored
Normal file
@@ -0,0 +1,445 @@
|
||||
//! `RvfMemoryStore` — wraps `RvfStore` for claude-flow memory operations.
|
||||
//!
|
||||
//! Maps claude-flow's key/value/namespace/tags/embedding model onto the
|
||||
//! RVF segment model:
|
||||
//! - Embeddings are stored as vectors via `ingest_batch`
|
||||
//! - Keys and namespaces are encoded as metadata (META_SEG fields)
|
||||
//! - Searches use `query` with optional namespace filtering
|
||||
//! - Deletes use soft-delete with witness recording
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rvf_runtime::filter::{FilterExpr, FilterValue};
|
||||
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
|
||||
use rvf_runtime::{RvfStore, SearchResult};
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::config::ClaudeFlowConfig;
|
||||
use crate::witness::WitnessChain;
|
||||
|
||||
/// Metadata field IDs for claude-flow memory entries.
|
||||
const FIELD_KEY: u16 = 0;
|
||||
const FIELD_NAMESPACE: u16 = 1;
|
||||
const FIELD_TAGS: u16 = 2;
|
||||
|
||||
/// A memory entry returned from retrieval or search.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MemoryEntry {
|
||||
/// The memory key.
|
||||
pub key: String,
|
||||
/// The namespace this entry belongs to.
|
||||
pub namespace: String,
|
||||
/// Tags associated with this entry.
|
||||
pub tags: Vec<String>,
|
||||
/// The vector ID in the underlying store.
|
||||
pub vector_id: u64,
|
||||
/// Distance from query (only meaningful for search results).
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// The RVF-backed memory store for claude-flow.
|
||||
pub struct RvfMemoryStore {
|
||||
store: RvfStore,
|
||||
witness: Option<WitnessChain>,
|
||||
config: ClaudeFlowConfig,
|
||||
/// Maps "namespace/key" -> vector_id for fast lookup.
|
||||
key_index: HashMap<String, u64>,
|
||||
/// Next vector ID to assign.
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl RvfMemoryStore {
|
||||
/// Create a new memory store, initializing the data directory and RVF file.
|
||||
pub fn create(config: ClaudeFlowConfig) -> Result<Self, MemoryStoreError> {
|
||||
config.validate().map_err(MemoryStoreError::Config)?;
|
||||
config.ensure_dirs().map_err(|e| MemoryStoreError::Io(e.to_string()))?;
|
||||
|
||||
let rvf_options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), rvf_options)
|
||||
.map_err(MemoryStoreError::Rvf)?;
|
||||
|
||||
let witness = if config.enable_witness {
|
||||
Some(WitnessChain::create(&config.witness_path())
|
||||
.map_err(MemoryStoreError::Witness)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
witness,
|
||||
config,
|
||||
key_index: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing memory store.
|
||||
pub fn open(config: ClaudeFlowConfig) -> Result<Self, MemoryStoreError> {
|
||||
config.validate().map_err(MemoryStoreError::Config)?;
|
||||
|
||||
let store = RvfStore::open(&config.store_path())
|
||||
.map_err(MemoryStoreError::Rvf)?;
|
||||
|
||||
let witness = if config.enable_witness {
|
||||
Some(WitnessChain::open_or_create(&config.witness_path())
|
||||
.map_err(MemoryStoreError::Witness)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Rebuild the key_index from the store status.
|
||||
// Since RvfStore doesn't expose metadata iteration, we start fresh.
|
||||
// Existing vectors remain searchable by embedding; key lookup is
|
||||
// rebuilt as entries are re-stored.
|
||||
let status = store.status();
|
||||
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
witness,
|
||||
config,
|
||||
key_index: HashMap::new(),
|
||||
next_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Store a memory entry with its embedding vector.
|
||||
///
|
||||
/// If an entry with the same key and namespace already exists, the old
|
||||
/// one is soft-deleted and replaced.
|
||||
pub fn store_memory(
|
||||
&mut self,
|
||||
key: &str,
|
||||
_value: &str,
|
||||
namespace: &str,
|
||||
tags: &[String],
|
||||
embedding: &[f32],
|
||||
) -> Result<u64, MemoryStoreError> {
|
||||
if embedding.len() != self.config.dimension as usize {
|
||||
return Err(MemoryStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// If key already exists in this namespace, soft-delete the old entry.
|
||||
let compound_key = format!("{namespace}/{key}");
|
||||
if let Some(&old_id) = self.key_index.get(&compound_key) {
|
||||
self.store.delete(&[old_id]).map_err(MemoryStoreError::Rvf)?;
|
||||
}
|
||||
|
||||
let vector_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
// Encode tags as a comma-separated string for metadata storage.
|
||||
let tags_str = tags.join(",");
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry { field_id: FIELD_KEY, value: MetadataValue::String(key.to_string()) },
|
||||
MetadataEntry { field_id: FIELD_NAMESPACE, value: MetadataValue::String(namespace.to_string()) },
|
||||
MetadataEntry { field_id: FIELD_TAGS, value: MetadataValue::String(tags_str) },
|
||||
];
|
||||
|
||||
self.store
|
||||
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
|
||||
.map_err(MemoryStoreError::Rvf)?;
|
||||
|
||||
self.key_index.insert(compound_key, vector_id);
|
||||
|
||||
if let Some(ref mut w) = self.witness {
|
||||
let _ = w.record_store(key, namespace);
|
||||
}
|
||||
|
||||
Ok(vector_id)
|
||||
}
|
||||
|
||||
/// Search memory by embedding vector, optionally filtering by namespace.
|
||||
pub fn search_memory(
|
||||
&mut self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
namespace: Option<&str>,
|
||||
_threshold: Option<f32>,
|
||||
) -> Result<Vec<SearchResult>, MemoryStoreError> {
|
||||
if query_embedding.len() != self.config.dimension as usize {
|
||||
return Err(MemoryStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: query_embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let filter = namespace.map(|ns| {
|
||||
FilterExpr::Eq(FIELD_NAMESPACE, FilterValue::String(ns.to_string()))
|
||||
});
|
||||
|
||||
let options = QueryOptions {
|
||||
filter,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let results = self.store.query(query_embedding, k, &options)
|
||||
.map_err(MemoryStoreError::Rvf)?;
|
||||
|
||||
if let Some(ref mut w) = self.witness {
|
||||
let ns = namespace.unwrap_or("*");
|
||||
let _ = w.record_search(ns, k);
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Retrieve a memory entry by key and namespace.
|
||||
///
|
||||
/// Returns the vector ID if found (the entry can then be used with
|
||||
/// the underlying store for further operations).
|
||||
pub fn retrieve_memory(
|
||||
&self,
|
||||
key: &str,
|
||||
namespace: &str,
|
||||
) -> Option<u64> {
|
||||
let compound_key = format!("{namespace}/{key}");
|
||||
self.key_index.get(&compound_key).copied()
|
||||
}
|
||||
|
||||
/// Soft-delete a memory entry by key and namespace.
|
||||
pub fn delete_memory(
|
||||
&mut self,
|
||||
key: &str,
|
||||
namespace: &str,
|
||||
) -> Result<bool, MemoryStoreError> {
|
||||
let compound_key = format!("{namespace}/{key}");
|
||||
if let Some(vector_id) = self.key_index.remove(&compound_key) {
|
||||
self.store.delete(&[vector_id]).map_err(MemoryStoreError::Rvf)?;
|
||||
|
||||
if let Some(ref mut w) = self.witness {
|
||||
let _ = w.record_delete(key, namespace);
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Run compaction on the underlying store.
|
||||
pub fn compact(&mut self) -> Result<(), MemoryStoreError> {
|
||||
self.store.compact().map_err(MemoryStoreError::Rvf)?;
|
||||
|
||||
if let Some(ref mut w) = self.witness {
|
||||
let _ = w.record_compact();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the current store status.
|
||||
pub fn status(&self) -> rvf_runtime::StoreStatus {
|
||||
self.store.status()
|
||||
}
|
||||
|
||||
/// Return a reference to the witness chain (if enabled).
|
||||
pub fn witness(&self) -> Option<&WitnessChain> {
|
||||
self.witness.as_ref()
|
||||
}
|
||||
|
||||
/// Close the memory store, releasing locks.
|
||||
pub fn close(self) -> Result<(), MemoryStoreError> {
|
||||
self.store.close().map_err(MemoryStoreError::Rvf)
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from memory store operations.
|
||||
#[derive(Debug)]
|
||||
pub enum MemoryStoreError {
|
||||
/// Underlying RVF store error.
|
||||
Rvf(RvfError),
|
||||
/// Witness chain error.
|
||||
Witness(crate::witness::WitnessError),
|
||||
/// Configuration error.
|
||||
Config(crate::config::ConfigError),
|
||||
/// I/O error.
|
||||
Io(String),
|
||||
/// Embedding dimension mismatch.
|
||||
DimensionMismatch { expected: usize, got: usize },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for MemoryStoreError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
|
||||
Self::Witness(e) => write!(f, "witness error: {e}"),
|
||||
Self::Config(e) => write!(f, "config error: {e}"),
|
||||
Self::Io(msg) => write!(f, "I/O error: {msg}"),
|
||||
Self::DimensionMismatch { expected, got } => {
|
||||
write!(f, "dimension mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for MemoryStoreError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn test_config(dir: &Path) -> ClaudeFlowConfig {
|
||||
ClaudeFlowConfig::new(dir, 4)
|
||||
}
|
||||
|
||||
fn make_embedding(seed: f32) -> Vec<f32> {
|
||||
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_and_store() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
let id = store.store_memory(
|
||||
"key1", "value1", "default", &["tag1".into(), "tag2".into()],
|
||||
&make_embedding(1.0),
|
||||
).unwrap();
|
||||
assert!(id > 0);
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn store_and_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
|
||||
store.store_memory("a", "val_a", "ns1", &[], &[1.0, 0.0, 0.0, 0.0]).unwrap();
|
||||
store.store_memory("b", "val_b", "ns1", &[], &[0.0, 1.0, 0.0, 0.0]).unwrap();
|
||||
store.store_memory("c", "val_c", "ns2", &[], &[0.0, 0.0, 1.0, 0.0]).unwrap();
|
||||
|
||||
// Search all namespaces
|
||||
let results = store.search_memory(&[1.0, 0.0, 0.0, 0.0], 3, None, None).unwrap();
|
||||
assert_eq!(results.len(), 3);
|
||||
|
||||
// Search filtered by namespace
|
||||
let results = store.search_memory(&[1.0, 0.0, 0.0, 0.0], 3, Some("ns1"), None).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn retrieve_by_key() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
let id = store.store_memory("mykey", "myval", "ns", &[], &make_embedding(2.0)).unwrap();
|
||||
|
||||
assert_eq!(store.retrieve_memory("mykey", "ns"), Some(id));
|
||||
assert_eq!(store.retrieve_memory("missing", "ns"), None);
|
||||
assert_eq!(store.retrieve_memory("mykey", "other_ns"), None);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_memory() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
store.store_memory("k", "v", "ns", &[], &make_embedding(3.0)).unwrap();
|
||||
|
||||
assert!(store.delete_memory("k", "ns").unwrap());
|
||||
assert!(!store.delete_memory("k", "ns").unwrap()); // already deleted
|
||||
assert_eq!(store.retrieve_memory("k", "ns"), None);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_existing_key() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
let id1 = store.store_memory("k", "v1", "ns", &[], &make_embedding(1.0)).unwrap();
|
||||
let id2 = store.store_memory("k", "v2", "ns", &[], &make_embedding(2.0)).unwrap();
|
||||
|
||||
// New ID should be different (old was soft-deleted)
|
||||
assert_ne!(id1, id2);
|
||||
assert_eq!(store.retrieve_memory("k", "ns"), Some(id2));
|
||||
|
||||
// Only one live vector
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_mismatch() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
let result = store.store_memory("k", "v", "ns", &[], &[1.0, 2.0]); // dim=2 vs config dim=4
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn witness_audit_trail() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
store.store_memory("a", "v", "ns", &[], &make_embedding(1.0)).unwrap();
|
||||
store.search_memory(&make_embedding(1.0), 1, None, None).unwrap();
|
||||
store.delete_memory("a", "ns").unwrap();
|
||||
|
||||
let witness = store.witness().unwrap();
|
||||
assert_eq!(witness.len(), 3); // store + search + delete
|
||||
assert_eq!(witness.verify().unwrap(), 3);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compact_works() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
|
||||
let mut store = RvfMemoryStore::create(config).unwrap();
|
||||
store.store_memory("a", "v", "ns", &[], &make_embedding(1.0)).unwrap();
|
||||
store.store_memory("b", "v", "ns", &[], &make_embedding(2.0)).unwrap();
|
||||
store.delete_memory("a", "ns").unwrap();
|
||||
store.compact().unwrap();
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_witness_when_disabled() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ClaudeFlowConfig::new(dir.path(), 4).with_witness(false);
|
||||
|
||||
let store = RvfMemoryStore::create(config).unwrap();
|
||||
assert!(store.witness().is_none());
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
292
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/witness.rs
vendored
Normal file
292
vendor/ruvector/crates/rvf/rvf-adapters/claude-flow/src/witness.rs
vendored
Normal file
@@ -0,0 +1,292 @@
|
||||
//! Audit trail using WITNESS_SEG for claude-flow memory operations.
|
||||
//!
|
||||
//! Wraps `rvf_crypto::witness` to provide a persistent, append-only
|
||||
//! witness chain that records every memory store/delete/search action.
|
||||
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use rvf_crypto::witness::{WitnessEntry, create_witness_chain, verify_witness_chain};
|
||||
use rvf_crypto::shake256_256;
|
||||
|
||||
/// Witness type constants for claude-flow actions.
|
||||
pub const WITNESS_STORE: u8 = 0x01;
|
||||
pub const WITNESS_DELETE: u8 = 0x02;
|
||||
pub const WITNESS_SEARCH: u8 = 0x03;
|
||||
pub const WITNESS_COMPACT: u8 = 0x04;
|
||||
|
||||
/// Persistent witness chain that records memory operations.
|
||||
pub struct WitnessChain {
|
||||
path: PathBuf,
|
||||
/// Cached chain bytes (in-memory mirror of the file).
|
||||
chain_data: Vec<u8>,
|
||||
/// Number of entries in the chain.
|
||||
entry_count: usize,
|
||||
}
|
||||
|
||||
impl WitnessChain {
|
||||
/// Create a new (empty) witness chain file at the given path.
|
||||
pub fn create(path: &Path) -> Result<Self, WitnessError> {
|
||||
File::create(path).map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
Ok(Self {
|
||||
path: path.to_path_buf(),
|
||||
chain_data: Vec::new(),
|
||||
entry_count: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing witness chain file, verifying its integrity.
|
||||
pub fn open(path: &Path) -> Result<Self, WitnessError> {
|
||||
let mut file = File::open(path).map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
let mut data = Vec::new();
|
||||
file.read_to_end(&mut data).map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
|
||||
if data.is_empty() {
|
||||
return Ok(Self {
|
||||
path: path.to_path_buf(),
|
||||
chain_data: Vec::new(),
|
||||
entry_count: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let entries = verify_witness_chain(&data)
|
||||
.map_err(|_| WitnessError::ChainCorrupted)?;
|
||||
|
||||
Ok(Self {
|
||||
path: path.to_path_buf(),
|
||||
chain_data: data,
|
||||
entry_count: entries.len(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing chain or create a new one.
|
||||
pub fn open_or_create(path: &Path) -> Result<Self, WitnessError> {
|
||||
if path.exists() {
|
||||
Self::open(path)
|
||||
} else {
|
||||
Self::create(path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a memory store action.
|
||||
pub fn record_store(&mut self, key: &str, namespace: &str) -> Result<(), WitnessError> {
|
||||
let mut hasher_input = Vec::new();
|
||||
hasher_input.extend_from_slice(b"store:");
|
||||
hasher_input.extend_from_slice(namespace.as_bytes());
|
||||
hasher_input.push(b'/');
|
||||
hasher_input.extend_from_slice(key.as_bytes());
|
||||
self.append_entry(&hasher_input, WITNESS_STORE)
|
||||
}
|
||||
|
||||
/// Record a memory delete action.
|
||||
pub fn record_delete(&mut self, key: &str, namespace: &str) -> Result<(), WitnessError> {
|
||||
let mut hasher_input = Vec::new();
|
||||
hasher_input.extend_from_slice(b"delete:");
|
||||
hasher_input.extend_from_slice(namespace.as_bytes());
|
||||
hasher_input.push(b'/');
|
||||
hasher_input.extend_from_slice(key.as_bytes());
|
||||
self.append_entry(&hasher_input, WITNESS_DELETE)
|
||||
}
|
||||
|
||||
/// Record a search action.
|
||||
pub fn record_search(&mut self, namespace: &str, k: usize) -> Result<(), WitnessError> {
|
||||
let mut hasher_input = Vec::new();
|
||||
hasher_input.extend_from_slice(b"search:");
|
||||
hasher_input.extend_from_slice(namespace.as_bytes());
|
||||
hasher_input.push(b':');
|
||||
hasher_input.extend_from_slice(k.to_string().as_bytes());
|
||||
self.append_entry(&hasher_input, WITNESS_SEARCH)
|
||||
}
|
||||
|
||||
/// Record a compaction action.
|
||||
pub fn record_compact(&mut self) -> Result<(), WitnessError> {
|
||||
self.append_entry(b"compact", WITNESS_COMPACT)
|
||||
}
|
||||
|
||||
/// Verify the entire chain is intact.
|
||||
pub fn verify(&self) -> Result<usize, WitnessError> {
|
||||
if self.chain_data.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
let entries = verify_witness_chain(&self.chain_data)
|
||||
.map_err(|_| WitnessError::ChainCorrupted)?;
|
||||
Ok(entries.len())
|
||||
}
|
||||
|
||||
/// Return the number of entries in the chain.
|
||||
pub fn len(&self) -> usize {
|
||||
self.entry_count
|
||||
}
|
||||
|
||||
/// Return whether the chain is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entry_count == 0
|
||||
}
|
||||
|
||||
// ── Internal ──────────────────────────────────────────────────────
|
||||
|
||||
fn append_entry(&mut self, action_data: &[u8], witness_type: u8) -> Result<(), WitnessError> {
|
||||
let action_hash = shake256_256(action_data);
|
||||
let timestamp_ns = now_ns();
|
||||
|
||||
let entry = WitnessEntry {
|
||||
prev_hash: [0u8; 32], // create_witness_chain will set this
|
||||
action_hash,
|
||||
timestamp_ns,
|
||||
witness_type,
|
||||
};
|
||||
|
||||
// Rebuild the entire chain with the new entry appended.
|
||||
// This is correct because create_witness_chain re-links prev_hash.
|
||||
let mut all_entries = if self.chain_data.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
verify_witness_chain(&self.chain_data)
|
||||
.map_err(|_| WitnessError::ChainCorrupted)?
|
||||
};
|
||||
all_entries.push(entry);
|
||||
|
||||
let new_chain = create_witness_chain(&all_entries);
|
||||
|
||||
// Persist atomically: write to temp then rename.
|
||||
let tmp_path = self.path.with_extension("bin.tmp");
|
||||
{
|
||||
let mut f = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(&tmp_path)
|
||||
.map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
f.write_all(&new_chain).map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
f.sync_all().map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
}
|
||||
std::fs::rename(&tmp_path, &self.path).map_err(|e| WitnessError::Io(e.to_string()))?;
|
||||
|
||||
self.chain_data = new_chain;
|
||||
self.entry_count = all_entries.len();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from witness chain operations.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum WitnessError {
|
||||
/// I/O error (stringified for Clone/Eq compatibility).
|
||||
Io(String),
|
||||
/// Chain integrity verification failed.
|
||||
ChainCorrupted,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for WitnessError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Io(msg) => write!(f, "witness I/O error: {msg}"),
|
||||
Self::ChainCorrupted => write!(f, "witness chain integrity check failed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for WitnessError {}
|
||||
|
||||
fn now_ns() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn create_and_open_empty() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("witness.bin");
|
||||
|
||||
let chain = WitnessChain::create(&path).unwrap();
|
||||
assert_eq!(chain.len(), 0);
|
||||
assert!(chain.is_empty());
|
||||
|
||||
let reopened = WitnessChain::open(&path).unwrap();
|
||||
assert_eq!(reopened.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_and_verify() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("witness.bin");
|
||||
|
||||
let mut chain = WitnessChain::create(&path).unwrap();
|
||||
chain.record_store("key1", "default").unwrap();
|
||||
chain.record_search("default", 5).unwrap();
|
||||
chain.record_delete("key1", "default").unwrap();
|
||||
assert_eq!(chain.len(), 3);
|
||||
|
||||
let count = chain.verify().unwrap();
|
||||
assert_eq!(count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn persistence_across_reopen() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("witness.bin");
|
||||
|
||||
{
|
||||
let mut chain = WitnessChain::create(&path).unwrap();
|
||||
chain.record_store("a", "ns").unwrap();
|
||||
chain.record_store("b", "ns").unwrap();
|
||||
}
|
||||
|
||||
let chain = WitnessChain::open(&path).unwrap();
|
||||
assert_eq!(chain.len(), 2);
|
||||
assert_eq!(chain.verify().unwrap(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tampered_chain_detected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("witness.bin");
|
||||
|
||||
{
|
||||
let mut chain = WitnessChain::create(&path).unwrap();
|
||||
chain.record_store("x", "ns").unwrap();
|
||||
chain.record_store("y", "ns").unwrap();
|
||||
}
|
||||
|
||||
// Tamper with the file
|
||||
let mut data = std::fs::read(&path).unwrap();
|
||||
if data.len() > 40 {
|
||||
data[40] ^= 0xFF;
|
||||
}
|
||||
std::fs::write(&path, &data).unwrap();
|
||||
|
||||
let result = WitnessChain::open(&path);
|
||||
assert!(result.is_err() || result.unwrap().verify().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_or_create_new() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("witness.bin");
|
||||
|
||||
let chain = WitnessChain::open_or_create(&path).unwrap();
|
||||
assert!(chain.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_or_create_existing() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("witness.bin");
|
||||
|
||||
{
|
||||
let mut chain = WitnessChain::create(&path).unwrap();
|
||||
chain.record_compact().unwrap();
|
||||
}
|
||||
|
||||
let chain = WitnessChain::open_or_create(&path).unwrap();
|
||||
assert_eq!(chain.len(), 1);
|
||||
}
|
||||
}
|
||||
18
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/Cargo.toml
vendored
Normal file
18
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/Cargo.toml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "rvf-adapter-ospipe"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "OSpipe adapter for RuVector Format -- maps observation state vectors to RVF with META_SEG"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
|
||||
[dependencies]
|
||||
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
|
||||
rvf-types = { path = "../../rvf-types", features = ["std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
17
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/lib.rs
vendored
Normal file
17
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/lib.rs
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
//! OSpipe adapter for the RuVector Format (RVF).
|
||||
//!
|
||||
//! Maps OSpipe's observation-state pipeline onto the RVF segment model:
|
||||
//!
|
||||
//! - **VEC_SEG**: State vector embeddings (screen, audio, UI observations)
|
||||
//! - **META_SEG**: Observation metadata (app name, content type, timestamps)
|
||||
//! - **JOURNAL_SEG**: Deletion records for expired observations
|
||||
//!
|
||||
//! The adapter bridges OSpipe's `StoredEmbedding` / `CapturedFrame` world
|
||||
//! (UUID ids, chrono timestamps, JSON metadata) to RVF's u64-id,
|
||||
//! field-based metadata model.
|
||||
|
||||
pub mod observation_store;
|
||||
pub mod pipeline;
|
||||
|
||||
pub use observation_store::{ObservationMeta, RvfObservationStore};
|
||||
pub use pipeline::{PipelineConfig, RvfPipelineAdapter};
|
||||
636
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/observation_store.rs
vendored
Normal file
636
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/observation_store.rs
vendored
Normal file
@@ -0,0 +1,636 @@
|
||||
//! RVF-backed observation store for OSpipe state vectors.
|
||||
//!
|
||||
//! Maps OSpipe observation embeddings into RVF segments with metadata
|
||||
//! stored via field IDs in META_SEG entries.
|
||||
//!
|
||||
//! # Field layout
|
||||
//!
|
||||
//! | field_id | type | description |
|
||||
//! |----------|--------|------------------------|
|
||||
//! | 0 | String | content_type |
|
||||
//! | 1 | String | app_name |
|
||||
//! | 2 | U64 | timestamp_secs (epoch) |
|
||||
//! | 3 | U64 | monitor_id |
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rvf_runtime::filter::FilterExpr;
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
|
||||
};
|
||||
use rvf_runtime::{IngestResult, RvfStore, SearchResult, StoreStatus};
|
||||
use rvf_types::RvfError;
|
||||
|
||||
/// Well-known metadata field IDs for OSpipe observations.
|
||||
pub mod fields {
|
||||
/// Content type (ocr, transcription, ui_event).
|
||||
pub const CONTENT_TYPE: u16 = 0;
|
||||
/// Application name.
|
||||
pub const APP_NAME: u16 = 1;
|
||||
/// Observation timestamp as seconds since UNIX epoch.
|
||||
pub const TIMESTAMP_SECS: u16 = 2;
|
||||
/// Monitor index.
|
||||
pub const MONITOR_ID: u16 = 3;
|
||||
}
|
||||
|
||||
/// Metadata for an observation to be recorded.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationMeta {
|
||||
/// Content type label (e.g. "ocr", "transcription", "ui_event").
|
||||
pub content_type: String,
|
||||
/// Application name, if known.
|
||||
pub app_name: Option<String>,
|
||||
/// Observation timestamp as seconds since UNIX epoch.
|
||||
pub timestamp_secs: u64,
|
||||
/// Monitor index, if applicable.
|
||||
pub monitor_id: Option<u32>,
|
||||
}
|
||||
|
||||
impl ObservationMeta {
|
||||
/// Convert to RVF metadata entries for a single vector.
|
||||
fn to_entries(&self) -> Vec<MetadataEntry> {
|
||||
let mut entries = Vec::with_capacity(4);
|
||||
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::CONTENT_TYPE,
|
||||
value: MetadataValue::String(self.content_type.clone()),
|
||||
});
|
||||
|
||||
if let Some(ref app) = self.app_name {
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::APP_NAME,
|
||||
value: MetadataValue::String(app.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::TIMESTAMP_SECS,
|
||||
value: MetadataValue::U64(self.timestamp_secs),
|
||||
});
|
||||
|
||||
if let Some(monitor) = self.monitor_id {
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::MONITOR_ID,
|
||||
value: MetadataValue::U64(monitor as u64),
|
||||
});
|
||||
}
|
||||
|
||||
entries
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for the observation store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationStoreConfig {
|
||||
/// Directory for RVF data files.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension.
|
||||
pub dimension: u16,
|
||||
/// Distance metric (defaults to Cosine for OSpipe embeddings).
|
||||
pub metric: DistanceMetric,
|
||||
}
|
||||
|
||||
impl ObservationStoreConfig {
|
||||
/// Create with required parameters, using Cosine metric by default.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension,
|
||||
metric: DistanceMetric::Cosine,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the distance metric.
|
||||
pub fn with_metric(mut self, metric: DistanceMetric) -> Self {
|
||||
self.metric = metric;
|
||||
self
|
||||
}
|
||||
|
||||
fn store_path(&self) -> PathBuf {
|
||||
self.data_dir.join("observations.rvf")
|
||||
}
|
||||
}
|
||||
|
||||
/// RVF-backed observation store for OSpipe.
|
||||
///
|
||||
/// Wraps an `RvfStore` and provides observation-oriented APIs:
|
||||
/// - `record_observation` -- ingest a state vector with metadata
|
||||
/// - `query_similar_states` -- k-NN search over observation vectors
|
||||
/// - `get_state_history` -- filtered query by time range
|
||||
/// - `compact_history` -- reclaim dead space from deleted observations
|
||||
pub struct RvfObservationStore {
|
||||
store: RvfStore,
|
||||
#[allow(dead_code)]
|
||||
config: ObservationStoreConfig,
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl RvfObservationStore {
|
||||
/// Create a new observation store, creating the RVF file.
|
||||
pub fn create(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
|
||||
if config.dimension == 0 {
|
||||
return Err(OspipeAdapterError::InvalidDimension);
|
||||
}
|
||||
std::fs::create_dir_all(&config.data_dir)
|
||||
.map_err(|e| OspipeAdapterError::Io(e.to_string()))?;
|
||||
|
||||
let options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), options)
|
||||
.map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing observation store.
|
||||
pub fn open(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store = RvfStore::open(&config.store_path())
|
||||
.map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
let status = store.status();
|
||||
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
next_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing store in read-only mode.
|
||||
pub fn open_readonly(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store = RvfStore::open_readonly(&config.store_path())
|
||||
.map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
next_id: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Record a single observation with its state vector and metadata.
|
||||
///
|
||||
/// Returns the assigned vector ID and the ingest result.
|
||||
pub fn record_observation(
|
||||
&mut self,
|
||||
state_vector: &[f32],
|
||||
meta: &ObservationMeta,
|
||||
) -> Result<(u64, IngestResult), OspipeAdapterError> {
|
||||
let id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let entries = meta.to_entries();
|
||||
let result = self.store.ingest_batch(
|
||||
&[state_vector],
|
||||
&[id],
|
||||
Some(&entries),
|
||||
).map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok((id, result))
|
||||
}
|
||||
|
||||
/// Record a batch of observations.
|
||||
///
|
||||
/// `vectors` and `metas` must have the same length.
|
||||
/// Returns the assigned IDs and the ingest result.
|
||||
pub fn record_batch(
|
||||
&mut self,
|
||||
vectors: &[&[f32]],
|
||||
metas: &[ObservationMeta],
|
||||
) -> Result<(Vec<u64>, IngestResult), OspipeAdapterError> {
|
||||
if vectors.len() != metas.len() {
|
||||
return Err(OspipeAdapterError::LengthMismatch {
|
||||
vectors: vectors.len(),
|
||||
metas: metas.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let start_id = self.next_id;
|
||||
let ids: Vec<u64> = (start_id..start_id + vectors.len() as u64).collect();
|
||||
self.next_id = start_id + vectors.len() as u64;
|
||||
|
||||
// Flatten metadata entries: each vector gets its own entries.
|
||||
// RvfStore expects entries_per_id to be uniform, so we pad to
|
||||
// a consistent entry count per vector.
|
||||
let entries_per_vec: Vec<Vec<MetadataEntry>> =
|
||||
metas.iter().map(|m| m.to_entries()).collect();
|
||||
|
||||
let max_entries = entries_per_vec.iter().map(|e| e.len()).max().unwrap_or(0);
|
||||
|
||||
let mut flat_entries = Vec::with_capacity(vectors.len() * max_entries);
|
||||
for vec_entries in &entries_per_vec {
|
||||
for entry in vec_entries {
|
||||
flat_entries.push(entry.clone());
|
||||
}
|
||||
// Pad with dummy entries so every vector has the same count.
|
||||
for _ in vec_entries.len()..max_entries {
|
||||
flat_entries.push(MetadataEntry {
|
||||
field_id: u16::MAX,
|
||||
value: MetadataValue::U64(0),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let result = self.store.ingest_batch(
|
||||
vectors,
|
||||
&ids,
|
||||
if flat_entries.is_empty() { None } else { Some(&flat_entries) },
|
||||
).map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok((ids, result))
|
||||
}
|
||||
|
||||
/// Query for the k most similar observation states.
|
||||
pub fn query_similar_states(
|
||||
&self,
|
||||
state_vector: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
|
||||
self.store
|
||||
.query(state_vector, k, &QueryOptions::default())
|
||||
.map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Query with a metadata filter expression.
|
||||
pub fn query_filtered(
|
||||
&self,
|
||||
state_vector: &[f32],
|
||||
k: usize,
|
||||
filter: FilterExpr,
|
||||
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
|
||||
let opts = QueryOptions {
|
||||
filter: Some(filter),
|
||||
..Default::default()
|
||||
};
|
||||
self.store
|
||||
.query(state_vector, k, &opts)
|
||||
.map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Query for observations within a time range.
|
||||
///
|
||||
/// `start_secs` and `end_secs` are UNIX epoch seconds. The query
|
||||
/// vector is used for similarity ranking among the time-filtered results.
|
||||
pub fn get_state_history(
|
||||
&self,
|
||||
state_vector: &[f32],
|
||||
k: usize,
|
||||
start_secs: u64,
|
||||
end_secs: u64,
|
||||
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
|
||||
use rvf_runtime::filter::FilterValue;
|
||||
|
||||
let filter = FilterExpr::And(vec![
|
||||
FilterExpr::Ge(fields::TIMESTAMP_SECS, FilterValue::U64(start_secs)),
|
||||
FilterExpr::Le(fields::TIMESTAMP_SECS, FilterValue::U64(end_secs)),
|
||||
]);
|
||||
|
||||
self.query_filtered(state_vector, k, filter)
|
||||
}
|
||||
|
||||
/// Run compaction to reclaim space from deleted observations.
|
||||
pub fn compact_history(&mut self) -> Result<rvf_runtime::CompactionResult, OspipeAdapterError> {
|
||||
self.store.compact().map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Delete observations by their IDs.
|
||||
pub fn delete_observations(
|
||||
&mut self,
|
||||
ids: &[u64],
|
||||
) -> Result<rvf_runtime::DeleteResult, OspipeAdapterError> {
|
||||
self.store.delete(ids).map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Delete observations matching a filter expression.
|
||||
pub fn delete_by_filter(
|
||||
&mut self,
|
||||
filter: &FilterExpr,
|
||||
) -> Result<rvf_runtime::DeleteResult, OspipeAdapterError> {
|
||||
self.store.delete_by_filter(filter).map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Get the current store status.
|
||||
pub fn status(&self) -> StoreStatus {
|
||||
self.store.status()
|
||||
}
|
||||
|
||||
/// Close the store, releasing locks.
|
||||
pub fn close(self) -> Result<(), OspipeAdapterError> {
|
||||
self.store.close().map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors produced by the OSpipe adapter.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum OspipeAdapterError {
|
||||
/// Underlying RVF error.
|
||||
Rvf(RvfError),
|
||||
/// IO error (directory creation, etc.).
|
||||
Io(String),
|
||||
/// Vector dimension must be > 0.
|
||||
InvalidDimension,
|
||||
/// Batch vectors and metadata have different lengths.
|
||||
LengthMismatch { vectors: usize, metas: usize },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OspipeAdapterError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF error: {e}"),
|
||||
Self::Io(msg) => write!(f, "IO error: {msg}"),
|
||||
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
|
||||
Self::LengthMismatch { vectors, metas } => {
|
||||
write!(f, "vectors ({vectors}) and metas ({metas}) length mismatch")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for OspipeAdapterError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_and_record_observation() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 64);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let vec = make_vector(64, 42);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: Some("VSCode".into()),
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: Some(0),
|
||||
};
|
||||
|
||||
let (id, result) = store.record_observation(&vec, &meta).unwrap();
|
||||
assert_eq!(id, 1);
|
||||
assert_eq!(result.accepted, 1);
|
||||
assert_eq!(result.rejected, 0);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_similar_states() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 32);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
// Insert 10 observations.
|
||||
for i in 0..10u64 {
|
||||
let vec = make_vector(32, i);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: now_secs() + i,
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
}
|
||||
|
||||
let query = make_vector(32, 5);
|
||||
let results = store.query_similar_states(&query, 3).unwrap();
|
||||
assert_eq!(results.len(), 3);
|
||||
|
||||
// Closest should be the same vector (id 6, since first id is 1).
|
||||
assert_eq!(results[0].id, 6);
|
||||
assert!(results[0].distance < 1e-5);
|
||||
|
||||
// Results are sorted by distance ascending.
|
||||
for i in 1..results.len() {
|
||||
assert!(results[i].distance >= results[i - 1].distance);
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_state_history_filters_by_time() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 16);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let base_time = 1_700_000_000u64;
|
||||
|
||||
// Insert observations at different times.
|
||||
for i in 0..5u64 {
|
||||
let vec = make_vector(16, i);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: base_time + i * 100,
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
}
|
||||
|
||||
// Query for observations in the range [base+100, base+300].
|
||||
let query = make_vector(16, 0);
|
||||
let results = store
|
||||
.get_state_history(&query, 10, base_time + 100, base_time + 300)
|
||||
.unwrap();
|
||||
|
||||
// Should get ids 2, 3, 4 (timestamps base+100, base+200, base+300).
|
||||
assert_eq!(results.len(), 3);
|
||||
let ids: Vec<u64> = results.iter().map(|r| r.id).collect();
|
||||
assert!(ids.contains(&2));
|
||||
assert!(ids.contains(&3));
|
||||
assert!(ids.contains(&4));
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_batch_and_query() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 16);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let vecs: Vec<Vec<f32>> = (0..5).map(|i| make_vector(16, i)).collect();
|
||||
let vec_refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
|
||||
let metas: Vec<ObservationMeta> = (0..5)
|
||||
.map(|i| ObservationMeta {
|
||||
content_type: if i % 2 == 0 { "ocr" } else { "transcription" }.into(),
|
||||
app_name: Some("TestApp".into()),
|
||||
timestamp_secs: now_secs() + i,
|
||||
monitor_id: None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (ids, result) = store.record_batch(&vec_refs, &metas).unwrap();
|
||||
assert_eq!(ids.len(), 5);
|
||||
assert_eq!(result.accepted, 5);
|
||||
|
||||
let query = make_vector(16, 2);
|
||||
let results = store.query_similar_states(&query, 1).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].id, 3); // id starts at 1, so seed=2 -> id=3
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_and_compact() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 8);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
// Insert 4 observations.
|
||||
for i in 0..4u64 {
|
||||
let vec = make_vector(8, i);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
}
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 4);
|
||||
|
||||
// Delete 2 observations.
|
||||
let del = store.delete_observations(&[1, 3]).unwrap();
|
||||
assert_eq!(del.deleted, 2);
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 2);
|
||||
|
||||
// Compact.
|
||||
let compact = store.compact_history().unwrap();
|
||||
assert_eq!(compact.segments_compacted, 2);
|
||||
|
||||
// Verify remaining vectors are queryable.
|
||||
let query = make_vector(8, 1); // seed=1 -> was id=2
|
||||
let results = store.query_similar_states(&query, 10).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
let ids: Vec<u64> = results.iter().map(|r| r.id).collect();
|
||||
assert!(ids.contains(&2));
|
||||
assert!(ids.contains(&4));
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_existing_store() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 16);
|
||||
|
||||
// Create and populate.
|
||||
{
|
||||
let mut store = RvfObservationStore::create(config.clone()).unwrap();
|
||||
let vec = make_vector(16, 99);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "transcription".into(),
|
||||
app_name: Some("Zoom".into()),
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Reopen.
|
||||
{
|
||||
let store = RvfObservationStore::open(config).unwrap();
|
||||
let query = make_vector(16, 99);
|
||||
let results = store.query_similar_states(&query, 1).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert!(results[0].distance < 1e-5);
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn readonly_mode() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 8);
|
||||
|
||||
{
|
||||
let mut store = RvfObservationStore::create(config.clone()).unwrap();
|
||||
let vec = make_vector(8, 0);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
let store = RvfObservationStore::open_readonly(config).unwrap();
|
||||
let status = store.status();
|
||||
assert!(status.read_only);
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_dimension_rejected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 0);
|
||||
let result = RvfObservationStore::create(config);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn batch_length_mismatch_rejected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 8);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let vecs = [make_vector(8, 0)];
|
||||
let vec_refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
|
||||
let metas = vec![
|
||||
ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: 0,
|
||||
monitor_id: None,
|
||||
},
|
||||
ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: 0,
|
||||
monitor_id: None,
|
||||
},
|
||||
];
|
||||
|
||||
let result = store.record_batch(&vec_refs, &metas);
|
||||
assert!(result.is_err());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
267
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/pipeline.rs
vendored
Normal file
267
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/pipeline.rs
vendored
Normal file
@@ -0,0 +1,267 @@
|
||||
//! Pipeline integration helpers for OSpipe.
|
||||
//!
|
||||
//! Provides [`RvfPipelineAdapter`] which wraps [`RvfObservationStore`] and
|
||||
//! exposes a simplified interface for OSpipe's ingestion pipeline to push
|
||||
//! captured frames directly into the RVF store.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rvf_runtime::options::DistanceMetric;
|
||||
|
||||
use crate::observation_store::{
|
||||
ObservationMeta, ObservationStoreConfig, OspipeAdapterError, RvfObservationStore,
|
||||
};
|
||||
|
||||
/// Configuration for the pipeline adapter.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PipelineConfig {
|
||||
/// Directory for RVF data files.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension.
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: DistanceMetric,
|
||||
/// Automatically compact when dead-space ratio exceeds this threshold.
|
||||
pub auto_compact_threshold: f64,
|
||||
}
|
||||
|
||||
impl PipelineConfig {
|
||||
/// Create a new pipeline config with required parameters.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension,
|
||||
metric: DistanceMetric::Cosine,
|
||||
auto_compact_threshold: 0.3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// High-level adapter that OSpipe's ingestion pipeline can use to persist
|
||||
/// observation vectors into an RVF store.
|
||||
///
|
||||
/// Handles store lifecycle, auto-compaction, and provides convenience
|
||||
/// methods that accept OSpipe-domain types directly.
|
||||
pub struct RvfPipelineAdapter {
|
||||
store: RvfObservationStore,
|
||||
config: PipelineConfig,
|
||||
ingest_count: u64,
|
||||
}
|
||||
|
||||
impl RvfPipelineAdapter {
|
||||
/// Create a new pipeline adapter, creating the underlying RVF file.
|
||||
pub fn create(config: PipelineConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store_config = ObservationStoreConfig {
|
||||
data_dir: config.data_dir.clone(),
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
};
|
||||
|
||||
let store = RvfObservationStore::create(store_config)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
ingest_count: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing pipeline adapter.
|
||||
pub fn open(config: PipelineConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store_config = ObservationStoreConfig {
|
||||
data_dir: config.data_dir.clone(),
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
};
|
||||
|
||||
let store = RvfObservationStore::open(store_config)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
ingest_count: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Ingest a single observation from the pipeline.
|
||||
///
|
||||
/// This is the primary entry point for OSpipe's ingestion pipeline.
|
||||
/// After ingestion, may trigger auto-compaction if the dead-space
|
||||
/// ratio exceeds the configured threshold.
|
||||
pub fn ingest(
|
||||
&mut self,
|
||||
embedding: &[f32],
|
||||
content_type: &str,
|
||||
app_name: Option<&str>,
|
||||
timestamp_secs: u64,
|
||||
monitor_id: Option<u32>,
|
||||
) -> Result<u64, OspipeAdapterError> {
|
||||
let meta = ObservationMeta {
|
||||
content_type: content_type.to_string(),
|
||||
app_name: app_name.map(|s| s.to_string()),
|
||||
timestamp_secs,
|
||||
monitor_id,
|
||||
};
|
||||
|
||||
let (id, _result) = self.store.record_observation(embedding, &meta)?;
|
||||
self.ingest_count += 1;
|
||||
|
||||
self.maybe_compact()?;
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Search for similar observations.
|
||||
pub fn search(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<rvf_runtime::SearchResult>, OspipeAdapterError> {
|
||||
self.store.query_similar_states(query, k)
|
||||
}
|
||||
|
||||
/// Search for observations within a time window.
|
||||
pub fn search_time_range(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
start_secs: u64,
|
||||
end_secs: u64,
|
||||
) -> Result<Vec<rvf_runtime::SearchResult>, OspipeAdapterError> {
|
||||
self.store.get_state_history(query, k, start_secs, end_secs)
|
||||
}
|
||||
|
||||
/// Expire observations older than the given timestamp.
|
||||
///
|
||||
/// Scans for observations with timestamps before `before_secs` and
|
||||
/// soft-deletes them. Returns the number of observations deleted.
|
||||
pub fn expire_before(
|
||||
&mut self,
|
||||
before_secs: u64,
|
||||
) -> Result<u64, OspipeAdapterError> {
|
||||
use rvf_runtime::filter::{FilterExpr, FilterValue};
|
||||
|
||||
let filter = FilterExpr::Lt(
|
||||
crate::observation_store::fields::TIMESTAMP_SECS,
|
||||
FilterValue::U64(before_secs),
|
||||
);
|
||||
|
||||
let result = self.store.delete_by_filter(&filter)?;
|
||||
|
||||
Ok(result.deleted)
|
||||
}
|
||||
|
||||
/// Force a compaction cycle.
|
||||
pub fn compact(&mut self) -> Result<rvf_runtime::CompactionResult, OspipeAdapterError> {
|
||||
self.store.compact_history()
|
||||
}
|
||||
|
||||
/// Get the total number of live observations.
|
||||
pub fn observation_count(&self) -> u64 {
|
||||
self.store.status().total_vectors
|
||||
}
|
||||
|
||||
/// Close the adapter and release resources.
|
||||
pub fn close(self) -> Result<(), OspipeAdapterError> {
|
||||
self.store.close()
|
||||
}
|
||||
|
||||
/// Check if auto-compaction should run, and run it if so.
|
||||
fn maybe_compact(&mut self) -> Result<(), OspipeAdapterError> {
|
||||
let status = self.store.status();
|
||||
if status.dead_space_ratio > self.config.auto_compact_threshold {
|
||||
self.store.compact_history()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_ingest_and_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = PipelineConfig::new(dir.path(), 32);
|
||||
let mut adapter = RvfPipelineAdapter::create(config).unwrap();
|
||||
|
||||
let ts = now_secs();
|
||||
|
||||
for i in 0..5u64 {
|
||||
let vec = make_vector(32, i);
|
||||
adapter
|
||||
.ingest(&vec, "ocr", Some("VSCode"), ts + i, Some(0))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(adapter.observation_count(), 5);
|
||||
|
||||
let query = make_vector(32, 2);
|
||||
let results = adapter.search(&query, 2).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, 3); // seed=2 -> id=3 (1-indexed)
|
||||
|
||||
adapter.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_time_range_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = PipelineConfig::new(dir.path(), 16);
|
||||
let mut adapter = RvfPipelineAdapter::create(config).unwrap();
|
||||
|
||||
let base = 1_700_000_000u64;
|
||||
for i in 0..4u64 {
|
||||
let vec = make_vector(16, i);
|
||||
adapter
|
||||
.ingest(&vec, "transcription", None, base + i * 3600, None)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let query = make_vector(16, 0);
|
||||
let results = adapter
|
||||
.search_time_range(&query, 10, base + 3600, base + 7200)
|
||||
.unwrap();
|
||||
|
||||
// Should get observations at base+3600 (id=2) and base+7200 (id=3).
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_open_existing() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = PipelineConfig::new(dir.path(), 16);
|
||||
|
||||
{
|
||||
let mut adapter = RvfPipelineAdapter::create(config.clone()).unwrap();
|
||||
let vec = make_vector(16, 0);
|
||||
adapter.ingest(&vec, "ocr", None, now_secs(), None).unwrap();
|
||||
adapter.close().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let adapter = RvfPipelineAdapter::open(config).unwrap();
|
||||
assert_eq!(adapter.observation_count(), 1);
|
||||
adapter.close().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
19
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/Cargo.toml
vendored
Normal file
19
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/Cargo.toml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "rvf-adapter-rvlite"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Lightweight embedded vector store adapter for RuVector Format -- minimal API over RVF Core Profile"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
rust-version = "1.87"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
|
||||
[dependencies]
|
||||
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
|
||||
rvf-types = { path = "../../rvf-types", features = ["std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
484
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/collection.rs
vendored
Normal file
484
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/collection.rs
vendored
Normal file
@@ -0,0 +1,484 @@
|
||||
//! The main rvlite collection API.
|
||||
//!
|
||||
//! [`RvliteCollection`] provides a minimal, ergonomic interface for
|
||||
//! embedded vector storage. No metadata, no filters, no namespaces --
|
||||
//! just vectors with IDs.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use rvf_runtime::options::{QueryOptions, RvfOptions};
|
||||
use rvf_runtime::store::RvfStore;
|
||||
|
||||
use crate::config::RvliteConfig;
|
||||
use crate::error::{Result, RvliteError};
|
||||
|
||||
/// A single search result: vector ID and distance from the query.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Match {
|
||||
/// The vector's unique identifier.
|
||||
pub id: u64,
|
||||
/// Distance from the query vector (lower = more similar).
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// Statistics returned by the [`RvliteCollection::compact`] operation.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct CompactStats {
|
||||
/// Number of segments that were compacted.
|
||||
pub segments_compacted: u32,
|
||||
/// Total bytes of dead space reclaimed.
|
||||
pub bytes_reclaimed: u64,
|
||||
}
|
||||
|
||||
/// A lightweight embedded vector collection wrapping [`RvfStore`].
|
||||
pub struct RvliteCollection {
|
||||
store: RvfStore,
|
||||
dimension: u16,
|
||||
}
|
||||
|
||||
impl RvliteCollection {
|
||||
/// Create a new collection at the configured path (file must not exist).
|
||||
pub fn create(config: RvliteConfig) -> Result<Self> {
|
||||
let options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: config.metric.into(),
|
||||
profile: 1, // Core profile
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.path, options)?;
|
||||
Ok(Self {
|
||||
store,
|
||||
dimension: config.dimension,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing collection (file must exist with a valid RVF manifest).
|
||||
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let store = RvfStore::open(path.as_ref())?;
|
||||
// The dimension is stored in the manifest and recovered on boot,
|
||||
// so we query it via a probe against the store.
|
||||
let dim = Self::probe_dimension(&store);
|
||||
Ok(Self {
|
||||
store,
|
||||
dimension: dim,
|
||||
})
|
||||
}
|
||||
|
||||
/// Add a single vector with the given ID. Errors on dimension mismatch.
|
||||
pub fn add(&mut self, id: u64, vector: &[f32]) -> Result<()> {
|
||||
self.check_dimension(vector.len())?;
|
||||
self.store.ingest_batch(&[vector], &[id], None)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add multiple vectors in a single batch. Returns count added.
|
||||
pub fn add_batch(&mut self, ids: &[u64], vectors: &[&[f32]]) -> Result<usize> {
|
||||
if ids.len() != vectors.len() {
|
||||
return Err(RvliteError::Io(
|
||||
"ids and vectors must have the same length".into(),
|
||||
));
|
||||
}
|
||||
let result = self.store.ingest_batch(vectors, ids, None)?;
|
||||
Ok(result.accepted as usize)
|
||||
}
|
||||
|
||||
/// Find the `k` nearest neighbors, sorted by distance (closest first).
|
||||
pub fn search(&self, vector: &[f32], k: usize) -> Vec<Match> {
|
||||
if vector.len() != self.dimension as usize {
|
||||
return Vec::new();
|
||||
}
|
||||
let query_opts = QueryOptions::default();
|
||||
match self.store.query(vector, k, &query_opts) {
|
||||
Ok(results) => results
|
||||
.into_iter()
|
||||
.map(|r| Match {
|
||||
id: r.id,
|
||||
distance: r.distance,
|
||||
})
|
||||
.collect(),
|
||||
Err(_) => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a single vector by ID. Returns whether it existed.
|
||||
pub fn remove(&mut self, id: u64) -> Result<bool> {
|
||||
let result = self.store.delete(&[id])?;
|
||||
Ok(result.deleted > 0)
|
||||
}
|
||||
|
||||
/// Remove multiple vectors by ID. Returns count actually removed.
|
||||
pub fn remove_batch(&mut self, ids: &[u64]) -> Result<usize> {
|
||||
let result = self.store.delete(ids)?;
|
||||
Ok(result.deleted as usize)
|
||||
}
|
||||
|
||||
/// Check whether a vector with the given ID exists (soft-deleted = absent).
|
||||
pub fn contains(&self, id: u64) -> bool {
|
||||
let total = self.store.status().total_vectors as usize;
|
||||
if total == 0 {
|
||||
return false;
|
||||
}
|
||||
// Brute-force scan via query; acceptable for rvlite's small collections.
|
||||
let zero_vec = vec![0.0f32; self.dimension as usize];
|
||||
match self.store.query(&zero_vec, total, &QueryOptions::default()) {
|
||||
Ok(results) => results.iter().any(|r| r.id == id),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of live (non-deleted) vectors in the collection.
|
||||
pub fn len(&self) -> usize {
|
||||
self.store.status().total_vectors as usize
|
||||
}
|
||||
|
||||
/// Return `true` if the collection has no live vectors.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Compact the collection, reclaiming space from deleted vectors.
|
||||
pub fn compact(&mut self) -> Result<CompactStats> {
|
||||
let result = self.store.compact()?;
|
||||
Ok(CompactStats {
|
||||
segments_compacted: result.segments_compacted,
|
||||
bytes_reclaimed: result.bytes_reclaimed,
|
||||
})
|
||||
}
|
||||
|
||||
/// Flush all pending writes and close the collection, consuming the handle.
|
||||
pub fn close(self) -> Result<()> {
|
||||
self.store.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the configured vector dimension.
|
||||
pub fn dimension(&self) -> u16 {
|
||||
self.dimension
|
||||
}
|
||||
|
||||
// ---- Internal helpers ------------------------------------------------
|
||||
|
||||
/// Validate that a vector length matches the collection dimension.
|
||||
fn check_dimension(&self, len: usize) -> Result<()> {
|
||||
if len != self.dimension as usize {
|
||||
return Err(RvliteError::DimensionMismatch {
|
||||
expected: self.dimension,
|
||||
got: len,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Probe the dimension of an opened store by trying queries with
|
||||
/// increasing dimensions until one succeeds.
|
||||
///
|
||||
/// RvfStore stores the dimension internally but does not expose it
|
||||
/// directly. When there are vectors present, a query with the wrong
|
||||
/// dimension returns `DimensionMismatch`, so we try dimensions
|
||||
/// 1..=4096 until one succeeds. For empty stores we return 0 as a
|
||||
/// sentinel.
|
||||
fn probe_dimension(store: &RvfStore) -> u16 {
|
||||
if store.status().total_vectors == 0 {
|
||||
return 0;
|
||||
}
|
||||
let opts = QueryOptions::default();
|
||||
for dim in 1u16..=4096 {
|
||||
let probe = vec![0.0f32; dim as usize];
|
||||
if store.query(&probe, 1, &opts).is_ok() {
|
||||
return dim;
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::config::{RvliteConfig, RvliteMetric};
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn temp_path(dir: &TempDir, name: &str) -> std::path::PathBuf {
|
||||
dir.path().join(name)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_add_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "basic.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
assert!(col.is_empty());
|
||||
assert_eq!(col.len(), 0);
|
||||
|
||||
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
|
||||
col.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
|
||||
col.add(3, &[0.0, 0.0, 1.0, 0.0]).unwrap();
|
||||
|
||||
assert_eq!(col.len(), 3);
|
||||
assert!(!col.is_empty());
|
||||
|
||||
let results = col.search(&[1.0, 0.0, 0.0, 0.0], 2);
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, 1);
|
||||
assert!(results[0].distance < f32::EPSILON);
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn batch_add_and_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "batch.rvf"), 3).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
let ids = vec![10, 20, 30];
|
||||
let v1 = [1.0, 0.0, 0.0];
|
||||
let v2 = [0.0, 1.0, 0.0];
|
||||
let v3 = [0.0, 0.0, 1.0];
|
||||
let vecs: Vec<&[f32]> = vec![&v1, &v2, &v3];
|
||||
|
||||
let count = col.add_batch(&ids, &vecs).unwrap();
|
||||
assert_eq!(count, 3);
|
||||
assert_eq!(col.len(), 3);
|
||||
|
||||
let results = col.search(&[0.0, 1.0, 0.0], 1);
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].id, 20);
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_and_verify() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "remove.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
|
||||
col.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
|
||||
col.add(3, &[0.0, 0.0, 1.0, 0.0]).unwrap();
|
||||
|
||||
assert_eq!(col.len(), 3);
|
||||
assert!(col.contains(2));
|
||||
|
||||
let removed = col.remove(2).unwrap();
|
||||
assert!(removed);
|
||||
assert_eq!(col.len(), 2);
|
||||
assert!(!col.contains(2));
|
||||
|
||||
// Removing again returns false
|
||||
let removed_again = col.remove(2).unwrap();
|
||||
assert!(!removed_again);
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_batch_and_verify() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "rm_batch.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
for i in 0..5u64 {
|
||||
col.add(i, &[i as f32, 0.0, 0.0, 0.0]).unwrap();
|
||||
}
|
||||
|
||||
let count = col.remove_batch(&[1, 3, 99]).unwrap();
|
||||
// 99 never existed, so only 2 are removed
|
||||
assert_eq!(count, 2);
|
||||
assert_eq!(col.len(), 3);
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_mismatch_error() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = RvliteConfig::new(temp_path(&dir, "dim.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
// Wrong dimension: 3 instead of 4
|
||||
let result = col.add(1, &[1.0, 0.0, 0.0]);
|
||||
assert!(result.is_err());
|
||||
match result.unwrap_err() {
|
||||
RvliteError::DimensionMismatch { expected, got } => {
|
||||
assert_eq!(expected, 4);
|
||||
assert_eq!(got, 3);
|
||||
}
|
||||
other => panic!("expected DimensionMismatch, got: {other}"),
|
||||
}
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_collection_edge_cases() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "empty.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
assert!(col.is_empty());
|
||||
assert_eq!(col.len(), 0);
|
||||
assert!(!col.contains(1));
|
||||
|
||||
let results = col.search(&[1.0, 0.0, 0.0, 0.0], 10);
|
||||
assert!(results.is_empty());
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_returns_empty_on_wrong_dimension() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "dim_search.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
|
||||
|
||||
// Search with wrong dimension returns empty (graceful degradation)
|
||||
let results = col.search(&[1.0, 0.0], 10);
|
||||
assert!(results.is_empty());
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_existing_collection() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = temp_path(&dir, "reopen.rvf");
|
||||
let config = RvliteConfig::new(path.clone(), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
{
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
|
||||
col.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let col = RvliteCollection::open(&path).unwrap();
|
||||
assert_eq!(col.len(), 2);
|
||||
assert_eq!(col.dimension(), 4);
|
||||
|
||||
let results = col.search(&[1.0, 0.0, 0.0, 0.0], 2);
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, 1);
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compact_and_verify() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "compact.rvf"), 4).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
col.add(i, &[i as f32, 0.0, 0.0, 0.0]).unwrap();
|
||||
}
|
||||
|
||||
col.remove_batch(&[0, 2, 4, 6, 8]).unwrap();
|
||||
assert_eq!(col.len(), 5);
|
||||
|
||||
let stats = col.compact().unwrap();
|
||||
assert_eq!(stats.segments_compacted, 5);
|
||||
assert!(stats.bytes_reclaimed > 0);
|
||||
|
||||
// Verify remaining vectors are intact after compaction
|
||||
assert_eq!(col.len(), 5);
|
||||
assert!(col.contains(1));
|
||||
assert!(col.contains(3));
|
||||
assert!(!col.contains(0));
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn len_is_empty_contains() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "accessors.rvf"), 2).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
assert_eq!(col.len(), 0);
|
||||
assert!(col.is_empty());
|
||||
assert!(!col.contains(42));
|
||||
|
||||
col.add(42, &[1.0, 2.0]).unwrap();
|
||||
|
||||
assert_eq!(col.len(), 1);
|
||||
assert!(!col.is_empty());
|
||||
assert!(col.contains(42));
|
||||
assert!(!col.contains(99));
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cosine_metric() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "cosine.rvf"), 3).with_metric(RvliteMetric::Cosine);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
col.add(1, &[1.0, 0.0, 0.0]).unwrap();
|
||||
col.add(2, &[0.0, 1.0, 0.0]).unwrap();
|
||||
col.add(3, &[1.0, 1.0, 0.0]).unwrap();
|
||||
|
||||
// Query for [1, 0, 0] -- id=1 should be closest (exact match)
|
||||
let results = col.search(&[1.0, 0.0, 0.0], 3);
|
||||
assert_eq!(results.len(), 3);
|
||||
assert_eq!(results[0].id, 1);
|
||||
assert!(results[0].distance < f32::EPSILON);
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_accessor() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "dim_acc.rvf"), 256).with_metric(RvliteMetric::L2);
|
||||
|
||||
let col = RvliteCollection::create(config).unwrap();
|
||||
assert_eq!(col.dimension(), 256);
|
||||
col.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn batch_length_mismatch() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config =
|
||||
RvliteConfig::new(temp_path(&dir, "mismatch.rvf"), 2).with_metric(RvliteMetric::L2);
|
||||
|
||||
let mut col = RvliteCollection::create(config).unwrap();
|
||||
|
||||
let ids = vec![1, 2, 3];
|
||||
let v1 = [1.0, 0.0];
|
||||
let v2 = [0.0, 1.0];
|
||||
let vecs: Vec<&[f32]> = vec![&v1, &v2]; // 2 vectors but 3 ids
|
||||
|
||||
let result = col.add_batch(&ids, &vecs);
|
||||
assert!(result.is_err());
|
||||
|
||||
col.close().unwrap();
|
||||
}
|
||||
}
|
||||
111
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/config.rs
vendored
Normal file
111
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/config.rs
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
//! Configuration for rvlite collections.
|
||||
//!
|
||||
//! Provides [`RvliteConfig`] with sensible defaults for lightweight,
|
||||
//! resource-constrained environments.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rvf_runtime::options::DistanceMetric;
|
||||
|
||||
/// Distance metric for rvlite similarity search.
|
||||
///
|
||||
/// Maps directly to the underlying `DistanceMetric` in rvf-runtime.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum RvliteMetric {
|
||||
/// Squared Euclidean distance.
|
||||
L2,
|
||||
/// Cosine distance (1 - cosine_similarity).
|
||||
#[default]
|
||||
Cosine,
|
||||
/// Inner (dot) product distance (negated).
|
||||
InnerProduct,
|
||||
}
|
||||
|
||||
impl From<RvliteMetric> for DistanceMetric {
|
||||
fn from(m: RvliteMetric) -> Self {
|
||||
match m {
|
||||
RvliteMetric::L2 => DistanceMetric::L2,
|
||||
RvliteMetric::Cosine => DistanceMetric::Cosine,
|
||||
RvliteMetric::InnerProduct => DistanceMetric::InnerProduct,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for creating a new rvlite collection.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RvliteConfig {
|
||||
/// File path for the RVF file.
|
||||
pub path: PathBuf,
|
||||
/// Vector dimensionality (required, must be > 0).
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: RvliteMetric,
|
||||
/// Optional capacity hint for pre-allocation.
|
||||
pub max_elements: Option<usize>,
|
||||
}
|
||||
|
||||
impl RvliteConfig {
|
||||
/// Create a new config with the required fields and sensible defaults.
|
||||
///
|
||||
/// The metric defaults to `Cosine` and `max_elements` is `None`.
|
||||
pub fn new(path: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
path: path.into(),
|
||||
dimension,
|
||||
metric: RvliteMetric::default(),
|
||||
max_elements: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the distance metric.
|
||||
pub fn with_metric(mut self, metric: RvliteMetric) -> Self {
|
||||
self.metric = metric;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the capacity hint.
|
||||
pub fn with_max_elements(mut self, max: usize) -> Self {
|
||||
self.max_elements = Some(max);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_metric_is_cosine() {
|
||||
assert_eq!(RvliteMetric::default(), RvliteMetric::Cosine);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_new_defaults() {
|
||||
let cfg = RvliteConfig::new("/tmp/test.rvf", 128);
|
||||
assert_eq!(cfg.dimension, 128);
|
||||
assert_eq!(cfg.metric, RvliteMetric::Cosine);
|
||||
assert!(cfg.max_elements.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_builder_methods() {
|
||||
let cfg = RvliteConfig::new("/tmp/test.rvf", 64)
|
||||
.with_metric(RvliteMetric::L2)
|
||||
.with_max_elements(1000);
|
||||
assert_eq!(cfg.metric, RvliteMetric::L2);
|
||||
assert_eq!(cfg.max_elements, Some(1000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metric_conversion() {
|
||||
assert_eq!(DistanceMetric::from(RvliteMetric::L2), DistanceMetric::L2);
|
||||
assert_eq!(
|
||||
DistanceMetric::from(RvliteMetric::Cosine),
|
||||
DistanceMetric::Cosine
|
||||
);
|
||||
assert_eq!(
|
||||
DistanceMetric::from(RvliteMetric::InnerProduct),
|
||||
DistanceMetric::InnerProduct
|
||||
);
|
||||
}
|
||||
}
|
||||
99
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/error.rs
vendored
Normal file
99
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/error.rs
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
//! Error types for the rvlite adapter.
|
||||
//!
|
||||
//! Provides a lightweight error enum that wraps `RvfError` and I/O errors,
|
||||
//! plus a dimension-mismatch variant for early validation.
|
||||
|
||||
use core::fmt;
|
||||
|
||||
use rvf_types::RvfError;
|
||||
|
||||
/// Errors that can occur in rvlite operations.
|
||||
#[derive(Debug)]
|
||||
pub enum RvliteError {
|
||||
/// An error originating from the RVF runtime or types layer.
|
||||
Rvf(RvfError),
|
||||
/// An I/O error described by a message string.
|
||||
Io(String),
|
||||
/// The supplied vector has the wrong number of dimensions.
|
||||
DimensionMismatch {
|
||||
/// The dimension the collection was created with.
|
||||
expected: u16,
|
||||
/// The dimension of the vector that was supplied.
|
||||
got: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Display for RvliteError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "rvf: {e}"),
|
||||
Self::Io(msg) => write!(f, "io: {msg}"),
|
||||
Self::DimensionMismatch { expected, got } => {
|
||||
write!(f, "dimension mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RvfError> for RvliteError {
|
||||
fn from(e: RvfError) -> Self {
|
||||
Self::Rvf(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for RvliteError {
|
||||
fn from(e: std::io::Error) -> Self {
|
||||
Self::Io(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience alias used throughout the rvlite crate.
|
||||
pub type Result<T> = std::result::Result<T, RvliteError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rvf_types::ErrorCode;
|
||||
|
||||
#[test]
|
||||
fn display_rvf_variant() {
|
||||
let err = RvliteError::Rvf(RvfError::Code(ErrorCode::DimensionMismatch));
|
||||
let msg = format!("{err}");
|
||||
assert!(msg.contains("rvf:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_io_variant() {
|
||||
let err = RvliteError::Io("file not found".into());
|
||||
let msg = format!("{err}");
|
||||
assert!(msg.contains("io: file not found"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_dimension_mismatch() {
|
||||
let err = RvliteError::DimensionMismatch {
|
||||
expected: 128,
|
||||
got: 64,
|
||||
};
|
||||
let msg = format!("{err}");
|
||||
assert!(msg.contains("expected 128"));
|
||||
assert!(msg.contains("got 64"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_rvf_error() {
|
||||
let rvf = RvfError::Code(ErrorCode::FsyncFailed);
|
||||
let err: RvliteError = rvf.into();
|
||||
matches!(err, RvliteError::Rvf(_));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_io_error() {
|
||||
let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "gone");
|
||||
let err: RvliteError = io_err.into();
|
||||
match err {
|
||||
RvliteError::Io(msg) => assert!(msg.contains("gone")),
|
||||
_ => panic!("expected Io variant"),
|
||||
}
|
||||
}
|
||||
}
|
||||
41
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/lib.rs
vendored
Normal file
41
vendor/ruvector/crates/rvf/rvf-adapters/rvlite/src/lib.rs
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
//! Lightweight embedded vector store adapter for the RuVector Format (RVF).
|
||||
//!
|
||||
//! **rvlite** provides a minimal, ergonomic API for embedded vector storage
|
||||
//! using the RVF Core Profile. It is designed for resource-constrained
|
||||
//! environments (WASM, edge, embedded) where a full-featured vector
|
||||
//! database is unnecessary.
|
||||
//!
|
||||
//! # Design philosophy
|
||||
//!
|
||||
//! - **Simple**: No metadata, no filters, no namespaces. Just vectors with IDs.
|
||||
//! - **Small**: Minimal dependency surface; only `rvf-runtime` and `rvf-types`.
|
||||
//! - **Safe**: Dimension validation, proper error handling, no panics.
|
||||
//!
|
||||
//! # Quick start
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use rvf_adapter_rvlite::{RvliteCollection, RvliteConfig, RvliteMetric};
|
||||
//!
|
||||
//! let config = RvliteConfig::new("/tmp/my_vectors.rvf", 128)
|
||||
//! .with_metric(RvliteMetric::Cosine);
|
||||
//!
|
||||
//! let mut col = RvliteCollection::create(config).unwrap();
|
||||
//!
|
||||
//! col.add(1, &vec![0.1; 128]).unwrap();
|
||||
//! col.add(2, &vec![0.2; 128]).unwrap();
|
||||
//!
|
||||
//! let results = col.search(&vec![0.1; 128], 5);
|
||||
//! for m in &results {
|
||||
//! println!("id={} distance={:.4}", m.id, m.distance);
|
||||
//! }
|
||||
//!
|
||||
//! col.close().unwrap();
|
||||
//! ```
|
||||
|
||||
pub mod collection;
|
||||
pub mod config;
|
||||
pub mod error;
|
||||
|
||||
pub use collection::{CompactStats, Match, RvliteCollection};
|
||||
pub use config::{RvliteConfig, RvliteMetric};
|
||||
pub use error::{Result, RvliteError};
|
||||
19
vendor/ruvector/crates/rvf/rvf-adapters/sona/Cargo.toml
vendored
Normal file
19
vendor/ruvector/crates/rvf/rvf-adapters/sona/Cargo.toml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "rvf-adapter-sona"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "SONA adapter for RuVector Format -- stores learning trajectories, neural patterns, and experience replay buffers as RVF segments"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/ruvnet/ruvector"
|
||||
rust-version = "1.87"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
|
||||
[dependencies]
|
||||
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
|
||||
rvf-types = { path = "../../rvf-types", features = ["std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
142
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/config.rs
vendored
Normal file
142
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/config.rs
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
//! Configuration for the SONA adapter.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Configuration for the RVF-backed SONA stores.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SonaConfig {
|
||||
/// Directory where RVF data files are stored.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension (must match SONA's embedding size).
|
||||
pub dimension: u16,
|
||||
/// Maximum number of experiences in the replay buffer.
|
||||
pub replay_capacity: usize,
|
||||
/// Number of recent trajectory steps to retain in the window.
|
||||
pub trajectory_window: usize,
|
||||
}
|
||||
|
||||
impl SonaConfig {
|
||||
/// Create a new configuration with required parameters and sensible defaults.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension,
|
||||
replay_capacity: 10_000,
|
||||
trajectory_window: 100,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the replay buffer capacity.
|
||||
pub fn with_replay_capacity(mut self, capacity: usize) -> Self {
|
||||
self.replay_capacity = capacity;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the trajectory window size.
|
||||
pub fn with_trajectory_window(mut self, window: usize) -> Self {
|
||||
self.trajectory_window = window;
|
||||
self
|
||||
}
|
||||
|
||||
/// Return the path to the shared RVF store file.
|
||||
pub fn store_path(&self) -> PathBuf {
|
||||
self.data_dir.join("sona.rvf")
|
||||
}
|
||||
|
||||
/// Ensure the data directory exists.
|
||||
pub fn ensure_dirs(&self) -> std::io::Result<()> {
|
||||
std::fs::create_dir_all(&self.data_dir)
|
||||
}
|
||||
|
||||
/// Validate the configuration.
|
||||
pub fn validate(&self) -> Result<(), ConfigError> {
|
||||
if self.dimension == 0 {
|
||||
return Err(ConfigError::InvalidDimension);
|
||||
}
|
||||
if self.replay_capacity == 0 {
|
||||
return Err(ConfigError::InvalidReplayCapacity);
|
||||
}
|
||||
if self.trajectory_window == 0 {
|
||||
return Err(ConfigError::InvalidTrajectoryWindow);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors specific to adapter configuration.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ConfigError {
|
||||
/// Dimension must be > 0.
|
||||
InvalidDimension,
|
||||
/// Replay capacity must be > 0.
|
||||
InvalidReplayCapacity,
|
||||
/// Trajectory window must be > 0.
|
||||
InvalidTrajectoryWindow,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
|
||||
Self::InvalidReplayCapacity => write!(f, "replay capacity must be > 0"),
|
||||
Self::InvalidTrajectoryWindow => write!(f, "trajectory window must be > 0"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ConfigError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn config_defaults() {
|
||||
let cfg = SonaConfig::new("/tmp/test", 256);
|
||||
assert_eq!(cfg.dimension, 256);
|
||||
assert_eq!(cfg.replay_capacity, 10_000);
|
||||
assert_eq!(cfg.trajectory_window, 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_store_path() {
|
||||
let cfg = SonaConfig::new("/data/sona", 128);
|
||||
assert_eq!(cfg.store_path(), Path::new("/data/sona/sona.rvf"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_zero_dimension() {
|
||||
let cfg = SonaConfig::new("/tmp", 0);
|
||||
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_zero_replay_capacity() {
|
||||
let mut cfg = SonaConfig::new("/tmp", 64);
|
||||
cfg.replay_capacity = 0;
|
||||
assert_eq!(cfg.validate(), Err(ConfigError::InvalidReplayCapacity));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_zero_trajectory_window() {
|
||||
let mut cfg = SonaConfig::new("/tmp", 64);
|
||||
cfg.trajectory_window = 0;
|
||||
assert_eq!(cfg.validate(), Err(ConfigError::InvalidTrajectoryWindow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_ok() {
|
||||
let cfg = SonaConfig::new("/tmp", 64);
|
||||
assert!(cfg.validate().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builder_methods() {
|
||||
let cfg = SonaConfig::new("/tmp", 256)
|
||||
.with_replay_capacity(5000)
|
||||
.with_trajectory_window(50);
|
||||
assert_eq!(cfg.replay_capacity, 5000);
|
||||
assert_eq!(cfg.trajectory_window, 50);
|
||||
}
|
||||
}
|
||||
397
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/experience.rs
vendored
Normal file
397
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/experience.rs
vendored
Normal file
@@ -0,0 +1,397 @@
|
||||
//! `ExperienceReplayBuffer` — circular buffer of experiences stored
|
||||
//! as RVF vectors in the shared SONA store.
|
||||
//!
|
||||
//! Each experience captures a (state, action, reward, next_state) tuple.
|
||||
//! State and next_state embeddings are concatenated into a single vector
|
||||
//! of double the configured dimension. The action and reward are stored
|
||||
//! as metadata. A type marker of "experience" distinguishes these
|
||||
//! entries from trajectory and pattern data.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::config::SonaConfig;
|
||||
|
||||
/// Metadata field IDs (shared across all SONA stores).
|
||||
const FIELD_STEP_ID: u16 = 0;
|
||||
const FIELD_ACTION: u16 = 1;
|
||||
const FIELD_REWARD: u16 = 2;
|
||||
const FIELD_CATEGORY: u16 = 3;
|
||||
const FIELD_TYPE: u16 = 4;
|
||||
|
||||
/// Type marker for experience entries.
|
||||
const TYPE_EXPERIENCE: &str = "experience";
|
||||
|
||||
/// A single experience returned from retrieval or sampling.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Experience {
|
||||
/// Internal vector ID in the RVF store.
|
||||
pub id: u64,
|
||||
/// The action taken.
|
||||
pub action: String,
|
||||
/// The reward received.
|
||||
pub reward: f64,
|
||||
/// Distance from query (only meaningful for prioritized sampling).
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// Circular buffer of experiences stored as RVF vectors.
|
||||
pub struct ExperienceReplayBuffer {
|
||||
store: RvfStore,
|
||||
config: SonaConfig,
|
||||
/// Ordered record of experience vector IDs (oldest first).
|
||||
experience_ids: VecDeque<u64>,
|
||||
/// Parallel metadata: (action, reward).
|
||||
experience_meta: VecDeque<(String, f64)>,
|
||||
/// Next vector ID to assign.
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl ExperienceReplayBuffer {
|
||||
/// Create a new experience replay buffer.
|
||||
pub fn create(config: SonaConfig) -> Result<Self, ExperienceStoreError> {
|
||||
config.validate().map_err(ExperienceStoreError::Config)?;
|
||||
config.ensure_dirs().map_err(|e| ExperienceStoreError::Io(e.to_string()))?;
|
||||
|
||||
let rvf_options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), rvf_options)
|
||||
.map_err(ExperienceStoreError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
experience_ids: VecDeque::new(),
|
||||
experience_meta: VecDeque::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Add an experience to the buffer.
|
||||
///
|
||||
/// If the buffer is at capacity, the oldest experience is evicted.
|
||||
/// The `state_embedding` is used as the stored vector (for similarity
|
||||
/// search); `next_state_embedding` is currently not stored as a
|
||||
/// separate vector but could be added via metadata extension.
|
||||
///
|
||||
/// Returns the internal vector ID.
|
||||
pub fn push(
|
||||
&mut self,
|
||||
state_embedding: &[f32],
|
||||
action: &str,
|
||||
reward: f64,
|
||||
_next_state_embedding: &[f32],
|
||||
) -> Result<u64, ExperienceStoreError> {
|
||||
if state_embedding.len() != self.config.dimension as usize {
|
||||
return Err(ExperienceStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: state_embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Evict oldest if at capacity.
|
||||
if self.experience_ids.len() >= self.config.replay_capacity {
|
||||
if let Some(old_id) = self.experience_ids.pop_front() {
|
||||
self.experience_meta.pop_front();
|
||||
self.store.delete(&[old_id]).map_err(ExperienceStoreError::Rvf)?;
|
||||
}
|
||||
}
|
||||
|
||||
let vector_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(vector_id) },
|
||||
MetadataEntry { field_id: FIELD_ACTION, value: MetadataValue::String(action.to_string()) },
|
||||
MetadataEntry { field_id: FIELD_REWARD, value: MetadataValue::F64(reward) },
|
||||
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(String::new()) },
|
||||
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_EXPERIENCE.to_string()) },
|
||||
];
|
||||
|
||||
self.store
|
||||
.ingest_batch(&[state_embedding], &[vector_id], Some(&metadata))
|
||||
.map_err(ExperienceStoreError::Rvf)?;
|
||||
|
||||
self.experience_ids.push_back(vector_id);
|
||||
self.experience_meta.push_back((action.to_string(), reward));
|
||||
|
||||
Ok(vector_id)
|
||||
}
|
||||
|
||||
/// Sample `n` experiences uniformly from the buffer.
|
||||
///
|
||||
/// Uses a deterministic stride-based selection: picks experiences
|
||||
/// evenly spaced across the buffer. Returns fewer than `n` if the
|
||||
/// buffer contains fewer experiences.
|
||||
pub fn sample(&self, n: usize) -> Vec<Experience> {
|
||||
let len = self.experience_ids.len();
|
||||
if len == 0 || n == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let count = n.min(len);
|
||||
let step = if count >= len { 1 } else { len / count };
|
||||
let mut results = Vec::with_capacity(count);
|
||||
|
||||
let mut idx = 0;
|
||||
while results.len() < count && idx < len {
|
||||
let vid = self.experience_ids[idx];
|
||||
let (action, reward) = &self.experience_meta[idx];
|
||||
results.push(Experience {
|
||||
id: vid,
|
||||
action: action.clone(),
|
||||
reward: *reward,
|
||||
distance: 0.0,
|
||||
});
|
||||
idx += step;
|
||||
}
|
||||
|
||||
// If stride skipped some, fill from the end.
|
||||
if results.len() < count {
|
||||
let mut back_idx = len - 1;
|
||||
while results.len() < count {
|
||||
let vid = self.experience_ids[back_idx];
|
||||
if !results.iter().any(|e| e.id == vid) {
|
||||
let (action, reward) = &self.experience_meta[back_idx];
|
||||
results.push(Experience {
|
||||
id: vid,
|
||||
action: action.clone(),
|
||||
reward: *reward,
|
||||
distance: 0.0,
|
||||
});
|
||||
}
|
||||
if back_idx == 0 {
|
||||
break;
|
||||
}
|
||||
back_idx -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Sample `n` experiences prioritized by similarity to the given embedding.
|
||||
///
|
||||
/// Finds the `n` nearest-neighbor experiences by vector distance.
|
||||
pub fn sample_prioritized(
|
||||
&mut self,
|
||||
n: usize,
|
||||
embedding: &[f32],
|
||||
) -> Result<Vec<Experience>, ExperienceStoreError> {
|
||||
if embedding.len() != self.config.dimension as usize {
|
||||
return Err(ExperienceStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let results = self.store
|
||||
.query(embedding, n, &QueryOptions::default())
|
||||
.map_err(ExperienceStoreError::Rvf)?;
|
||||
|
||||
Ok(self.enrich_results(&results))
|
||||
}
|
||||
|
||||
/// Return the number of experiences in the buffer.
|
||||
pub fn len(&self) -> usize {
|
||||
self.experience_ids.len()
|
||||
}
|
||||
|
||||
/// Return whether the buffer is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.experience_ids.is_empty()
|
||||
}
|
||||
|
||||
/// Return whether the buffer has reached its capacity.
|
||||
pub fn is_full(&self) -> bool {
|
||||
self.experience_ids.len() >= self.config.replay_capacity
|
||||
}
|
||||
|
||||
/// Close the store, releasing locks.
|
||||
pub fn close(self) -> Result<(), ExperienceStoreError> {
|
||||
self.store.close().map_err(ExperienceStoreError::Rvf)
|
||||
}
|
||||
|
||||
// ── Internal ──────────────────────────────────────────────────────
|
||||
|
||||
fn enrich_results(&self, results: &[rvf_runtime::SearchResult]) -> Vec<Experience> {
|
||||
results
|
||||
.iter()
|
||||
.map(|r| {
|
||||
let meta = self.experience_ids.iter()
|
||||
.zip(self.experience_meta.iter())
|
||||
.find(|(&vid, _)| vid == r.id)
|
||||
.map(|(_, m)| m);
|
||||
|
||||
match meta {
|
||||
Some((action, reward)) => Experience {
|
||||
id: r.id,
|
||||
action: action.clone(),
|
||||
reward: *reward,
|
||||
distance: r.distance,
|
||||
},
|
||||
None => Experience {
|
||||
id: r.id,
|
||||
action: String::new(),
|
||||
reward: 0.0,
|
||||
distance: r.distance,
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from experience replay buffer operations.
|
||||
#[derive(Debug)]
|
||||
pub enum ExperienceStoreError {
|
||||
/// Underlying RVF store error.
|
||||
Rvf(RvfError),
|
||||
/// Configuration error.
|
||||
Config(crate::config::ConfigError),
|
||||
/// I/O error.
|
||||
Io(String),
|
||||
/// Embedding dimension mismatch.
|
||||
DimensionMismatch { expected: usize, got: usize },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ExperienceStoreError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
|
||||
Self::Config(e) => write!(f, "config error: {e}"),
|
||||
Self::Io(msg) => write!(f, "I/O error: {msg}"),
|
||||
Self::DimensionMismatch { expected, got } => {
|
||||
write!(f, "dimension mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ExperienceStoreError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn test_config(dir: &std::path::Path) -> SonaConfig {
|
||||
SonaConfig::new(dir, 4).with_replay_capacity(5)
|
||||
}
|
||||
|
||||
fn make_embedding(seed: f32) -> Vec<f32> {
|
||||
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_and_sample() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
|
||||
|
||||
buf.push(&make_embedding(1.0), "explore", 0.5, &make_embedding(1.1)).unwrap();
|
||||
buf.push(&make_embedding(2.0), "exploit", 0.8, &make_embedding(2.1)).unwrap();
|
||||
buf.push(&make_embedding(3.0), "explore", 0.3, &make_embedding(3.1)).unwrap();
|
||||
|
||||
assert_eq!(buf.len(), 3);
|
||||
assert!(!buf.is_full());
|
||||
|
||||
let samples = buf.sample(2);
|
||||
assert_eq!(samples.len(), 2);
|
||||
|
||||
buf.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffer_capacity_eviction() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path()); // capacity = 5
|
||||
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
|
||||
|
||||
for i in 0..7 {
|
||||
buf.push(&make_embedding(i as f32 + 0.1), &format!("act{i}"), i as f64 * 0.1, &make_embedding(0.0)).unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(buf.len(), 5);
|
||||
assert!(buf.is_full());
|
||||
|
||||
// The oldest two (act0, act1) should have been evicted.
|
||||
let all = buf.sample(5);
|
||||
assert_eq!(all.len(), 5);
|
||||
assert!(all.iter().all(|e| e.action != "act0" && e.action != "act1"));
|
||||
|
||||
buf.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sample_prioritized() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
|
||||
|
||||
buf.push(&[1.0, 0.0, 0.0, 0.0], "a", 0.1, &[0.0; 4]).unwrap();
|
||||
buf.push(&[0.0, 1.0, 0.0, 0.0], "b", 0.2, &[0.0; 4]).unwrap();
|
||||
buf.push(&[0.9, 0.1, 0.0, 0.0], "c", 0.3, &[0.0; 4]).unwrap();
|
||||
|
||||
let results = buf.sample_prioritized(2, &[1.0, 0.0, 0.0, 0.0]).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert!(results[0].distance <= results[1].distance);
|
||||
|
||||
buf.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_buffer_operations() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
|
||||
|
||||
assert!(buf.is_empty());
|
||||
assert!(!buf.is_full());
|
||||
assert_eq!(buf.len(), 0);
|
||||
|
||||
let samples = buf.sample(5);
|
||||
assert!(samples.is_empty());
|
||||
|
||||
let results = buf.sample_prioritized(5, &make_embedding(1.0)).unwrap();
|
||||
assert!(results.is_empty());
|
||||
|
||||
buf.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sample_more_than_available() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
|
||||
|
||||
buf.push(&make_embedding(1.0), "a", 0.1, &make_embedding(0.0)).unwrap();
|
||||
buf.push(&make_embedding(2.0), "b", 0.2, &make_embedding(0.0)).unwrap();
|
||||
|
||||
let samples = buf.sample(10);
|
||||
assert_eq!(samples.len(), 2);
|
||||
|
||||
buf.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_mismatch() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
|
||||
|
||||
let result = buf.push(&[1.0, 2.0], "a", 0.1, &[1.0, 2.0]);
|
||||
assert!(result.is_err());
|
||||
|
||||
let result = buf.sample_prioritized(5, &[1.0, 2.0]);
|
||||
assert!(result.is_err());
|
||||
|
||||
buf.close().unwrap();
|
||||
}
|
||||
}
|
||||
44
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/lib.rs
vendored
Normal file
44
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/lib.rs
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
//! RVF adapter for SONA (Self-Optimizing Neural Architecture).
|
||||
//!
|
||||
//! This crate bridges SONA's learning trajectory tracking, pattern
|
||||
//! recognition, and experience replay with the RuVector Format (RVF)
|
||||
//! segment store per ADR-029. All three data types share a single
|
||||
//! underlying RVF file, distinguished by a type marker in metadata
|
||||
//! field 4.
|
||||
//!
|
||||
//! # Architecture
|
||||
//!
|
||||
//! - **`TrajectoryStore`**: Records and queries sequences of state
|
||||
//! embeddings that form a learning trajectory.
|
||||
//! - **`ExperienceReplayBuffer`**: Circular buffer of (state, action,
|
||||
//! reward, next_state) tuples for off-policy training.
|
||||
//! - **`NeuralPatternStore`**: Stores recognized neural patterns with
|
||||
//! confidence scores, searchable by category or embedding similarity.
|
||||
//! - **`SonaConfig`**: Configuration for data directory, dimension,
|
||||
//! replay capacity, and trajectory window size.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use rvf_adapter_sona::{SonaConfig, TrajectoryStore, ExperienceReplayBuffer, NeuralPatternStore};
|
||||
//!
|
||||
//! let config = SonaConfig::new("/tmp/sona-data", 256);
|
||||
//! let mut trajectory = TrajectoryStore::create(config.clone()).unwrap();
|
||||
//!
|
||||
//! let embedding = vec![0.1f32; 256];
|
||||
//! trajectory.record_step(1, &embedding, "explore", 0.5).unwrap();
|
||||
//!
|
||||
//! let recent = trajectory.get_recent(10);
|
||||
//! let similar = trajectory.search_similar_states(&embedding, 5).unwrap();
|
||||
//! trajectory.close().unwrap();
|
||||
//! ```
|
||||
|
||||
pub mod config;
|
||||
pub mod experience;
|
||||
pub mod pattern;
|
||||
pub mod trajectory;
|
||||
|
||||
pub use config::{ConfigError, SonaConfig};
|
||||
pub use experience::{Experience, ExperienceReplayBuffer};
|
||||
pub use pattern::{NeuralPattern, NeuralPatternStore};
|
||||
pub use trajectory::{TrajectoryStep, TrajectoryStore};
|
||||
423
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/pattern.rs
vendored
Normal file
423
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/pattern.rs
vendored
Normal file
@@ -0,0 +1,423 @@
|
||||
//! `NeuralPatternStore` — stores recognized neural patterns as RVF
|
||||
//! vectors with confidence scores and categories.
|
||||
//!
|
||||
//! Patterns can be searched by embedding similarity, filtered by
|
||||
//! category, or ranked by confidence. A type marker of "pattern"
|
||||
//! distinguishes these entries from trajectory and experience data.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::config::SonaConfig;
|
||||
|
||||
/// Metadata field IDs (shared across all SONA stores).
|
||||
const FIELD_STEP_ID: u16 = 0;
|
||||
const FIELD_NAME: u16 = 1;
|
||||
const FIELD_CONFIDENCE: u16 = 2;
|
||||
const FIELD_CATEGORY: u16 = 3;
|
||||
const FIELD_TYPE: u16 = 4;
|
||||
|
||||
/// Type marker for pattern entries.
|
||||
const TYPE_PATTERN: &str = "pattern";
|
||||
|
||||
/// A recognized neural pattern returned from retrieval or search.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NeuralPattern {
|
||||
/// Internal vector ID in the RVF store.
|
||||
pub id: u64,
|
||||
/// Human-readable pattern name.
|
||||
pub name: String,
|
||||
/// Category this pattern belongs to.
|
||||
pub category: String,
|
||||
/// Confidence score (0.0 to 1.0).
|
||||
pub confidence: f64,
|
||||
/// Distance from query (only meaningful for search results).
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// Stores recognized neural patterns as RVF vectors.
|
||||
pub struct NeuralPatternStore {
|
||||
store: RvfStore,
|
||||
config: SonaConfig,
|
||||
/// In-memory index of pattern metadata keyed by vector ID.
|
||||
patterns: HashMap<u64, PatternMeta>,
|
||||
/// In-memory index of category -> vector IDs.
|
||||
category_index: HashMap<String, Vec<u64>>,
|
||||
/// Next vector ID to assign.
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
/// In-memory metadata for a pattern.
|
||||
#[derive(Clone, Debug)]
|
||||
struct PatternMeta {
|
||||
name: String,
|
||||
category: String,
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
impl NeuralPatternStore {
|
||||
/// Create a new neural pattern store.
|
||||
pub fn create(config: SonaConfig) -> Result<Self, PatternStoreError> {
|
||||
config.validate().map_err(PatternStoreError::Config)?;
|
||||
config.ensure_dirs().map_err(|e| PatternStoreError::Io(e.to_string()))?;
|
||||
|
||||
let rvf_options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), rvf_options)
|
||||
.map_err(PatternStoreError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
patterns: HashMap::new(),
|
||||
category_index: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Store a new neural pattern.
|
||||
///
|
||||
/// Returns the internal vector ID assigned to this pattern.
|
||||
pub fn store_pattern(
|
||||
&mut self,
|
||||
name: &str,
|
||||
category: &str,
|
||||
embedding: &[f32],
|
||||
confidence: f64,
|
||||
) -> Result<u64, PatternStoreError> {
|
||||
if embedding.len() != self.config.dimension as usize {
|
||||
return Err(PatternStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let vector_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(vector_id) },
|
||||
MetadataEntry { field_id: FIELD_NAME, value: MetadataValue::String(name.to_string()) },
|
||||
MetadataEntry { field_id: FIELD_CONFIDENCE, value: MetadataValue::F64(confidence) },
|
||||
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(category.to_string()) },
|
||||
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_PATTERN.to_string()) },
|
||||
];
|
||||
|
||||
self.store
|
||||
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
|
||||
.map_err(PatternStoreError::Rvf)?;
|
||||
|
||||
let meta = PatternMeta {
|
||||
name: name.to_string(),
|
||||
category: category.to_string(),
|
||||
confidence,
|
||||
};
|
||||
self.patterns.insert(vector_id, meta);
|
||||
self.category_index
|
||||
.entry(category.to_string())
|
||||
.or_default()
|
||||
.push(vector_id);
|
||||
|
||||
Ok(vector_id)
|
||||
}
|
||||
|
||||
/// Search for patterns whose embeddings are most similar to the given embedding.
|
||||
pub fn search_patterns(
|
||||
&mut self,
|
||||
embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<NeuralPattern>, PatternStoreError> {
|
||||
if embedding.len() != self.config.dimension as usize {
|
||||
return Err(PatternStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let results = self.store
|
||||
.query(embedding, k, &QueryOptions::default())
|
||||
.map_err(PatternStoreError::Rvf)?;
|
||||
|
||||
Ok(self.enrich_results(&results))
|
||||
}
|
||||
|
||||
/// Get all patterns in a given category.
|
||||
pub fn get_by_category(&self, category: &str) -> Vec<NeuralPattern> {
|
||||
let ids = match self.category_index.get(category) {
|
||||
Some(ids) => ids,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
|
||||
ids.iter()
|
||||
.filter_map(|&vid| {
|
||||
self.patterns.get(&vid).map(|meta| NeuralPattern {
|
||||
id: vid,
|
||||
name: meta.name.clone(),
|
||||
category: meta.category.clone(),
|
||||
confidence: meta.confidence,
|
||||
distance: 0.0,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Update the confidence score for a pattern by its vector ID.
|
||||
pub fn update_confidence(&mut self, id: u64, confidence: f64) -> Result<(), PatternStoreError> {
|
||||
match self.patterns.get_mut(&id) {
|
||||
Some(meta) => {
|
||||
meta.confidence = confidence;
|
||||
Ok(())
|
||||
}
|
||||
None => Err(PatternStoreError::PatternNotFound(id)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the top `k` patterns ranked by confidence (highest first).
|
||||
pub fn get_top_patterns(&self, k: usize) -> Vec<NeuralPattern> {
|
||||
let mut all: Vec<_> = self.patterns.iter()
|
||||
.map(|(&vid, meta)| NeuralPattern {
|
||||
id: vid,
|
||||
name: meta.name.clone(),
|
||||
category: meta.category.clone(),
|
||||
confidence: meta.confidence,
|
||||
distance: 0.0,
|
||||
})
|
||||
.collect();
|
||||
|
||||
all.sort_by(|a, b| {
|
||||
b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
all.truncate(k);
|
||||
all
|
||||
}
|
||||
|
||||
/// Return the total number of stored patterns.
|
||||
pub fn len(&self) -> usize {
|
||||
self.patterns.len()
|
||||
}
|
||||
|
||||
/// Return whether the store has no patterns.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.patterns.is_empty()
|
||||
}
|
||||
|
||||
/// Close the store, releasing locks.
|
||||
pub fn close(self) -> Result<(), PatternStoreError> {
|
||||
self.store.close().map_err(PatternStoreError::Rvf)
|
||||
}
|
||||
|
||||
// ── Internal ──────────────────────────────────────────────────────
|
||||
|
||||
fn enrich_results(&self, results: &[rvf_runtime::SearchResult]) -> Vec<NeuralPattern> {
|
||||
results
|
||||
.iter()
|
||||
.map(|r| {
|
||||
match self.patterns.get(&r.id) {
|
||||
Some(meta) => NeuralPattern {
|
||||
id: r.id,
|
||||
name: meta.name.clone(),
|
||||
category: meta.category.clone(),
|
||||
confidence: meta.confidence,
|
||||
distance: r.distance,
|
||||
},
|
||||
None => NeuralPattern {
|
||||
id: r.id,
|
||||
name: String::new(),
|
||||
category: String::new(),
|
||||
confidence: 0.0,
|
||||
distance: r.distance,
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from neural pattern store operations.
|
||||
#[derive(Debug)]
|
||||
pub enum PatternStoreError {
|
||||
/// Underlying RVF store error.
|
||||
Rvf(RvfError),
|
||||
/// Configuration error.
|
||||
Config(crate::config::ConfigError),
|
||||
/// I/O error.
|
||||
Io(String),
|
||||
/// Embedding dimension mismatch.
|
||||
DimensionMismatch { expected: usize, got: usize },
|
||||
/// Pattern not found for the given ID.
|
||||
PatternNotFound(u64),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PatternStoreError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
|
||||
Self::Config(e) => write!(f, "config error: {e}"),
|
||||
Self::Io(msg) => write!(f, "I/O error: {msg}"),
|
||||
Self::DimensionMismatch { expected, got } => {
|
||||
write!(f, "dimension mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
Self::PatternNotFound(id) => write!(f, "pattern not found: {id}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for PatternStoreError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn test_config(dir: &std::path::Path) -> SonaConfig {
|
||||
SonaConfig::new(dir, 4)
|
||||
}
|
||||
|
||||
fn make_embedding(seed: f32) -> Vec<f32> {
|
||||
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn store_and_search_patterns() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
store.store_pattern("convergent", "thinking", &[1.0, 0.0, 0.0, 0.0], 0.9).unwrap();
|
||||
store.store_pattern("divergent", "thinking", &[0.0, 1.0, 0.0, 0.0], 0.7).unwrap();
|
||||
store.store_pattern("lateral", "creative", &[0.0, 0.0, 1.0, 0.0], 0.8).unwrap();
|
||||
|
||||
let results = store.search_patterns(&[1.0, 0.0, 0.0, 0.0], 2).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert!(results[0].distance <= results[1].distance);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_by_category() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
store.store_pattern("p1", "alpha", &make_embedding(1.0), 0.9).unwrap();
|
||||
store.store_pattern("p2", "beta", &make_embedding(2.0), 0.7).unwrap();
|
||||
store.store_pattern("p3", "alpha", &make_embedding(3.0), 0.8).unwrap();
|
||||
|
||||
let alpha = store.get_by_category("alpha");
|
||||
assert_eq!(alpha.len(), 2);
|
||||
assert!(alpha.iter().all(|p| p.category == "alpha"));
|
||||
|
||||
let beta = store.get_by_category("beta");
|
||||
assert_eq!(beta.len(), 1);
|
||||
assert_eq!(beta[0].name, "p2");
|
||||
|
||||
let empty = store.get_by_category("nonexistent");
|
||||
assert!(empty.is_empty());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_confidence() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
let id = store.store_pattern("p1", "cat", &make_embedding(1.0), 0.5).unwrap();
|
||||
|
||||
store.update_confidence(id, 0.95).unwrap();
|
||||
|
||||
let top = store.get_top_patterns(1);
|
||||
assert_eq!(top.len(), 1);
|
||||
assert!((top[0].confidence - 0.95).abs() < f64::EPSILON);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_confidence_not_found() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
let result = store.update_confidence(999, 0.5);
|
||||
assert!(result.is_err());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_top_patterns() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
store.store_pattern("low", "cat", &make_embedding(1.0), 0.3).unwrap();
|
||||
store.store_pattern("high", "cat", &make_embedding(2.0), 0.9).unwrap();
|
||||
store.store_pattern("mid", "cat", &make_embedding(3.0), 0.6).unwrap();
|
||||
|
||||
let top = store.get_top_patterns(2);
|
||||
assert_eq!(top.len(), 2);
|
||||
assert_eq!(top[0].name, "high");
|
||||
assert_eq!(top[1].name, "mid");
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_top_more_than_available() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
store.store_pattern("only", "cat", &make_embedding(1.0), 0.5).unwrap();
|
||||
|
||||
let top = store.get_top_patterns(10);
|
||||
assert_eq!(top.len(), 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_store_operations() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
assert!(store.is_empty());
|
||||
assert_eq!(store.len(), 0);
|
||||
|
||||
let results = store.search_patterns(&make_embedding(1.0), 5).unwrap();
|
||||
assert!(results.is_empty());
|
||||
|
||||
let by_cat = store.get_by_category("anything");
|
||||
assert!(by_cat.is_empty());
|
||||
|
||||
let top = store.get_top_patterns(5);
|
||||
assert!(top.is_empty());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_mismatch() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = NeuralPatternStore::create(config).unwrap();
|
||||
|
||||
let result = store.store_pattern("p", "c", &[1.0, 2.0], 0.5);
|
||||
assert!(result.is_err());
|
||||
|
||||
let result = store.search_patterns(&[1.0, 2.0], 5);
|
||||
assert!(result.is_err());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
422
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/trajectory.rs
vendored
Normal file
422
vendor/ruvector/crates/rvf/rvf-adapters/sona/src/trajectory.rs
vendored
Normal file
@@ -0,0 +1,422 @@
|
||||
//! `TrajectoryStore` — stores learning trajectories as sequences of
|
||||
//! state embeddings in the shared SONA RVF file.
|
||||
//!
|
||||
//! Each trajectory step records a state embedding, the action taken,
|
||||
//! the reward received, and a monotonically increasing step ID. Steps
|
||||
//! are stored as RVF vectors with metadata fields encoding the step
|
||||
//! details and a type marker of "trajectory".
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
|
||||
use rvf_runtime::{RvfStore, SearchResult};
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::config::SonaConfig;
|
||||
|
||||
/// Metadata field IDs (shared across all SONA stores).
|
||||
const FIELD_STEP_ID: u16 = 0;
|
||||
const FIELD_ACTION: u16 = 1;
|
||||
const FIELD_REWARD: u16 = 2;
|
||||
const FIELD_CATEGORY: u16 = 3;
|
||||
const FIELD_TYPE: u16 = 4;
|
||||
|
||||
/// Type marker for trajectory entries.
|
||||
const TYPE_TRAJECTORY: &str = "trajectory";
|
||||
|
||||
/// A single trajectory step returned from retrieval or search.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct TrajectoryStep {
|
||||
/// Internal vector ID in the RVF store.
|
||||
pub id: u64,
|
||||
/// The step identifier within the trajectory.
|
||||
pub step_id: u64,
|
||||
/// The action taken at this step.
|
||||
pub action: String,
|
||||
/// The reward received at this step.
|
||||
pub reward: f64,
|
||||
/// Distance from query (only meaningful for search results).
|
||||
pub distance: f32,
|
||||
}
|
||||
|
||||
/// Stores learning trajectories as sequences of state embeddings.
|
||||
pub struct TrajectoryStore {
|
||||
store: RvfStore,
|
||||
config: SonaConfig,
|
||||
/// In-memory ordered record of trajectory step vector IDs, newest last.
|
||||
step_ids: VecDeque<u64>,
|
||||
/// Parallel deque of step metadata for fast retrieval.
|
||||
step_meta: VecDeque<(u64, String, f64)>, // (step_id, action, reward)
|
||||
/// Next vector ID to assign.
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl TrajectoryStore {
|
||||
/// Create a new trajectory store, initializing the data directory and RVF file.
|
||||
pub fn create(config: SonaConfig) -> Result<Self, SonaStoreError> {
|
||||
config.validate().map_err(SonaStoreError::Config)?;
|
||||
config.ensure_dirs().map_err(|e| SonaStoreError::Io(e.to_string()))?;
|
||||
|
||||
let rvf_options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), rvf_options)
|
||||
.map_err(SonaStoreError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
step_ids: VecDeque::new(),
|
||||
step_meta: VecDeque::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Record a single trajectory step.
|
||||
///
|
||||
/// Returns the internal vector ID assigned to this step.
|
||||
pub fn record_step(
|
||||
&mut self,
|
||||
step_id: u64,
|
||||
state_embedding: &[f32],
|
||||
action: &str,
|
||||
reward: f64,
|
||||
) -> Result<u64, SonaStoreError> {
|
||||
if state_embedding.len() != self.config.dimension as usize {
|
||||
return Err(SonaStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: state_embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let vector_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(step_id) },
|
||||
MetadataEntry { field_id: FIELD_ACTION, value: MetadataValue::String(action.to_string()) },
|
||||
MetadataEntry { field_id: FIELD_REWARD, value: MetadataValue::F64(reward) },
|
||||
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(String::new()) },
|
||||
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_TRAJECTORY.to_string()) },
|
||||
];
|
||||
|
||||
self.store
|
||||
.ingest_batch(&[state_embedding], &[vector_id], Some(&metadata))
|
||||
.map_err(SonaStoreError::Rvf)?;
|
||||
|
||||
self.step_ids.push_back(vector_id);
|
||||
self.step_meta.push_back((step_id, action.to_string(), reward));
|
||||
|
||||
// Trim to trajectory window size.
|
||||
while self.step_ids.len() > self.config.trajectory_window {
|
||||
self.step_ids.pop_front();
|
||||
self.step_meta.pop_front();
|
||||
}
|
||||
|
||||
Ok(vector_id)
|
||||
}
|
||||
|
||||
/// Get the `n` most recent trajectory steps.
|
||||
///
|
||||
/// Returns fewer than `n` if fewer steps are available.
|
||||
pub fn get_recent(&self, n: usize) -> Vec<TrajectoryStep> {
|
||||
let len = self.step_ids.len();
|
||||
let start = len.saturating_sub(n);
|
||||
self.step_ids
|
||||
.iter()
|
||||
.zip(self.step_meta.iter())
|
||||
.skip(start)
|
||||
.map(|(&vid, (step_id, action, reward))| TrajectoryStep {
|
||||
id: vid,
|
||||
step_id: *step_id,
|
||||
action: action.clone(),
|
||||
reward: *reward,
|
||||
distance: 0.0,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Search for trajectory steps whose state embeddings are most
|
||||
/// similar to the given embedding.
|
||||
pub fn search_similar_states(
|
||||
&mut self,
|
||||
embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<TrajectoryStep>, SonaStoreError> {
|
||||
if embedding.len() != self.config.dimension as usize {
|
||||
return Err(SonaStoreError::DimensionMismatch {
|
||||
expected: self.config.dimension as usize,
|
||||
got: embedding.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let results = self.store
|
||||
.query(embedding, k, &QueryOptions::default())
|
||||
.map_err(SonaStoreError::Rvf)?;
|
||||
|
||||
Ok(self.enrich_results(&results))
|
||||
}
|
||||
|
||||
/// Get all steps in the current trajectory window.
|
||||
pub fn get_trajectory_window(&self) -> Vec<TrajectoryStep> {
|
||||
self.get_recent(self.config.trajectory_window)
|
||||
}
|
||||
|
||||
/// Prune old trajectory data, keeping only the most recent `keep_last_n` steps.
|
||||
///
|
||||
/// Returns the number of steps deleted.
|
||||
pub fn clear_old(&mut self, keep_last_n: usize) -> Result<usize, SonaStoreError> {
|
||||
let len = self.step_ids.len();
|
||||
if len <= keep_last_n {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let to_remove = len - keep_last_n;
|
||||
let mut ids_to_delete = Vec::with_capacity(to_remove);
|
||||
|
||||
for _ in 0..to_remove {
|
||||
if let Some(vid) = self.step_ids.pop_front() {
|
||||
ids_to_delete.push(vid);
|
||||
self.step_meta.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
if !ids_to_delete.is_empty() {
|
||||
self.store.delete(&ids_to_delete).map_err(SonaStoreError::Rvf)?;
|
||||
}
|
||||
|
||||
Ok(ids_to_delete.len())
|
||||
}
|
||||
|
||||
/// Return the number of steps in the current in-memory window.
|
||||
pub fn len(&self) -> usize {
|
||||
self.step_ids.len()
|
||||
}
|
||||
|
||||
/// Return whether the store has no steps in the window.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.step_ids.is_empty()
|
||||
}
|
||||
|
||||
/// Close the store, releasing locks.
|
||||
pub fn close(self) -> Result<(), SonaStoreError> {
|
||||
self.store.close().map_err(SonaStoreError::Rvf)
|
||||
}
|
||||
|
||||
// ── Internal ──────────────────────────────────────────────────────
|
||||
|
||||
/// Enrich raw search results with step metadata from the in-memory index.
|
||||
fn enrich_results(&self, results: &[SearchResult]) -> Vec<TrajectoryStep> {
|
||||
results
|
||||
.iter()
|
||||
.map(|r| {
|
||||
let meta = self.step_ids.iter()
|
||||
.zip(self.step_meta.iter())
|
||||
.find(|(&vid, _)| vid == r.id)
|
||||
.map(|(_, m)| m);
|
||||
|
||||
match meta {
|
||||
Some((step_id, action, reward)) => TrajectoryStep {
|
||||
id: r.id,
|
||||
step_id: *step_id,
|
||||
action: action.clone(),
|
||||
reward: *reward,
|
||||
distance: r.distance,
|
||||
},
|
||||
None => TrajectoryStep {
|
||||
id: r.id,
|
||||
step_id: 0,
|
||||
action: String::new(),
|
||||
reward: 0.0,
|
||||
distance: r.distance,
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from SONA store operations.
|
||||
#[derive(Debug)]
|
||||
pub enum SonaStoreError {
|
||||
/// Underlying RVF store error.
|
||||
Rvf(RvfError),
|
||||
/// Configuration error.
|
||||
Config(crate::config::ConfigError),
|
||||
/// I/O error.
|
||||
Io(String),
|
||||
/// Embedding dimension mismatch.
|
||||
DimensionMismatch { expected: usize, got: usize },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SonaStoreError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
|
||||
Self::Config(e) => write!(f, "config error: {e}"),
|
||||
Self::Io(msg) => write!(f, "I/O error: {msg}"),
|
||||
Self::DimensionMismatch { expected, got } => {
|
||||
write!(f, "dimension mismatch: expected {expected}, got {got}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for SonaStoreError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn test_config(dir: &std::path::Path) -> SonaConfig {
|
||||
SonaConfig::new(dir, 4).with_trajectory_window(5)
|
||||
}
|
||||
|
||||
fn make_embedding(seed: f32) -> Vec<f32> {
|
||||
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_and_get_recent() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
store.record_step(1, &make_embedding(1.0), "explore", 0.5).unwrap();
|
||||
store.record_step(2, &make_embedding(2.0), "exploit", 0.8).unwrap();
|
||||
store.record_step(3, &make_embedding(3.0), "explore", 0.3).unwrap();
|
||||
|
||||
let recent = store.get_recent(2);
|
||||
assert_eq!(recent.len(), 2);
|
||||
assert_eq!(recent[0].step_id, 2);
|
||||
assert_eq!(recent[1].step_id, 3);
|
||||
assert_eq!(recent[1].action, "explore");
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_recent_more_than_available() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
store.record_step(1, &make_embedding(1.0), "a", 0.1).unwrap();
|
||||
|
||||
let recent = store.get_recent(10);
|
||||
assert_eq!(recent.len(), 1);
|
||||
assert_eq!(recent[0].step_id, 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trajectory_window_trimming() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path()); // window = 5
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
for i in 0..8 {
|
||||
store.record_step(i, &make_embedding(i as f32 + 0.1), "act", 0.1).unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(store.len(), 5);
|
||||
let window = store.get_trajectory_window();
|
||||
assert_eq!(window.len(), 5);
|
||||
// Should have steps 3..7
|
||||
assert_eq!(window[0].step_id, 3);
|
||||
assert_eq!(window[4].step_id, 7);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_similar_states() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
store.record_step(1, &[1.0, 0.0, 0.0, 0.0], "a", 0.1).unwrap();
|
||||
store.record_step(2, &[0.0, 1.0, 0.0, 0.0], "b", 0.2).unwrap();
|
||||
store.record_step(3, &[0.9, 0.1, 0.0, 0.0], "c", 0.3).unwrap();
|
||||
|
||||
let results = store.search_similar_states(&[1.0, 0.0, 0.0, 0.0], 2).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
// Closest to [1,0,0,0] should be step 1 or step 3
|
||||
assert!(results[0].distance <= results[1].distance);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_old_steps() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
for i in 0..5 {
|
||||
store.record_step(i, &make_embedding(i as f32 + 0.1), "act", 0.1).unwrap();
|
||||
}
|
||||
|
||||
let removed = store.clear_old(2).unwrap();
|
||||
assert_eq!(removed, 3);
|
||||
assert_eq!(store.len(), 2);
|
||||
|
||||
let remaining = store.get_recent(10);
|
||||
assert_eq!(remaining.len(), 2);
|
||||
assert_eq!(remaining[0].step_id, 3);
|
||||
assert_eq!(remaining[1].step_id, 4);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_old_no_op_when_within_limit() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
store.record_step(1, &make_embedding(1.0), "a", 0.1).unwrap();
|
||||
|
||||
let removed = store.clear_old(10).unwrap();
|
||||
assert_eq!(removed, 0);
|
||||
assert_eq!(store.len(), 1);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_store_operations() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
assert!(store.is_empty());
|
||||
assert_eq!(store.len(), 0);
|
||||
assert!(store.get_recent(5).is_empty());
|
||||
assert!(store.get_trajectory_window().is_empty());
|
||||
|
||||
let results = store.search_similar_states(&make_embedding(1.0), 5).unwrap();
|
||||
assert!(results.is_empty());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dimension_mismatch() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = test_config(dir.path());
|
||||
let mut store = TrajectoryStore::create(config).unwrap();
|
||||
|
||||
let result = store.record_step(1, &[1.0, 2.0], "a", 0.1);
|
||||
assert!(result.is_err());
|
||||
|
||||
let result = store.search_similar_states(&[1.0, 2.0], 5);
|
||||
assert!(result.is_err());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user