Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
[package]
name = "rvf-adapter-agentdb"
version = "0.1.0"
edition = "2021"
description = "AgentDB adapter for RuVector Format -- maps agent memory to RVF segments"
license = "MIT OR Apache-2.0"
[features]
default = ["std"]
std = []
[dependencies]
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-types = { path = "../../rvf-types", features = ["std"] }
rvf-index = { path = "../../rvf-index", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,323 @@
//! Maps agentdb HNSW operations to RVF INDEX_SEG layers.
//!
//! Bridges agentdb's HNSW index lifecycle to the three-layer progressive
//! indexing model (Layer A / B / C) defined in `rvf-index`.
use std::collections::BTreeSet;
use rvf_index::builder::{build_full_index, build_layer_a, build_layer_b, build_layer_c};
use rvf_index::distance::{cosine_distance, l2_distance};
use rvf_index::hnsw::{HnswConfig, HnswGraph};
type DistanceFn = Box<dyn Fn(&[f32], &[f32]) -> f32>;
use rvf_index::layers::{IndexLayer, LayerA, LayerB, LayerC};
use rvf_index::progressive::ProgressiveIndex;
use rvf_index::traits::InMemoryVectorStore;
/// Configuration for the RVF index adapter.
#[derive(Clone, Debug)]
pub struct IndexAdapterConfig {
/// HNSW M parameter.
pub m: usize,
/// HNSW M0 (layer-0 neighbors).
pub m0: usize,
/// ef_construction beam width.
pub ef_construction: usize,
/// ef_search beam width for queries.
pub ef_search: usize,
/// Use cosine distance (default true for agentdb text embeddings).
pub use_cosine: bool,
/// Hot node fraction for Layer B (0.0 - 1.0).
pub hot_fraction: f32,
}
impl Default for IndexAdapterConfig {
fn default() -> Self {
Self {
m: 16,
m0: 32,
ef_construction: 200,
ef_search: 100,
use_cosine: true,
hot_fraction: 0.2,
}
}
}
/// Adapter that maps agentdb HNSW operations to RVF INDEX_SEG layers.
///
/// Manages the full HNSW graph and can extract progressive layers (A/B/C)
/// for serialization into INDEX_SEG segments.
pub struct RvfIndexAdapter {
config: IndexAdapterConfig,
graph: Option<HnswGraph>,
vectors: Vec<Vec<f32>>,
id_map: Vec<u64>,
progressive: ProgressiveIndex,
loaded_layers: Vec<IndexLayer>,
}
impl RvfIndexAdapter {
/// Create a new index adapter with the given configuration.
pub fn new(config: IndexAdapterConfig) -> Self {
Self {
config,
graph: None,
vectors: Vec::new(),
id_map: Vec::new(),
progressive: ProgressiveIndex::new(),
loaded_layers: Vec::new(),
}
}
/// Build the full HNSW index from a set of vectors and IDs.
///
/// This replaces any existing index.
pub fn build(&mut self, vectors: Vec<Vec<f32>>, ids: Vec<u64>) {
let n = vectors.len();
if n == 0 {
return;
}
let hnsw_config = HnswConfig {
m: self.config.m,
m0: self.config.m0,
ef_construction: self.config.ef_construction,
};
let store = InMemoryVectorStore::new(vectors.clone());
let distance_fn = self.distance_fn();
// Generate deterministic pseudo-random values for level selection.
let rng_values: Vec<f64> = (0..n)
.map(|i| {
let seed = (i as u64)
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let val = (seed >> 33) as f64 / (1u64 << 31) as f64;
val.clamp(0.001, 0.999)
})
.collect();
let graph = build_full_index(&store, n, &hnsw_config, &rng_values, &distance_fn);
self.vectors = vectors;
self.id_map = ids;
self.graph = Some(graph);
}
/// Extract Layer A (entry points + coarse routing) from the current graph.
pub fn extract_layer_a(&self) -> Option<LayerA> {
let graph = self.graph.as_ref()?;
let n = self.vectors.len();
// Simple centroid computation: split vectors into 2 partitions.
let mid = n / 2;
let dim = self.vectors.first().map_or(0, |v| v.len());
let centroid_0 = compute_centroid(&self.vectors[..mid], dim);
let centroid_1 = if mid < n {
compute_centroid(&self.vectors[mid..], dim)
} else {
centroid_0.clone()
};
let centroids = vec![centroid_0, centroid_1];
let assignments: Vec<u32> = (0..n).map(|i| if i < mid { 0 } else { 1 }).collect();
Some(build_layer_a(graph, &centroids, &assignments, n as u64))
}
/// Extract Layer B (hot region partial adjacency) from the current graph.
pub fn extract_layer_b(&self) -> Option<LayerB> {
let graph = self.graph.as_ref()?;
let n = self.vectors.len();
let hot_count = ((n as f32) * self.config.hot_fraction).ceil() as usize;
let hot_ids: BTreeSet<u64> = (0..hot_count as u64).collect();
Some(build_layer_b(graph, &hot_ids))
}
/// Extract Layer C (full adjacency) from the current graph.
pub fn extract_layer_c(&self) -> Option<LayerC> {
let graph = self.graph.as_ref()?;
Some(build_layer_c(graph))
}
/// Load progressive layers and configure the progressive index for search.
pub fn load_progressive(&mut self, layers: &[IndexLayer]) {
self.loaded_layers = layers.to_vec();
let mut idx = ProgressiveIndex::new();
for layer in layers {
match layer {
IndexLayer::A => {
idx.layer_a = self.extract_layer_a();
}
IndexLayer::B => {
idx.layer_b = self.extract_layer_b();
}
IndexLayer::C => {
idx.layer_c = self.extract_layer_c();
}
}
}
self.progressive = idx;
}
/// Search using the progressive index with whatever layers are loaded.
pub fn search(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
let store = InMemoryVectorStore::new(self.vectors.clone());
let distance_fn = self.distance_fn();
self.progressive
.search_with_distance(query, k, self.config.ef_search, &store, &distance_fn)
}
/// Search using the full HNSW graph directly (bypasses progressive layers).
pub fn search_full(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
let graph = match self.graph.as_ref() {
Some(g) => g,
None => return Vec::new(),
};
let store = InMemoryVectorStore::new(self.vectors.clone());
let distance_fn = self.distance_fn();
graph.search(query, k, self.config.ef_search, &store, &distance_fn)
}
/// Get the node count in the HNSW graph.
pub fn node_count(&self) -> usize {
self.graph.as_ref().map_or(0, |g| g.node_count())
}
/// Get the currently loaded layers.
pub fn loaded_layers(&self) -> &[IndexLayer] {
&self.loaded_layers
}
fn distance_fn(&self) -> DistanceFn {
if self.config.use_cosine {
Box::new(cosine_distance)
} else {
Box::new(l2_distance)
}
}
}
/// Compute the centroid of a set of vectors.
fn compute_centroid(vectors: &[Vec<f32>], dim: usize) -> Vec<f32> {
if vectors.is_empty() || dim == 0 {
return vec![0.0; dim];
}
let n = vectors.len() as f32;
let mut centroid = vec![0.0f32; dim];
for v in vectors {
for (i, &val) in v.iter().enumerate().take(dim) {
centroid[i] += val;
}
}
for c in &mut centroid {
*c /= n;
}
centroid
}
#[cfg(test)]
mod tests {
use super::*;
fn make_vectors(n: usize, dim: usize) -> (Vec<Vec<f32>>, Vec<u64>) {
let vecs: Vec<Vec<f32>> = (0..n)
.map(|i| (0..dim).map(|d| (i * dim + d) as f32).collect())
.collect();
let ids: Vec<u64> = (0..n as u64).collect();
(vecs, ids)
}
#[test]
fn build_and_search_full() {
let (vecs, ids) = make_vectors(100, 8);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs.clone(), ids);
assert_eq!(adapter.node_count(), 100);
let results = adapter.search_full(&vecs[50], 5);
assert!(!results.is_empty());
assert_eq!(results[0].0, 50);
}
#[test]
fn extract_layers() {
let (vecs, ids) = make_vectors(50, 4);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs, ids);
let layer_a = adapter.extract_layer_a();
assert!(layer_a.is_some());
let la = layer_a.unwrap();
assert!(!la.entry_points.is_empty());
assert_eq!(la.centroids.len(), 2);
let layer_b = adapter.extract_layer_b();
assert!(layer_b.is_some());
let layer_c = adapter.extract_layer_c();
assert!(layer_c.is_some());
}
#[test]
fn progressive_search_with_layers() {
let (vecs, ids) = make_vectors(100, 4);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs.clone(), ids);
// Load all three layers.
adapter.load_progressive(&[IndexLayer::A, IndexLayer::B, IndexLayer::C]);
let results = adapter.search(&vecs[25], 5);
assert!(!results.is_empty());
// With full Layer C, we should find the exact match.
assert_eq!(results[0].0, 25);
}
#[test]
fn progressive_layer_a_only() {
let (vecs, ids) = make_vectors(100, 4);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs.clone(), ids);
adapter.load_progressive(&[IndexLayer::A]);
let results = adapter.search(&vecs[10], 5);
// Layer A alone provides coarse results; we just verify non-empty.
assert!(!results.is_empty());
}
#[test]
fn empty_adapter() {
let adapter = RvfIndexAdapter::new(IndexAdapterConfig::default());
assert_eq!(adapter.node_count(), 0);
let results = adapter.search_full(&[0.0; 4], 5);
assert!(results.is_empty());
}
#[test]
fn compute_centroid_basic() {
let vecs = vec![
vec![1.0, 2.0, 3.0],
vec![3.0, 4.0, 5.0],
];
let centroid = compute_centroid(&vecs, 3);
assert_eq!(centroid, vec![2.0, 3.0, 4.0]);
}
}

View File

@@ -0,0 +1,18 @@
//! AgentDB adapter for the RuVector Format (RVF).
//!
//! Maps agentdb's vector storage, HNSW index, and memory pattern APIs
//! onto the RVF segment model:
//!
//! - **VEC_SEG**: Raw vector data (episodes, state embeddings)
//! - **INDEX_SEG**: HNSW index layers (A/B/C progressive indexing)
//! - **META_SEG**: Memory pattern metadata (rewards, critiques, tags)
//!
//! Uses the RVText domain profile for text/embedding workloads.
pub mod index_adapter;
pub mod pattern_store;
pub mod vector_store;
pub use index_adapter::RvfIndexAdapter;
pub use pattern_store::{MemoryPattern, RvfPatternStore};
pub use vector_store::RvfVectorStore;

View File

@@ -0,0 +1,456 @@
//! Memory pattern storage using RVF META_SEG.
//!
//! Stores agentdb memory patterns (task descriptions, rewards, critiques,
//! success flags) as metadata alongside their state-embedding vectors.
//! Patterns can be searched by similarity and filtered by reward threshold.
use std::collections::HashMap;
use std::path::Path;
use rvf_runtime::options::{MetadataEntry, MetadataValue};
use rvf_types::RvfError;
use crate::vector_store::{AgentDbMetric, RvfVectorStore, VectorStoreConfig};
/// A memory pattern stored in the agentdb reasoning bank.
#[derive(Clone, Debug)]
pub struct MemoryPattern {
/// Unique pattern identifier.
pub id: u64,
/// Task description that produced this pattern.
pub task: String,
/// Reward score (0.0 - 1.0) indicating quality.
pub reward: f32,
/// Whether the pattern was successful.
pub success: bool,
/// Self-critique / notes about the pattern.
pub critique: String,
/// State embedding vector for similarity search.
pub embedding: Vec<f32>,
}
/// Well-known metadata field IDs for pattern attributes.
mod field_ids {
pub const TASK: u16 = 0;
pub const REWARD: u16 = 1;
pub const SUCCESS: u16 = 2;
pub const CRITIQUE: u16 = 3;
}
/// RVF-backed memory pattern store for agentdb.
///
/// Stores patterns as vectors (embeddings) with metadata (task, reward,
/// critique, success flag). Supports similarity search with reward filtering.
pub struct RvfPatternStore {
vector_store: RvfVectorStore,
patterns: HashMap<u64, PatternMetadata>,
next_id: u64,
}
/// In-memory metadata for a pattern (kept alongside the RVF store).
#[derive(Clone, Debug)]
struct PatternMetadata {
task: String,
reward: f32,
success: bool,
critique: String,
}
impl RvfPatternStore {
/// Create a new pattern store at the given path.
pub fn create(path: &Path, dimension: u16) -> Result<Self, RvfError> {
let config = VectorStoreConfig {
dimension,
metric: AgentDbMetric::Cosine,
ef_search: 100,
};
let vector_store = RvfVectorStore::create(path, config)?;
Ok(Self {
vector_store,
patterns: HashMap::new(),
next_id: 1,
})
}
/// Open an existing pattern store.
pub fn open(path: &Path, dimension: u16) -> Result<Self, RvfError> {
let config = VectorStoreConfig {
dimension,
metric: AgentDbMetric::Cosine,
ef_search: 100,
};
let vector_store = RvfVectorStore::open(path, config)?;
Ok(Self {
vector_store,
patterns: HashMap::new(),
next_id: 1,
})
}
/// Store a memory pattern.
///
/// Returns the assigned pattern ID.
pub fn store_pattern(&mut self, pattern: MemoryPattern) -> Result<u64, RvfError> {
let id = if pattern.id > 0 {
pattern.id
} else {
let id = self.next_id;
self.next_id += 1;
id
};
// Ensure next_id stays ahead of manually assigned IDs.
if id >= self.next_id {
self.next_id = id + 1;
}
let metadata = vec![
MetadataEntry {
field_id: field_ids::TASK,
value: MetadataValue::String(pattern.task.clone()),
},
MetadataEntry {
field_id: field_ids::REWARD,
value: MetadataValue::F64(pattern.reward as f64),
},
MetadataEntry {
field_id: field_ids::SUCCESS,
value: MetadataValue::U64(if pattern.success { 1 } else { 0 }),
},
MetadataEntry {
field_id: field_ids::CRITIQUE,
value: MetadataValue::String(pattern.critique.clone()),
},
];
self.vector_store
.add_vectors(&[pattern.embedding.as_slice()], &[id], Some(&metadata))?;
self.patterns.insert(
id,
PatternMetadata {
task: pattern.task,
reward: pattern.reward,
success: pattern.success,
critique: pattern.critique,
},
);
Ok(id)
}
/// Search for patterns similar to the given embedding.
///
/// Returns `(pattern_id, distance)` pairs sorted by distance.
/// Optionally filter by minimum reward score.
pub fn search_patterns(
&self,
query_embedding: &[f32],
k: usize,
min_reward: Option<f32>,
) -> Result<Vec<PatternSearchResult>, RvfError> {
let search_k = if min_reward.is_some() { k * 3 } else { k };
let results = self.vector_store.search(query_embedding, search_k, None)?;
let mut filtered: Vec<PatternSearchResult> = results
.into_iter()
.filter_map(|r| {
let meta = self.patterns.get(&r.id)?;
if let Some(threshold) = min_reward {
if meta.reward < threshold {
return None;
}
}
Some(PatternSearchResult {
id: r.id,
distance: r.distance,
task: meta.task.clone(),
reward: meta.reward,
success: meta.success,
critique: meta.critique.clone(),
})
})
.collect();
filtered.truncate(k);
Ok(filtered)
}
/// Search for patterns that failed (success == false).
pub fn search_failures(
&self,
query_embedding: &[f32],
k: usize,
) -> Result<Vec<PatternSearchResult>, RvfError> {
let results = self.vector_store.search(query_embedding, k * 5, None)?;
let mut filtered: Vec<PatternSearchResult> = results
.into_iter()
.filter_map(|r| {
let meta = self.patterns.get(&r.id)?;
if meta.success {
return None;
}
Some(PatternSearchResult {
id: r.id,
distance: r.distance,
task: meta.task.clone(),
reward: meta.reward,
success: false,
critique: meta.critique.clone(),
})
})
.collect();
filtered.truncate(k);
Ok(filtered)
}
/// Delete a pattern by ID.
pub fn delete_pattern(&mut self, id: u64) -> Result<bool, RvfError> {
let deleted = self.vector_store.delete_vectors(&[id])?;
self.patterns.remove(&id);
Ok(deleted > 0)
}
/// Get pattern metadata by ID.
pub fn get_pattern(&self, id: u64) -> Option<PatternSearchResult> {
let meta = self.patterns.get(&id)?;
Some(PatternSearchResult {
id,
distance: 0.0,
task: meta.task.clone(),
reward: meta.reward,
success: meta.success,
critique: meta.critique.clone(),
})
}
/// Get aggregate statistics about stored patterns.
pub fn stats(&self) -> PatternStoreStats {
let total = self.patterns.len();
let successful = self.patterns.values().filter(|p| p.success).count();
let avg_reward = if total > 0 {
self.patterns.values().map(|p| p.reward as f64).sum::<f64>() / total as f64
} else {
0.0
};
PatternStoreStats {
total_patterns: total,
successful_patterns: successful,
failed_patterns: total - successful,
avg_reward,
vector_count: self.vector_store.len(),
}
}
/// Save the store to disk.
pub fn save(&mut self) -> Result<(), RvfError> {
self.vector_store.save()
}
/// Get the total number of patterns.
pub fn len(&self) -> usize {
self.patterns.len()
}
/// Returns true if no patterns are stored.
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
}
/// A pattern search result with full metadata.
#[derive(Clone, Debug)]
pub struct PatternSearchResult {
pub id: u64,
pub distance: f32,
pub task: String,
pub reward: f32,
pub success: bool,
pub critique: String,
}
/// Aggregate statistics for the pattern store.
#[derive(Clone, Debug)]
pub struct PatternStoreStats {
pub total_patterns: usize,
pub successful_patterns: usize,
pub failed_patterns: usize,
pub avg_reward: f64,
pub vector_count: u64,
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn dummy_embedding(dim: usize, seed: u64) -> Vec<f32> {
let mut v = Vec::with_capacity(dim);
let mut x = seed;
for _ in 0..dim {
x = x.wrapping_mul(6364136223846793005).wrapping_add(1);
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
}
v
}
#[test]
fn store_and_search_patterns() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns.rvf");
let dim = 8;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..10u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: (i as f32) / 10.0,
success: i >= 5,
critique: format!("critique_{}", i),
embedding: dummy_embedding(dim, i),
};
store.store_pattern(pattern).unwrap();
}
assert_eq!(store.len(), 10);
let query = dummy_embedding(dim, 7);
let results = store.search_patterns(&query, 3, None).unwrap();
assert!(!results.is_empty());
assert!(results.len() <= 3);
}
#[test]
fn search_with_min_reward() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_reward.rvf");
let dim = 8;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..10u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: (i as f32) / 10.0,
success: true,
critique: String::new(),
embedding: dummy_embedding(dim, i),
};
store.store_pattern(pattern).unwrap();
}
let query = dummy_embedding(dim, 5);
let results = store.search_patterns(&query, 10, Some(0.5)).unwrap();
assert!(results.iter().all(|r| r.reward >= 0.5));
}
#[test]
fn search_failures() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_fail.rvf");
let dim = 8;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..10u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: 0.5,
success: i % 2 == 0,
critique: String::new(),
embedding: dummy_embedding(dim, i),
};
store.store_pattern(pattern).unwrap();
}
let query = dummy_embedding(dim, 3);
let results = store.search_failures(&query, 5).unwrap();
assert!(results.iter().all(|r| !r.success));
}
#[test]
fn delete_pattern() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_del.rvf");
let dim = 4;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
let pattern = MemoryPattern {
id: 42,
task: "test".into(),
reward: 0.9,
success: true,
critique: "good".into(),
embedding: vec![1.0, 2.0, 3.0, 4.0],
};
store.store_pattern(pattern).unwrap();
assert_eq!(store.len(), 1);
let deleted = store.delete_pattern(42).unwrap();
assert!(deleted);
assert_eq!(store.len(), 0);
assert!(store.get_pattern(42).is_none());
}
#[test]
fn stats() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_stats.rvf");
let dim = 4;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..5u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: (i as f32) * 0.2,
success: i >= 3,
critique: String::new(),
embedding: vec![i as f32; dim],
};
store.store_pattern(pattern).unwrap();
}
let stats = store.stats();
assert_eq!(stats.total_patterns, 5);
assert_eq!(stats.successful_patterns, 2);
assert_eq!(stats.failed_patterns, 3);
assert!(stats.avg_reward > 0.0);
}
#[test]
fn get_pattern_by_id() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_get.rvf");
let dim = 4;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
let pattern = MemoryPattern {
id: 100,
task: "find_bugs".into(),
reward: 0.85,
success: true,
critique: "good coverage".into(),
embedding: vec![1.0, 0.0, 0.0, 0.0],
};
store.store_pattern(pattern).unwrap();
let result = store.get_pattern(100).unwrap();
assert_eq!(result.task, "find_bugs");
assert_eq!(result.reward, 0.85);
assert!(result.success);
assert_eq!(result.critique, "good coverage");
assert!(store.get_pattern(999).is_none());
}
}

View File

@@ -0,0 +1,326 @@
//! RVF-backed vector store for agentdb.
//!
//! Wraps [`RvfStore`] to provide the vector CRUD operations that agentdb
//! expects: add, search, delete, get, save, and load.
use std::path::{Path, PathBuf};
use rvf_runtime::options::{
DistanceMetric, MetadataEntry, QueryOptions, RvfOptions, SearchResult,
};
use rvf_runtime::RvfStore;
use rvf_types::{ErrorCode, RvfError};
/// Distance metric selection matching agentdb's API.
#[derive(Clone, Copy, Debug, Default)]
pub enum AgentDbMetric {
#[default]
Cosine,
L2,
InnerProduct,
}
impl From<AgentDbMetric> for DistanceMetric {
fn from(m: AgentDbMetric) -> Self {
match m {
AgentDbMetric::Cosine => DistanceMetric::Cosine,
AgentDbMetric::L2 => DistanceMetric::L2,
AgentDbMetric::InnerProduct => DistanceMetric::InnerProduct,
}
}
}
/// Configuration for the RVF vector store.
#[derive(Clone, Debug)]
pub struct VectorStoreConfig {
/// Vector dimensionality.
pub dimension: u16,
/// Distance metric for similarity search.
pub metric: AgentDbMetric,
/// HNSW ef_search beam width for queries.
pub ef_search: u16,
}
impl Default for VectorStoreConfig {
fn default() -> Self {
Self {
dimension: 128,
metric: AgentDbMetric::Cosine,
ef_search: 100,
}
}
}
/// RVF-backed vector store that provides the agentdb vector storage interface.
///
/// Maps agentdb operations to RvfStore calls:
/// - `add_vectors` -> `ingest_batch`
/// - `search` -> `query`
/// - `delete_vectors` -> `delete`
/// - `get_vector` -> single-vector query
/// - `save` / `load` -> close / open
pub struct RvfVectorStore {
store: Option<RvfStore>,
path: PathBuf,
config: VectorStoreConfig,
}
impl RvfVectorStore {
/// Create a new RVF vector store at the given path.
pub fn create(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
let rvf_opts = RvfOptions {
dimension: config.dimension,
metric: config.metric.into(),
profile: 1, // RVText profile
..Default::default()
};
let store = RvfStore::create(path, rvf_opts)?;
Ok(Self {
store: Some(store),
path: path.to_path_buf(),
config,
})
}
/// Open an existing RVF vector store.
pub fn open(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
let store = RvfStore::open(path)?;
Ok(Self {
store: Some(store),
path: path.to_path_buf(),
config,
})
}
/// Add vectors with their IDs and optional metadata.
///
/// `vectors`: slice of float slices, one per vector.
/// `ids`: one ID per vector.
/// `metadata`: optional metadata entries (flat list, one entry per vector).
pub fn add_vectors(
&mut self,
vectors: &[&[f32]],
ids: &[u64],
metadata: Option<&[MetadataEntry]>,
) -> Result<u64, RvfError> {
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let result = store.ingest_batch(vectors, ids, metadata)?;
Ok(result.accepted)
}
/// Search for the k nearest neighbors of a query vector.
///
/// Returns results sorted by distance (ascending).
pub fn search(
&self,
query: &[f32],
k: usize,
ef_search: Option<u16>,
) -> Result<Vec<SearchResult>, RvfError> {
let store = self.store.as_ref().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let opts = QueryOptions {
ef_search: ef_search.unwrap_or(self.config.ef_search),
..Default::default()
};
store.query(query, k, &opts)
}
/// Delete vectors by their IDs.
pub fn delete_vectors(&mut self, ids: &[u64]) -> Result<u64, RvfError> {
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let result = store.delete(ids)?;
Ok(result.deleted)
}
/// Retrieve a single vector by ID.
///
/// Uses a zero-distance search trick: queries with each candidate until
/// the exact ID is found. For small stores this is acceptable; for large
/// stores the caller should maintain an ID index.
///
/// Returns `None` if the vector is not found or has been deleted.
pub fn get_vector(&self, id: u64) -> Option<SearchResult> {
let store = self.store.as_ref()?;
let status = store.status();
if status.total_vectors == 0 {
return None;
}
// Query a large k and find the matching ID in results.
// This is O(n) but correct. Production agentdb should cache vectors.
let dim = self.config.dimension as usize;
let zero_query = vec![0.0f32; dim];
let opts = QueryOptions {
ef_search: self.config.ef_search,
..Default::default()
};
let results = store.query(&zero_query, status.total_vectors as usize, &opts).ok()?;
results.into_iter().find(|r| r.id == id)
}
/// Save the store (flushes and closes the underlying RVF file).
pub fn save(&mut self) -> Result<(), RvfError> {
if let Some(store) = self.store.take() {
store.close()?;
}
Ok(())
}
/// Reload the store from disk.
pub fn load(&mut self) -> Result<(), RvfError> {
if self.store.is_some() {
return Ok(());
}
let store = RvfStore::open(&self.path)?;
self.store = Some(store);
Ok(())
}
/// Get the current vector count.
pub fn len(&self) -> u64 {
self.store.as_ref().map_or(0, |s| s.status().total_vectors)
}
/// Returns true if the store is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Run compaction to reclaim space from deleted vectors.
pub fn compact(&mut self) -> Result<u64, RvfError> {
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let result = store.compact()?;
Ok(result.bytes_reclaimed)
}
/// Get the file path of the underlying RVF store.
pub fn path(&self) -> &Path {
&self.path
}
/// Get the store configuration.
pub fn config(&self) -> &VectorStoreConfig {
&self.config
}
}
#[cfg(test)]
mod tests {
use super::*;
use rvf_runtime::options::MetadataValue;
use tempfile::TempDir;
fn make_config(dim: u16) -> VectorStoreConfig {
VectorStoreConfig {
dimension: dim,
metric: AgentDbMetric::L2,
ef_search: 100,
}
}
#[test]
fn create_add_search() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb.rvf");
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
let v1 = [1.0f32, 0.0, 0.0, 0.0];
let v2 = [0.0f32, 1.0, 0.0, 0.0];
let v3 = [0.0f32, 0.0, 1.0, 0.0];
let accepted = store
.add_vectors(&[&v1, &v2, &v3], &[10, 20, 30], None)
.unwrap();
assert_eq!(accepted, 3);
let results = store.search(&[1.0, 0.0, 0.0, 0.0], 2, None).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, 10);
assert!(results[0].distance < f32::EPSILON);
}
#[test]
fn delete_and_compact() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_del.rvf");
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
let vecs: Vec<[f32; 4]> = (0..10).map(|i| [i as f32, 0.0, 0.0, 0.0]).collect();
let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (0..10).collect();
store.add_vectors(&refs, &ids, None).unwrap();
let deleted = store.delete_vectors(&[0, 2, 4]).unwrap();
assert_eq!(deleted, 3);
assert_eq!(store.len(), 7);
let reclaimed = store.compact().unwrap();
assert!(reclaimed > 0);
assert_eq!(store.len(), 7);
}
#[test]
fn save_and_load() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_persist.rvf");
let config = make_config(4);
{
let mut store = RvfVectorStore::create(&path, config.clone()).unwrap();
let v1 = [1.0f32, 2.0, 3.0, 4.0];
store.add_vectors(&[&v1], &[42], None).unwrap();
store.save().unwrap();
}
{
let store = RvfVectorStore::open(&path, config).unwrap();
assert_eq!(store.len(), 1);
let results = store.search(&[1.0, 2.0, 3.0, 4.0], 1, None).unwrap();
assert_eq!(results[0].id, 42);
}
}
#[test]
fn add_with_metadata() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_meta.rvf");
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
let v1 = [1.0f32, 0.0, 0.0, 0.0];
let v2 = [0.0f32, 1.0, 0.0, 0.0];
let metadata = vec![
MetadataEntry {
field_id: 0,
value: MetadataValue::String("episode_a".into()),
},
MetadataEntry {
field_id: 0,
value: MetadataValue::String("episode_b".into()),
},
];
let accepted = store
.add_vectors(&[&v1, &v2], &[1, 2], Some(&metadata))
.unwrap();
assert_eq!(accepted, 2);
}
#[test]
fn empty_store() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_empty.rvf");
let store = RvfVectorStore::create(&path, make_config(4)).unwrap();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
let results = store.search(&[0.0, 0.0, 0.0, 0.0], 5, None).unwrap();
assert!(results.is_empty());
}
}

View File

@@ -0,0 +1,20 @@
[package]
name = "rvf-adapter-agentic-flow"
version = "0.1.0"
edition = "2021"
description = "Agentic-flow swarm adapter for RuVector Format -- maps inter-agent memory, coordination state, and learning patterns to RVF segments"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
rust-version = "1.87"
[features]
default = ["std"]
std = []
[dependencies]
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-types = { path = "../../rvf-types", features = ["std"] }
rvf-crypto = { path = "../../rvf-crypto", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,148 @@
//! Configuration for the agentic-flow swarm adapter.
use std::path::PathBuf;
/// Configuration for the RVF-backed agentic-flow swarm store.
#[derive(Clone, Debug)]
pub struct AgenticFlowConfig {
/// Directory where RVF data files are stored.
pub data_dir: PathBuf,
/// Vector embedding dimension (must match embeddings used by agents).
pub dimension: u16,
/// Unique identifier for this agent.
pub agent_id: String,
/// Whether to log consensus events in a WITNESS_SEG audit trail.
pub enable_witness: bool,
/// Optional swarm group identifier for multi-swarm deployments.
pub swarm_id: Option<String>,
}
impl AgenticFlowConfig {
/// Create a new configuration with required parameters.
///
/// Uses sensible defaults: dimension=384, witness enabled, no swarm group.
pub fn new(data_dir: impl Into<PathBuf>, agent_id: impl Into<String>) -> Self {
Self {
data_dir: data_dir.into(),
dimension: 384,
agent_id: agent_id.into(),
enable_witness: true,
swarm_id: None,
}
}
/// Set the embedding dimension.
pub fn with_dimension(mut self, dimension: u16) -> Self {
self.dimension = dimension;
self
}
/// Enable or disable witness audit trails.
pub fn with_witness(mut self, enable: bool) -> Self {
self.enable_witness = enable;
self
}
/// Set the swarm group identifier.
pub fn with_swarm_id(mut self, swarm_id: impl Into<String>) -> Self {
self.swarm_id = Some(swarm_id.into());
self
}
/// Return the path to the main vector store RVF file.
pub fn store_path(&self) -> PathBuf {
self.data_dir.join("swarm.rvf")
}
/// Return the path to the witness chain file.
pub fn witness_path(&self) -> PathBuf {
self.data_dir.join("witness.bin")
}
/// Ensure the data directory exists.
pub fn ensure_dirs(&self) -> std::io::Result<()> {
std::fs::create_dir_all(&self.data_dir)
}
/// Validate the configuration.
pub fn validate(&self) -> Result<(), ConfigError> {
if self.dimension == 0 {
return Err(ConfigError::InvalidDimension);
}
if self.agent_id.is_empty() {
return Err(ConfigError::EmptyAgentId);
}
Ok(())
}
}
/// Errors specific to adapter configuration.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ConfigError {
/// Dimension must be > 0.
InvalidDimension,
/// Agent ID must not be empty.
EmptyAgentId,
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
Self::EmptyAgentId => write!(f, "agent_id must not be empty"),
}
}
}
impl std::error::Error for ConfigError {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn config_defaults() {
let cfg = AgenticFlowConfig::new("/tmp/test", "agent-1");
assert_eq!(cfg.dimension, 384);
assert!(cfg.enable_witness);
assert!(cfg.swarm_id.is_none());
assert_eq!(cfg.agent_id, "agent-1");
}
#[test]
fn config_paths() {
let cfg = AgenticFlowConfig::new("/data/swarm", "a1");
assert_eq!(cfg.store_path(), Path::new("/data/swarm/swarm.rvf"));
assert_eq!(cfg.witness_path(), Path::new("/data/swarm/witness.bin"));
}
#[test]
fn validate_zero_dimension() {
let cfg = AgenticFlowConfig::new("/tmp", "a1").with_dimension(0);
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
}
#[test]
fn validate_empty_agent_id() {
let cfg = AgenticFlowConfig::new("/tmp", "");
assert_eq!(cfg.validate(), Err(ConfigError::EmptyAgentId));
}
#[test]
fn validate_ok() {
let cfg = AgenticFlowConfig::new("/tmp", "agent-1").with_dimension(64);
assert!(cfg.validate().is_ok());
}
#[test]
fn builder_methods() {
let cfg = AgenticFlowConfig::new("/tmp", "a1")
.with_dimension(128)
.with_witness(false)
.with_swarm_id("swarm-alpha");
assert_eq!(cfg.dimension, 128);
assert!(!cfg.enable_witness);
assert_eq!(cfg.swarm_id.as_deref(), Some("swarm-alpha"));
}
}

View File

@@ -0,0 +1,283 @@
//! Swarm coordination state management.
//!
//! Tracks agent state changes and consensus votes in-memory, with the
//! coordination state serialized alongside the RVF store. State entries
//! and votes are appended chronologically for audit and replay.
/// A recorded agent state change.
#[derive(Clone, Debug, PartialEq)]
pub struct StateEntry {
/// The agent that produced this state change.
pub agent_id: String,
/// State key (e.g., "status", "role", "topology").
pub key: String,
/// State value (e.g., "active", "coordinator", "mesh").
pub value: String,
/// Timestamp in nanoseconds since the Unix epoch.
pub timestamp: u64,
}
/// A consensus vote cast by an agent.
#[derive(Clone, Debug, PartialEq)]
pub struct ConsensusVote {
/// The topic being voted on (e.g., "leader-election-42").
pub topic: String,
/// The agent casting the vote.
pub agent_id: String,
/// The vote (true = approve, false = reject).
pub vote: bool,
/// Timestamp in nanoseconds since the Unix epoch.
pub timestamp: u64,
}
/// Swarm coordination state tracker.
///
/// Maintains an in-memory log of agent state changes and consensus votes.
/// This state lives alongside the RVF store and is used for coordination
/// protocol decisions (leader election, topology changes, etc.).
pub struct SwarmCoordination {
states: Vec<StateEntry>,
votes: Vec<ConsensusVote>,
}
impl SwarmCoordination {
/// Create a new, empty coordination tracker.
pub fn new() -> Self {
Self {
states: Vec::new(),
votes: Vec::new(),
}
}
/// Record an agent state change.
pub fn record_state(
&mut self,
agent_id: &str,
state_key: &str,
state_value: &str,
) -> Result<(), CoordinationError> {
if agent_id.is_empty() {
return Err(CoordinationError::EmptyAgentId);
}
if state_key.is_empty() {
return Err(CoordinationError::EmptyKey);
}
self.states.push(StateEntry {
agent_id: agent_id.to_string(),
key: state_key.to_string(),
value: state_value.to_string(),
timestamp: now_ns(),
});
Ok(())
}
/// Get the state history for a specific agent.
pub fn get_agent_states(&self, agent_id: &str) -> Vec<StateEntry> {
self.states
.iter()
.filter(|s| s.agent_id == agent_id)
.cloned()
.collect()
}
/// Get all coordination state entries.
pub fn get_all_states(&self) -> Vec<StateEntry> {
self.states.clone()
}
/// Record a consensus vote for a topic.
pub fn record_consensus_vote(
&mut self,
topic: &str,
agent_id: &str,
vote: bool,
) -> Result<(), CoordinationError> {
if topic.is_empty() {
return Err(CoordinationError::EmptyTopic);
}
if agent_id.is_empty() {
return Err(CoordinationError::EmptyAgentId);
}
self.votes.push(ConsensusVote {
topic: topic.to_string(),
agent_id: agent_id.to_string(),
vote,
timestamp: now_ns(),
});
Ok(())
}
/// Get all votes for a specific topic.
pub fn get_votes(&self, topic: &str) -> Vec<ConsensusVote> {
self.votes
.iter()
.filter(|v| v.topic == topic)
.cloned()
.collect()
}
/// Get the total number of state entries.
pub fn state_count(&self) -> usize {
self.states.len()
}
/// Get the total number of votes.
pub fn vote_count(&self) -> usize {
self.votes.len()
}
}
impl Default for SwarmCoordination {
fn default() -> Self {
Self::new()
}
}
/// Errors from coordination operations.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CoordinationError {
/// Agent ID must not be empty.
EmptyAgentId,
/// State key must not be empty.
EmptyKey,
/// Topic must not be empty.
EmptyTopic,
}
impl std::fmt::Display for CoordinationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EmptyAgentId => write!(f, "agent_id must not be empty"),
Self::EmptyKey => write!(f, "state key must not be empty"),
Self::EmptyTopic => write!(f, "topic must not be empty"),
}
}
}
impl std::error::Error for CoordinationError {}
fn now_ns() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn record_and_get_states() {
let mut coord = SwarmCoordination::new();
coord.record_state("a1", "status", "active").unwrap();
coord.record_state("a2", "status", "idle").unwrap();
coord.record_state("a1", "role", "coordinator").unwrap();
let a1_states = coord.get_agent_states("a1");
assert_eq!(a1_states.len(), 2);
assert_eq!(a1_states[0].key, "status");
assert_eq!(a1_states[1].key, "role");
let a2_states = coord.get_agent_states("a2");
assert_eq!(a2_states.len(), 1);
}
#[test]
fn get_all_states() {
let mut coord = SwarmCoordination::new();
coord.record_state("a1", "k1", "v1").unwrap();
coord.record_state("a2", "k2", "v2").unwrap();
let all = coord.get_all_states();
assert_eq!(all.len(), 2);
}
#[test]
fn record_and_get_votes() {
let mut coord = SwarmCoordination::new();
coord
.record_consensus_vote("leader-election", "a1", true)
.unwrap();
coord
.record_consensus_vote("leader-election", "a2", false)
.unwrap();
coord
.record_consensus_vote("other-topic", "a1", true)
.unwrap();
let votes = coord.get_votes("leader-election");
assert_eq!(votes.len(), 2);
assert!(votes[0].vote);
assert!(!votes[1].vote);
let other = coord.get_votes("other-topic");
assert_eq!(other.len(), 1);
}
#[test]
fn empty_agent_id_rejected() {
let mut coord = SwarmCoordination::new();
assert_eq!(
coord.record_state("", "k", "v"),
Err(CoordinationError::EmptyAgentId)
);
assert_eq!(
coord.record_consensus_vote("topic", "", true),
Err(CoordinationError::EmptyAgentId)
);
}
#[test]
fn empty_key_rejected() {
let mut coord = SwarmCoordination::new();
assert_eq!(
coord.record_state("a1", "", "v"),
Err(CoordinationError::EmptyKey)
);
}
#[test]
fn empty_topic_rejected() {
let mut coord = SwarmCoordination::new();
assert_eq!(
coord.record_consensus_vote("", "a1", true),
Err(CoordinationError::EmptyTopic)
);
}
#[test]
fn counts() {
let mut coord = SwarmCoordination::new();
assert_eq!(coord.state_count(), 0);
assert_eq!(coord.vote_count(), 0);
coord.record_state("a1", "k", "v").unwrap();
coord.record_consensus_vote("t", "a1", true).unwrap();
assert_eq!(coord.state_count(), 1);
assert_eq!(coord.vote_count(), 1);
}
#[test]
fn no_states_for_unknown_agent() {
let coord = SwarmCoordination::new();
assert!(coord.get_agent_states("ghost").is_empty());
}
#[test]
fn no_votes_for_unknown_topic() {
let coord = SwarmCoordination::new();
assert!(coord.get_votes("nonexistent").is_empty());
}
#[test]
fn timestamps_are_monotonic() {
let mut coord = SwarmCoordination::new();
coord.record_state("a1", "k1", "v1").unwrap();
coord.record_state("a1", "k2", "v2").unwrap();
let states = coord.get_agent_states("a1");
assert!(states[0].timestamp <= states[1].timestamp);
}
}

View File

@@ -0,0 +1,301 @@
//! Agent learning pattern management.
//!
//! Stores learned patterns as vectors with metadata (pattern type, description,
//! effectiveness score) in the RVF store. Patterns can be searched by embedding
//! similarity and ranked by their effectiveness scores.
use std::collections::HashMap;
/// A learning pattern search result.
#[derive(Clone, Debug)]
pub struct PatternResult {
/// Unique pattern identifier.
pub id: u64,
/// The cognitive pattern type (e.g., "convergent", "divergent", "lateral").
pub pattern_type: String,
/// Human-readable description of the pattern.
pub description: String,
/// Effectiveness score (0.0 - 1.0).
pub score: f32,
/// Distance from query embedding (only meaningful in search results).
pub distance: f32,
}
/// In-memory metadata for a stored pattern.
#[derive(Clone, Debug)]
struct PatternMeta {
pattern_type: String,
description: String,
score: f32,
}
/// Agent learning pattern store.
///
/// Wraps a vector store to provide pattern-specific operations: store, search,
/// update scores, and retrieve top patterns. Each pattern has a type, description,
/// effectiveness score, and an embedding vector for similarity search.
pub struct LearningPatternStore {
patterns: HashMap<u64, PatternMeta>,
/// Ordered list of (score, id) for efficient top-k retrieval.
score_index: Vec<(f32, u64)>,
next_id: u64,
}
impl LearningPatternStore {
/// Create a new, empty learning pattern store.
pub fn new() -> Self {
Self {
patterns: HashMap::new(),
score_index: Vec::new(),
next_id: 1,
}
}
/// Store a learned pattern.
///
/// The `embedding` is stored in the parent `RvfSwarmStore` via metadata;
/// this struct tracks the pattern metadata for filtering and ranking.
///
/// Returns the assigned pattern ID.
pub fn store_pattern(
&mut self,
pattern_type: &str,
description: &str,
score: f32,
) -> Result<u64, LearningError> {
if pattern_type.is_empty() {
return Err(LearningError::EmptyPatternType);
}
let clamped_score = score.clamp(0.0, 1.0);
let id = self.next_id;
self.next_id += 1;
self.patterns.insert(
id,
PatternMeta {
pattern_type: pattern_type.to_string(),
description: description.to_string(),
score: clamped_score,
},
);
self.score_index.push((clamped_score, id));
Ok(id)
}
/// Search patterns by returning those whose IDs are in the given candidate
/// set (from a vector similarity search), enriched with metadata.
pub fn enrich_results(
&self,
candidates: &[(u64, f32)],
k: usize,
) -> Vec<PatternResult> {
let mut results: Vec<PatternResult> = candidates
.iter()
.filter_map(|&(id, distance)| {
let meta = self.patterns.get(&id)?;
Some(PatternResult {
id,
pattern_type: meta.pattern_type.clone(),
description: meta.description.clone(),
score: meta.score,
distance,
})
})
.collect();
results.truncate(k);
results
}
/// Update the effectiveness score for a pattern.
pub fn update_score(&mut self, id: u64, new_score: f32) -> Result<(), LearningError> {
let meta = self
.patterns
.get_mut(&id)
.ok_or(LearningError::PatternNotFound(id))?;
let clamped = new_score.clamp(0.0, 1.0);
meta.score = clamped;
// Update the score index entry.
if let Some(entry) = self.score_index.iter_mut().find(|(_, eid)| *eid == id) {
entry.0 = clamped;
}
Ok(())
}
/// Get the top-k patterns by effectiveness score (highest first).
pub fn get_top_patterns(&self, k: usize) -> Vec<PatternResult> {
let mut sorted = self.score_index.clone();
sorted.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
sorted.truncate(k);
sorted
.into_iter()
.filter_map(|(_, id)| {
let meta = self.patterns.get(&id)?;
Some(PatternResult {
id,
pattern_type: meta.pattern_type.clone(),
description: meta.description.clone(),
score: meta.score,
distance: 0.0,
})
})
.collect()
}
/// Get a pattern by ID.
pub fn get_pattern(&self, id: u64) -> Option<PatternResult> {
let meta = self.patterns.get(&id)?;
Some(PatternResult {
id,
pattern_type: meta.pattern_type.clone(),
description: meta.description.clone(),
score: meta.score,
distance: 0.0,
})
}
/// Get the total number of stored patterns.
pub fn len(&self) -> usize {
self.patterns.len()
}
/// Returns true if no patterns are stored.
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
}
impl Default for LearningPatternStore {
fn default() -> Self {
Self::new()
}
}
/// Errors from learning pattern operations.
#[derive(Clone, Debug, PartialEq)]
pub enum LearningError {
/// Pattern type must not be empty.
EmptyPatternType,
/// Pattern with the given ID was not found.
PatternNotFound(u64),
}
impl std::fmt::Display for LearningError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EmptyPatternType => write!(f, "pattern_type must not be empty"),
Self::PatternNotFound(id) => write!(f, "pattern not found: {id}"),
}
}
}
impl std::error::Error for LearningError {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn store_and_retrieve() {
let mut store = LearningPatternStore::new();
let id = store.store_pattern("convergent", "Use batched writes", 0.85).unwrap();
let p = store.get_pattern(id).unwrap();
assert_eq!(p.pattern_type, "convergent");
assert_eq!(p.description, "Use batched writes");
assert!((p.score - 0.85).abs() < f32::EPSILON);
}
#[test]
fn update_score() {
let mut store = LearningPatternStore::new();
let id = store.store_pattern("lateral", "Try alternative approach", 0.5).unwrap();
store.update_score(id, 0.95).unwrap();
let p = store.get_pattern(id).unwrap();
assert!((p.score - 0.95).abs() < f32::EPSILON);
}
#[test]
fn update_nonexistent_pattern() {
let mut store = LearningPatternStore::new();
assert_eq!(
store.update_score(999, 0.5),
Err(LearningError::PatternNotFound(999))
);
}
#[test]
fn top_patterns() {
let mut store = LearningPatternStore::new();
store.store_pattern("a", "low", 0.2).unwrap();
store.store_pattern("b", "mid", 0.5).unwrap();
store.store_pattern("c", "high", 0.9).unwrap();
store.store_pattern("d", "highest", 1.0).unwrap();
let top = store.get_top_patterns(2);
assert_eq!(top.len(), 2);
assert!((top[0].score - 1.0).abs() < f32::EPSILON);
assert!((top[1].score - 0.9).abs() < f32::EPSILON);
}
#[test]
fn score_clamping() {
let mut store = LearningPatternStore::new();
let id1 = store.store_pattern("a", "over", 1.5).unwrap();
let id2 = store.store_pattern("b", "under", -0.3).unwrap();
assert!((store.get_pattern(id1).unwrap().score - 1.0).abs() < f32::EPSILON);
assert!(store.get_pattern(id2).unwrap().score.abs() < f32::EPSILON);
}
#[test]
fn empty_pattern_type_rejected() {
let mut store = LearningPatternStore::new();
assert_eq!(
store.store_pattern("", "desc", 0.5),
Err(LearningError::EmptyPatternType)
);
}
#[test]
fn enrich_results() {
let mut store = LearningPatternStore::new();
let id1 = store.store_pattern("convergent", "desc1", 0.8).unwrap();
let id2 = store.store_pattern("divergent", "desc2", 0.6).unwrap();
let _id3 = store.store_pattern("lateral", "desc3", 0.4).unwrap();
let candidates = vec![(id1, 0.1), (id2, 0.3), (999, 0.5)];
let results = store.enrich_results(&candidates, 10);
// id 999 is filtered out (not in patterns map)
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, id1);
assert_eq!(results[1].id, id2);
}
#[test]
fn len_and_is_empty() {
let mut store = LearningPatternStore::new();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
store.store_pattern("a", "desc", 0.5).unwrap();
assert!(!store.is_empty());
assert_eq!(store.len(), 1);
}
#[test]
fn get_nonexistent_pattern() {
let store = LearningPatternStore::new();
assert!(store.get_pattern(42).is_none());
}
#[test]
fn top_patterns_empty_store() {
let store = LearningPatternStore::new();
assert!(store.get_top_patterns(5).is_empty());
}
}

View File

@@ -0,0 +1,53 @@
//! RVF adapter for agentic-flow swarm coordination.
//!
//! This crate bridges agentic-flow's swarm coordination primitives with the
//! RuVector Format (RVF) segment store, per ADR-029. It provides persistent
//! storage for inter-agent memory sharing, swarm coordination state, and
//! agent learning patterns.
//!
//! # Segment mapping
//!
//! - **VEC_SEG + META_SEG**: Shared memory entries (embeddings + key/value
//! metadata) for inter-agent memory sharing via the RVF streaming protocol.
//! - **META_SEG**: Swarm coordination state (agent states, topology changes).
//! - **SKETCH_SEG**: Agent learning patterns with effectiveness scores.
//! - **WITNESS_SEG**: Distributed consensus votes with signatures for
//! tamper-evident audit trails.
//!
//! # Usage
//!
//! ```rust,no_run
//! use rvf_adapter_agentic_flow::{AgenticFlowConfig, RvfSwarmStore};
//!
//! let config = AgenticFlowConfig::new("/tmp/swarm-data", "agent-001");
//! let mut store = RvfSwarmStore::create(config).unwrap();
//!
//! // Share a memory entry with other agents
//! let embedding = vec![0.1f32; 384];
//! store.share_memory("auth-pattern", "JWT with refresh tokens",
//! "patterns", &embedding).unwrap();
//!
//! // Search shared memories by embedding similarity
//! let results = store.search_shared(&embedding, 5);
//!
//! // Record coordination state
//! store.coordination().record_state("agent-001", "status", "active").unwrap();
//!
//! // Store a learning pattern
//! store.learning().store_pattern("convergent", "Use batched writes",
//! 0.92).unwrap();
//!
//! store.close().unwrap();
//! ```
pub mod config;
pub mod coordination;
pub mod learning;
pub mod swarm_store;
pub use config::{AgenticFlowConfig, ConfigError};
pub use coordination::{ConsensusVote, StateEntry, SwarmCoordination};
pub use learning::{LearningPatternStore, PatternResult};
pub use swarm_store::{
RvfSwarmStore, SharedMemoryEntry, SharedMemoryResult, SwarmStoreError,
};

View File

@@ -0,0 +1,587 @@
//! `RvfSwarmStore` -- main API wrapping `RvfStore` for swarm operations.
//!
//! Maps agentic-flow's inter-agent memory sharing model onto the RVF
//! segment model:
//! - Embeddings are stored as vectors via `ingest_batch`
//! - Agent ID, key, value, and namespace are encoded as metadata fields
//! - Searches use `query` with optional namespace filtering
//! - Coordination state and learning patterns are managed by sub-stores
use std::collections::HashMap;
use rvf_runtime::options::{
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
};
use rvf_runtime::RvfStore;
use rvf_types::RvfError;
use crate::config::{AgenticFlowConfig, ConfigError};
use crate::coordination::SwarmCoordination;
use crate::learning::LearningPatternStore;
/// Metadata field IDs for shared memory entries.
const FIELD_AGENT_ID: u16 = 0;
const FIELD_KEY: u16 = 1;
const FIELD_VALUE: u16 = 2;
const FIELD_NAMESPACE: u16 = 3;
/// A search result from shared memory, enriched with agent metadata.
#[derive(Clone, Debug)]
pub struct SharedMemoryResult {
/// Vector ID in the underlying store.
pub id: u64,
/// Distance from the query embedding (lower = more similar).
pub distance: f32,
/// The agent that shared this memory.
pub agent_id: String,
/// The memory key.
pub key: String,
}
/// A full shared memory entry retrieved by ID.
#[derive(Clone, Debug)]
pub struct SharedMemoryEntry {
/// Vector ID in the underlying store.
pub id: u64,
/// The agent that shared this memory.
pub agent_id: String,
/// The memory key.
pub key: String,
/// The memory value.
pub value: String,
/// The namespace this entry belongs to.
pub namespace: String,
}
/// The RVF-backed swarm store for agentic-flow.
pub struct RvfSwarmStore {
store: RvfStore,
config: AgenticFlowConfig,
coordination: SwarmCoordination,
learning: LearningPatternStore,
/// Maps "agent_id/namespace/key" -> vector_id for fast lookup.
key_index: HashMap<String, u64>,
/// Maps vector_id -> SharedMemoryEntry for retrieval by ID.
entry_index: HashMap<u64, SharedMemoryEntry>,
/// Next vector ID to assign.
next_id: u64,
}
impl RvfSwarmStore {
/// Create a new swarm store, initializing the data directory and RVF file.
pub fn create(config: AgenticFlowConfig) -> Result<Self, SwarmStoreError> {
config.validate().map_err(SwarmStoreError::Config)?;
config
.ensure_dirs()
.map_err(|e| SwarmStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
metric: DistanceMetric::Cosine,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(SwarmStoreError::Rvf)?;
Ok(Self {
store,
config,
coordination: SwarmCoordination::new(),
learning: LearningPatternStore::new(),
key_index: HashMap::new(),
entry_index: HashMap::new(),
next_id: 1,
})
}
/// Open an existing swarm store.
pub fn open(config: AgenticFlowConfig) -> Result<Self, SwarmStoreError> {
config.validate().map_err(SwarmStoreError::Config)?;
let store =
RvfStore::open(&config.store_path()).map_err(SwarmStoreError::Rvf)?;
// Rebuild next_id from the store status so new IDs don't collide.
let status = store.status();
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
Ok(Self {
store,
config,
coordination: SwarmCoordination::new(),
learning: LearningPatternStore::new(),
key_index: HashMap::new(),
entry_index: HashMap::new(),
next_id,
})
}
/// Share a memory entry with other agents.
///
/// Stores the embedding vector with agent_id/key/value/namespace as
/// metadata fields. If an entry with the same agent_id/namespace/key
/// already exists, the old one is soft-deleted and replaced.
///
/// Returns the assigned vector ID.
pub fn share_memory(
&mut self,
key: &str,
value: &str,
namespace: &str,
embedding: &[f32],
) -> Result<u64, SwarmStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(SwarmStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let compound_key = format!(
"{}/{}/{}",
self.config.agent_id, namespace, key
);
// Soft-delete existing entry with the same compound key.
if let Some(&old_id) = self.key_index.get(&compound_key) {
self.store.delete(&[old_id]).map_err(SwarmStoreError::Rvf)?;
self.entry_index.remove(&old_id);
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry {
field_id: FIELD_AGENT_ID,
value: MetadataValue::String(self.config.agent_id.clone()),
},
MetadataEntry {
field_id: FIELD_KEY,
value: MetadataValue::String(key.to_string()),
},
MetadataEntry {
field_id: FIELD_VALUE,
value: MetadataValue::String(value.to_string()),
},
MetadataEntry {
field_id: FIELD_NAMESPACE,
value: MetadataValue::String(namespace.to_string()),
},
];
self.store
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
.map_err(SwarmStoreError::Rvf)?;
self.key_index.insert(compound_key, vector_id);
self.entry_index.insert(
vector_id,
SharedMemoryEntry {
id: vector_id,
agent_id: self.config.agent_id.clone(),
key: key.to_string(),
value: value.to_string(),
namespace: namespace.to_string(),
},
);
Ok(vector_id)
}
/// Search for shared memories similar to the given embedding.
///
/// Returns up to `k` results sorted by distance (closest first),
/// enriched with agent metadata from the in-memory index.
pub fn search_shared(
&self,
embedding: &[f32],
k: usize,
) -> Vec<SharedMemoryResult> {
let options = QueryOptions::default();
let results = match self.store.query(embedding, k, &options) {
Ok(r) => r,
Err(_) => return Vec::new(),
};
results
.into_iter()
.filter_map(|r| {
let entry = self.entry_index.get(&r.id)?;
Some(SharedMemoryResult {
id: r.id,
distance: r.distance,
agent_id: entry.agent_id.clone(),
key: entry.key.clone(),
})
})
.collect()
}
/// Retrieve a shared memory entry by its vector ID.
pub fn get_shared(&self, id: u64) -> Option<SharedMemoryEntry> {
self.entry_index.get(&id).cloned()
}
/// Delete shared memory entries by their vector IDs.
///
/// Returns the number of entries actually deleted.
pub fn delete_shared(&mut self, ids: &[u64]) -> Result<usize, SwarmStoreError> {
let existing: Vec<u64> = ids
.iter()
.filter(|id| self.entry_index.contains_key(id))
.copied()
.collect();
if existing.is_empty() {
return Ok(0);
}
self.store
.delete(&existing)
.map_err(SwarmStoreError::Rvf)?;
let mut removed = 0;
for &id in &existing {
if let Some(entry) = self.entry_index.remove(&id) {
let compound_key = format!(
"{}/{}/{}",
entry.agent_id, entry.namespace, entry.key
);
self.key_index.remove(&compound_key);
removed += 1;
}
}
Ok(removed)
}
/// Get a mutable reference to the coordination state tracker.
pub fn coordination(&mut self) -> &mut SwarmCoordination {
&mut self.coordination
}
/// Get an immutable reference to the coordination state tracker.
pub fn coordination_ref(&self) -> &SwarmCoordination {
&self.coordination
}
/// Get a mutable reference to the learning pattern store.
pub fn learning(&mut self) -> &mut LearningPatternStore {
&mut self.learning
}
/// Get an immutable reference to the learning pattern store.
pub fn learning_ref(&self) -> &LearningPatternStore {
&self.learning
}
/// Get the current store status.
pub fn status(&self) -> rvf_runtime::StoreStatus {
self.store.status()
}
/// Get the agent ID for this store.
pub fn agent_id(&self) -> &str {
&self.config.agent_id
}
/// Close the swarm store, releasing locks.
pub fn close(self) -> Result<(), SwarmStoreError> {
self.store.close().map_err(SwarmStoreError::Rvf)
}
}
/// Errors from swarm store operations.
#[derive(Debug)]
pub enum SwarmStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
}
impl std::fmt::Display for SwarmStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for SwarmStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> AgenticFlowConfig {
AgenticFlowConfig::new(dir, "test-agent").with_dimension(4)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn create_and_share() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let id = store
.share_memory("key1", "value1", "default", &make_embedding(1.0))
.unwrap();
assert!(id > 0);
let status = store.status();
assert_eq!(status.total_vectors, 1);
store.close().unwrap();
}
#[test]
fn share_and_search() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
store
.share_memory("a", "val_a", "ns1", &[1.0, 0.0, 0.0, 0.0])
.unwrap();
store
.share_memory("b", "val_b", "ns1", &[0.0, 1.0, 0.0, 0.0])
.unwrap();
store
.share_memory("c", "val_c", "ns2", &[0.0, 0.0, 1.0, 0.0])
.unwrap();
let results = store.search_shared(&[1.0, 0.0, 0.0, 0.0], 3);
assert_eq!(results.len(), 3);
// Closest should be "a"
assert_eq!(results[0].key, "a");
store.close().unwrap();
}
#[test]
fn get_shared_by_id() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let id = store
.share_memory("mykey", "myval", "ns", &make_embedding(2.0))
.unwrap();
let entry = store.get_shared(id).unwrap();
assert_eq!(entry.key, "mykey");
assert_eq!(entry.value, "myval");
assert_eq!(entry.namespace, "ns");
assert_eq!(entry.agent_id, "test-agent");
assert!(store.get_shared(9999).is_none());
store.close().unwrap();
}
#[test]
fn delete_shared_entries() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let id1 = store
.share_memory("k1", "v1", "ns", &make_embedding(1.0))
.unwrap();
let id2 = store
.share_memory("k2", "v2", "ns", &make_embedding(2.0))
.unwrap();
let removed = store.delete_shared(&[id1]).unwrap();
assert_eq!(removed, 1);
assert!(store.get_shared(id1).is_none());
assert!(store.get_shared(id2).is_some());
store.close().unwrap();
}
#[test]
fn delete_nonexistent_ids() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let removed = store.delete_shared(&[999, 1000]).unwrap();
assert_eq!(removed, 0);
store.close().unwrap();
}
#[test]
fn replace_existing_key() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let id1 = store
.share_memory("k", "v1", "ns", &make_embedding(1.0))
.unwrap();
let id2 = store
.share_memory("k", "v2", "ns", &make_embedding(2.0))
.unwrap();
assert_ne!(id1, id2);
assert!(store.get_shared(id1).is_none());
let entry = store.get_shared(id2).unwrap();
assert_eq!(entry.value, "v2");
let status = store.status();
assert_eq!(status.total_vectors, 1);
store.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let result = store.share_memory("k", "v", "ns", &[1.0, 2.0]);
assert!(result.is_err());
}
#[test]
fn coordination_state() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
store
.coordination()
.record_state("agent-1", "status", "active")
.unwrap();
store
.coordination()
.record_state("agent-2", "status", "idle")
.unwrap();
let states = store.coordination_ref().get_all_states();
assert_eq!(states.len(), 2);
store.close().unwrap();
}
#[test]
fn learning_patterns() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
let id = store
.learning()
.store_pattern("convergent", "Use batched writes", 0.85)
.unwrap();
let pattern = store.learning_ref().get_pattern(id).unwrap();
assert_eq!(pattern.pattern_type, "convergent");
assert!((pattern.score - 0.85).abs() < f32::EPSILON);
store.close().unwrap();
}
#[test]
fn open_existing_store() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
{
let mut store = RvfSwarmStore::create(config.clone()).unwrap();
store
.share_memory("k", "v", "ns", &make_embedding(1.0))
.unwrap();
store.close().unwrap();
}
{
let store = RvfSwarmStore::open(config).unwrap();
let status = store.status();
assert_eq!(status.total_vectors, 1);
store.close().unwrap();
}
}
#[test]
fn agent_id_accessor() {
let dir = TempDir::new().unwrap();
let config = AgenticFlowConfig::new(dir.path(), "special-agent")
.with_dimension(4);
let store = RvfSwarmStore::create(config).unwrap();
assert_eq!(store.agent_id(), "special-agent");
store.close().unwrap();
}
#[test]
fn empty_store_search() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let store = RvfSwarmStore::create(config).unwrap();
let results = store.search_shared(&[1.0, 0.0, 0.0, 0.0], 5);
assert!(results.is_empty());
store.close().unwrap();
}
#[test]
fn consensus_votes() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfSwarmStore::create(config).unwrap();
store
.coordination()
.record_consensus_vote("leader-election", "a1", true)
.unwrap();
store
.coordination()
.record_consensus_vote("leader-election", "a2", false)
.unwrap();
let votes = store.coordination_ref().get_votes("leader-election");
assert_eq!(votes.len(), 2);
assert!(votes[0].vote);
assert!(!votes[1].vote);
store.close().unwrap();
}
#[test]
fn invalid_config_rejected() {
let dir = TempDir::new().unwrap();
// Zero dimension
let config = AgenticFlowConfig::new(dir.path(), "a1").with_dimension(0);
assert!(RvfSwarmStore::create(config).is_err());
// Empty agent_id
let config = AgenticFlowConfig::new(dir.path(), "").with_dimension(4);
assert!(RvfSwarmStore::create(config).is_err());
}
}

View File

@@ -0,0 +1,19 @@
[package]
name = "rvf-adapter-claude-flow"
version = "0.1.0"
edition = "2021"
description = "RVF adapter for claude-flow memory subsystem — stores memory entries as RVF files with WITNESS_SEG audit trails"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
[features]
default = ["std"]
std = []
[dependencies]
rvf-types = { path = "../../rvf-types", features = ["std"] }
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-crypto = { path = "../../rvf-crypto", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,124 @@
//! Configuration for the claude-flow memory adapter.
use std::path::PathBuf;
use rvf_runtime::options::DistanceMetric;
/// Configuration for the RVF-backed claude-flow memory store.
#[derive(Clone, Debug)]
pub struct ClaudeFlowConfig {
/// Directory where RVF data files are stored.
pub data_dir: PathBuf,
/// Vector embedding dimension (must match the embeddings used by claude-flow).
pub dimension: u16,
/// Distance metric for similarity search.
pub metric: DistanceMetric,
/// Whether to record witness entries for audit trails.
pub enable_witness: bool,
}
impl ClaudeFlowConfig {
/// Create a new configuration with required parameters.
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
Self {
data_dir: data_dir.into(),
dimension,
metric: DistanceMetric::Cosine,
enable_witness: true,
}
}
/// Set the distance metric.
pub fn with_metric(mut self, metric: DistanceMetric) -> Self {
self.metric = metric;
self
}
/// Enable or disable witness audit trails.
pub fn with_witness(mut self, enable: bool) -> Self {
self.enable_witness = enable;
self
}
/// Return the path to the main vector store RVF file.
pub fn store_path(&self) -> PathBuf {
self.data_dir.join("memory.rvf")
}
/// Return the path to the witness chain file.
pub fn witness_path(&self) -> PathBuf {
self.data_dir.join("witness.bin")
}
/// Ensure the data directory exists.
pub fn ensure_dirs(&self) -> std::io::Result<()> {
std::fs::create_dir_all(&self.data_dir)
}
/// Validate the configuration.
pub fn validate(&self) -> Result<(), ConfigError> {
if self.dimension == 0 {
return Err(ConfigError::InvalidDimension);
}
Ok(())
}
}
/// Errors specific to adapter configuration.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ConfigError {
/// Dimension must be > 0.
InvalidDimension,
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
}
}
}
impl std::error::Error for ConfigError {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn config_defaults() {
let cfg = ClaudeFlowConfig::new("/tmp/test", 384);
assert_eq!(cfg.dimension, 384);
assert_eq!(cfg.metric, DistanceMetric::Cosine);
assert!(cfg.enable_witness);
}
#[test]
fn config_paths() {
let cfg = ClaudeFlowConfig::new("/data/memory", 128);
assert_eq!(cfg.store_path(), Path::new("/data/memory/memory.rvf"));
assert_eq!(cfg.witness_path(), Path::new("/data/memory/witness.bin"));
}
#[test]
fn validate_zero_dimension() {
let cfg = ClaudeFlowConfig::new("/tmp", 0);
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
}
#[test]
fn validate_ok() {
let cfg = ClaudeFlowConfig::new("/tmp", 64);
assert!(cfg.validate().is_ok());
}
#[test]
fn builder_methods() {
let cfg = ClaudeFlowConfig::new("/tmp", 256)
.with_metric(DistanceMetric::L2)
.with_witness(false);
assert_eq!(cfg.metric, DistanceMetric::L2);
assert!(!cfg.enable_witness);
}
}

View File

@@ -0,0 +1,48 @@
//! RVF adapter for the claude-flow memory subsystem.
//!
//! This crate bridges claude-flow's key/value/embedding memory model
//! with the RuVector Format (RVF) segment store. Memory entries are
//! persisted as RVF files with the RVText profile, and every mutation
//! is recorded in a WITNESS_SEG audit trail for tamper-evident logging.
//!
//! # Architecture
//!
//! - **`RvfMemoryStore`**: Main API wrapping `RvfStore` for
//! store/search/retrieve/delete operations on memory entries.
//! - **`WitnessChain`**: Persistent, append-only audit log using
//! `rvf_crypto::witness` chains (SHAKE-256 linked).
//! - **`ClaudeFlowConfig`**: Configuration for data directory, embedding
//! dimension, distance metric, and witness toggle.
//!
//! # Usage
//!
//! ```rust,no_run
//! use rvf_adapter_claude_flow::{ClaudeFlowConfig, RvfMemoryStore};
//!
//! let config = ClaudeFlowConfig::new("/tmp/claude-flow-memory", 384);
//! let mut store = RvfMemoryStore::create(config).unwrap();
//!
//! // Store a memory entry with its embedding
//! let embedding = vec![0.1f32; 384];
//! store.store_memory("auth-pattern", "JWT with refresh tokens",
//! "patterns", &["auth".into()], &embedding).unwrap();
//!
//! // Search by embedding similarity
//! let results = store.search_memory(&embedding, 5, Some("patterns"), None).unwrap();
//!
//! // Retrieve by key
//! let id = store.retrieve_memory("auth-pattern", "patterns");
//!
//! // Delete
//! store.delete_memory("auth-pattern", "patterns").unwrap();
//!
//! store.close().unwrap();
//! ```
pub mod config;
pub mod memory_store;
pub mod witness;
pub use config::ClaudeFlowConfig;
pub use memory_store::{MemoryEntry, MemoryStoreError, RvfMemoryStore};
pub use witness::{WitnessChain, WitnessError};

View File

@@ -0,0 +1,445 @@
//! `RvfMemoryStore` — wraps `RvfStore` for claude-flow memory operations.
//!
//! Maps claude-flow's key/value/namespace/tags/embedding model onto the
//! RVF segment model:
//! - Embeddings are stored as vectors via `ingest_batch`
//! - Keys and namespaces are encoded as metadata (META_SEG fields)
//! - Searches use `query` with optional namespace filtering
//! - Deletes use soft-delete with witness recording
use std::collections::HashMap;
use rvf_runtime::filter::{FilterExpr, FilterValue};
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::{RvfStore, SearchResult};
use rvf_types::RvfError;
use crate::config::ClaudeFlowConfig;
use crate::witness::WitnessChain;
/// Metadata field IDs for claude-flow memory entries.
const FIELD_KEY: u16 = 0;
const FIELD_NAMESPACE: u16 = 1;
const FIELD_TAGS: u16 = 2;
/// A memory entry returned from retrieval or search.
#[derive(Clone, Debug)]
pub struct MemoryEntry {
/// The memory key.
pub key: String,
/// The namespace this entry belongs to.
pub namespace: String,
/// Tags associated with this entry.
pub tags: Vec<String>,
/// The vector ID in the underlying store.
pub vector_id: u64,
/// Distance from query (only meaningful for search results).
pub distance: f32,
}
/// The RVF-backed memory store for claude-flow.
pub struct RvfMemoryStore {
store: RvfStore,
witness: Option<WitnessChain>,
config: ClaudeFlowConfig,
/// Maps "namespace/key" -> vector_id for fast lookup.
key_index: HashMap<String, u64>,
/// Next vector ID to assign.
next_id: u64,
}
impl RvfMemoryStore {
/// Create a new memory store, initializing the data directory and RVF file.
pub fn create(config: ClaudeFlowConfig) -> Result<Self, MemoryStoreError> {
config.validate().map_err(MemoryStoreError::Config)?;
config.ensure_dirs().map_err(|e| MemoryStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
metric: config.metric,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(MemoryStoreError::Rvf)?;
let witness = if config.enable_witness {
Some(WitnessChain::create(&config.witness_path())
.map_err(MemoryStoreError::Witness)?)
} else {
None
};
Ok(Self {
store,
witness,
config,
key_index: HashMap::new(),
next_id: 1,
})
}
/// Open an existing memory store.
pub fn open(config: ClaudeFlowConfig) -> Result<Self, MemoryStoreError> {
config.validate().map_err(MemoryStoreError::Config)?;
let store = RvfStore::open(&config.store_path())
.map_err(MemoryStoreError::Rvf)?;
let witness = if config.enable_witness {
Some(WitnessChain::open_or_create(&config.witness_path())
.map_err(MemoryStoreError::Witness)?)
} else {
None
};
// Rebuild the key_index from the store status.
// Since RvfStore doesn't expose metadata iteration, we start fresh.
// Existing vectors remain searchable by embedding; key lookup is
// rebuilt as entries are re-stored.
let status = store.status();
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
Ok(Self {
store,
witness,
config,
key_index: HashMap::new(),
next_id,
})
}
/// Store a memory entry with its embedding vector.
///
/// If an entry with the same key and namespace already exists, the old
/// one is soft-deleted and replaced.
pub fn store_memory(
&mut self,
key: &str,
_value: &str,
namespace: &str,
tags: &[String],
embedding: &[f32],
) -> Result<u64, MemoryStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(MemoryStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
// If key already exists in this namespace, soft-delete the old entry.
let compound_key = format!("{namespace}/{key}");
if let Some(&old_id) = self.key_index.get(&compound_key) {
self.store.delete(&[old_id]).map_err(MemoryStoreError::Rvf)?;
}
let vector_id = self.next_id;
self.next_id += 1;
// Encode tags as a comma-separated string for metadata storage.
let tags_str = tags.join(",");
let metadata = vec![
MetadataEntry { field_id: FIELD_KEY, value: MetadataValue::String(key.to_string()) },
MetadataEntry { field_id: FIELD_NAMESPACE, value: MetadataValue::String(namespace.to_string()) },
MetadataEntry { field_id: FIELD_TAGS, value: MetadataValue::String(tags_str) },
];
self.store
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
.map_err(MemoryStoreError::Rvf)?;
self.key_index.insert(compound_key, vector_id);
if let Some(ref mut w) = self.witness {
let _ = w.record_store(key, namespace);
}
Ok(vector_id)
}
/// Search memory by embedding vector, optionally filtering by namespace.
pub fn search_memory(
&mut self,
query_embedding: &[f32],
k: usize,
namespace: Option<&str>,
_threshold: Option<f32>,
) -> Result<Vec<SearchResult>, MemoryStoreError> {
if query_embedding.len() != self.config.dimension as usize {
return Err(MemoryStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: query_embedding.len(),
});
}
let filter = namespace.map(|ns| {
FilterExpr::Eq(FIELD_NAMESPACE, FilterValue::String(ns.to_string()))
});
let options = QueryOptions {
filter,
..Default::default()
};
let results = self.store.query(query_embedding, k, &options)
.map_err(MemoryStoreError::Rvf)?;
if let Some(ref mut w) = self.witness {
let ns = namespace.unwrap_or("*");
let _ = w.record_search(ns, k);
}
Ok(results)
}
/// Retrieve a memory entry by key and namespace.
///
/// Returns the vector ID if found (the entry can then be used with
/// the underlying store for further operations).
pub fn retrieve_memory(
&self,
key: &str,
namespace: &str,
) -> Option<u64> {
let compound_key = format!("{namespace}/{key}");
self.key_index.get(&compound_key).copied()
}
/// Soft-delete a memory entry by key and namespace.
pub fn delete_memory(
&mut self,
key: &str,
namespace: &str,
) -> Result<bool, MemoryStoreError> {
let compound_key = format!("{namespace}/{key}");
if let Some(vector_id) = self.key_index.remove(&compound_key) {
self.store.delete(&[vector_id]).map_err(MemoryStoreError::Rvf)?;
if let Some(ref mut w) = self.witness {
let _ = w.record_delete(key, namespace);
}
Ok(true)
} else {
Ok(false)
}
}
/// Run compaction on the underlying store.
pub fn compact(&mut self) -> Result<(), MemoryStoreError> {
self.store.compact().map_err(MemoryStoreError::Rvf)?;
if let Some(ref mut w) = self.witness {
let _ = w.record_compact();
}
Ok(())
}
/// Get the current store status.
pub fn status(&self) -> rvf_runtime::StoreStatus {
self.store.status()
}
/// Return a reference to the witness chain (if enabled).
pub fn witness(&self) -> Option<&WitnessChain> {
self.witness.as_ref()
}
/// Close the memory store, releasing locks.
pub fn close(self) -> Result<(), MemoryStoreError> {
self.store.close().map_err(MemoryStoreError::Rvf)
}
}
/// Errors from memory store operations.
#[derive(Debug)]
pub enum MemoryStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Witness chain error.
Witness(crate::witness::WitnessError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
}
impl std::fmt::Display for MemoryStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Witness(e) => write!(f, "witness error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for MemoryStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
use tempfile::TempDir;
fn test_config(dir: &Path) -> ClaudeFlowConfig {
ClaudeFlowConfig::new(dir, 4)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn create_and_store() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
let id = store.store_memory(
"key1", "value1", "default", &["tag1".into(), "tag2".into()],
&make_embedding(1.0),
).unwrap();
assert!(id > 0);
let status = store.status();
assert_eq!(status.total_vectors, 1);
store.close().unwrap();
}
#[test]
fn store_and_search() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
store.store_memory("a", "val_a", "ns1", &[], &[1.0, 0.0, 0.0, 0.0]).unwrap();
store.store_memory("b", "val_b", "ns1", &[], &[0.0, 1.0, 0.0, 0.0]).unwrap();
store.store_memory("c", "val_c", "ns2", &[], &[0.0, 0.0, 1.0, 0.0]).unwrap();
// Search all namespaces
let results = store.search_memory(&[1.0, 0.0, 0.0, 0.0], 3, None, None).unwrap();
assert_eq!(results.len(), 3);
// Search filtered by namespace
let results = store.search_memory(&[1.0, 0.0, 0.0, 0.0], 3, Some("ns1"), None).unwrap();
assert_eq!(results.len(), 2);
store.close().unwrap();
}
#[test]
fn retrieve_by_key() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
let id = store.store_memory("mykey", "myval", "ns", &[], &make_embedding(2.0)).unwrap();
assert_eq!(store.retrieve_memory("mykey", "ns"), Some(id));
assert_eq!(store.retrieve_memory("missing", "ns"), None);
assert_eq!(store.retrieve_memory("mykey", "other_ns"), None);
store.close().unwrap();
}
#[test]
fn delete_memory() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
store.store_memory("k", "v", "ns", &[], &make_embedding(3.0)).unwrap();
assert!(store.delete_memory("k", "ns").unwrap());
assert!(!store.delete_memory("k", "ns").unwrap()); // already deleted
assert_eq!(store.retrieve_memory("k", "ns"), None);
store.close().unwrap();
}
#[test]
fn replace_existing_key() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
let id1 = store.store_memory("k", "v1", "ns", &[], &make_embedding(1.0)).unwrap();
let id2 = store.store_memory("k", "v2", "ns", &[], &make_embedding(2.0)).unwrap();
// New ID should be different (old was soft-deleted)
assert_ne!(id1, id2);
assert_eq!(store.retrieve_memory("k", "ns"), Some(id2));
// Only one live vector
let status = store.status();
assert_eq!(status.total_vectors, 1);
store.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
let result = store.store_memory("k", "v", "ns", &[], &[1.0, 2.0]); // dim=2 vs config dim=4
assert!(result.is_err());
}
#[test]
fn witness_audit_trail() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
store.store_memory("a", "v", "ns", &[], &make_embedding(1.0)).unwrap();
store.search_memory(&make_embedding(1.0), 1, None, None).unwrap();
store.delete_memory("a", "ns").unwrap();
let witness = store.witness().unwrap();
assert_eq!(witness.len(), 3); // store + search + delete
assert_eq!(witness.verify().unwrap(), 3);
store.close().unwrap();
}
#[test]
fn compact_works() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = RvfMemoryStore::create(config).unwrap();
store.store_memory("a", "v", "ns", &[], &make_embedding(1.0)).unwrap();
store.store_memory("b", "v", "ns", &[], &make_embedding(2.0)).unwrap();
store.delete_memory("a", "ns").unwrap();
store.compact().unwrap();
let status = store.status();
assert_eq!(status.total_vectors, 1);
store.close().unwrap();
}
#[test]
fn no_witness_when_disabled() {
let dir = TempDir::new().unwrap();
let config = ClaudeFlowConfig::new(dir.path(), 4).with_witness(false);
let store = RvfMemoryStore::create(config).unwrap();
assert!(store.witness().is_none());
store.close().unwrap();
}
}

View File

@@ -0,0 +1,292 @@
//! Audit trail using WITNESS_SEG for claude-flow memory operations.
//!
//! Wraps `rvf_crypto::witness` to provide a persistent, append-only
//! witness chain that records every memory store/delete/search action.
use std::fs::{File, OpenOptions};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use rvf_crypto::witness::{WitnessEntry, create_witness_chain, verify_witness_chain};
use rvf_crypto::shake256_256;
/// Witness type constants for claude-flow actions.
pub const WITNESS_STORE: u8 = 0x01;
pub const WITNESS_DELETE: u8 = 0x02;
pub const WITNESS_SEARCH: u8 = 0x03;
pub const WITNESS_COMPACT: u8 = 0x04;
/// Persistent witness chain that records memory operations.
pub struct WitnessChain {
path: PathBuf,
/// Cached chain bytes (in-memory mirror of the file).
chain_data: Vec<u8>,
/// Number of entries in the chain.
entry_count: usize,
}
impl WitnessChain {
/// Create a new (empty) witness chain file at the given path.
pub fn create(path: &Path) -> Result<Self, WitnessError> {
File::create(path).map_err(|e| WitnessError::Io(e.to_string()))?;
Ok(Self {
path: path.to_path_buf(),
chain_data: Vec::new(),
entry_count: 0,
})
}
/// Open an existing witness chain file, verifying its integrity.
pub fn open(path: &Path) -> Result<Self, WitnessError> {
let mut file = File::open(path).map_err(|e| WitnessError::Io(e.to_string()))?;
let mut data = Vec::new();
file.read_to_end(&mut data).map_err(|e| WitnessError::Io(e.to_string()))?;
if data.is_empty() {
return Ok(Self {
path: path.to_path_buf(),
chain_data: Vec::new(),
entry_count: 0,
});
}
let entries = verify_witness_chain(&data)
.map_err(|_| WitnessError::ChainCorrupted)?;
Ok(Self {
path: path.to_path_buf(),
chain_data: data,
entry_count: entries.len(),
})
}
/// Open an existing chain or create a new one.
pub fn open_or_create(path: &Path) -> Result<Self, WitnessError> {
if path.exists() {
Self::open(path)
} else {
Self::create(path)
}
}
/// Record a memory store action.
pub fn record_store(&mut self, key: &str, namespace: &str) -> Result<(), WitnessError> {
let mut hasher_input = Vec::new();
hasher_input.extend_from_slice(b"store:");
hasher_input.extend_from_slice(namespace.as_bytes());
hasher_input.push(b'/');
hasher_input.extend_from_slice(key.as_bytes());
self.append_entry(&hasher_input, WITNESS_STORE)
}
/// Record a memory delete action.
pub fn record_delete(&mut self, key: &str, namespace: &str) -> Result<(), WitnessError> {
let mut hasher_input = Vec::new();
hasher_input.extend_from_slice(b"delete:");
hasher_input.extend_from_slice(namespace.as_bytes());
hasher_input.push(b'/');
hasher_input.extend_from_slice(key.as_bytes());
self.append_entry(&hasher_input, WITNESS_DELETE)
}
/// Record a search action.
pub fn record_search(&mut self, namespace: &str, k: usize) -> Result<(), WitnessError> {
let mut hasher_input = Vec::new();
hasher_input.extend_from_slice(b"search:");
hasher_input.extend_from_slice(namespace.as_bytes());
hasher_input.push(b':');
hasher_input.extend_from_slice(k.to_string().as_bytes());
self.append_entry(&hasher_input, WITNESS_SEARCH)
}
/// Record a compaction action.
pub fn record_compact(&mut self) -> Result<(), WitnessError> {
self.append_entry(b"compact", WITNESS_COMPACT)
}
/// Verify the entire chain is intact.
pub fn verify(&self) -> Result<usize, WitnessError> {
if self.chain_data.is_empty() {
return Ok(0);
}
let entries = verify_witness_chain(&self.chain_data)
.map_err(|_| WitnessError::ChainCorrupted)?;
Ok(entries.len())
}
/// Return the number of entries in the chain.
pub fn len(&self) -> usize {
self.entry_count
}
/// Return whether the chain is empty.
pub fn is_empty(&self) -> bool {
self.entry_count == 0
}
// ── Internal ──────────────────────────────────────────────────────
fn append_entry(&mut self, action_data: &[u8], witness_type: u8) -> Result<(), WitnessError> {
let action_hash = shake256_256(action_data);
let timestamp_ns = now_ns();
let entry = WitnessEntry {
prev_hash: [0u8; 32], // create_witness_chain will set this
action_hash,
timestamp_ns,
witness_type,
};
// Rebuild the entire chain with the new entry appended.
// This is correct because create_witness_chain re-links prev_hash.
let mut all_entries = if self.chain_data.is_empty() {
Vec::new()
} else {
verify_witness_chain(&self.chain_data)
.map_err(|_| WitnessError::ChainCorrupted)?
};
all_entries.push(entry);
let new_chain = create_witness_chain(&all_entries);
// Persist atomically: write to temp then rename.
let tmp_path = self.path.with_extension("bin.tmp");
{
let mut f = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&tmp_path)
.map_err(|e| WitnessError::Io(e.to_string()))?;
f.write_all(&new_chain).map_err(|e| WitnessError::Io(e.to_string()))?;
f.sync_all().map_err(|e| WitnessError::Io(e.to_string()))?;
}
std::fs::rename(&tmp_path, &self.path).map_err(|e| WitnessError::Io(e.to_string()))?;
self.chain_data = new_chain;
self.entry_count = all_entries.len();
Ok(())
}
}
/// Errors from witness chain operations.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum WitnessError {
/// I/O error (stringified for Clone/Eq compatibility).
Io(String),
/// Chain integrity verification failed.
ChainCorrupted,
}
impl std::fmt::Display for WitnessError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(msg) => write!(f, "witness I/O error: {msg}"),
Self::ChainCorrupted => write!(f, "witness chain integrity check failed"),
}
}
}
impl std::error::Error for WitnessError {}
fn now_ns() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn create_and_open_empty() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("witness.bin");
let chain = WitnessChain::create(&path).unwrap();
assert_eq!(chain.len(), 0);
assert!(chain.is_empty());
let reopened = WitnessChain::open(&path).unwrap();
assert_eq!(reopened.len(), 0);
}
#[test]
fn record_and_verify() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("witness.bin");
let mut chain = WitnessChain::create(&path).unwrap();
chain.record_store("key1", "default").unwrap();
chain.record_search("default", 5).unwrap();
chain.record_delete("key1", "default").unwrap();
assert_eq!(chain.len(), 3);
let count = chain.verify().unwrap();
assert_eq!(count, 3);
}
#[test]
fn persistence_across_reopen() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("witness.bin");
{
let mut chain = WitnessChain::create(&path).unwrap();
chain.record_store("a", "ns").unwrap();
chain.record_store("b", "ns").unwrap();
}
let chain = WitnessChain::open(&path).unwrap();
assert_eq!(chain.len(), 2);
assert_eq!(chain.verify().unwrap(), 2);
}
#[test]
fn tampered_chain_detected() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("witness.bin");
{
let mut chain = WitnessChain::create(&path).unwrap();
chain.record_store("x", "ns").unwrap();
chain.record_store("y", "ns").unwrap();
}
// Tamper with the file
let mut data = std::fs::read(&path).unwrap();
if data.len() > 40 {
data[40] ^= 0xFF;
}
std::fs::write(&path, &data).unwrap();
let result = WitnessChain::open(&path);
assert!(result.is_err() || result.unwrap().verify().is_err());
}
#[test]
fn open_or_create_new() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("witness.bin");
let chain = WitnessChain::open_or_create(&path).unwrap();
assert!(chain.is_empty());
}
#[test]
fn open_or_create_existing() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("witness.bin");
{
let mut chain = WitnessChain::create(&path).unwrap();
chain.record_compact().unwrap();
}
let chain = WitnessChain::open_or_create(&path).unwrap();
assert_eq!(chain.len(), 1);
}
}

View File

@@ -0,0 +1,18 @@
[package]
name = "rvf-adapter-ospipe"
version = "0.1.0"
edition = "2021"
description = "OSpipe adapter for RuVector Format -- maps observation state vectors to RVF with META_SEG"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
[features]
default = ["std"]
std = []
[dependencies]
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-types = { path = "../../rvf-types", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,17 @@
//! OSpipe adapter for the RuVector Format (RVF).
//!
//! Maps OSpipe's observation-state pipeline onto the RVF segment model:
//!
//! - **VEC_SEG**: State vector embeddings (screen, audio, UI observations)
//! - **META_SEG**: Observation metadata (app name, content type, timestamps)
//! - **JOURNAL_SEG**: Deletion records for expired observations
//!
//! The adapter bridges OSpipe's `StoredEmbedding` / `CapturedFrame` world
//! (UUID ids, chrono timestamps, JSON metadata) to RVF's u64-id,
//! field-based metadata model.
pub mod observation_store;
pub mod pipeline;
pub use observation_store::{ObservationMeta, RvfObservationStore};
pub use pipeline::{PipelineConfig, RvfPipelineAdapter};

View File

@@ -0,0 +1,636 @@
//! RVF-backed observation store for OSpipe state vectors.
//!
//! Maps OSpipe observation embeddings into RVF segments with metadata
//! stored via field IDs in META_SEG entries.
//!
//! # Field layout
//!
//! | field_id | type | description |
//! |----------|--------|------------------------|
//! | 0 | String | content_type |
//! | 1 | String | app_name |
//! | 2 | U64 | timestamp_secs (epoch) |
//! | 3 | U64 | monitor_id |
use std::path::PathBuf;
use rvf_runtime::filter::FilterExpr;
use rvf_runtime::options::{
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
};
use rvf_runtime::{IngestResult, RvfStore, SearchResult, StoreStatus};
use rvf_types::RvfError;
/// Well-known metadata field IDs for OSpipe observations.
pub mod fields {
/// Content type (ocr, transcription, ui_event).
pub const CONTENT_TYPE: u16 = 0;
/// Application name.
pub const APP_NAME: u16 = 1;
/// Observation timestamp as seconds since UNIX epoch.
pub const TIMESTAMP_SECS: u16 = 2;
/// Monitor index.
pub const MONITOR_ID: u16 = 3;
}
/// Metadata for an observation to be recorded.
#[derive(Clone, Debug)]
pub struct ObservationMeta {
/// Content type label (e.g. "ocr", "transcription", "ui_event").
pub content_type: String,
/// Application name, if known.
pub app_name: Option<String>,
/// Observation timestamp as seconds since UNIX epoch.
pub timestamp_secs: u64,
/// Monitor index, if applicable.
pub monitor_id: Option<u32>,
}
impl ObservationMeta {
/// Convert to RVF metadata entries for a single vector.
fn to_entries(&self) -> Vec<MetadataEntry> {
let mut entries = Vec::with_capacity(4);
entries.push(MetadataEntry {
field_id: fields::CONTENT_TYPE,
value: MetadataValue::String(self.content_type.clone()),
});
if let Some(ref app) = self.app_name {
entries.push(MetadataEntry {
field_id: fields::APP_NAME,
value: MetadataValue::String(app.clone()),
});
}
entries.push(MetadataEntry {
field_id: fields::TIMESTAMP_SECS,
value: MetadataValue::U64(self.timestamp_secs),
});
if let Some(monitor) = self.monitor_id {
entries.push(MetadataEntry {
field_id: fields::MONITOR_ID,
value: MetadataValue::U64(monitor as u64),
});
}
entries
}
}
/// Configuration for the observation store.
#[derive(Clone, Debug)]
pub struct ObservationStoreConfig {
/// Directory for RVF data files.
pub data_dir: PathBuf,
/// Vector embedding dimension.
pub dimension: u16,
/// Distance metric (defaults to Cosine for OSpipe embeddings).
pub metric: DistanceMetric,
}
impl ObservationStoreConfig {
/// Create with required parameters, using Cosine metric by default.
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
Self {
data_dir: data_dir.into(),
dimension,
metric: DistanceMetric::Cosine,
}
}
/// Set the distance metric.
pub fn with_metric(mut self, metric: DistanceMetric) -> Self {
self.metric = metric;
self
}
fn store_path(&self) -> PathBuf {
self.data_dir.join("observations.rvf")
}
}
/// RVF-backed observation store for OSpipe.
///
/// Wraps an `RvfStore` and provides observation-oriented APIs:
/// - `record_observation` -- ingest a state vector with metadata
/// - `query_similar_states` -- k-NN search over observation vectors
/// - `get_state_history` -- filtered query by time range
/// - `compact_history` -- reclaim dead space from deleted observations
pub struct RvfObservationStore {
store: RvfStore,
#[allow(dead_code)]
config: ObservationStoreConfig,
next_id: u64,
}
impl RvfObservationStore {
/// Create a new observation store, creating the RVF file.
pub fn create(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
if config.dimension == 0 {
return Err(OspipeAdapterError::InvalidDimension);
}
std::fs::create_dir_all(&config.data_dir)
.map_err(|e| OspipeAdapterError::Io(e.to_string()))?;
let options = RvfOptions {
dimension: config.dimension,
metric: config.metric,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), options)
.map_err(OspipeAdapterError::Rvf)?;
Ok(Self {
store,
config,
next_id: 1,
})
}
/// Open an existing observation store.
pub fn open(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
let store = RvfStore::open(&config.store_path())
.map_err(OspipeAdapterError::Rvf)?;
let status = store.status();
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
Ok(Self {
store,
config,
next_id,
})
}
/// Open an existing store in read-only mode.
pub fn open_readonly(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
let store = RvfStore::open_readonly(&config.store_path())
.map_err(OspipeAdapterError::Rvf)?;
Ok(Self {
store,
config,
next_id: 0,
})
}
/// Record a single observation with its state vector and metadata.
///
/// Returns the assigned vector ID and the ingest result.
pub fn record_observation(
&mut self,
state_vector: &[f32],
meta: &ObservationMeta,
) -> Result<(u64, IngestResult), OspipeAdapterError> {
let id = self.next_id;
self.next_id += 1;
let entries = meta.to_entries();
let result = self.store.ingest_batch(
&[state_vector],
&[id],
Some(&entries),
).map_err(OspipeAdapterError::Rvf)?;
Ok((id, result))
}
/// Record a batch of observations.
///
/// `vectors` and `metas` must have the same length.
/// Returns the assigned IDs and the ingest result.
pub fn record_batch(
&mut self,
vectors: &[&[f32]],
metas: &[ObservationMeta],
) -> Result<(Vec<u64>, IngestResult), OspipeAdapterError> {
if vectors.len() != metas.len() {
return Err(OspipeAdapterError::LengthMismatch {
vectors: vectors.len(),
metas: metas.len(),
});
}
let start_id = self.next_id;
let ids: Vec<u64> = (start_id..start_id + vectors.len() as u64).collect();
self.next_id = start_id + vectors.len() as u64;
// Flatten metadata entries: each vector gets its own entries.
// RvfStore expects entries_per_id to be uniform, so we pad to
// a consistent entry count per vector.
let entries_per_vec: Vec<Vec<MetadataEntry>> =
metas.iter().map(|m| m.to_entries()).collect();
let max_entries = entries_per_vec.iter().map(|e| e.len()).max().unwrap_or(0);
let mut flat_entries = Vec::with_capacity(vectors.len() * max_entries);
for vec_entries in &entries_per_vec {
for entry in vec_entries {
flat_entries.push(entry.clone());
}
// Pad with dummy entries so every vector has the same count.
for _ in vec_entries.len()..max_entries {
flat_entries.push(MetadataEntry {
field_id: u16::MAX,
value: MetadataValue::U64(0),
});
}
}
let result = self.store.ingest_batch(
vectors,
&ids,
if flat_entries.is_empty() { None } else { Some(&flat_entries) },
).map_err(OspipeAdapterError::Rvf)?;
Ok((ids, result))
}
/// Query for the k most similar observation states.
pub fn query_similar_states(
&self,
state_vector: &[f32],
k: usize,
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
self.store
.query(state_vector, k, &QueryOptions::default())
.map_err(OspipeAdapterError::Rvf)
}
/// Query with a metadata filter expression.
pub fn query_filtered(
&self,
state_vector: &[f32],
k: usize,
filter: FilterExpr,
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
let opts = QueryOptions {
filter: Some(filter),
..Default::default()
};
self.store
.query(state_vector, k, &opts)
.map_err(OspipeAdapterError::Rvf)
}
/// Query for observations within a time range.
///
/// `start_secs` and `end_secs` are UNIX epoch seconds. The query
/// vector is used for similarity ranking among the time-filtered results.
pub fn get_state_history(
&self,
state_vector: &[f32],
k: usize,
start_secs: u64,
end_secs: u64,
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
use rvf_runtime::filter::FilterValue;
let filter = FilterExpr::And(vec![
FilterExpr::Ge(fields::TIMESTAMP_SECS, FilterValue::U64(start_secs)),
FilterExpr::Le(fields::TIMESTAMP_SECS, FilterValue::U64(end_secs)),
]);
self.query_filtered(state_vector, k, filter)
}
/// Run compaction to reclaim space from deleted observations.
pub fn compact_history(&mut self) -> Result<rvf_runtime::CompactionResult, OspipeAdapterError> {
self.store.compact().map_err(OspipeAdapterError::Rvf)
}
/// Delete observations by their IDs.
pub fn delete_observations(
&mut self,
ids: &[u64],
) -> Result<rvf_runtime::DeleteResult, OspipeAdapterError> {
self.store.delete(ids).map_err(OspipeAdapterError::Rvf)
}
/// Delete observations matching a filter expression.
pub fn delete_by_filter(
&mut self,
filter: &FilterExpr,
) -> Result<rvf_runtime::DeleteResult, OspipeAdapterError> {
self.store.delete_by_filter(filter).map_err(OspipeAdapterError::Rvf)
}
/// Get the current store status.
pub fn status(&self) -> StoreStatus {
self.store.status()
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), OspipeAdapterError> {
self.store.close().map_err(OspipeAdapterError::Rvf)
}
}
/// Errors produced by the OSpipe adapter.
#[derive(Clone, Debug)]
pub enum OspipeAdapterError {
/// Underlying RVF error.
Rvf(RvfError),
/// IO error (directory creation, etc.).
Io(String),
/// Vector dimension must be > 0.
InvalidDimension,
/// Batch vectors and metadata have different lengths.
LengthMismatch { vectors: usize, metas: usize },
}
impl std::fmt::Display for OspipeAdapterError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF error: {e}"),
Self::Io(msg) => write!(f, "IO error: {msg}"),
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
Self::LengthMismatch { vectors, metas } => {
write!(f, "vectors ({vectors}) and metas ({metas}) length mismatch")
}
}
}
}
impl std::error::Error for OspipeAdapterError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn make_vector(dim: usize, seed: u64) -> Vec<f32> {
let mut v = Vec::with_capacity(dim);
let mut x = seed;
for _ in 0..dim {
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
}
v
}
fn now_secs() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
#[test]
fn create_and_record_observation() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 64);
let mut store = RvfObservationStore::create(config).unwrap();
let vec = make_vector(64, 42);
let meta = ObservationMeta {
content_type: "ocr".into(),
app_name: Some("VSCode".into()),
timestamp_secs: now_secs(),
monitor_id: Some(0),
};
let (id, result) = store.record_observation(&vec, &meta).unwrap();
assert_eq!(id, 1);
assert_eq!(result.accepted, 1);
assert_eq!(result.rejected, 0);
store.close().unwrap();
}
#[test]
fn query_similar_states() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 32);
let mut store = RvfObservationStore::create(config).unwrap();
// Insert 10 observations.
for i in 0..10u64 {
let vec = make_vector(32, i);
let meta = ObservationMeta {
content_type: "ocr".into(),
app_name: None,
timestamp_secs: now_secs() + i,
monitor_id: None,
};
store.record_observation(&vec, &meta).unwrap();
}
let query = make_vector(32, 5);
let results = store.query_similar_states(&query, 3).unwrap();
assert_eq!(results.len(), 3);
// Closest should be the same vector (id 6, since first id is 1).
assert_eq!(results[0].id, 6);
assert!(results[0].distance < 1e-5);
// Results are sorted by distance ascending.
for i in 1..results.len() {
assert!(results[i].distance >= results[i - 1].distance);
}
store.close().unwrap();
}
#[test]
fn get_state_history_filters_by_time() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 16);
let mut store = RvfObservationStore::create(config).unwrap();
let base_time = 1_700_000_000u64;
// Insert observations at different times.
for i in 0..5u64 {
let vec = make_vector(16, i);
let meta = ObservationMeta {
content_type: "ocr".into(),
app_name: None,
timestamp_secs: base_time + i * 100,
monitor_id: None,
};
store.record_observation(&vec, &meta).unwrap();
}
// Query for observations in the range [base+100, base+300].
let query = make_vector(16, 0);
let results = store
.get_state_history(&query, 10, base_time + 100, base_time + 300)
.unwrap();
// Should get ids 2, 3, 4 (timestamps base+100, base+200, base+300).
assert_eq!(results.len(), 3);
let ids: Vec<u64> = results.iter().map(|r| r.id).collect();
assert!(ids.contains(&2));
assert!(ids.contains(&3));
assert!(ids.contains(&4));
store.close().unwrap();
}
#[test]
fn record_batch_and_query() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 16);
let mut store = RvfObservationStore::create(config).unwrap();
let vecs: Vec<Vec<f32>> = (0..5).map(|i| make_vector(16, i)).collect();
let vec_refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
let metas: Vec<ObservationMeta> = (0..5)
.map(|i| ObservationMeta {
content_type: if i % 2 == 0 { "ocr" } else { "transcription" }.into(),
app_name: Some("TestApp".into()),
timestamp_secs: now_secs() + i,
monitor_id: None,
})
.collect();
let (ids, result) = store.record_batch(&vec_refs, &metas).unwrap();
assert_eq!(ids.len(), 5);
assert_eq!(result.accepted, 5);
let query = make_vector(16, 2);
let results = store.query_similar_states(&query, 1).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].id, 3); // id starts at 1, so seed=2 -> id=3
store.close().unwrap();
}
#[test]
fn delete_and_compact() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 8);
let mut store = RvfObservationStore::create(config).unwrap();
// Insert 4 observations.
for i in 0..4u64 {
let vec = make_vector(8, i);
let meta = ObservationMeta {
content_type: "ocr".into(),
app_name: None,
timestamp_secs: now_secs(),
monitor_id: None,
};
store.record_observation(&vec, &meta).unwrap();
}
let status = store.status();
assert_eq!(status.total_vectors, 4);
// Delete 2 observations.
let del = store.delete_observations(&[1, 3]).unwrap();
assert_eq!(del.deleted, 2);
let status = store.status();
assert_eq!(status.total_vectors, 2);
// Compact.
let compact = store.compact_history().unwrap();
assert_eq!(compact.segments_compacted, 2);
// Verify remaining vectors are queryable.
let query = make_vector(8, 1); // seed=1 -> was id=2
let results = store.query_similar_states(&query, 10).unwrap();
assert_eq!(results.len(), 2);
let ids: Vec<u64> = results.iter().map(|r| r.id).collect();
assert!(ids.contains(&2));
assert!(ids.contains(&4));
store.close().unwrap();
}
#[test]
fn open_existing_store() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 16);
// Create and populate.
{
let mut store = RvfObservationStore::create(config.clone()).unwrap();
let vec = make_vector(16, 99);
let meta = ObservationMeta {
content_type: "transcription".into(),
app_name: Some("Zoom".into()),
timestamp_secs: now_secs(),
monitor_id: None,
};
store.record_observation(&vec, &meta).unwrap();
store.close().unwrap();
}
// Reopen.
{
let store = RvfObservationStore::open(config).unwrap();
let query = make_vector(16, 99);
let results = store.query_similar_states(&query, 1).unwrap();
assert_eq!(results.len(), 1);
assert!(results[0].distance < 1e-5);
store.close().unwrap();
}
}
#[test]
fn readonly_mode() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 8);
{
let mut store = RvfObservationStore::create(config.clone()).unwrap();
let vec = make_vector(8, 0);
let meta = ObservationMeta {
content_type: "ocr".into(),
app_name: None,
timestamp_secs: now_secs(),
monitor_id: None,
};
store.record_observation(&vec, &meta).unwrap();
store.close().unwrap();
}
let store = RvfObservationStore::open_readonly(config).unwrap();
let status = store.status();
assert!(status.read_only);
assert_eq!(status.total_vectors, 1);
}
#[test]
fn invalid_dimension_rejected() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 0);
let result = RvfObservationStore::create(config);
assert!(result.is_err());
}
#[test]
fn batch_length_mismatch_rejected() {
let dir = TempDir::new().unwrap();
let config = ObservationStoreConfig::new(dir.path(), 8);
let mut store = RvfObservationStore::create(config).unwrap();
let vecs = [make_vector(8, 0)];
let vec_refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
let metas = vec![
ObservationMeta {
content_type: "ocr".into(),
app_name: None,
timestamp_secs: 0,
monitor_id: None,
},
ObservationMeta {
content_type: "ocr".into(),
app_name: None,
timestamp_secs: 0,
monitor_id: None,
},
];
let result = store.record_batch(&vec_refs, &metas);
assert!(result.is_err());
store.close().unwrap();
}
}

View File

@@ -0,0 +1,267 @@
//! Pipeline integration helpers for OSpipe.
//!
//! Provides [`RvfPipelineAdapter`] which wraps [`RvfObservationStore`] and
//! exposes a simplified interface for OSpipe's ingestion pipeline to push
//! captured frames directly into the RVF store.
use std::path::PathBuf;
use rvf_runtime::options::DistanceMetric;
use crate::observation_store::{
ObservationMeta, ObservationStoreConfig, OspipeAdapterError, RvfObservationStore,
};
/// Configuration for the pipeline adapter.
#[derive(Clone, Debug)]
pub struct PipelineConfig {
/// Directory for RVF data files.
pub data_dir: PathBuf,
/// Vector embedding dimension.
pub dimension: u16,
/// Distance metric for similarity search.
pub metric: DistanceMetric,
/// Automatically compact when dead-space ratio exceeds this threshold.
pub auto_compact_threshold: f64,
}
impl PipelineConfig {
/// Create a new pipeline config with required parameters.
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
Self {
data_dir: data_dir.into(),
dimension,
metric: DistanceMetric::Cosine,
auto_compact_threshold: 0.3,
}
}
}
/// High-level adapter that OSpipe's ingestion pipeline can use to persist
/// observation vectors into an RVF store.
///
/// Handles store lifecycle, auto-compaction, and provides convenience
/// methods that accept OSpipe-domain types directly.
pub struct RvfPipelineAdapter {
store: RvfObservationStore,
config: PipelineConfig,
ingest_count: u64,
}
impl RvfPipelineAdapter {
/// Create a new pipeline adapter, creating the underlying RVF file.
pub fn create(config: PipelineConfig) -> Result<Self, OspipeAdapterError> {
let store_config = ObservationStoreConfig {
data_dir: config.data_dir.clone(),
dimension: config.dimension,
metric: config.metric,
};
let store = RvfObservationStore::create(store_config)?;
Ok(Self {
store,
config,
ingest_count: 0,
})
}
/// Open an existing pipeline adapter.
pub fn open(config: PipelineConfig) -> Result<Self, OspipeAdapterError> {
let store_config = ObservationStoreConfig {
data_dir: config.data_dir.clone(),
dimension: config.dimension,
metric: config.metric,
};
let store = RvfObservationStore::open(store_config)?;
Ok(Self {
store,
config,
ingest_count: 0,
})
}
/// Ingest a single observation from the pipeline.
///
/// This is the primary entry point for OSpipe's ingestion pipeline.
/// After ingestion, may trigger auto-compaction if the dead-space
/// ratio exceeds the configured threshold.
pub fn ingest(
&mut self,
embedding: &[f32],
content_type: &str,
app_name: Option<&str>,
timestamp_secs: u64,
monitor_id: Option<u32>,
) -> Result<u64, OspipeAdapterError> {
let meta = ObservationMeta {
content_type: content_type.to_string(),
app_name: app_name.map(|s| s.to_string()),
timestamp_secs,
monitor_id,
};
let (id, _result) = self.store.record_observation(embedding, &meta)?;
self.ingest_count += 1;
self.maybe_compact()?;
Ok(id)
}
/// Search for similar observations.
pub fn search(
&self,
query: &[f32],
k: usize,
) -> Result<Vec<rvf_runtime::SearchResult>, OspipeAdapterError> {
self.store.query_similar_states(query, k)
}
/// Search for observations within a time window.
pub fn search_time_range(
&self,
query: &[f32],
k: usize,
start_secs: u64,
end_secs: u64,
) -> Result<Vec<rvf_runtime::SearchResult>, OspipeAdapterError> {
self.store.get_state_history(query, k, start_secs, end_secs)
}
/// Expire observations older than the given timestamp.
///
/// Scans for observations with timestamps before `before_secs` and
/// soft-deletes them. Returns the number of observations deleted.
pub fn expire_before(
&mut self,
before_secs: u64,
) -> Result<u64, OspipeAdapterError> {
use rvf_runtime::filter::{FilterExpr, FilterValue};
let filter = FilterExpr::Lt(
crate::observation_store::fields::TIMESTAMP_SECS,
FilterValue::U64(before_secs),
);
let result = self.store.delete_by_filter(&filter)?;
Ok(result.deleted)
}
/// Force a compaction cycle.
pub fn compact(&mut self) -> Result<rvf_runtime::CompactionResult, OspipeAdapterError> {
self.store.compact_history()
}
/// Get the total number of live observations.
pub fn observation_count(&self) -> u64 {
self.store.status().total_vectors
}
/// Close the adapter and release resources.
pub fn close(self) -> Result<(), OspipeAdapterError> {
self.store.close()
}
/// Check if auto-compaction should run, and run it if so.
fn maybe_compact(&mut self) -> Result<(), OspipeAdapterError> {
let status = self.store.status();
if status.dead_space_ratio > self.config.auto_compact_threshold {
self.store.compact_history()?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn make_vector(dim: usize, seed: u64) -> Vec<f32> {
let mut v = Vec::with_capacity(dim);
let mut x = seed;
for _ in 0..dim {
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
}
v
}
fn now_secs() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
#[test]
fn pipeline_ingest_and_search() {
let dir = TempDir::new().unwrap();
let config = PipelineConfig::new(dir.path(), 32);
let mut adapter = RvfPipelineAdapter::create(config).unwrap();
let ts = now_secs();
for i in 0..5u64 {
let vec = make_vector(32, i);
adapter
.ingest(&vec, "ocr", Some("VSCode"), ts + i, Some(0))
.unwrap();
}
assert_eq!(adapter.observation_count(), 5);
let query = make_vector(32, 2);
let results = adapter.search(&query, 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, 3); // seed=2 -> id=3 (1-indexed)
adapter.close().unwrap();
}
#[test]
fn pipeline_time_range_search() {
let dir = TempDir::new().unwrap();
let config = PipelineConfig::new(dir.path(), 16);
let mut adapter = RvfPipelineAdapter::create(config).unwrap();
let base = 1_700_000_000u64;
for i in 0..4u64 {
let vec = make_vector(16, i);
adapter
.ingest(&vec, "transcription", None, base + i * 3600, None)
.unwrap();
}
let query = make_vector(16, 0);
let results = adapter
.search_time_range(&query, 10, base + 3600, base + 7200)
.unwrap();
// Should get observations at base+3600 (id=2) and base+7200 (id=3).
assert_eq!(results.len(), 2);
}
#[test]
fn pipeline_open_existing() {
let dir = TempDir::new().unwrap();
let config = PipelineConfig::new(dir.path(), 16);
{
let mut adapter = RvfPipelineAdapter::create(config.clone()).unwrap();
let vec = make_vector(16, 0);
adapter.ingest(&vec, "ocr", None, now_secs(), None).unwrap();
adapter.close().unwrap();
}
{
let adapter = RvfPipelineAdapter::open(config).unwrap();
assert_eq!(adapter.observation_count(), 1);
adapter.close().unwrap();
}
}
}

View File

@@ -0,0 +1,19 @@
[package]
name = "rvf-adapter-rvlite"
version = "0.1.0"
edition = "2021"
description = "Lightweight embedded vector store adapter for RuVector Format -- minimal API over RVF Core Profile"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
rust-version = "1.87"
[features]
default = ["std"]
std = []
[dependencies]
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-types = { path = "../../rvf-types", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,484 @@
//! The main rvlite collection API.
//!
//! [`RvliteCollection`] provides a minimal, ergonomic interface for
//! embedded vector storage. No metadata, no filters, no namespaces --
//! just vectors with IDs.
use std::path::Path;
use rvf_runtime::options::{QueryOptions, RvfOptions};
use rvf_runtime::store::RvfStore;
use crate::config::RvliteConfig;
use crate::error::{Result, RvliteError};
/// A single search result: vector ID and distance from the query.
#[derive(Clone, Debug, PartialEq)]
pub struct Match {
/// The vector's unique identifier.
pub id: u64,
/// Distance from the query vector (lower = more similar).
pub distance: f32,
}
/// Statistics returned by the [`RvliteCollection::compact`] operation.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CompactStats {
/// Number of segments that were compacted.
pub segments_compacted: u32,
/// Total bytes of dead space reclaimed.
pub bytes_reclaimed: u64,
}
/// A lightweight embedded vector collection wrapping [`RvfStore`].
pub struct RvliteCollection {
store: RvfStore,
dimension: u16,
}
impl RvliteCollection {
/// Create a new collection at the configured path (file must not exist).
pub fn create(config: RvliteConfig) -> Result<Self> {
let options = RvfOptions {
dimension: config.dimension,
metric: config.metric.into(),
profile: 1, // Core profile
..Default::default()
};
let store = RvfStore::create(&config.path, options)?;
Ok(Self {
store,
dimension: config.dimension,
})
}
/// Open an existing collection (file must exist with a valid RVF manifest).
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let store = RvfStore::open(path.as_ref())?;
// The dimension is stored in the manifest and recovered on boot,
// so we query it via a probe against the store.
let dim = Self::probe_dimension(&store);
Ok(Self {
store,
dimension: dim,
})
}
/// Add a single vector with the given ID. Errors on dimension mismatch.
pub fn add(&mut self, id: u64, vector: &[f32]) -> Result<()> {
self.check_dimension(vector.len())?;
self.store.ingest_batch(&[vector], &[id], None)?;
Ok(())
}
/// Add multiple vectors in a single batch. Returns count added.
pub fn add_batch(&mut self, ids: &[u64], vectors: &[&[f32]]) -> Result<usize> {
if ids.len() != vectors.len() {
return Err(RvliteError::Io(
"ids and vectors must have the same length".into(),
));
}
let result = self.store.ingest_batch(vectors, ids, None)?;
Ok(result.accepted as usize)
}
/// Find the `k` nearest neighbors, sorted by distance (closest first).
pub fn search(&self, vector: &[f32], k: usize) -> Vec<Match> {
if vector.len() != self.dimension as usize {
return Vec::new();
}
let query_opts = QueryOptions::default();
match self.store.query(vector, k, &query_opts) {
Ok(results) => results
.into_iter()
.map(|r| Match {
id: r.id,
distance: r.distance,
})
.collect(),
Err(_) => Vec::new(),
}
}
/// Remove a single vector by ID. Returns whether it existed.
pub fn remove(&mut self, id: u64) -> Result<bool> {
let result = self.store.delete(&[id])?;
Ok(result.deleted > 0)
}
/// Remove multiple vectors by ID. Returns count actually removed.
pub fn remove_batch(&mut self, ids: &[u64]) -> Result<usize> {
let result = self.store.delete(ids)?;
Ok(result.deleted as usize)
}
/// Check whether a vector with the given ID exists (soft-deleted = absent).
pub fn contains(&self, id: u64) -> bool {
let total = self.store.status().total_vectors as usize;
if total == 0 {
return false;
}
// Brute-force scan via query; acceptable for rvlite's small collections.
let zero_vec = vec![0.0f32; self.dimension as usize];
match self.store.query(&zero_vec, total, &QueryOptions::default()) {
Ok(results) => results.iter().any(|r| r.id == id),
Err(_) => false,
}
}
/// Return the number of live (non-deleted) vectors in the collection.
pub fn len(&self) -> usize {
self.store.status().total_vectors as usize
}
/// Return `true` if the collection has no live vectors.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Compact the collection, reclaiming space from deleted vectors.
pub fn compact(&mut self) -> Result<CompactStats> {
let result = self.store.compact()?;
Ok(CompactStats {
segments_compacted: result.segments_compacted,
bytes_reclaimed: result.bytes_reclaimed,
})
}
/// Flush all pending writes and close the collection, consuming the handle.
pub fn close(self) -> Result<()> {
self.store.close()?;
Ok(())
}
/// Return the configured vector dimension.
pub fn dimension(&self) -> u16 {
self.dimension
}
// ---- Internal helpers ------------------------------------------------
/// Validate that a vector length matches the collection dimension.
fn check_dimension(&self, len: usize) -> Result<()> {
if len != self.dimension as usize {
return Err(RvliteError::DimensionMismatch {
expected: self.dimension,
got: len,
});
}
Ok(())
}
/// Probe the dimension of an opened store by trying queries with
/// increasing dimensions until one succeeds.
///
/// RvfStore stores the dimension internally but does not expose it
/// directly. When there are vectors present, a query with the wrong
/// dimension returns `DimensionMismatch`, so we try dimensions
/// 1..=4096 until one succeeds. For empty stores we return 0 as a
/// sentinel.
fn probe_dimension(store: &RvfStore) -> u16 {
if store.status().total_vectors == 0 {
return 0;
}
let opts = QueryOptions::default();
for dim in 1u16..=4096 {
let probe = vec![0.0f32; dim as usize];
if store.query(&probe, 1, &opts).is_ok() {
return dim;
}
}
0
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::{RvliteConfig, RvliteMetric};
use tempfile::TempDir;
fn temp_path(dir: &TempDir, name: &str) -> std::path::PathBuf {
dir.path().join(name)
}
#[test]
fn create_add_search() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "basic.rvf"), 4).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
assert!(col.is_empty());
assert_eq!(col.len(), 0);
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
col.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
col.add(3, &[0.0, 0.0, 1.0, 0.0]).unwrap();
assert_eq!(col.len(), 3);
assert!(!col.is_empty());
let results = col.search(&[1.0, 0.0, 0.0, 0.0], 2);
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, 1);
assert!(results[0].distance < f32::EPSILON);
col.close().unwrap();
}
#[test]
fn batch_add_and_search() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "batch.rvf"), 3).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
let ids = vec![10, 20, 30];
let v1 = [1.0, 0.0, 0.0];
let v2 = [0.0, 1.0, 0.0];
let v3 = [0.0, 0.0, 1.0];
let vecs: Vec<&[f32]> = vec![&v1, &v2, &v3];
let count = col.add_batch(&ids, &vecs).unwrap();
assert_eq!(count, 3);
assert_eq!(col.len(), 3);
let results = col.search(&[0.0, 1.0, 0.0], 1);
assert_eq!(results.len(), 1);
assert_eq!(results[0].id, 20);
col.close().unwrap();
}
#[test]
fn remove_and_verify() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "remove.rvf"), 4).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
col.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
col.add(3, &[0.0, 0.0, 1.0, 0.0]).unwrap();
assert_eq!(col.len(), 3);
assert!(col.contains(2));
let removed = col.remove(2).unwrap();
assert!(removed);
assert_eq!(col.len(), 2);
assert!(!col.contains(2));
// Removing again returns false
let removed_again = col.remove(2).unwrap();
assert!(!removed_again);
col.close().unwrap();
}
#[test]
fn remove_batch_and_verify() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "rm_batch.rvf"), 4).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
for i in 0..5u64 {
col.add(i, &[i as f32, 0.0, 0.0, 0.0]).unwrap();
}
let count = col.remove_batch(&[1, 3, 99]).unwrap();
// 99 never existed, so only 2 are removed
assert_eq!(count, 2);
assert_eq!(col.len(), 3);
col.close().unwrap();
}
#[test]
fn dimension_mismatch_error() {
let dir = TempDir::new().unwrap();
let config = RvliteConfig::new(temp_path(&dir, "dim.rvf"), 4).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
// Wrong dimension: 3 instead of 4
let result = col.add(1, &[1.0, 0.0, 0.0]);
assert!(result.is_err());
match result.unwrap_err() {
RvliteError::DimensionMismatch { expected, got } => {
assert_eq!(expected, 4);
assert_eq!(got, 3);
}
other => panic!("expected DimensionMismatch, got: {other}"),
}
col.close().unwrap();
}
#[test]
fn empty_collection_edge_cases() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "empty.rvf"), 4).with_metric(RvliteMetric::L2);
let col = RvliteCollection::create(config).unwrap();
assert!(col.is_empty());
assert_eq!(col.len(), 0);
assert!(!col.contains(1));
let results = col.search(&[1.0, 0.0, 0.0, 0.0], 10);
assert!(results.is_empty());
col.close().unwrap();
}
#[test]
fn search_returns_empty_on_wrong_dimension() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "dim_search.rvf"), 4).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
// Search with wrong dimension returns empty (graceful degradation)
let results = col.search(&[1.0, 0.0], 10);
assert!(results.is_empty());
col.close().unwrap();
}
#[test]
fn open_existing_collection() {
let dir = TempDir::new().unwrap();
let path = temp_path(&dir, "reopen.rvf");
let config = RvliteConfig::new(path.clone(), 4).with_metric(RvliteMetric::L2);
{
let mut col = RvliteCollection::create(config).unwrap();
col.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
col.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
col.close().unwrap();
}
{
let col = RvliteCollection::open(&path).unwrap();
assert_eq!(col.len(), 2);
assert_eq!(col.dimension(), 4);
let results = col.search(&[1.0, 0.0, 0.0, 0.0], 2);
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, 1);
col.close().unwrap();
}
}
#[test]
fn compact_and_verify() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "compact.rvf"), 4).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
for i in 0..10u64 {
col.add(i, &[i as f32, 0.0, 0.0, 0.0]).unwrap();
}
col.remove_batch(&[0, 2, 4, 6, 8]).unwrap();
assert_eq!(col.len(), 5);
let stats = col.compact().unwrap();
assert_eq!(stats.segments_compacted, 5);
assert!(stats.bytes_reclaimed > 0);
// Verify remaining vectors are intact after compaction
assert_eq!(col.len(), 5);
assert!(col.contains(1));
assert!(col.contains(3));
assert!(!col.contains(0));
col.close().unwrap();
}
#[test]
fn len_is_empty_contains() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "accessors.rvf"), 2).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
assert_eq!(col.len(), 0);
assert!(col.is_empty());
assert!(!col.contains(42));
col.add(42, &[1.0, 2.0]).unwrap();
assert_eq!(col.len(), 1);
assert!(!col.is_empty());
assert!(col.contains(42));
assert!(!col.contains(99));
col.close().unwrap();
}
#[test]
fn cosine_metric() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "cosine.rvf"), 3).with_metric(RvliteMetric::Cosine);
let mut col = RvliteCollection::create(config).unwrap();
col.add(1, &[1.0, 0.0, 0.0]).unwrap();
col.add(2, &[0.0, 1.0, 0.0]).unwrap();
col.add(3, &[1.0, 1.0, 0.0]).unwrap();
// Query for [1, 0, 0] -- id=1 should be closest (exact match)
let results = col.search(&[1.0, 0.0, 0.0], 3);
assert_eq!(results.len(), 3);
assert_eq!(results[0].id, 1);
assert!(results[0].distance < f32::EPSILON);
col.close().unwrap();
}
#[test]
fn dimension_accessor() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "dim_acc.rvf"), 256).with_metric(RvliteMetric::L2);
let col = RvliteCollection::create(config).unwrap();
assert_eq!(col.dimension(), 256);
col.close().unwrap();
}
#[test]
fn batch_length_mismatch() {
let dir = TempDir::new().unwrap();
let config =
RvliteConfig::new(temp_path(&dir, "mismatch.rvf"), 2).with_metric(RvliteMetric::L2);
let mut col = RvliteCollection::create(config).unwrap();
let ids = vec![1, 2, 3];
let v1 = [1.0, 0.0];
let v2 = [0.0, 1.0];
let vecs: Vec<&[f32]> = vec![&v1, &v2]; // 2 vectors but 3 ids
let result = col.add_batch(&ids, &vecs);
assert!(result.is_err());
col.close().unwrap();
}
}

View File

@@ -0,0 +1,111 @@
//! Configuration for rvlite collections.
//!
//! Provides [`RvliteConfig`] with sensible defaults for lightweight,
//! resource-constrained environments.
use std::path::PathBuf;
use rvf_runtime::options::DistanceMetric;
/// Distance metric for rvlite similarity search.
///
/// Maps directly to the underlying `DistanceMetric` in rvf-runtime.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub enum RvliteMetric {
/// Squared Euclidean distance.
L2,
/// Cosine distance (1 - cosine_similarity).
#[default]
Cosine,
/// Inner (dot) product distance (negated).
InnerProduct,
}
impl From<RvliteMetric> for DistanceMetric {
fn from(m: RvliteMetric) -> Self {
match m {
RvliteMetric::L2 => DistanceMetric::L2,
RvliteMetric::Cosine => DistanceMetric::Cosine,
RvliteMetric::InnerProduct => DistanceMetric::InnerProduct,
}
}
}
/// Configuration for creating a new rvlite collection.
#[derive(Clone, Debug)]
pub struct RvliteConfig {
/// File path for the RVF file.
pub path: PathBuf,
/// Vector dimensionality (required, must be > 0).
pub dimension: u16,
/// Distance metric for similarity search.
pub metric: RvliteMetric,
/// Optional capacity hint for pre-allocation.
pub max_elements: Option<usize>,
}
impl RvliteConfig {
/// Create a new config with the required fields and sensible defaults.
///
/// The metric defaults to `Cosine` and `max_elements` is `None`.
pub fn new(path: impl Into<PathBuf>, dimension: u16) -> Self {
Self {
path: path.into(),
dimension,
metric: RvliteMetric::default(),
max_elements: None,
}
}
/// Set the distance metric.
pub fn with_metric(mut self, metric: RvliteMetric) -> Self {
self.metric = metric;
self
}
/// Set the capacity hint.
pub fn with_max_elements(mut self, max: usize) -> Self {
self.max_elements = Some(max);
self
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_metric_is_cosine() {
assert_eq!(RvliteMetric::default(), RvliteMetric::Cosine);
}
#[test]
fn config_new_defaults() {
let cfg = RvliteConfig::new("/tmp/test.rvf", 128);
assert_eq!(cfg.dimension, 128);
assert_eq!(cfg.metric, RvliteMetric::Cosine);
assert!(cfg.max_elements.is_none());
}
#[test]
fn config_builder_methods() {
let cfg = RvliteConfig::new("/tmp/test.rvf", 64)
.with_metric(RvliteMetric::L2)
.with_max_elements(1000);
assert_eq!(cfg.metric, RvliteMetric::L2);
assert_eq!(cfg.max_elements, Some(1000));
}
#[test]
fn metric_conversion() {
assert_eq!(DistanceMetric::from(RvliteMetric::L2), DistanceMetric::L2);
assert_eq!(
DistanceMetric::from(RvliteMetric::Cosine),
DistanceMetric::Cosine
);
assert_eq!(
DistanceMetric::from(RvliteMetric::InnerProduct),
DistanceMetric::InnerProduct
);
}
}

View File

@@ -0,0 +1,99 @@
//! Error types for the rvlite adapter.
//!
//! Provides a lightweight error enum that wraps `RvfError` and I/O errors,
//! plus a dimension-mismatch variant for early validation.
use core::fmt;
use rvf_types::RvfError;
/// Errors that can occur in rvlite operations.
#[derive(Debug)]
pub enum RvliteError {
/// An error originating from the RVF runtime or types layer.
Rvf(RvfError),
/// An I/O error described by a message string.
Io(String),
/// The supplied vector has the wrong number of dimensions.
DimensionMismatch {
/// The dimension the collection was created with.
expected: u16,
/// The dimension of the vector that was supplied.
got: usize,
},
}
impl fmt::Display for RvliteError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Rvf(e) => write!(f, "rvf: {e}"),
Self::Io(msg) => write!(f, "io: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl From<RvfError> for RvliteError {
fn from(e: RvfError) -> Self {
Self::Rvf(e)
}
}
impl From<std::io::Error> for RvliteError {
fn from(e: std::io::Error) -> Self {
Self::Io(e.to_string())
}
}
/// Convenience alias used throughout the rvlite crate.
pub type Result<T> = std::result::Result<T, RvliteError>;
#[cfg(test)]
mod tests {
use super::*;
use rvf_types::ErrorCode;
#[test]
fn display_rvf_variant() {
let err = RvliteError::Rvf(RvfError::Code(ErrorCode::DimensionMismatch));
let msg = format!("{err}");
assert!(msg.contains("rvf:"));
}
#[test]
fn display_io_variant() {
let err = RvliteError::Io("file not found".into());
let msg = format!("{err}");
assert!(msg.contains("io: file not found"));
}
#[test]
fn display_dimension_mismatch() {
let err = RvliteError::DimensionMismatch {
expected: 128,
got: 64,
};
let msg = format!("{err}");
assert!(msg.contains("expected 128"));
assert!(msg.contains("got 64"));
}
#[test]
fn from_rvf_error() {
let rvf = RvfError::Code(ErrorCode::FsyncFailed);
let err: RvliteError = rvf.into();
matches!(err, RvliteError::Rvf(_));
}
#[test]
fn from_io_error() {
let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "gone");
let err: RvliteError = io_err.into();
match err {
RvliteError::Io(msg) => assert!(msg.contains("gone")),
_ => panic!("expected Io variant"),
}
}
}

View File

@@ -0,0 +1,41 @@
//! Lightweight embedded vector store adapter for the RuVector Format (RVF).
//!
//! **rvlite** provides a minimal, ergonomic API for embedded vector storage
//! using the RVF Core Profile. It is designed for resource-constrained
//! environments (WASM, edge, embedded) where a full-featured vector
//! database is unnecessary.
//!
//! # Design philosophy
//!
//! - **Simple**: No metadata, no filters, no namespaces. Just vectors with IDs.
//! - **Small**: Minimal dependency surface; only `rvf-runtime` and `rvf-types`.
//! - **Safe**: Dimension validation, proper error handling, no panics.
//!
//! # Quick start
//!
//! ```no_run
//! use rvf_adapter_rvlite::{RvliteCollection, RvliteConfig, RvliteMetric};
//!
//! let config = RvliteConfig::new("/tmp/my_vectors.rvf", 128)
//! .with_metric(RvliteMetric::Cosine);
//!
//! let mut col = RvliteCollection::create(config).unwrap();
//!
//! col.add(1, &vec![0.1; 128]).unwrap();
//! col.add(2, &vec![0.2; 128]).unwrap();
//!
//! let results = col.search(&vec![0.1; 128], 5);
//! for m in &results {
//! println!("id={} distance={:.4}", m.id, m.distance);
//! }
//!
//! col.close().unwrap();
//! ```
pub mod collection;
pub mod config;
pub mod error;
pub use collection::{CompactStats, Match, RvliteCollection};
pub use config::{RvliteConfig, RvliteMetric};
pub use error::{Result, RvliteError};

View File

@@ -0,0 +1,19 @@
[package]
name = "rvf-adapter-sona"
version = "0.1.0"
edition = "2021"
description = "SONA adapter for RuVector Format -- stores learning trajectories, neural patterns, and experience replay buffers as RVF segments"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
rust-version = "1.87"
[features]
default = ["std"]
std = []
[dependencies]
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-types = { path = "../../rvf-types", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,142 @@
//! Configuration for the SONA adapter.
use std::path::PathBuf;
/// Configuration for the RVF-backed SONA stores.
#[derive(Clone, Debug)]
pub struct SonaConfig {
/// Directory where RVF data files are stored.
pub data_dir: PathBuf,
/// Vector embedding dimension (must match SONA's embedding size).
pub dimension: u16,
/// Maximum number of experiences in the replay buffer.
pub replay_capacity: usize,
/// Number of recent trajectory steps to retain in the window.
pub trajectory_window: usize,
}
impl SonaConfig {
/// Create a new configuration with required parameters and sensible defaults.
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
Self {
data_dir: data_dir.into(),
dimension,
replay_capacity: 10_000,
trajectory_window: 100,
}
}
/// Set the replay buffer capacity.
pub fn with_replay_capacity(mut self, capacity: usize) -> Self {
self.replay_capacity = capacity;
self
}
/// Set the trajectory window size.
pub fn with_trajectory_window(mut self, window: usize) -> Self {
self.trajectory_window = window;
self
}
/// Return the path to the shared RVF store file.
pub fn store_path(&self) -> PathBuf {
self.data_dir.join("sona.rvf")
}
/// Ensure the data directory exists.
pub fn ensure_dirs(&self) -> std::io::Result<()> {
std::fs::create_dir_all(&self.data_dir)
}
/// Validate the configuration.
pub fn validate(&self) -> Result<(), ConfigError> {
if self.dimension == 0 {
return Err(ConfigError::InvalidDimension);
}
if self.replay_capacity == 0 {
return Err(ConfigError::InvalidReplayCapacity);
}
if self.trajectory_window == 0 {
return Err(ConfigError::InvalidTrajectoryWindow);
}
Ok(())
}
}
/// Errors specific to adapter configuration.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ConfigError {
/// Dimension must be > 0.
InvalidDimension,
/// Replay capacity must be > 0.
InvalidReplayCapacity,
/// Trajectory window must be > 0.
InvalidTrajectoryWindow,
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
Self::InvalidReplayCapacity => write!(f, "replay capacity must be > 0"),
Self::InvalidTrajectoryWindow => write!(f, "trajectory window must be > 0"),
}
}
}
impl std::error::Error for ConfigError {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn config_defaults() {
let cfg = SonaConfig::new("/tmp/test", 256);
assert_eq!(cfg.dimension, 256);
assert_eq!(cfg.replay_capacity, 10_000);
assert_eq!(cfg.trajectory_window, 100);
}
#[test]
fn config_store_path() {
let cfg = SonaConfig::new("/data/sona", 128);
assert_eq!(cfg.store_path(), Path::new("/data/sona/sona.rvf"));
}
#[test]
fn validate_zero_dimension() {
let cfg = SonaConfig::new("/tmp", 0);
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
}
#[test]
fn validate_zero_replay_capacity() {
let mut cfg = SonaConfig::new("/tmp", 64);
cfg.replay_capacity = 0;
assert_eq!(cfg.validate(), Err(ConfigError::InvalidReplayCapacity));
}
#[test]
fn validate_zero_trajectory_window() {
let mut cfg = SonaConfig::new("/tmp", 64);
cfg.trajectory_window = 0;
assert_eq!(cfg.validate(), Err(ConfigError::InvalidTrajectoryWindow));
}
#[test]
fn validate_ok() {
let cfg = SonaConfig::new("/tmp", 64);
assert!(cfg.validate().is_ok());
}
#[test]
fn builder_methods() {
let cfg = SonaConfig::new("/tmp", 256)
.with_replay_capacity(5000)
.with_trajectory_window(50);
assert_eq!(cfg.replay_capacity, 5000);
assert_eq!(cfg.trajectory_window, 50);
}
}

View File

@@ -0,0 +1,397 @@
//! `ExperienceReplayBuffer` — circular buffer of experiences stored
//! as RVF vectors in the shared SONA store.
//!
//! Each experience captures a (state, action, reward, next_state) tuple.
//! State and next_state embeddings are concatenated into a single vector
//! of double the configured dimension. The action and reward are stored
//! as metadata. A type marker of "experience" distinguishes these
//! entries from trajectory and pattern data.
use std::collections::VecDeque;
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::RvfStore;
use rvf_types::RvfError;
use crate::config::SonaConfig;
/// Metadata field IDs (shared across all SONA stores).
const FIELD_STEP_ID: u16 = 0;
const FIELD_ACTION: u16 = 1;
const FIELD_REWARD: u16 = 2;
const FIELD_CATEGORY: u16 = 3;
const FIELD_TYPE: u16 = 4;
/// Type marker for experience entries.
const TYPE_EXPERIENCE: &str = "experience";
/// A single experience returned from retrieval or sampling.
#[derive(Clone, Debug)]
pub struct Experience {
/// Internal vector ID in the RVF store.
pub id: u64,
/// The action taken.
pub action: String,
/// The reward received.
pub reward: f64,
/// Distance from query (only meaningful for prioritized sampling).
pub distance: f32,
}
/// Circular buffer of experiences stored as RVF vectors.
pub struct ExperienceReplayBuffer {
store: RvfStore,
config: SonaConfig,
/// Ordered record of experience vector IDs (oldest first).
experience_ids: VecDeque<u64>,
/// Parallel metadata: (action, reward).
experience_meta: VecDeque<(String, f64)>,
/// Next vector ID to assign.
next_id: u64,
}
impl ExperienceReplayBuffer {
/// Create a new experience replay buffer.
pub fn create(config: SonaConfig) -> Result<Self, ExperienceStoreError> {
config.validate().map_err(ExperienceStoreError::Config)?;
config.ensure_dirs().map_err(|e| ExperienceStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(ExperienceStoreError::Rvf)?;
Ok(Self {
store,
config,
experience_ids: VecDeque::new(),
experience_meta: VecDeque::new(),
next_id: 1,
})
}
/// Add an experience to the buffer.
///
/// If the buffer is at capacity, the oldest experience is evicted.
/// The `state_embedding` is used as the stored vector (for similarity
/// search); `next_state_embedding` is currently not stored as a
/// separate vector but could be added via metadata extension.
///
/// Returns the internal vector ID.
pub fn push(
&mut self,
state_embedding: &[f32],
action: &str,
reward: f64,
_next_state_embedding: &[f32],
) -> Result<u64, ExperienceStoreError> {
if state_embedding.len() != self.config.dimension as usize {
return Err(ExperienceStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: state_embedding.len(),
});
}
// Evict oldest if at capacity.
if self.experience_ids.len() >= self.config.replay_capacity {
if let Some(old_id) = self.experience_ids.pop_front() {
self.experience_meta.pop_front();
self.store.delete(&[old_id]).map_err(ExperienceStoreError::Rvf)?;
}
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(vector_id) },
MetadataEntry { field_id: FIELD_ACTION, value: MetadataValue::String(action.to_string()) },
MetadataEntry { field_id: FIELD_REWARD, value: MetadataValue::F64(reward) },
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(String::new()) },
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_EXPERIENCE.to_string()) },
];
self.store
.ingest_batch(&[state_embedding], &[vector_id], Some(&metadata))
.map_err(ExperienceStoreError::Rvf)?;
self.experience_ids.push_back(vector_id);
self.experience_meta.push_back((action.to_string(), reward));
Ok(vector_id)
}
/// Sample `n` experiences uniformly from the buffer.
///
/// Uses a deterministic stride-based selection: picks experiences
/// evenly spaced across the buffer. Returns fewer than `n` if the
/// buffer contains fewer experiences.
pub fn sample(&self, n: usize) -> Vec<Experience> {
let len = self.experience_ids.len();
if len == 0 || n == 0 {
return Vec::new();
}
let count = n.min(len);
let step = if count >= len { 1 } else { len / count };
let mut results = Vec::with_capacity(count);
let mut idx = 0;
while results.len() < count && idx < len {
let vid = self.experience_ids[idx];
let (action, reward) = &self.experience_meta[idx];
results.push(Experience {
id: vid,
action: action.clone(),
reward: *reward,
distance: 0.0,
});
idx += step;
}
// If stride skipped some, fill from the end.
if results.len() < count {
let mut back_idx = len - 1;
while results.len() < count {
let vid = self.experience_ids[back_idx];
if !results.iter().any(|e| e.id == vid) {
let (action, reward) = &self.experience_meta[back_idx];
results.push(Experience {
id: vid,
action: action.clone(),
reward: *reward,
distance: 0.0,
});
}
if back_idx == 0 {
break;
}
back_idx -= 1;
}
}
results
}
/// Sample `n` experiences prioritized by similarity to the given embedding.
///
/// Finds the `n` nearest-neighbor experiences by vector distance.
pub fn sample_prioritized(
&mut self,
n: usize,
embedding: &[f32],
) -> Result<Vec<Experience>, ExperienceStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(ExperienceStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let results = self.store
.query(embedding, n, &QueryOptions::default())
.map_err(ExperienceStoreError::Rvf)?;
Ok(self.enrich_results(&results))
}
/// Return the number of experiences in the buffer.
pub fn len(&self) -> usize {
self.experience_ids.len()
}
/// Return whether the buffer is empty.
pub fn is_empty(&self) -> bool {
self.experience_ids.is_empty()
}
/// Return whether the buffer has reached its capacity.
pub fn is_full(&self) -> bool {
self.experience_ids.len() >= self.config.replay_capacity
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), ExperienceStoreError> {
self.store.close().map_err(ExperienceStoreError::Rvf)
}
// ── Internal ──────────────────────────────────────────────────────
fn enrich_results(&self, results: &[rvf_runtime::SearchResult]) -> Vec<Experience> {
results
.iter()
.map(|r| {
let meta = self.experience_ids.iter()
.zip(self.experience_meta.iter())
.find(|(&vid, _)| vid == r.id)
.map(|(_, m)| m);
match meta {
Some((action, reward)) => Experience {
id: r.id,
action: action.clone(),
reward: *reward,
distance: r.distance,
},
None => Experience {
id: r.id,
action: String::new(),
reward: 0.0,
distance: r.distance,
},
}
})
.collect()
}
}
/// Errors from experience replay buffer operations.
#[derive(Debug)]
pub enum ExperienceStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
}
impl std::fmt::Display for ExperienceStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for ExperienceStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> SonaConfig {
SonaConfig::new(dir, 4).with_replay_capacity(5)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn push_and_sample() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
buf.push(&make_embedding(1.0), "explore", 0.5, &make_embedding(1.1)).unwrap();
buf.push(&make_embedding(2.0), "exploit", 0.8, &make_embedding(2.1)).unwrap();
buf.push(&make_embedding(3.0), "explore", 0.3, &make_embedding(3.1)).unwrap();
assert_eq!(buf.len(), 3);
assert!(!buf.is_full());
let samples = buf.sample(2);
assert_eq!(samples.len(), 2);
buf.close().unwrap();
}
#[test]
fn buffer_capacity_eviction() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path()); // capacity = 5
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
for i in 0..7 {
buf.push(&make_embedding(i as f32 + 0.1), &format!("act{i}"), i as f64 * 0.1, &make_embedding(0.0)).unwrap();
}
assert_eq!(buf.len(), 5);
assert!(buf.is_full());
// The oldest two (act0, act1) should have been evicted.
let all = buf.sample(5);
assert_eq!(all.len(), 5);
assert!(all.iter().all(|e| e.action != "act0" && e.action != "act1"));
buf.close().unwrap();
}
#[test]
fn sample_prioritized() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
buf.push(&[1.0, 0.0, 0.0, 0.0], "a", 0.1, &[0.0; 4]).unwrap();
buf.push(&[0.0, 1.0, 0.0, 0.0], "b", 0.2, &[0.0; 4]).unwrap();
buf.push(&[0.9, 0.1, 0.0, 0.0], "c", 0.3, &[0.0; 4]).unwrap();
let results = buf.sample_prioritized(2, &[1.0, 0.0, 0.0, 0.0]).unwrap();
assert_eq!(results.len(), 2);
assert!(results[0].distance <= results[1].distance);
buf.close().unwrap();
}
#[test]
fn empty_buffer_operations() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
assert!(buf.is_empty());
assert!(!buf.is_full());
assert_eq!(buf.len(), 0);
let samples = buf.sample(5);
assert!(samples.is_empty());
let results = buf.sample_prioritized(5, &make_embedding(1.0)).unwrap();
assert!(results.is_empty());
buf.close().unwrap();
}
#[test]
fn sample_more_than_available() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
buf.push(&make_embedding(1.0), "a", 0.1, &make_embedding(0.0)).unwrap();
buf.push(&make_embedding(2.0), "b", 0.2, &make_embedding(0.0)).unwrap();
let samples = buf.sample(10);
assert_eq!(samples.len(), 2);
buf.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
let result = buf.push(&[1.0, 2.0], "a", 0.1, &[1.0, 2.0]);
assert!(result.is_err());
let result = buf.sample_prioritized(5, &[1.0, 2.0]);
assert!(result.is_err());
buf.close().unwrap();
}
}

View File

@@ -0,0 +1,44 @@
//! RVF adapter for SONA (Self-Optimizing Neural Architecture).
//!
//! This crate bridges SONA's learning trajectory tracking, pattern
//! recognition, and experience replay with the RuVector Format (RVF)
//! segment store per ADR-029. All three data types share a single
//! underlying RVF file, distinguished by a type marker in metadata
//! field 4.
//!
//! # Architecture
//!
//! - **`TrajectoryStore`**: Records and queries sequences of state
//! embeddings that form a learning trajectory.
//! - **`ExperienceReplayBuffer`**: Circular buffer of (state, action,
//! reward, next_state) tuples for off-policy training.
//! - **`NeuralPatternStore`**: Stores recognized neural patterns with
//! confidence scores, searchable by category or embedding similarity.
//! - **`SonaConfig`**: Configuration for data directory, dimension,
//! replay capacity, and trajectory window size.
//!
//! # Usage
//!
//! ```rust,no_run
//! use rvf_adapter_sona::{SonaConfig, TrajectoryStore, ExperienceReplayBuffer, NeuralPatternStore};
//!
//! let config = SonaConfig::new("/tmp/sona-data", 256);
//! let mut trajectory = TrajectoryStore::create(config.clone()).unwrap();
//!
//! let embedding = vec![0.1f32; 256];
//! trajectory.record_step(1, &embedding, "explore", 0.5).unwrap();
//!
//! let recent = trajectory.get_recent(10);
//! let similar = trajectory.search_similar_states(&embedding, 5).unwrap();
//! trajectory.close().unwrap();
//! ```
pub mod config;
pub mod experience;
pub mod pattern;
pub mod trajectory;
pub use config::{ConfigError, SonaConfig};
pub use experience::{Experience, ExperienceReplayBuffer};
pub use pattern::{NeuralPattern, NeuralPatternStore};
pub use trajectory::{TrajectoryStep, TrajectoryStore};

View File

@@ -0,0 +1,423 @@
//! `NeuralPatternStore` — stores recognized neural patterns as RVF
//! vectors with confidence scores and categories.
//!
//! Patterns can be searched by embedding similarity, filtered by
//! category, or ranked by confidence. A type marker of "pattern"
//! distinguishes these entries from trajectory and experience data.
use std::collections::HashMap;
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::RvfStore;
use rvf_types::RvfError;
use crate::config::SonaConfig;
/// Metadata field IDs (shared across all SONA stores).
const FIELD_STEP_ID: u16 = 0;
const FIELD_NAME: u16 = 1;
const FIELD_CONFIDENCE: u16 = 2;
const FIELD_CATEGORY: u16 = 3;
const FIELD_TYPE: u16 = 4;
/// Type marker for pattern entries.
const TYPE_PATTERN: &str = "pattern";
/// A recognized neural pattern returned from retrieval or search.
#[derive(Clone, Debug)]
pub struct NeuralPattern {
/// Internal vector ID in the RVF store.
pub id: u64,
/// Human-readable pattern name.
pub name: String,
/// Category this pattern belongs to.
pub category: String,
/// Confidence score (0.0 to 1.0).
pub confidence: f64,
/// Distance from query (only meaningful for search results).
pub distance: f32,
}
/// Stores recognized neural patterns as RVF vectors.
pub struct NeuralPatternStore {
store: RvfStore,
config: SonaConfig,
/// In-memory index of pattern metadata keyed by vector ID.
patterns: HashMap<u64, PatternMeta>,
/// In-memory index of category -> vector IDs.
category_index: HashMap<String, Vec<u64>>,
/// Next vector ID to assign.
next_id: u64,
}
/// In-memory metadata for a pattern.
#[derive(Clone, Debug)]
struct PatternMeta {
name: String,
category: String,
confidence: f64,
}
impl NeuralPatternStore {
/// Create a new neural pattern store.
pub fn create(config: SonaConfig) -> Result<Self, PatternStoreError> {
config.validate().map_err(PatternStoreError::Config)?;
config.ensure_dirs().map_err(|e| PatternStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(PatternStoreError::Rvf)?;
Ok(Self {
store,
config,
patterns: HashMap::new(),
category_index: HashMap::new(),
next_id: 1,
})
}
/// Store a new neural pattern.
///
/// Returns the internal vector ID assigned to this pattern.
pub fn store_pattern(
&mut self,
name: &str,
category: &str,
embedding: &[f32],
confidence: f64,
) -> Result<u64, PatternStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(PatternStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(vector_id) },
MetadataEntry { field_id: FIELD_NAME, value: MetadataValue::String(name.to_string()) },
MetadataEntry { field_id: FIELD_CONFIDENCE, value: MetadataValue::F64(confidence) },
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(category.to_string()) },
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_PATTERN.to_string()) },
];
self.store
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
.map_err(PatternStoreError::Rvf)?;
let meta = PatternMeta {
name: name.to_string(),
category: category.to_string(),
confidence,
};
self.patterns.insert(vector_id, meta);
self.category_index
.entry(category.to_string())
.or_default()
.push(vector_id);
Ok(vector_id)
}
/// Search for patterns whose embeddings are most similar to the given embedding.
pub fn search_patterns(
&mut self,
embedding: &[f32],
k: usize,
) -> Result<Vec<NeuralPattern>, PatternStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(PatternStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let results = self.store
.query(embedding, k, &QueryOptions::default())
.map_err(PatternStoreError::Rvf)?;
Ok(self.enrich_results(&results))
}
/// Get all patterns in a given category.
pub fn get_by_category(&self, category: &str) -> Vec<NeuralPattern> {
let ids = match self.category_index.get(category) {
Some(ids) => ids,
None => return Vec::new(),
};
ids.iter()
.filter_map(|&vid| {
self.patterns.get(&vid).map(|meta| NeuralPattern {
id: vid,
name: meta.name.clone(),
category: meta.category.clone(),
confidence: meta.confidence,
distance: 0.0,
})
})
.collect()
}
/// Update the confidence score for a pattern by its vector ID.
pub fn update_confidence(&mut self, id: u64, confidence: f64) -> Result<(), PatternStoreError> {
match self.patterns.get_mut(&id) {
Some(meta) => {
meta.confidence = confidence;
Ok(())
}
None => Err(PatternStoreError::PatternNotFound(id)),
}
}
/// Get the top `k` patterns ranked by confidence (highest first).
pub fn get_top_patterns(&self, k: usize) -> Vec<NeuralPattern> {
let mut all: Vec<_> = self.patterns.iter()
.map(|(&vid, meta)| NeuralPattern {
id: vid,
name: meta.name.clone(),
category: meta.category.clone(),
confidence: meta.confidence,
distance: 0.0,
})
.collect();
all.sort_by(|a, b| {
b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)
});
all.truncate(k);
all
}
/// Return the total number of stored patterns.
pub fn len(&self) -> usize {
self.patterns.len()
}
/// Return whether the store has no patterns.
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), PatternStoreError> {
self.store.close().map_err(PatternStoreError::Rvf)
}
// ── Internal ──────────────────────────────────────────────────────
fn enrich_results(&self, results: &[rvf_runtime::SearchResult]) -> Vec<NeuralPattern> {
results
.iter()
.map(|r| {
match self.patterns.get(&r.id) {
Some(meta) => NeuralPattern {
id: r.id,
name: meta.name.clone(),
category: meta.category.clone(),
confidence: meta.confidence,
distance: r.distance,
},
None => NeuralPattern {
id: r.id,
name: String::new(),
category: String::new(),
confidence: 0.0,
distance: r.distance,
},
}
})
.collect()
}
}
/// Errors from neural pattern store operations.
#[derive(Debug)]
pub enum PatternStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
/// Pattern not found for the given ID.
PatternNotFound(u64),
}
impl std::fmt::Display for PatternStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
Self::PatternNotFound(id) => write!(f, "pattern not found: {id}"),
}
}
}
impl std::error::Error for PatternStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> SonaConfig {
SonaConfig::new(dir, 4)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn store_and_search_patterns() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("convergent", "thinking", &[1.0, 0.0, 0.0, 0.0], 0.9).unwrap();
store.store_pattern("divergent", "thinking", &[0.0, 1.0, 0.0, 0.0], 0.7).unwrap();
store.store_pattern("lateral", "creative", &[0.0, 0.0, 1.0, 0.0], 0.8).unwrap();
let results = store.search_patterns(&[1.0, 0.0, 0.0, 0.0], 2).unwrap();
assert_eq!(results.len(), 2);
assert!(results[0].distance <= results[1].distance);
store.close().unwrap();
}
#[test]
fn get_by_category() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("p1", "alpha", &make_embedding(1.0), 0.9).unwrap();
store.store_pattern("p2", "beta", &make_embedding(2.0), 0.7).unwrap();
store.store_pattern("p3", "alpha", &make_embedding(3.0), 0.8).unwrap();
let alpha = store.get_by_category("alpha");
assert_eq!(alpha.len(), 2);
assert!(alpha.iter().all(|p| p.category == "alpha"));
let beta = store.get_by_category("beta");
assert_eq!(beta.len(), 1);
assert_eq!(beta[0].name, "p2");
let empty = store.get_by_category("nonexistent");
assert!(empty.is_empty());
store.close().unwrap();
}
#[test]
fn update_confidence() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
let id = store.store_pattern("p1", "cat", &make_embedding(1.0), 0.5).unwrap();
store.update_confidence(id, 0.95).unwrap();
let top = store.get_top_patterns(1);
assert_eq!(top.len(), 1);
assert!((top[0].confidence - 0.95).abs() < f64::EPSILON);
store.close().unwrap();
}
#[test]
fn update_confidence_not_found() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
let result = store.update_confidence(999, 0.5);
assert!(result.is_err());
store.close().unwrap();
}
#[test]
fn get_top_patterns() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("low", "cat", &make_embedding(1.0), 0.3).unwrap();
store.store_pattern("high", "cat", &make_embedding(2.0), 0.9).unwrap();
store.store_pattern("mid", "cat", &make_embedding(3.0), 0.6).unwrap();
let top = store.get_top_patterns(2);
assert_eq!(top.len(), 2);
assert_eq!(top[0].name, "high");
assert_eq!(top[1].name, "mid");
store.close().unwrap();
}
#[test]
fn get_top_more_than_available() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("only", "cat", &make_embedding(1.0), 0.5).unwrap();
let top = store.get_top_patterns(10);
assert_eq!(top.len(), 1);
store.close().unwrap();
}
#[test]
fn empty_store_operations() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
let results = store.search_patterns(&make_embedding(1.0), 5).unwrap();
assert!(results.is_empty());
let by_cat = store.get_by_category("anything");
assert!(by_cat.is_empty());
let top = store.get_top_patterns(5);
assert!(top.is_empty());
store.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
let result = store.store_pattern("p", "c", &[1.0, 2.0], 0.5);
assert!(result.is_err());
let result = store.search_patterns(&[1.0, 2.0], 5);
assert!(result.is_err());
store.close().unwrap();
}
}

View File

@@ -0,0 +1,422 @@
//! `TrajectoryStore` — stores learning trajectories as sequences of
//! state embeddings in the shared SONA RVF file.
//!
//! Each trajectory step records a state embedding, the action taken,
//! the reward received, and a monotonically increasing step ID. Steps
//! are stored as RVF vectors with metadata fields encoding the step
//! details and a type marker of "trajectory".
use std::collections::VecDeque;
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::{RvfStore, SearchResult};
use rvf_types::RvfError;
use crate::config::SonaConfig;
/// Metadata field IDs (shared across all SONA stores).
const FIELD_STEP_ID: u16 = 0;
const FIELD_ACTION: u16 = 1;
const FIELD_REWARD: u16 = 2;
const FIELD_CATEGORY: u16 = 3;
const FIELD_TYPE: u16 = 4;
/// Type marker for trajectory entries.
const TYPE_TRAJECTORY: &str = "trajectory";
/// A single trajectory step returned from retrieval or search.
#[derive(Clone, Debug)]
pub struct TrajectoryStep {
/// Internal vector ID in the RVF store.
pub id: u64,
/// The step identifier within the trajectory.
pub step_id: u64,
/// The action taken at this step.
pub action: String,
/// The reward received at this step.
pub reward: f64,
/// Distance from query (only meaningful for search results).
pub distance: f32,
}
/// Stores learning trajectories as sequences of state embeddings.
pub struct TrajectoryStore {
store: RvfStore,
config: SonaConfig,
/// In-memory ordered record of trajectory step vector IDs, newest last.
step_ids: VecDeque<u64>,
/// Parallel deque of step metadata for fast retrieval.
step_meta: VecDeque<(u64, String, f64)>, // (step_id, action, reward)
/// Next vector ID to assign.
next_id: u64,
}
impl TrajectoryStore {
/// Create a new trajectory store, initializing the data directory and RVF file.
pub fn create(config: SonaConfig) -> Result<Self, SonaStoreError> {
config.validate().map_err(SonaStoreError::Config)?;
config.ensure_dirs().map_err(|e| SonaStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(SonaStoreError::Rvf)?;
Ok(Self {
store,
config,
step_ids: VecDeque::new(),
step_meta: VecDeque::new(),
next_id: 1,
})
}
/// Record a single trajectory step.
///
/// Returns the internal vector ID assigned to this step.
pub fn record_step(
&mut self,
step_id: u64,
state_embedding: &[f32],
action: &str,
reward: f64,
) -> Result<u64, SonaStoreError> {
if state_embedding.len() != self.config.dimension as usize {
return Err(SonaStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: state_embedding.len(),
});
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(step_id) },
MetadataEntry { field_id: FIELD_ACTION, value: MetadataValue::String(action.to_string()) },
MetadataEntry { field_id: FIELD_REWARD, value: MetadataValue::F64(reward) },
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(String::new()) },
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_TRAJECTORY.to_string()) },
];
self.store
.ingest_batch(&[state_embedding], &[vector_id], Some(&metadata))
.map_err(SonaStoreError::Rvf)?;
self.step_ids.push_back(vector_id);
self.step_meta.push_back((step_id, action.to_string(), reward));
// Trim to trajectory window size.
while self.step_ids.len() > self.config.trajectory_window {
self.step_ids.pop_front();
self.step_meta.pop_front();
}
Ok(vector_id)
}
/// Get the `n` most recent trajectory steps.
///
/// Returns fewer than `n` if fewer steps are available.
pub fn get_recent(&self, n: usize) -> Vec<TrajectoryStep> {
let len = self.step_ids.len();
let start = len.saturating_sub(n);
self.step_ids
.iter()
.zip(self.step_meta.iter())
.skip(start)
.map(|(&vid, (step_id, action, reward))| TrajectoryStep {
id: vid,
step_id: *step_id,
action: action.clone(),
reward: *reward,
distance: 0.0,
})
.collect()
}
/// Search for trajectory steps whose state embeddings are most
/// similar to the given embedding.
pub fn search_similar_states(
&mut self,
embedding: &[f32],
k: usize,
) -> Result<Vec<TrajectoryStep>, SonaStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(SonaStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let results = self.store
.query(embedding, k, &QueryOptions::default())
.map_err(SonaStoreError::Rvf)?;
Ok(self.enrich_results(&results))
}
/// Get all steps in the current trajectory window.
pub fn get_trajectory_window(&self) -> Vec<TrajectoryStep> {
self.get_recent(self.config.trajectory_window)
}
/// Prune old trajectory data, keeping only the most recent `keep_last_n` steps.
///
/// Returns the number of steps deleted.
pub fn clear_old(&mut self, keep_last_n: usize) -> Result<usize, SonaStoreError> {
let len = self.step_ids.len();
if len <= keep_last_n {
return Ok(0);
}
let to_remove = len - keep_last_n;
let mut ids_to_delete = Vec::with_capacity(to_remove);
for _ in 0..to_remove {
if let Some(vid) = self.step_ids.pop_front() {
ids_to_delete.push(vid);
self.step_meta.pop_front();
}
}
if !ids_to_delete.is_empty() {
self.store.delete(&ids_to_delete).map_err(SonaStoreError::Rvf)?;
}
Ok(ids_to_delete.len())
}
/// Return the number of steps in the current in-memory window.
pub fn len(&self) -> usize {
self.step_ids.len()
}
/// Return whether the store has no steps in the window.
pub fn is_empty(&self) -> bool {
self.step_ids.is_empty()
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), SonaStoreError> {
self.store.close().map_err(SonaStoreError::Rvf)
}
// ── Internal ──────────────────────────────────────────────────────
/// Enrich raw search results with step metadata from the in-memory index.
fn enrich_results(&self, results: &[SearchResult]) -> Vec<TrajectoryStep> {
results
.iter()
.map(|r| {
let meta = self.step_ids.iter()
.zip(self.step_meta.iter())
.find(|(&vid, _)| vid == r.id)
.map(|(_, m)| m);
match meta {
Some((step_id, action, reward)) => TrajectoryStep {
id: r.id,
step_id: *step_id,
action: action.clone(),
reward: *reward,
distance: r.distance,
},
None => TrajectoryStep {
id: r.id,
step_id: 0,
action: String::new(),
reward: 0.0,
distance: r.distance,
},
}
})
.collect()
}
}
/// Errors from SONA store operations.
#[derive(Debug)]
pub enum SonaStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
}
impl std::fmt::Display for SonaStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for SonaStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> SonaConfig {
SonaConfig::new(dir, 4).with_trajectory_window(5)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn record_and_get_recent() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &make_embedding(1.0), "explore", 0.5).unwrap();
store.record_step(2, &make_embedding(2.0), "exploit", 0.8).unwrap();
store.record_step(3, &make_embedding(3.0), "explore", 0.3).unwrap();
let recent = store.get_recent(2);
assert_eq!(recent.len(), 2);
assert_eq!(recent[0].step_id, 2);
assert_eq!(recent[1].step_id, 3);
assert_eq!(recent[1].action, "explore");
store.close().unwrap();
}
#[test]
fn get_recent_more_than_available() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &make_embedding(1.0), "a", 0.1).unwrap();
let recent = store.get_recent(10);
assert_eq!(recent.len(), 1);
assert_eq!(recent[0].step_id, 1);
store.close().unwrap();
}
#[test]
fn trajectory_window_trimming() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path()); // window = 5
let mut store = TrajectoryStore::create(config).unwrap();
for i in 0..8 {
store.record_step(i, &make_embedding(i as f32 + 0.1), "act", 0.1).unwrap();
}
assert_eq!(store.len(), 5);
let window = store.get_trajectory_window();
assert_eq!(window.len(), 5);
// Should have steps 3..7
assert_eq!(window[0].step_id, 3);
assert_eq!(window[4].step_id, 7);
store.close().unwrap();
}
#[test]
fn search_similar_states() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &[1.0, 0.0, 0.0, 0.0], "a", 0.1).unwrap();
store.record_step(2, &[0.0, 1.0, 0.0, 0.0], "b", 0.2).unwrap();
store.record_step(3, &[0.9, 0.1, 0.0, 0.0], "c", 0.3).unwrap();
let results = store.search_similar_states(&[1.0, 0.0, 0.0, 0.0], 2).unwrap();
assert_eq!(results.len(), 2);
// Closest to [1,0,0,0] should be step 1 or step 3
assert!(results[0].distance <= results[1].distance);
store.close().unwrap();
}
#[test]
fn clear_old_steps() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
for i in 0..5 {
store.record_step(i, &make_embedding(i as f32 + 0.1), "act", 0.1).unwrap();
}
let removed = store.clear_old(2).unwrap();
assert_eq!(removed, 3);
assert_eq!(store.len(), 2);
let remaining = store.get_recent(10);
assert_eq!(remaining.len(), 2);
assert_eq!(remaining[0].step_id, 3);
assert_eq!(remaining[1].step_id, 4);
store.close().unwrap();
}
#[test]
fn clear_old_no_op_when_within_limit() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &make_embedding(1.0), "a", 0.1).unwrap();
let removed = store.clear_old(10).unwrap();
assert_eq!(removed, 0);
assert_eq!(store.len(), 1);
store.close().unwrap();
}
#[test]
fn empty_store_operations() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
assert!(store.get_recent(5).is_empty());
assert!(store.get_trajectory_window().is_empty());
let results = store.search_similar_states(&make_embedding(1.0), 5).unwrap();
assert!(results.is_empty());
store.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
let result = store.record_step(1, &[1.0, 2.0], "a", 0.1);
assert!(result.is_err());
let result = store.search_similar_states(&[1.0, 2.0], 5);
assert!(result.is_err());
store.close().unwrap();
}
}