Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
[package]
name = "rvf-adapter-sona"
version = "0.1.0"
edition = "2021"
description = "SONA adapter for RuVector Format -- stores learning trajectories, neural patterns, and experience replay buffers as RVF segments"
license = "MIT OR Apache-2.0"
repository = "https://github.com/ruvnet/ruvector"
rust-version = "1.87"
[features]
default = ["std"]
std = []
[dependencies]
rvf-runtime = { path = "../../rvf-runtime", features = ["std"] }
rvf-types = { path = "../../rvf-types", features = ["std"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,142 @@
//! Configuration for the SONA adapter.
use std::path::PathBuf;
/// Configuration for the RVF-backed SONA stores.
#[derive(Clone, Debug)]
pub struct SonaConfig {
/// Directory where RVF data files are stored.
pub data_dir: PathBuf,
/// Vector embedding dimension (must match SONA's embedding size).
pub dimension: u16,
/// Maximum number of experiences in the replay buffer.
pub replay_capacity: usize,
/// Number of recent trajectory steps to retain in the window.
pub trajectory_window: usize,
}
impl SonaConfig {
/// Create a new configuration with required parameters and sensible defaults.
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
Self {
data_dir: data_dir.into(),
dimension,
replay_capacity: 10_000,
trajectory_window: 100,
}
}
/// Set the replay buffer capacity.
pub fn with_replay_capacity(mut self, capacity: usize) -> Self {
self.replay_capacity = capacity;
self
}
/// Set the trajectory window size.
pub fn with_trajectory_window(mut self, window: usize) -> Self {
self.trajectory_window = window;
self
}
/// Return the path to the shared RVF store file.
pub fn store_path(&self) -> PathBuf {
self.data_dir.join("sona.rvf")
}
/// Ensure the data directory exists.
pub fn ensure_dirs(&self) -> std::io::Result<()> {
std::fs::create_dir_all(&self.data_dir)
}
/// Validate the configuration.
pub fn validate(&self) -> Result<(), ConfigError> {
if self.dimension == 0 {
return Err(ConfigError::InvalidDimension);
}
if self.replay_capacity == 0 {
return Err(ConfigError::InvalidReplayCapacity);
}
if self.trajectory_window == 0 {
return Err(ConfigError::InvalidTrajectoryWindow);
}
Ok(())
}
}
/// Errors specific to adapter configuration.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ConfigError {
/// Dimension must be > 0.
InvalidDimension,
/// Replay capacity must be > 0.
InvalidReplayCapacity,
/// Trajectory window must be > 0.
InvalidTrajectoryWindow,
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
Self::InvalidReplayCapacity => write!(f, "replay capacity must be > 0"),
Self::InvalidTrajectoryWindow => write!(f, "trajectory window must be > 0"),
}
}
}
impl std::error::Error for ConfigError {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn config_defaults() {
let cfg = SonaConfig::new("/tmp/test", 256);
assert_eq!(cfg.dimension, 256);
assert_eq!(cfg.replay_capacity, 10_000);
assert_eq!(cfg.trajectory_window, 100);
}
#[test]
fn config_store_path() {
let cfg = SonaConfig::new("/data/sona", 128);
assert_eq!(cfg.store_path(), Path::new("/data/sona/sona.rvf"));
}
#[test]
fn validate_zero_dimension() {
let cfg = SonaConfig::new("/tmp", 0);
assert_eq!(cfg.validate(), Err(ConfigError::InvalidDimension));
}
#[test]
fn validate_zero_replay_capacity() {
let mut cfg = SonaConfig::new("/tmp", 64);
cfg.replay_capacity = 0;
assert_eq!(cfg.validate(), Err(ConfigError::InvalidReplayCapacity));
}
#[test]
fn validate_zero_trajectory_window() {
let mut cfg = SonaConfig::new("/tmp", 64);
cfg.trajectory_window = 0;
assert_eq!(cfg.validate(), Err(ConfigError::InvalidTrajectoryWindow));
}
#[test]
fn validate_ok() {
let cfg = SonaConfig::new("/tmp", 64);
assert!(cfg.validate().is_ok());
}
#[test]
fn builder_methods() {
let cfg = SonaConfig::new("/tmp", 256)
.with_replay_capacity(5000)
.with_trajectory_window(50);
assert_eq!(cfg.replay_capacity, 5000);
assert_eq!(cfg.trajectory_window, 50);
}
}

View File

@@ -0,0 +1,397 @@
//! `ExperienceReplayBuffer` — circular buffer of experiences stored
//! as RVF vectors in the shared SONA store.
//!
//! Each experience captures a (state, action, reward, next_state) tuple.
//! State and next_state embeddings are concatenated into a single vector
//! of double the configured dimension. The action and reward are stored
//! as metadata. A type marker of "experience" distinguishes these
//! entries from trajectory and pattern data.
use std::collections::VecDeque;
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::RvfStore;
use rvf_types::RvfError;
use crate::config::SonaConfig;
/// Metadata field IDs (shared across all SONA stores).
const FIELD_STEP_ID: u16 = 0;
const FIELD_ACTION: u16 = 1;
const FIELD_REWARD: u16 = 2;
const FIELD_CATEGORY: u16 = 3;
const FIELD_TYPE: u16 = 4;
/// Type marker for experience entries.
const TYPE_EXPERIENCE: &str = "experience";
/// A single experience returned from retrieval or sampling.
#[derive(Clone, Debug)]
pub struct Experience {
/// Internal vector ID in the RVF store.
pub id: u64,
/// The action taken.
pub action: String,
/// The reward received.
pub reward: f64,
/// Distance from query (only meaningful for prioritized sampling).
pub distance: f32,
}
/// Circular buffer of experiences stored as RVF vectors.
pub struct ExperienceReplayBuffer {
store: RvfStore,
config: SonaConfig,
/// Ordered record of experience vector IDs (oldest first).
experience_ids: VecDeque<u64>,
/// Parallel metadata: (action, reward).
experience_meta: VecDeque<(String, f64)>,
/// Next vector ID to assign.
next_id: u64,
}
impl ExperienceReplayBuffer {
/// Create a new experience replay buffer.
pub fn create(config: SonaConfig) -> Result<Self, ExperienceStoreError> {
config.validate().map_err(ExperienceStoreError::Config)?;
config.ensure_dirs().map_err(|e| ExperienceStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(ExperienceStoreError::Rvf)?;
Ok(Self {
store,
config,
experience_ids: VecDeque::new(),
experience_meta: VecDeque::new(),
next_id: 1,
})
}
/// Add an experience to the buffer.
///
/// If the buffer is at capacity, the oldest experience is evicted.
/// The `state_embedding` is used as the stored vector (for similarity
/// search); `next_state_embedding` is currently not stored as a
/// separate vector but could be added via metadata extension.
///
/// Returns the internal vector ID.
pub fn push(
&mut self,
state_embedding: &[f32],
action: &str,
reward: f64,
_next_state_embedding: &[f32],
) -> Result<u64, ExperienceStoreError> {
if state_embedding.len() != self.config.dimension as usize {
return Err(ExperienceStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: state_embedding.len(),
});
}
// Evict oldest if at capacity.
if self.experience_ids.len() >= self.config.replay_capacity {
if let Some(old_id) = self.experience_ids.pop_front() {
self.experience_meta.pop_front();
self.store.delete(&[old_id]).map_err(ExperienceStoreError::Rvf)?;
}
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(vector_id) },
MetadataEntry { field_id: FIELD_ACTION, value: MetadataValue::String(action.to_string()) },
MetadataEntry { field_id: FIELD_REWARD, value: MetadataValue::F64(reward) },
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(String::new()) },
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_EXPERIENCE.to_string()) },
];
self.store
.ingest_batch(&[state_embedding], &[vector_id], Some(&metadata))
.map_err(ExperienceStoreError::Rvf)?;
self.experience_ids.push_back(vector_id);
self.experience_meta.push_back((action.to_string(), reward));
Ok(vector_id)
}
/// Sample `n` experiences uniformly from the buffer.
///
/// Uses a deterministic stride-based selection: picks experiences
/// evenly spaced across the buffer. Returns fewer than `n` if the
/// buffer contains fewer experiences.
pub fn sample(&self, n: usize) -> Vec<Experience> {
let len = self.experience_ids.len();
if len == 0 || n == 0 {
return Vec::new();
}
let count = n.min(len);
let step = if count >= len { 1 } else { len / count };
let mut results = Vec::with_capacity(count);
let mut idx = 0;
while results.len() < count && idx < len {
let vid = self.experience_ids[idx];
let (action, reward) = &self.experience_meta[idx];
results.push(Experience {
id: vid,
action: action.clone(),
reward: *reward,
distance: 0.0,
});
idx += step;
}
// If stride skipped some, fill from the end.
if results.len() < count {
let mut back_idx = len - 1;
while results.len() < count {
let vid = self.experience_ids[back_idx];
if !results.iter().any(|e| e.id == vid) {
let (action, reward) = &self.experience_meta[back_idx];
results.push(Experience {
id: vid,
action: action.clone(),
reward: *reward,
distance: 0.0,
});
}
if back_idx == 0 {
break;
}
back_idx -= 1;
}
}
results
}
/// Sample `n` experiences prioritized by similarity to the given embedding.
///
/// Finds the `n` nearest-neighbor experiences by vector distance.
pub fn sample_prioritized(
&mut self,
n: usize,
embedding: &[f32],
) -> Result<Vec<Experience>, ExperienceStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(ExperienceStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let results = self.store
.query(embedding, n, &QueryOptions::default())
.map_err(ExperienceStoreError::Rvf)?;
Ok(self.enrich_results(&results))
}
/// Return the number of experiences in the buffer.
pub fn len(&self) -> usize {
self.experience_ids.len()
}
/// Return whether the buffer is empty.
pub fn is_empty(&self) -> bool {
self.experience_ids.is_empty()
}
/// Return whether the buffer has reached its capacity.
pub fn is_full(&self) -> bool {
self.experience_ids.len() >= self.config.replay_capacity
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), ExperienceStoreError> {
self.store.close().map_err(ExperienceStoreError::Rvf)
}
// ── Internal ──────────────────────────────────────────────────────
fn enrich_results(&self, results: &[rvf_runtime::SearchResult]) -> Vec<Experience> {
results
.iter()
.map(|r| {
let meta = self.experience_ids.iter()
.zip(self.experience_meta.iter())
.find(|(&vid, _)| vid == r.id)
.map(|(_, m)| m);
match meta {
Some((action, reward)) => Experience {
id: r.id,
action: action.clone(),
reward: *reward,
distance: r.distance,
},
None => Experience {
id: r.id,
action: String::new(),
reward: 0.0,
distance: r.distance,
},
}
})
.collect()
}
}
/// Errors from experience replay buffer operations.
#[derive(Debug)]
pub enum ExperienceStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
}
impl std::fmt::Display for ExperienceStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for ExperienceStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> SonaConfig {
SonaConfig::new(dir, 4).with_replay_capacity(5)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn push_and_sample() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
buf.push(&make_embedding(1.0), "explore", 0.5, &make_embedding(1.1)).unwrap();
buf.push(&make_embedding(2.0), "exploit", 0.8, &make_embedding(2.1)).unwrap();
buf.push(&make_embedding(3.0), "explore", 0.3, &make_embedding(3.1)).unwrap();
assert_eq!(buf.len(), 3);
assert!(!buf.is_full());
let samples = buf.sample(2);
assert_eq!(samples.len(), 2);
buf.close().unwrap();
}
#[test]
fn buffer_capacity_eviction() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path()); // capacity = 5
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
for i in 0..7 {
buf.push(&make_embedding(i as f32 + 0.1), &format!("act{i}"), i as f64 * 0.1, &make_embedding(0.0)).unwrap();
}
assert_eq!(buf.len(), 5);
assert!(buf.is_full());
// The oldest two (act0, act1) should have been evicted.
let all = buf.sample(5);
assert_eq!(all.len(), 5);
assert!(all.iter().all(|e| e.action != "act0" && e.action != "act1"));
buf.close().unwrap();
}
#[test]
fn sample_prioritized() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
buf.push(&[1.0, 0.0, 0.0, 0.0], "a", 0.1, &[0.0; 4]).unwrap();
buf.push(&[0.0, 1.0, 0.0, 0.0], "b", 0.2, &[0.0; 4]).unwrap();
buf.push(&[0.9, 0.1, 0.0, 0.0], "c", 0.3, &[0.0; 4]).unwrap();
let results = buf.sample_prioritized(2, &[1.0, 0.0, 0.0, 0.0]).unwrap();
assert_eq!(results.len(), 2);
assert!(results[0].distance <= results[1].distance);
buf.close().unwrap();
}
#[test]
fn empty_buffer_operations() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
assert!(buf.is_empty());
assert!(!buf.is_full());
assert_eq!(buf.len(), 0);
let samples = buf.sample(5);
assert!(samples.is_empty());
let results = buf.sample_prioritized(5, &make_embedding(1.0)).unwrap();
assert!(results.is_empty());
buf.close().unwrap();
}
#[test]
fn sample_more_than_available() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
buf.push(&make_embedding(1.0), "a", 0.1, &make_embedding(0.0)).unwrap();
buf.push(&make_embedding(2.0), "b", 0.2, &make_embedding(0.0)).unwrap();
let samples = buf.sample(10);
assert_eq!(samples.len(), 2);
buf.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut buf = ExperienceReplayBuffer::create(config).unwrap();
let result = buf.push(&[1.0, 2.0], "a", 0.1, &[1.0, 2.0]);
assert!(result.is_err());
let result = buf.sample_prioritized(5, &[1.0, 2.0]);
assert!(result.is_err());
buf.close().unwrap();
}
}

View File

@@ -0,0 +1,44 @@
//! RVF adapter for SONA (Self-Optimizing Neural Architecture).
//!
//! This crate bridges SONA's learning trajectory tracking, pattern
//! recognition, and experience replay with the RuVector Format (RVF)
//! segment store per ADR-029. All three data types share a single
//! underlying RVF file, distinguished by a type marker in metadata
//! field 4.
//!
//! # Architecture
//!
//! - **`TrajectoryStore`**: Records and queries sequences of state
//! embeddings that form a learning trajectory.
//! - **`ExperienceReplayBuffer`**: Circular buffer of (state, action,
//! reward, next_state) tuples for off-policy training.
//! - **`NeuralPatternStore`**: Stores recognized neural patterns with
//! confidence scores, searchable by category or embedding similarity.
//! - **`SonaConfig`**: Configuration for data directory, dimension,
//! replay capacity, and trajectory window size.
//!
//! # Usage
//!
//! ```rust,no_run
//! use rvf_adapter_sona::{SonaConfig, TrajectoryStore, ExperienceReplayBuffer, NeuralPatternStore};
//!
//! let config = SonaConfig::new("/tmp/sona-data", 256);
//! let mut trajectory = TrajectoryStore::create(config.clone()).unwrap();
//!
//! let embedding = vec![0.1f32; 256];
//! trajectory.record_step(1, &embedding, "explore", 0.5).unwrap();
//!
//! let recent = trajectory.get_recent(10);
//! let similar = trajectory.search_similar_states(&embedding, 5).unwrap();
//! trajectory.close().unwrap();
//! ```
pub mod config;
pub mod experience;
pub mod pattern;
pub mod trajectory;
pub use config::{ConfigError, SonaConfig};
pub use experience::{Experience, ExperienceReplayBuffer};
pub use pattern::{NeuralPattern, NeuralPatternStore};
pub use trajectory::{TrajectoryStep, TrajectoryStore};

View File

@@ -0,0 +1,423 @@
//! `NeuralPatternStore` — stores recognized neural patterns as RVF
//! vectors with confidence scores and categories.
//!
//! Patterns can be searched by embedding similarity, filtered by
//! category, or ranked by confidence. A type marker of "pattern"
//! distinguishes these entries from trajectory and experience data.
use std::collections::HashMap;
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::RvfStore;
use rvf_types::RvfError;
use crate::config::SonaConfig;
/// Metadata field IDs (shared across all SONA stores).
const FIELD_STEP_ID: u16 = 0;
const FIELD_NAME: u16 = 1;
const FIELD_CONFIDENCE: u16 = 2;
const FIELD_CATEGORY: u16 = 3;
const FIELD_TYPE: u16 = 4;
/// Type marker for pattern entries.
const TYPE_PATTERN: &str = "pattern";
/// A recognized neural pattern returned from retrieval or search.
#[derive(Clone, Debug)]
pub struct NeuralPattern {
/// Internal vector ID in the RVF store.
pub id: u64,
/// Human-readable pattern name.
pub name: String,
/// Category this pattern belongs to.
pub category: String,
/// Confidence score (0.0 to 1.0).
pub confidence: f64,
/// Distance from query (only meaningful for search results).
pub distance: f32,
}
/// Stores recognized neural patterns as RVF vectors.
pub struct NeuralPatternStore {
store: RvfStore,
config: SonaConfig,
/// In-memory index of pattern metadata keyed by vector ID.
patterns: HashMap<u64, PatternMeta>,
/// In-memory index of category -> vector IDs.
category_index: HashMap<String, Vec<u64>>,
/// Next vector ID to assign.
next_id: u64,
}
/// In-memory metadata for a pattern.
#[derive(Clone, Debug)]
struct PatternMeta {
name: String,
category: String,
confidence: f64,
}
impl NeuralPatternStore {
/// Create a new neural pattern store.
pub fn create(config: SonaConfig) -> Result<Self, PatternStoreError> {
config.validate().map_err(PatternStoreError::Config)?;
config.ensure_dirs().map_err(|e| PatternStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(PatternStoreError::Rvf)?;
Ok(Self {
store,
config,
patterns: HashMap::new(),
category_index: HashMap::new(),
next_id: 1,
})
}
/// Store a new neural pattern.
///
/// Returns the internal vector ID assigned to this pattern.
pub fn store_pattern(
&mut self,
name: &str,
category: &str,
embedding: &[f32],
confidence: f64,
) -> Result<u64, PatternStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(PatternStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(vector_id) },
MetadataEntry { field_id: FIELD_NAME, value: MetadataValue::String(name.to_string()) },
MetadataEntry { field_id: FIELD_CONFIDENCE, value: MetadataValue::F64(confidence) },
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(category.to_string()) },
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_PATTERN.to_string()) },
];
self.store
.ingest_batch(&[embedding], &[vector_id], Some(&metadata))
.map_err(PatternStoreError::Rvf)?;
let meta = PatternMeta {
name: name.to_string(),
category: category.to_string(),
confidence,
};
self.patterns.insert(vector_id, meta);
self.category_index
.entry(category.to_string())
.or_default()
.push(vector_id);
Ok(vector_id)
}
/// Search for patterns whose embeddings are most similar to the given embedding.
pub fn search_patterns(
&mut self,
embedding: &[f32],
k: usize,
) -> Result<Vec<NeuralPattern>, PatternStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(PatternStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let results = self.store
.query(embedding, k, &QueryOptions::default())
.map_err(PatternStoreError::Rvf)?;
Ok(self.enrich_results(&results))
}
/// Get all patterns in a given category.
pub fn get_by_category(&self, category: &str) -> Vec<NeuralPattern> {
let ids = match self.category_index.get(category) {
Some(ids) => ids,
None => return Vec::new(),
};
ids.iter()
.filter_map(|&vid| {
self.patterns.get(&vid).map(|meta| NeuralPattern {
id: vid,
name: meta.name.clone(),
category: meta.category.clone(),
confidence: meta.confidence,
distance: 0.0,
})
})
.collect()
}
/// Update the confidence score for a pattern by its vector ID.
pub fn update_confidence(&mut self, id: u64, confidence: f64) -> Result<(), PatternStoreError> {
match self.patterns.get_mut(&id) {
Some(meta) => {
meta.confidence = confidence;
Ok(())
}
None => Err(PatternStoreError::PatternNotFound(id)),
}
}
/// Get the top `k` patterns ranked by confidence (highest first).
pub fn get_top_patterns(&self, k: usize) -> Vec<NeuralPattern> {
let mut all: Vec<_> = self.patterns.iter()
.map(|(&vid, meta)| NeuralPattern {
id: vid,
name: meta.name.clone(),
category: meta.category.clone(),
confidence: meta.confidence,
distance: 0.0,
})
.collect();
all.sort_by(|a, b| {
b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)
});
all.truncate(k);
all
}
/// Return the total number of stored patterns.
pub fn len(&self) -> usize {
self.patterns.len()
}
/// Return whether the store has no patterns.
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), PatternStoreError> {
self.store.close().map_err(PatternStoreError::Rvf)
}
// ── Internal ──────────────────────────────────────────────────────
fn enrich_results(&self, results: &[rvf_runtime::SearchResult]) -> Vec<NeuralPattern> {
results
.iter()
.map(|r| {
match self.patterns.get(&r.id) {
Some(meta) => NeuralPattern {
id: r.id,
name: meta.name.clone(),
category: meta.category.clone(),
confidence: meta.confidence,
distance: r.distance,
},
None => NeuralPattern {
id: r.id,
name: String::new(),
category: String::new(),
confidence: 0.0,
distance: r.distance,
},
}
})
.collect()
}
}
/// Errors from neural pattern store operations.
#[derive(Debug)]
pub enum PatternStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
/// Pattern not found for the given ID.
PatternNotFound(u64),
}
impl std::fmt::Display for PatternStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
Self::PatternNotFound(id) => write!(f, "pattern not found: {id}"),
}
}
}
impl std::error::Error for PatternStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> SonaConfig {
SonaConfig::new(dir, 4)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn store_and_search_patterns() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("convergent", "thinking", &[1.0, 0.0, 0.0, 0.0], 0.9).unwrap();
store.store_pattern("divergent", "thinking", &[0.0, 1.0, 0.0, 0.0], 0.7).unwrap();
store.store_pattern("lateral", "creative", &[0.0, 0.0, 1.0, 0.0], 0.8).unwrap();
let results = store.search_patterns(&[1.0, 0.0, 0.0, 0.0], 2).unwrap();
assert_eq!(results.len(), 2);
assert!(results[0].distance <= results[1].distance);
store.close().unwrap();
}
#[test]
fn get_by_category() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("p1", "alpha", &make_embedding(1.0), 0.9).unwrap();
store.store_pattern("p2", "beta", &make_embedding(2.0), 0.7).unwrap();
store.store_pattern("p3", "alpha", &make_embedding(3.0), 0.8).unwrap();
let alpha = store.get_by_category("alpha");
assert_eq!(alpha.len(), 2);
assert!(alpha.iter().all(|p| p.category == "alpha"));
let beta = store.get_by_category("beta");
assert_eq!(beta.len(), 1);
assert_eq!(beta[0].name, "p2");
let empty = store.get_by_category("nonexistent");
assert!(empty.is_empty());
store.close().unwrap();
}
#[test]
fn update_confidence() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
let id = store.store_pattern("p1", "cat", &make_embedding(1.0), 0.5).unwrap();
store.update_confidence(id, 0.95).unwrap();
let top = store.get_top_patterns(1);
assert_eq!(top.len(), 1);
assert!((top[0].confidence - 0.95).abs() < f64::EPSILON);
store.close().unwrap();
}
#[test]
fn update_confidence_not_found() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
let result = store.update_confidence(999, 0.5);
assert!(result.is_err());
store.close().unwrap();
}
#[test]
fn get_top_patterns() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("low", "cat", &make_embedding(1.0), 0.3).unwrap();
store.store_pattern("high", "cat", &make_embedding(2.0), 0.9).unwrap();
store.store_pattern("mid", "cat", &make_embedding(3.0), 0.6).unwrap();
let top = store.get_top_patterns(2);
assert_eq!(top.len(), 2);
assert_eq!(top[0].name, "high");
assert_eq!(top[1].name, "mid");
store.close().unwrap();
}
#[test]
fn get_top_more_than_available() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
store.store_pattern("only", "cat", &make_embedding(1.0), 0.5).unwrap();
let top = store.get_top_patterns(10);
assert_eq!(top.len(), 1);
store.close().unwrap();
}
#[test]
fn empty_store_operations() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
let results = store.search_patterns(&make_embedding(1.0), 5).unwrap();
assert!(results.is_empty());
let by_cat = store.get_by_category("anything");
assert!(by_cat.is_empty());
let top = store.get_top_patterns(5);
assert!(top.is_empty());
store.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = NeuralPatternStore::create(config).unwrap();
let result = store.store_pattern("p", "c", &[1.0, 2.0], 0.5);
assert!(result.is_err());
let result = store.search_patterns(&[1.0, 2.0], 5);
assert!(result.is_err());
store.close().unwrap();
}
}

View File

@@ -0,0 +1,422 @@
//! `TrajectoryStore` — stores learning trajectories as sequences of
//! state embeddings in the shared SONA RVF file.
//!
//! Each trajectory step records a state embedding, the action taken,
//! the reward received, and a monotonically increasing step ID. Steps
//! are stored as RVF vectors with metadata fields encoding the step
//! details and a type marker of "trajectory".
use std::collections::VecDeque;
use rvf_runtime::options::{MetadataEntry, MetadataValue, QueryOptions, RvfOptions};
use rvf_runtime::{RvfStore, SearchResult};
use rvf_types::RvfError;
use crate::config::SonaConfig;
/// Metadata field IDs (shared across all SONA stores).
const FIELD_STEP_ID: u16 = 0;
const FIELD_ACTION: u16 = 1;
const FIELD_REWARD: u16 = 2;
const FIELD_CATEGORY: u16 = 3;
const FIELD_TYPE: u16 = 4;
/// Type marker for trajectory entries.
const TYPE_TRAJECTORY: &str = "trajectory";
/// A single trajectory step returned from retrieval or search.
#[derive(Clone, Debug)]
pub struct TrajectoryStep {
/// Internal vector ID in the RVF store.
pub id: u64,
/// The step identifier within the trajectory.
pub step_id: u64,
/// The action taken at this step.
pub action: String,
/// The reward received at this step.
pub reward: f64,
/// Distance from query (only meaningful for search results).
pub distance: f32,
}
/// Stores learning trajectories as sequences of state embeddings.
pub struct TrajectoryStore {
store: RvfStore,
config: SonaConfig,
/// In-memory ordered record of trajectory step vector IDs, newest last.
step_ids: VecDeque<u64>,
/// Parallel deque of step metadata for fast retrieval.
step_meta: VecDeque<(u64, String, f64)>, // (step_id, action, reward)
/// Next vector ID to assign.
next_id: u64,
}
impl TrajectoryStore {
/// Create a new trajectory store, initializing the data directory and RVF file.
pub fn create(config: SonaConfig) -> Result<Self, SonaStoreError> {
config.validate().map_err(SonaStoreError::Config)?;
config.ensure_dirs().map_err(|e| SonaStoreError::Io(e.to_string()))?;
let rvf_options = RvfOptions {
dimension: config.dimension,
..Default::default()
};
let store = RvfStore::create(&config.store_path(), rvf_options)
.map_err(SonaStoreError::Rvf)?;
Ok(Self {
store,
config,
step_ids: VecDeque::new(),
step_meta: VecDeque::new(),
next_id: 1,
})
}
/// Record a single trajectory step.
///
/// Returns the internal vector ID assigned to this step.
pub fn record_step(
&mut self,
step_id: u64,
state_embedding: &[f32],
action: &str,
reward: f64,
) -> Result<u64, SonaStoreError> {
if state_embedding.len() != self.config.dimension as usize {
return Err(SonaStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: state_embedding.len(),
});
}
let vector_id = self.next_id;
self.next_id += 1;
let metadata = vec![
MetadataEntry { field_id: FIELD_STEP_ID, value: MetadataValue::U64(step_id) },
MetadataEntry { field_id: FIELD_ACTION, value: MetadataValue::String(action.to_string()) },
MetadataEntry { field_id: FIELD_REWARD, value: MetadataValue::F64(reward) },
MetadataEntry { field_id: FIELD_CATEGORY, value: MetadataValue::String(String::new()) },
MetadataEntry { field_id: FIELD_TYPE, value: MetadataValue::String(TYPE_TRAJECTORY.to_string()) },
];
self.store
.ingest_batch(&[state_embedding], &[vector_id], Some(&metadata))
.map_err(SonaStoreError::Rvf)?;
self.step_ids.push_back(vector_id);
self.step_meta.push_back((step_id, action.to_string(), reward));
// Trim to trajectory window size.
while self.step_ids.len() > self.config.trajectory_window {
self.step_ids.pop_front();
self.step_meta.pop_front();
}
Ok(vector_id)
}
/// Get the `n` most recent trajectory steps.
///
/// Returns fewer than `n` if fewer steps are available.
pub fn get_recent(&self, n: usize) -> Vec<TrajectoryStep> {
let len = self.step_ids.len();
let start = len.saturating_sub(n);
self.step_ids
.iter()
.zip(self.step_meta.iter())
.skip(start)
.map(|(&vid, (step_id, action, reward))| TrajectoryStep {
id: vid,
step_id: *step_id,
action: action.clone(),
reward: *reward,
distance: 0.0,
})
.collect()
}
/// Search for trajectory steps whose state embeddings are most
/// similar to the given embedding.
pub fn search_similar_states(
&mut self,
embedding: &[f32],
k: usize,
) -> Result<Vec<TrajectoryStep>, SonaStoreError> {
if embedding.len() != self.config.dimension as usize {
return Err(SonaStoreError::DimensionMismatch {
expected: self.config.dimension as usize,
got: embedding.len(),
});
}
let results = self.store
.query(embedding, k, &QueryOptions::default())
.map_err(SonaStoreError::Rvf)?;
Ok(self.enrich_results(&results))
}
/// Get all steps in the current trajectory window.
pub fn get_trajectory_window(&self) -> Vec<TrajectoryStep> {
self.get_recent(self.config.trajectory_window)
}
/// Prune old trajectory data, keeping only the most recent `keep_last_n` steps.
///
/// Returns the number of steps deleted.
pub fn clear_old(&mut self, keep_last_n: usize) -> Result<usize, SonaStoreError> {
let len = self.step_ids.len();
if len <= keep_last_n {
return Ok(0);
}
let to_remove = len - keep_last_n;
let mut ids_to_delete = Vec::with_capacity(to_remove);
for _ in 0..to_remove {
if let Some(vid) = self.step_ids.pop_front() {
ids_to_delete.push(vid);
self.step_meta.pop_front();
}
}
if !ids_to_delete.is_empty() {
self.store.delete(&ids_to_delete).map_err(SonaStoreError::Rvf)?;
}
Ok(ids_to_delete.len())
}
/// Return the number of steps in the current in-memory window.
pub fn len(&self) -> usize {
self.step_ids.len()
}
/// Return whether the store has no steps in the window.
pub fn is_empty(&self) -> bool {
self.step_ids.is_empty()
}
/// Close the store, releasing locks.
pub fn close(self) -> Result<(), SonaStoreError> {
self.store.close().map_err(SonaStoreError::Rvf)
}
// ── Internal ──────────────────────────────────────────────────────
/// Enrich raw search results with step metadata from the in-memory index.
fn enrich_results(&self, results: &[SearchResult]) -> Vec<TrajectoryStep> {
results
.iter()
.map(|r| {
let meta = self.step_ids.iter()
.zip(self.step_meta.iter())
.find(|(&vid, _)| vid == r.id)
.map(|(_, m)| m);
match meta {
Some((step_id, action, reward)) => TrajectoryStep {
id: r.id,
step_id: *step_id,
action: action.clone(),
reward: *reward,
distance: r.distance,
},
None => TrajectoryStep {
id: r.id,
step_id: 0,
action: String::new(),
reward: 0.0,
distance: r.distance,
},
}
})
.collect()
}
}
/// Errors from SONA store operations.
#[derive(Debug)]
pub enum SonaStoreError {
/// Underlying RVF store error.
Rvf(RvfError),
/// Configuration error.
Config(crate::config::ConfigError),
/// I/O error.
Io(String),
/// Embedding dimension mismatch.
DimensionMismatch { expected: usize, got: usize },
}
impl std::fmt::Display for SonaStoreError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Rvf(e) => write!(f, "RVF store error: {e}"),
Self::Config(e) => write!(f, "config error: {e}"),
Self::Io(msg) => write!(f, "I/O error: {msg}"),
Self::DimensionMismatch { expected, got } => {
write!(f, "dimension mismatch: expected {expected}, got {got}")
}
}
}
}
impl std::error::Error for SonaStoreError {}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn test_config(dir: &std::path::Path) -> SonaConfig {
SonaConfig::new(dir, 4).with_trajectory_window(5)
}
fn make_embedding(seed: f32) -> Vec<f32> {
vec![seed, seed * 0.5, seed * 0.25, seed * 0.125]
}
#[test]
fn record_and_get_recent() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &make_embedding(1.0), "explore", 0.5).unwrap();
store.record_step(2, &make_embedding(2.0), "exploit", 0.8).unwrap();
store.record_step(3, &make_embedding(3.0), "explore", 0.3).unwrap();
let recent = store.get_recent(2);
assert_eq!(recent.len(), 2);
assert_eq!(recent[0].step_id, 2);
assert_eq!(recent[1].step_id, 3);
assert_eq!(recent[1].action, "explore");
store.close().unwrap();
}
#[test]
fn get_recent_more_than_available() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &make_embedding(1.0), "a", 0.1).unwrap();
let recent = store.get_recent(10);
assert_eq!(recent.len(), 1);
assert_eq!(recent[0].step_id, 1);
store.close().unwrap();
}
#[test]
fn trajectory_window_trimming() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path()); // window = 5
let mut store = TrajectoryStore::create(config).unwrap();
for i in 0..8 {
store.record_step(i, &make_embedding(i as f32 + 0.1), "act", 0.1).unwrap();
}
assert_eq!(store.len(), 5);
let window = store.get_trajectory_window();
assert_eq!(window.len(), 5);
// Should have steps 3..7
assert_eq!(window[0].step_id, 3);
assert_eq!(window[4].step_id, 7);
store.close().unwrap();
}
#[test]
fn search_similar_states() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &[1.0, 0.0, 0.0, 0.0], "a", 0.1).unwrap();
store.record_step(2, &[0.0, 1.0, 0.0, 0.0], "b", 0.2).unwrap();
store.record_step(3, &[0.9, 0.1, 0.0, 0.0], "c", 0.3).unwrap();
let results = store.search_similar_states(&[1.0, 0.0, 0.0, 0.0], 2).unwrap();
assert_eq!(results.len(), 2);
// Closest to [1,0,0,0] should be step 1 or step 3
assert!(results[0].distance <= results[1].distance);
store.close().unwrap();
}
#[test]
fn clear_old_steps() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
for i in 0..5 {
store.record_step(i, &make_embedding(i as f32 + 0.1), "act", 0.1).unwrap();
}
let removed = store.clear_old(2).unwrap();
assert_eq!(removed, 3);
assert_eq!(store.len(), 2);
let remaining = store.get_recent(10);
assert_eq!(remaining.len(), 2);
assert_eq!(remaining[0].step_id, 3);
assert_eq!(remaining[1].step_id, 4);
store.close().unwrap();
}
#[test]
fn clear_old_no_op_when_within_limit() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
store.record_step(1, &make_embedding(1.0), "a", 0.1).unwrap();
let removed = store.clear_old(10).unwrap();
assert_eq!(removed, 0);
assert_eq!(store.len(), 1);
store.close().unwrap();
}
#[test]
fn empty_store_operations() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
assert!(store.get_recent(5).is_empty());
assert!(store.get_trajectory_window().is_empty());
let results = store.search_similar_states(&make_embedding(1.0), 5).unwrap();
assert!(results.is_empty());
store.close().unwrap();
}
#[test]
fn dimension_mismatch() {
let dir = TempDir::new().unwrap();
let config = test_config(dir.path());
let mut store = TrajectoryStore::create(config).unwrap();
let result = store.record_step(1, &[1.0, 2.0], "a", 0.1);
assert!(result.is_err());
let result = store.search_similar_states(&[1.0, 2.0], 5);
assert!(result.is_err());
store.close().unwrap();
}
}