Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,323 @@
//! Maps agentdb HNSW operations to RVF INDEX_SEG layers.
//!
//! Bridges agentdb's HNSW index lifecycle to the three-layer progressive
//! indexing model (Layer A / B / C) defined in `rvf-index`.
use std::collections::BTreeSet;
use rvf_index::builder::{build_full_index, build_layer_a, build_layer_b, build_layer_c};
use rvf_index::distance::{cosine_distance, l2_distance};
use rvf_index::hnsw::{HnswConfig, HnswGraph};
type DistanceFn = Box<dyn Fn(&[f32], &[f32]) -> f32>;
use rvf_index::layers::{IndexLayer, LayerA, LayerB, LayerC};
use rvf_index::progressive::ProgressiveIndex;
use rvf_index::traits::InMemoryVectorStore;
/// Configuration for the RVF index adapter.
#[derive(Clone, Debug)]
pub struct IndexAdapterConfig {
/// HNSW M parameter.
pub m: usize,
/// HNSW M0 (layer-0 neighbors).
pub m0: usize,
/// ef_construction beam width.
pub ef_construction: usize,
/// ef_search beam width for queries.
pub ef_search: usize,
/// Use cosine distance (default true for agentdb text embeddings).
pub use_cosine: bool,
/// Hot node fraction for Layer B (0.0 - 1.0).
pub hot_fraction: f32,
}
impl Default for IndexAdapterConfig {
fn default() -> Self {
Self {
m: 16,
m0: 32,
ef_construction: 200,
ef_search: 100,
use_cosine: true,
hot_fraction: 0.2,
}
}
}
/// Adapter that maps agentdb HNSW operations to RVF INDEX_SEG layers.
///
/// Manages the full HNSW graph and can extract progressive layers (A/B/C)
/// for serialization into INDEX_SEG segments.
pub struct RvfIndexAdapter {
config: IndexAdapterConfig,
graph: Option<HnswGraph>,
vectors: Vec<Vec<f32>>,
id_map: Vec<u64>,
progressive: ProgressiveIndex,
loaded_layers: Vec<IndexLayer>,
}
impl RvfIndexAdapter {
/// Create a new index adapter with the given configuration.
pub fn new(config: IndexAdapterConfig) -> Self {
Self {
config,
graph: None,
vectors: Vec::new(),
id_map: Vec::new(),
progressive: ProgressiveIndex::new(),
loaded_layers: Vec::new(),
}
}
/// Build the full HNSW index from a set of vectors and IDs.
///
/// This replaces any existing index.
pub fn build(&mut self, vectors: Vec<Vec<f32>>, ids: Vec<u64>) {
let n = vectors.len();
if n == 0 {
return;
}
let hnsw_config = HnswConfig {
m: self.config.m,
m0: self.config.m0,
ef_construction: self.config.ef_construction,
};
let store = InMemoryVectorStore::new(vectors.clone());
let distance_fn = self.distance_fn();
// Generate deterministic pseudo-random values for level selection.
let rng_values: Vec<f64> = (0..n)
.map(|i| {
let seed = (i as u64)
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let val = (seed >> 33) as f64 / (1u64 << 31) as f64;
val.clamp(0.001, 0.999)
})
.collect();
let graph = build_full_index(&store, n, &hnsw_config, &rng_values, &distance_fn);
self.vectors = vectors;
self.id_map = ids;
self.graph = Some(graph);
}
/// Extract Layer A (entry points + coarse routing) from the current graph.
pub fn extract_layer_a(&self) -> Option<LayerA> {
let graph = self.graph.as_ref()?;
let n = self.vectors.len();
// Simple centroid computation: split vectors into 2 partitions.
let mid = n / 2;
let dim = self.vectors.first().map_or(0, |v| v.len());
let centroid_0 = compute_centroid(&self.vectors[..mid], dim);
let centroid_1 = if mid < n {
compute_centroid(&self.vectors[mid..], dim)
} else {
centroid_0.clone()
};
let centroids = vec![centroid_0, centroid_1];
let assignments: Vec<u32> = (0..n).map(|i| if i < mid { 0 } else { 1 }).collect();
Some(build_layer_a(graph, &centroids, &assignments, n as u64))
}
/// Extract Layer B (hot region partial adjacency) from the current graph.
pub fn extract_layer_b(&self) -> Option<LayerB> {
let graph = self.graph.as_ref()?;
let n = self.vectors.len();
let hot_count = ((n as f32) * self.config.hot_fraction).ceil() as usize;
let hot_ids: BTreeSet<u64> = (0..hot_count as u64).collect();
Some(build_layer_b(graph, &hot_ids))
}
/// Extract Layer C (full adjacency) from the current graph.
pub fn extract_layer_c(&self) -> Option<LayerC> {
let graph = self.graph.as_ref()?;
Some(build_layer_c(graph))
}
/// Load progressive layers and configure the progressive index for search.
pub fn load_progressive(&mut self, layers: &[IndexLayer]) {
self.loaded_layers = layers.to_vec();
let mut idx = ProgressiveIndex::new();
for layer in layers {
match layer {
IndexLayer::A => {
idx.layer_a = self.extract_layer_a();
}
IndexLayer::B => {
idx.layer_b = self.extract_layer_b();
}
IndexLayer::C => {
idx.layer_c = self.extract_layer_c();
}
}
}
self.progressive = idx;
}
/// Search using the progressive index with whatever layers are loaded.
pub fn search(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
let store = InMemoryVectorStore::new(self.vectors.clone());
let distance_fn = self.distance_fn();
self.progressive
.search_with_distance(query, k, self.config.ef_search, &store, &distance_fn)
}
/// Search using the full HNSW graph directly (bypasses progressive layers).
pub fn search_full(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
let graph = match self.graph.as_ref() {
Some(g) => g,
None => return Vec::new(),
};
let store = InMemoryVectorStore::new(self.vectors.clone());
let distance_fn = self.distance_fn();
graph.search(query, k, self.config.ef_search, &store, &distance_fn)
}
/// Get the node count in the HNSW graph.
pub fn node_count(&self) -> usize {
self.graph.as_ref().map_or(0, |g| g.node_count())
}
/// Get the currently loaded layers.
pub fn loaded_layers(&self) -> &[IndexLayer] {
&self.loaded_layers
}
fn distance_fn(&self) -> DistanceFn {
if self.config.use_cosine {
Box::new(cosine_distance)
} else {
Box::new(l2_distance)
}
}
}
/// Compute the centroid of a set of vectors.
fn compute_centroid(vectors: &[Vec<f32>], dim: usize) -> Vec<f32> {
if vectors.is_empty() || dim == 0 {
return vec![0.0; dim];
}
let n = vectors.len() as f32;
let mut centroid = vec![0.0f32; dim];
for v in vectors {
for (i, &val) in v.iter().enumerate().take(dim) {
centroid[i] += val;
}
}
for c in &mut centroid {
*c /= n;
}
centroid
}
#[cfg(test)]
mod tests {
use super::*;
fn make_vectors(n: usize, dim: usize) -> (Vec<Vec<f32>>, Vec<u64>) {
let vecs: Vec<Vec<f32>> = (0..n)
.map(|i| (0..dim).map(|d| (i * dim + d) as f32).collect())
.collect();
let ids: Vec<u64> = (0..n as u64).collect();
(vecs, ids)
}
#[test]
fn build_and_search_full() {
let (vecs, ids) = make_vectors(100, 8);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs.clone(), ids);
assert_eq!(adapter.node_count(), 100);
let results = adapter.search_full(&vecs[50], 5);
assert!(!results.is_empty());
assert_eq!(results[0].0, 50);
}
#[test]
fn extract_layers() {
let (vecs, ids) = make_vectors(50, 4);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs, ids);
let layer_a = adapter.extract_layer_a();
assert!(layer_a.is_some());
let la = layer_a.unwrap();
assert!(!la.entry_points.is_empty());
assert_eq!(la.centroids.len(), 2);
let layer_b = adapter.extract_layer_b();
assert!(layer_b.is_some());
let layer_c = adapter.extract_layer_c();
assert!(layer_c.is_some());
}
#[test]
fn progressive_search_with_layers() {
let (vecs, ids) = make_vectors(100, 4);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs.clone(), ids);
// Load all three layers.
adapter.load_progressive(&[IndexLayer::A, IndexLayer::B, IndexLayer::C]);
let results = adapter.search(&vecs[25], 5);
assert!(!results.is_empty());
// With full Layer C, we should find the exact match.
assert_eq!(results[0].0, 25);
}
#[test]
fn progressive_layer_a_only() {
let (vecs, ids) = make_vectors(100, 4);
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
use_cosine: false,
..Default::default()
});
adapter.build(vecs.clone(), ids);
adapter.load_progressive(&[IndexLayer::A]);
let results = adapter.search(&vecs[10], 5);
// Layer A alone provides coarse results; we just verify non-empty.
assert!(!results.is_empty());
}
#[test]
fn empty_adapter() {
let adapter = RvfIndexAdapter::new(IndexAdapterConfig::default());
assert_eq!(adapter.node_count(), 0);
let results = adapter.search_full(&[0.0; 4], 5);
assert!(results.is_empty());
}
#[test]
fn compute_centroid_basic() {
let vecs = vec![
vec![1.0, 2.0, 3.0],
vec![3.0, 4.0, 5.0],
];
let centroid = compute_centroid(&vecs, 3);
assert_eq!(centroid, vec![2.0, 3.0, 4.0]);
}
}

View File

@@ -0,0 +1,18 @@
//! AgentDB adapter for the RuVector Format (RVF).
//!
//! Maps agentdb's vector storage, HNSW index, and memory pattern APIs
//! onto the RVF segment model:
//!
//! - **VEC_SEG**: Raw vector data (episodes, state embeddings)
//! - **INDEX_SEG**: HNSW index layers (A/B/C progressive indexing)
//! - **META_SEG**: Memory pattern metadata (rewards, critiques, tags)
//!
//! Uses the RVText domain profile for text/embedding workloads.
pub mod index_adapter;
pub mod pattern_store;
pub mod vector_store;
pub use index_adapter::RvfIndexAdapter;
pub use pattern_store::{MemoryPattern, RvfPatternStore};
pub use vector_store::RvfVectorStore;

View File

@@ -0,0 +1,456 @@
//! Memory pattern storage using RVF META_SEG.
//!
//! Stores agentdb memory patterns (task descriptions, rewards, critiques,
//! success flags) as metadata alongside their state-embedding vectors.
//! Patterns can be searched by similarity and filtered by reward threshold.
use std::collections::HashMap;
use std::path::Path;
use rvf_runtime::options::{MetadataEntry, MetadataValue};
use rvf_types::RvfError;
use crate::vector_store::{AgentDbMetric, RvfVectorStore, VectorStoreConfig};
/// A memory pattern stored in the agentdb reasoning bank.
#[derive(Clone, Debug)]
pub struct MemoryPattern {
/// Unique pattern identifier.
pub id: u64,
/// Task description that produced this pattern.
pub task: String,
/// Reward score (0.0 - 1.0) indicating quality.
pub reward: f32,
/// Whether the pattern was successful.
pub success: bool,
/// Self-critique / notes about the pattern.
pub critique: String,
/// State embedding vector for similarity search.
pub embedding: Vec<f32>,
}
/// Well-known metadata field IDs for pattern attributes.
mod field_ids {
pub const TASK: u16 = 0;
pub const REWARD: u16 = 1;
pub const SUCCESS: u16 = 2;
pub const CRITIQUE: u16 = 3;
}
/// RVF-backed memory pattern store for agentdb.
///
/// Stores patterns as vectors (embeddings) with metadata (task, reward,
/// critique, success flag). Supports similarity search with reward filtering.
pub struct RvfPatternStore {
vector_store: RvfVectorStore,
patterns: HashMap<u64, PatternMetadata>,
next_id: u64,
}
/// In-memory metadata for a pattern (kept alongside the RVF store).
#[derive(Clone, Debug)]
struct PatternMetadata {
task: String,
reward: f32,
success: bool,
critique: String,
}
impl RvfPatternStore {
/// Create a new pattern store at the given path.
pub fn create(path: &Path, dimension: u16) -> Result<Self, RvfError> {
let config = VectorStoreConfig {
dimension,
metric: AgentDbMetric::Cosine,
ef_search: 100,
};
let vector_store = RvfVectorStore::create(path, config)?;
Ok(Self {
vector_store,
patterns: HashMap::new(),
next_id: 1,
})
}
/// Open an existing pattern store.
pub fn open(path: &Path, dimension: u16) -> Result<Self, RvfError> {
let config = VectorStoreConfig {
dimension,
metric: AgentDbMetric::Cosine,
ef_search: 100,
};
let vector_store = RvfVectorStore::open(path, config)?;
Ok(Self {
vector_store,
patterns: HashMap::new(),
next_id: 1,
})
}
/// Store a memory pattern.
///
/// Returns the assigned pattern ID.
pub fn store_pattern(&mut self, pattern: MemoryPattern) -> Result<u64, RvfError> {
let id = if pattern.id > 0 {
pattern.id
} else {
let id = self.next_id;
self.next_id += 1;
id
};
// Ensure next_id stays ahead of manually assigned IDs.
if id >= self.next_id {
self.next_id = id + 1;
}
let metadata = vec![
MetadataEntry {
field_id: field_ids::TASK,
value: MetadataValue::String(pattern.task.clone()),
},
MetadataEntry {
field_id: field_ids::REWARD,
value: MetadataValue::F64(pattern.reward as f64),
},
MetadataEntry {
field_id: field_ids::SUCCESS,
value: MetadataValue::U64(if pattern.success { 1 } else { 0 }),
},
MetadataEntry {
field_id: field_ids::CRITIQUE,
value: MetadataValue::String(pattern.critique.clone()),
},
];
self.vector_store
.add_vectors(&[pattern.embedding.as_slice()], &[id], Some(&metadata))?;
self.patterns.insert(
id,
PatternMetadata {
task: pattern.task,
reward: pattern.reward,
success: pattern.success,
critique: pattern.critique,
},
);
Ok(id)
}
/// Search for patterns similar to the given embedding.
///
/// Returns `(pattern_id, distance)` pairs sorted by distance.
/// Optionally filter by minimum reward score.
pub fn search_patterns(
&self,
query_embedding: &[f32],
k: usize,
min_reward: Option<f32>,
) -> Result<Vec<PatternSearchResult>, RvfError> {
let search_k = if min_reward.is_some() { k * 3 } else { k };
let results = self.vector_store.search(query_embedding, search_k, None)?;
let mut filtered: Vec<PatternSearchResult> = results
.into_iter()
.filter_map(|r| {
let meta = self.patterns.get(&r.id)?;
if let Some(threshold) = min_reward {
if meta.reward < threshold {
return None;
}
}
Some(PatternSearchResult {
id: r.id,
distance: r.distance,
task: meta.task.clone(),
reward: meta.reward,
success: meta.success,
critique: meta.critique.clone(),
})
})
.collect();
filtered.truncate(k);
Ok(filtered)
}
/// Search for patterns that failed (success == false).
pub fn search_failures(
&self,
query_embedding: &[f32],
k: usize,
) -> Result<Vec<PatternSearchResult>, RvfError> {
let results = self.vector_store.search(query_embedding, k * 5, None)?;
let mut filtered: Vec<PatternSearchResult> = results
.into_iter()
.filter_map(|r| {
let meta = self.patterns.get(&r.id)?;
if meta.success {
return None;
}
Some(PatternSearchResult {
id: r.id,
distance: r.distance,
task: meta.task.clone(),
reward: meta.reward,
success: false,
critique: meta.critique.clone(),
})
})
.collect();
filtered.truncate(k);
Ok(filtered)
}
/// Delete a pattern by ID.
pub fn delete_pattern(&mut self, id: u64) -> Result<bool, RvfError> {
let deleted = self.vector_store.delete_vectors(&[id])?;
self.patterns.remove(&id);
Ok(deleted > 0)
}
/// Get pattern metadata by ID.
pub fn get_pattern(&self, id: u64) -> Option<PatternSearchResult> {
let meta = self.patterns.get(&id)?;
Some(PatternSearchResult {
id,
distance: 0.0,
task: meta.task.clone(),
reward: meta.reward,
success: meta.success,
critique: meta.critique.clone(),
})
}
/// Get aggregate statistics about stored patterns.
pub fn stats(&self) -> PatternStoreStats {
let total = self.patterns.len();
let successful = self.patterns.values().filter(|p| p.success).count();
let avg_reward = if total > 0 {
self.patterns.values().map(|p| p.reward as f64).sum::<f64>() / total as f64
} else {
0.0
};
PatternStoreStats {
total_patterns: total,
successful_patterns: successful,
failed_patterns: total - successful,
avg_reward,
vector_count: self.vector_store.len(),
}
}
/// Save the store to disk.
pub fn save(&mut self) -> Result<(), RvfError> {
self.vector_store.save()
}
/// Get the total number of patterns.
pub fn len(&self) -> usize {
self.patterns.len()
}
/// Returns true if no patterns are stored.
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
}
/// A pattern search result with full metadata.
#[derive(Clone, Debug)]
pub struct PatternSearchResult {
pub id: u64,
pub distance: f32,
pub task: String,
pub reward: f32,
pub success: bool,
pub critique: String,
}
/// Aggregate statistics for the pattern store.
#[derive(Clone, Debug)]
pub struct PatternStoreStats {
pub total_patterns: usize,
pub successful_patterns: usize,
pub failed_patterns: usize,
pub avg_reward: f64,
pub vector_count: u64,
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn dummy_embedding(dim: usize, seed: u64) -> Vec<f32> {
let mut v = Vec::with_capacity(dim);
let mut x = seed;
for _ in 0..dim {
x = x.wrapping_mul(6364136223846793005).wrapping_add(1);
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
}
v
}
#[test]
fn store_and_search_patterns() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns.rvf");
let dim = 8;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..10u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: (i as f32) / 10.0,
success: i >= 5,
critique: format!("critique_{}", i),
embedding: dummy_embedding(dim, i),
};
store.store_pattern(pattern).unwrap();
}
assert_eq!(store.len(), 10);
let query = dummy_embedding(dim, 7);
let results = store.search_patterns(&query, 3, None).unwrap();
assert!(!results.is_empty());
assert!(results.len() <= 3);
}
#[test]
fn search_with_min_reward() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_reward.rvf");
let dim = 8;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..10u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: (i as f32) / 10.0,
success: true,
critique: String::new(),
embedding: dummy_embedding(dim, i),
};
store.store_pattern(pattern).unwrap();
}
let query = dummy_embedding(dim, 5);
let results = store.search_patterns(&query, 10, Some(0.5)).unwrap();
assert!(results.iter().all(|r| r.reward >= 0.5));
}
#[test]
fn search_failures() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_fail.rvf");
let dim = 8;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..10u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: 0.5,
success: i % 2 == 0,
critique: String::new(),
embedding: dummy_embedding(dim, i),
};
store.store_pattern(pattern).unwrap();
}
let query = dummy_embedding(dim, 3);
let results = store.search_failures(&query, 5).unwrap();
assert!(results.iter().all(|r| !r.success));
}
#[test]
fn delete_pattern() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_del.rvf");
let dim = 4;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
let pattern = MemoryPattern {
id: 42,
task: "test".into(),
reward: 0.9,
success: true,
critique: "good".into(),
embedding: vec![1.0, 2.0, 3.0, 4.0],
};
store.store_pattern(pattern).unwrap();
assert_eq!(store.len(), 1);
let deleted = store.delete_pattern(42).unwrap();
assert!(deleted);
assert_eq!(store.len(), 0);
assert!(store.get_pattern(42).is_none());
}
#[test]
fn stats() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_stats.rvf");
let dim = 4;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
for i in 0..5u64 {
let pattern = MemoryPattern {
id: 0,
task: format!("task_{}", i),
reward: (i as f32) * 0.2,
success: i >= 3,
critique: String::new(),
embedding: vec![i as f32; dim],
};
store.store_pattern(pattern).unwrap();
}
let stats = store.stats();
assert_eq!(stats.total_patterns, 5);
assert_eq!(stats.successful_patterns, 2);
assert_eq!(stats.failed_patterns, 3);
assert!(stats.avg_reward > 0.0);
}
#[test]
fn get_pattern_by_id() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("patterns_get.rvf");
let dim = 4;
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
let pattern = MemoryPattern {
id: 100,
task: "find_bugs".into(),
reward: 0.85,
success: true,
critique: "good coverage".into(),
embedding: vec![1.0, 0.0, 0.0, 0.0],
};
store.store_pattern(pattern).unwrap();
let result = store.get_pattern(100).unwrap();
assert_eq!(result.task, "find_bugs");
assert_eq!(result.reward, 0.85);
assert!(result.success);
assert_eq!(result.critique, "good coverage");
assert!(store.get_pattern(999).is_none());
}
}

View File

@@ -0,0 +1,326 @@
//! RVF-backed vector store for agentdb.
//!
//! Wraps [`RvfStore`] to provide the vector CRUD operations that agentdb
//! expects: add, search, delete, get, save, and load.
use std::path::{Path, PathBuf};
use rvf_runtime::options::{
DistanceMetric, MetadataEntry, QueryOptions, RvfOptions, SearchResult,
};
use rvf_runtime::RvfStore;
use rvf_types::{ErrorCode, RvfError};
/// Distance metric selection matching agentdb's API.
#[derive(Clone, Copy, Debug, Default)]
pub enum AgentDbMetric {
#[default]
Cosine,
L2,
InnerProduct,
}
impl From<AgentDbMetric> for DistanceMetric {
fn from(m: AgentDbMetric) -> Self {
match m {
AgentDbMetric::Cosine => DistanceMetric::Cosine,
AgentDbMetric::L2 => DistanceMetric::L2,
AgentDbMetric::InnerProduct => DistanceMetric::InnerProduct,
}
}
}
/// Configuration for the RVF vector store.
#[derive(Clone, Debug)]
pub struct VectorStoreConfig {
/// Vector dimensionality.
pub dimension: u16,
/// Distance metric for similarity search.
pub metric: AgentDbMetric,
/// HNSW ef_search beam width for queries.
pub ef_search: u16,
}
impl Default for VectorStoreConfig {
fn default() -> Self {
Self {
dimension: 128,
metric: AgentDbMetric::Cosine,
ef_search: 100,
}
}
}
/// RVF-backed vector store that provides the agentdb vector storage interface.
///
/// Maps agentdb operations to RvfStore calls:
/// - `add_vectors` -> `ingest_batch`
/// - `search` -> `query`
/// - `delete_vectors` -> `delete`
/// - `get_vector` -> single-vector query
/// - `save` / `load` -> close / open
pub struct RvfVectorStore {
store: Option<RvfStore>,
path: PathBuf,
config: VectorStoreConfig,
}
impl RvfVectorStore {
/// Create a new RVF vector store at the given path.
pub fn create(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
let rvf_opts = RvfOptions {
dimension: config.dimension,
metric: config.metric.into(),
profile: 1, // RVText profile
..Default::default()
};
let store = RvfStore::create(path, rvf_opts)?;
Ok(Self {
store: Some(store),
path: path.to_path_buf(),
config,
})
}
/// Open an existing RVF vector store.
pub fn open(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
let store = RvfStore::open(path)?;
Ok(Self {
store: Some(store),
path: path.to_path_buf(),
config,
})
}
/// Add vectors with their IDs and optional metadata.
///
/// `vectors`: slice of float slices, one per vector.
/// `ids`: one ID per vector.
/// `metadata`: optional metadata entries (flat list, one entry per vector).
pub fn add_vectors(
&mut self,
vectors: &[&[f32]],
ids: &[u64],
metadata: Option<&[MetadataEntry]>,
) -> Result<u64, RvfError> {
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let result = store.ingest_batch(vectors, ids, metadata)?;
Ok(result.accepted)
}
/// Search for the k nearest neighbors of a query vector.
///
/// Returns results sorted by distance (ascending).
pub fn search(
&self,
query: &[f32],
k: usize,
ef_search: Option<u16>,
) -> Result<Vec<SearchResult>, RvfError> {
let store = self.store.as_ref().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let opts = QueryOptions {
ef_search: ef_search.unwrap_or(self.config.ef_search),
..Default::default()
};
store.query(query, k, &opts)
}
/// Delete vectors by their IDs.
pub fn delete_vectors(&mut self, ids: &[u64]) -> Result<u64, RvfError> {
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let result = store.delete(ids)?;
Ok(result.deleted)
}
/// Retrieve a single vector by ID.
///
/// Uses a zero-distance search trick: queries with each candidate until
/// the exact ID is found. For small stores this is acceptable; for large
/// stores the caller should maintain an ID index.
///
/// Returns `None` if the vector is not found or has been deleted.
pub fn get_vector(&self, id: u64) -> Option<SearchResult> {
let store = self.store.as_ref()?;
let status = store.status();
if status.total_vectors == 0 {
return None;
}
// Query a large k and find the matching ID in results.
// This is O(n) but correct. Production agentdb should cache vectors.
let dim = self.config.dimension as usize;
let zero_query = vec![0.0f32; dim];
let opts = QueryOptions {
ef_search: self.config.ef_search,
..Default::default()
};
let results = store.query(&zero_query, status.total_vectors as usize, &opts).ok()?;
results.into_iter().find(|r| r.id == id)
}
/// Save the store (flushes and closes the underlying RVF file).
pub fn save(&mut self) -> Result<(), RvfError> {
if let Some(store) = self.store.take() {
store.close()?;
}
Ok(())
}
/// Reload the store from disk.
pub fn load(&mut self) -> Result<(), RvfError> {
if self.store.is_some() {
return Ok(());
}
let store = RvfStore::open(&self.path)?;
self.store = Some(store);
Ok(())
}
/// Get the current vector count.
pub fn len(&self) -> u64 {
self.store.as_ref().map_or(0, |s| s.status().total_vectors)
}
/// Returns true if the store is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Run compaction to reclaim space from deleted vectors.
pub fn compact(&mut self) -> Result<u64, RvfError> {
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
let result = store.compact()?;
Ok(result.bytes_reclaimed)
}
/// Get the file path of the underlying RVF store.
pub fn path(&self) -> &Path {
&self.path
}
/// Get the store configuration.
pub fn config(&self) -> &VectorStoreConfig {
&self.config
}
}
#[cfg(test)]
mod tests {
use super::*;
use rvf_runtime::options::MetadataValue;
use tempfile::TempDir;
fn make_config(dim: u16) -> VectorStoreConfig {
VectorStoreConfig {
dimension: dim,
metric: AgentDbMetric::L2,
ef_search: 100,
}
}
#[test]
fn create_add_search() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb.rvf");
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
let v1 = [1.0f32, 0.0, 0.0, 0.0];
let v2 = [0.0f32, 1.0, 0.0, 0.0];
let v3 = [0.0f32, 0.0, 1.0, 0.0];
let accepted = store
.add_vectors(&[&v1, &v2, &v3], &[10, 20, 30], None)
.unwrap();
assert_eq!(accepted, 3);
let results = store.search(&[1.0, 0.0, 0.0, 0.0], 2, None).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].id, 10);
assert!(results[0].distance < f32::EPSILON);
}
#[test]
fn delete_and_compact() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_del.rvf");
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
let vecs: Vec<[f32; 4]> = (0..10).map(|i| [i as f32, 0.0, 0.0, 0.0]).collect();
let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (0..10).collect();
store.add_vectors(&refs, &ids, None).unwrap();
let deleted = store.delete_vectors(&[0, 2, 4]).unwrap();
assert_eq!(deleted, 3);
assert_eq!(store.len(), 7);
let reclaimed = store.compact().unwrap();
assert!(reclaimed > 0);
assert_eq!(store.len(), 7);
}
#[test]
fn save_and_load() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_persist.rvf");
let config = make_config(4);
{
let mut store = RvfVectorStore::create(&path, config.clone()).unwrap();
let v1 = [1.0f32, 2.0, 3.0, 4.0];
store.add_vectors(&[&v1], &[42], None).unwrap();
store.save().unwrap();
}
{
let store = RvfVectorStore::open(&path, config).unwrap();
assert_eq!(store.len(), 1);
let results = store.search(&[1.0, 2.0, 3.0, 4.0], 1, None).unwrap();
assert_eq!(results[0].id, 42);
}
}
#[test]
fn add_with_metadata() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_meta.rvf");
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
let v1 = [1.0f32, 0.0, 0.0, 0.0];
let v2 = [0.0f32, 1.0, 0.0, 0.0];
let metadata = vec![
MetadataEntry {
field_id: 0,
value: MetadataValue::String("episode_a".into()),
},
MetadataEntry {
field_id: 0,
value: MetadataValue::String("episode_b".into()),
},
];
let accepted = store
.add_vectors(&[&v1, &v2], &[1, 2], Some(&metadata))
.unwrap();
assert_eq!(accepted, 2);
}
#[test]
fn empty_store() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("agentdb_empty.rvf");
let store = RvfVectorStore::create(&path, make_config(4)).unwrap();
assert!(store.is_empty());
assert_eq!(store.len(), 0);
let results = store.search(&[0.0, 0.0, 0.0, 0.0], 5, None).unwrap();
assert!(results.is_empty());
}
}