Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
323
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/index_adapter.rs
vendored
Normal file
323
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/index_adapter.rs
vendored
Normal file
@@ -0,0 +1,323 @@
|
||||
//! Maps agentdb HNSW operations to RVF INDEX_SEG layers.
|
||||
//!
|
||||
//! Bridges agentdb's HNSW index lifecycle to the three-layer progressive
|
||||
//! indexing model (Layer A / B / C) defined in `rvf-index`.
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use rvf_index::builder::{build_full_index, build_layer_a, build_layer_b, build_layer_c};
|
||||
use rvf_index::distance::{cosine_distance, l2_distance};
|
||||
use rvf_index::hnsw::{HnswConfig, HnswGraph};
|
||||
|
||||
type DistanceFn = Box<dyn Fn(&[f32], &[f32]) -> f32>;
|
||||
use rvf_index::layers::{IndexLayer, LayerA, LayerB, LayerC};
|
||||
use rvf_index::progressive::ProgressiveIndex;
|
||||
use rvf_index::traits::InMemoryVectorStore;
|
||||
|
||||
/// Configuration for the RVF index adapter.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IndexAdapterConfig {
|
||||
/// HNSW M parameter.
|
||||
pub m: usize,
|
||||
/// HNSW M0 (layer-0 neighbors).
|
||||
pub m0: usize,
|
||||
/// ef_construction beam width.
|
||||
pub ef_construction: usize,
|
||||
/// ef_search beam width for queries.
|
||||
pub ef_search: usize,
|
||||
/// Use cosine distance (default true for agentdb text embeddings).
|
||||
pub use_cosine: bool,
|
||||
/// Hot node fraction for Layer B (0.0 - 1.0).
|
||||
pub hot_fraction: f32,
|
||||
}
|
||||
|
||||
impl Default for IndexAdapterConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
m: 16,
|
||||
m0: 32,
|
||||
ef_construction: 200,
|
||||
ef_search: 100,
|
||||
use_cosine: true,
|
||||
hot_fraction: 0.2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adapter that maps agentdb HNSW operations to RVF INDEX_SEG layers.
|
||||
///
|
||||
/// Manages the full HNSW graph and can extract progressive layers (A/B/C)
|
||||
/// for serialization into INDEX_SEG segments.
|
||||
pub struct RvfIndexAdapter {
|
||||
config: IndexAdapterConfig,
|
||||
graph: Option<HnswGraph>,
|
||||
vectors: Vec<Vec<f32>>,
|
||||
id_map: Vec<u64>,
|
||||
progressive: ProgressiveIndex,
|
||||
loaded_layers: Vec<IndexLayer>,
|
||||
}
|
||||
|
||||
impl RvfIndexAdapter {
|
||||
/// Create a new index adapter with the given configuration.
|
||||
pub fn new(config: IndexAdapterConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
graph: None,
|
||||
vectors: Vec::new(),
|
||||
id_map: Vec::new(),
|
||||
progressive: ProgressiveIndex::new(),
|
||||
loaded_layers: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the full HNSW index from a set of vectors and IDs.
|
||||
///
|
||||
/// This replaces any existing index.
|
||||
pub fn build(&mut self, vectors: Vec<Vec<f32>>, ids: Vec<u64>) {
|
||||
let n = vectors.len();
|
||||
if n == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let hnsw_config = HnswConfig {
|
||||
m: self.config.m,
|
||||
m0: self.config.m0,
|
||||
ef_construction: self.config.ef_construction,
|
||||
};
|
||||
|
||||
let store = InMemoryVectorStore::new(vectors.clone());
|
||||
let distance_fn = self.distance_fn();
|
||||
|
||||
// Generate deterministic pseudo-random values for level selection.
|
||||
let rng_values: Vec<f64> = (0..n)
|
||||
.map(|i| {
|
||||
let seed = (i as u64)
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
let val = (seed >> 33) as f64 / (1u64 << 31) as f64;
|
||||
val.clamp(0.001, 0.999)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let graph = build_full_index(&store, n, &hnsw_config, &rng_values, &distance_fn);
|
||||
|
||||
self.vectors = vectors;
|
||||
self.id_map = ids;
|
||||
self.graph = Some(graph);
|
||||
}
|
||||
|
||||
/// Extract Layer A (entry points + coarse routing) from the current graph.
|
||||
pub fn extract_layer_a(&self) -> Option<LayerA> {
|
||||
let graph = self.graph.as_ref()?;
|
||||
let n = self.vectors.len();
|
||||
|
||||
// Simple centroid computation: split vectors into 2 partitions.
|
||||
let mid = n / 2;
|
||||
let dim = self.vectors.first().map_or(0, |v| v.len());
|
||||
|
||||
let centroid_0 = compute_centroid(&self.vectors[..mid], dim);
|
||||
let centroid_1 = if mid < n {
|
||||
compute_centroid(&self.vectors[mid..], dim)
|
||||
} else {
|
||||
centroid_0.clone()
|
||||
};
|
||||
|
||||
let centroids = vec![centroid_0, centroid_1];
|
||||
let assignments: Vec<u32> = (0..n).map(|i| if i < mid { 0 } else { 1 }).collect();
|
||||
|
||||
Some(build_layer_a(graph, ¢roids, &assignments, n as u64))
|
||||
}
|
||||
|
||||
/// Extract Layer B (hot region partial adjacency) from the current graph.
|
||||
pub fn extract_layer_b(&self) -> Option<LayerB> {
|
||||
let graph = self.graph.as_ref()?;
|
||||
let n = self.vectors.len();
|
||||
let hot_count = ((n as f32) * self.config.hot_fraction).ceil() as usize;
|
||||
let hot_ids: BTreeSet<u64> = (0..hot_count as u64).collect();
|
||||
Some(build_layer_b(graph, &hot_ids))
|
||||
}
|
||||
|
||||
/// Extract Layer C (full adjacency) from the current graph.
|
||||
pub fn extract_layer_c(&self) -> Option<LayerC> {
|
||||
let graph = self.graph.as_ref()?;
|
||||
Some(build_layer_c(graph))
|
||||
}
|
||||
|
||||
/// Load progressive layers and configure the progressive index for search.
|
||||
pub fn load_progressive(&mut self, layers: &[IndexLayer]) {
|
||||
self.loaded_layers = layers.to_vec();
|
||||
|
||||
let mut idx = ProgressiveIndex::new();
|
||||
for layer in layers {
|
||||
match layer {
|
||||
IndexLayer::A => {
|
||||
idx.layer_a = self.extract_layer_a();
|
||||
}
|
||||
IndexLayer::B => {
|
||||
idx.layer_b = self.extract_layer_b();
|
||||
}
|
||||
IndexLayer::C => {
|
||||
idx.layer_c = self.extract_layer_c();
|
||||
}
|
||||
}
|
||||
}
|
||||
self.progressive = idx;
|
||||
}
|
||||
|
||||
/// Search using the progressive index with whatever layers are loaded.
|
||||
pub fn search(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
|
||||
let store = InMemoryVectorStore::new(self.vectors.clone());
|
||||
let distance_fn = self.distance_fn();
|
||||
self.progressive
|
||||
.search_with_distance(query, k, self.config.ef_search, &store, &distance_fn)
|
||||
}
|
||||
|
||||
/// Search using the full HNSW graph directly (bypasses progressive layers).
|
||||
pub fn search_full(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
|
||||
let graph = match self.graph.as_ref() {
|
||||
Some(g) => g,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let store = InMemoryVectorStore::new(self.vectors.clone());
|
||||
let distance_fn = self.distance_fn();
|
||||
graph.search(query, k, self.config.ef_search, &store, &distance_fn)
|
||||
}
|
||||
|
||||
/// Get the node count in the HNSW graph.
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.graph.as_ref().map_or(0, |g| g.node_count())
|
||||
}
|
||||
|
||||
/// Get the currently loaded layers.
|
||||
pub fn loaded_layers(&self) -> &[IndexLayer] {
|
||||
&self.loaded_layers
|
||||
}
|
||||
|
||||
fn distance_fn(&self) -> DistanceFn {
|
||||
if self.config.use_cosine {
|
||||
Box::new(cosine_distance)
|
||||
} else {
|
||||
Box::new(l2_distance)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the centroid of a set of vectors.
|
||||
fn compute_centroid(vectors: &[Vec<f32>], dim: usize) -> Vec<f32> {
|
||||
if vectors.is_empty() || dim == 0 {
|
||||
return vec![0.0; dim];
|
||||
}
|
||||
let n = vectors.len() as f32;
|
||||
let mut centroid = vec![0.0f32; dim];
|
||||
for v in vectors {
|
||||
for (i, &val) in v.iter().enumerate().take(dim) {
|
||||
centroid[i] += val;
|
||||
}
|
||||
}
|
||||
for c in &mut centroid {
|
||||
*c /= n;
|
||||
}
|
||||
centroid
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_vectors(n: usize, dim: usize) -> (Vec<Vec<f32>>, Vec<u64>) {
|
||||
let vecs: Vec<Vec<f32>> = (0..n)
|
||||
.map(|i| (0..dim).map(|d| (i * dim + d) as f32).collect())
|
||||
.collect();
|
||||
let ids: Vec<u64> = (0..n as u64).collect();
|
||||
(vecs, ids)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_and_search_full() {
|
||||
let (vecs, ids) = make_vectors(100, 8);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs.clone(), ids);
|
||||
|
||||
assert_eq!(adapter.node_count(), 100);
|
||||
|
||||
let results = adapter.search_full(&vecs[50], 5);
|
||||
assert!(!results.is_empty());
|
||||
assert_eq!(results[0].0, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_layers() {
|
||||
let (vecs, ids) = make_vectors(50, 4);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs, ids);
|
||||
|
||||
let layer_a = adapter.extract_layer_a();
|
||||
assert!(layer_a.is_some());
|
||||
let la = layer_a.unwrap();
|
||||
assert!(!la.entry_points.is_empty());
|
||||
assert_eq!(la.centroids.len(), 2);
|
||||
|
||||
let layer_b = adapter.extract_layer_b();
|
||||
assert!(layer_b.is_some());
|
||||
|
||||
let layer_c = adapter.extract_layer_c();
|
||||
assert!(layer_c.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn progressive_search_with_layers() {
|
||||
let (vecs, ids) = make_vectors(100, 4);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs.clone(), ids);
|
||||
|
||||
// Load all three layers.
|
||||
adapter.load_progressive(&[IndexLayer::A, IndexLayer::B, IndexLayer::C]);
|
||||
|
||||
let results = adapter.search(&vecs[25], 5);
|
||||
assert!(!results.is_empty());
|
||||
// With full Layer C, we should find the exact match.
|
||||
assert_eq!(results[0].0, 25);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn progressive_layer_a_only() {
|
||||
let (vecs, ids) = make_vectors(100, 4);
|
||||
let mut adapter = RvfIndexAdapter::new(IndexAdapterConfig {
|
||||
use_cosine: false,
|
||||
..Default::default()
|
||||
});
|
||||
adapter.build(vecs.clone(), ids);
|
||||
|
||||
adapter.load_progressive(&[IndexLayer::A]);
|
||||
let results = adapter.search(&vecs[10], 5);
|
||||
// Layer A alone provides coarse results; we just verify non-empty.
|
||||
assert!(!results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_adapter() {
|
||||
let adapter = RvfIndexAdapter::new(IndexAdapterConfig::default());
|
||||
assert_eq!(adapter.node_count(), 0);
|
||||
let results = adapter.search_full(&[0.0; 4], 5);
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_centroid_basic() {
|
||||
let vecs = vec![
|
||||
vec![1.0, 2.0, 3.0],
|
||||
vec![3.0, 4.0, 5.0],
|
||||
];
|
||||
let centroid = compute_centroid(&vecs, 3);
|
||||
assert_eq!(centroid, vec![2.0, 3.0, 4.0]);
|
||||
}
|
||||
}
|
||||
18
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/lib.rs
vendored
Normal file
18
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/lib.rs
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
//! AgentDB adapter for the RuVector Format (RVF).
|
||||
//!
|
||||
//! Maps agentdb's vector storage, HNSW index, and memory pattern APIs
|
||||
//! onto the RVF segment model:
|
||||
//!
|
||||
//! - **VEC_SEG**: Raw vector data (episodes, state embeddings)
|
||||
//! - **INDEX_SEG**: HNSW index layers (A/B/C progressive indexing)
|
||||
//! - **META_SEG**: Memory pattern metadata (rewards, critiques, tags)
|
||||
//!
|
||||
//! Uses the RVText domain profile for text/embedding workloads.
|
||||
|
||||
pub mod index_adapter;
|
||||
pub mod pattern_store;
|
||||
pub mod vector_store;
|
||||
|
||||
pub use index_adapter::RvfIndexAdapter;
|
||||
pub use pattern_store::{MemoryPattern, RvfPatternStore};
|
||||
pub use vector_store::RvfVectorStore;
|
||||
456
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/pattern_store.rs
vendored
Normal file
456
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/pattern_store.rs
vendored
Normal file
@@ -0,0 +1,456 @@
|
||||
//! Memory pattern storage using RVF META_SEG.
|
||||
//!
|
||||
//! Stores agentdb memory patterns (task descriptions, rewards, critiques,
|
||||
//! success flags) as metadata alongside their state-embedding vectors.
|
||||
//! Patterns can be searched by similarity and filtered by reward threshold.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use rvf_runtime::options::{MetadataEntry, MetadataValue};
|
||||
use rvf_types::RvfError;
|
||||
|
||||
use crate::vector_store::{AgentDbMetric, RvfVectorStore, VectorStoreConfig};
|
||||
|
||||
/// A memory pattern stored in the agentdb reasoning bank.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MemoryPattern {
|
||||
/// Unique pattern identifier.
|
||||
pub id: u64,
|
||||
/// Task description that produced this pattern.
|
||||
pub task: String,
|
||||
/// Reward score (0.0 - 1.0) indicating quality.
|
||||
pub reward: f32,
|
||||
/// Whether the pattern was successful.
|
||||
pub success: bool,
|
||||
/// Self-critique / notes about the pattern.
|
||||
pub critique: String,
|
||||
/// State embedding vector for similarity search.
|
||||
pub embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
/// Well-known metadata field IDs for pattern attributes.
|
||||
mod field_ids {
|
||||
pub const TASK: u16 = 0;
|
||||
pub const REWARD: u16 = 1;
|
||||
pub const SUCCESS: u16 = 2;
|
||||
pub const CRITIQUE: u16 = 3;
|
||||
}
|
||||
|
||||
/// RVF-backed memory pattern store for agentdb.
|
||||
///
|
||||
/// Stores patterns as vectors (embeddings) with metadata (task, reward,
|
||||
/// critique, success flag). Supports similarity search with reward filtering.
|
||||
pub struct RvfPatternStore {
|
||||
vector_store: RvfVectorStore,
|
||||
patterns: HashMap<u64, PatternMetadata>,
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
/// In-memory metadata for a pattern (kept alongside the RVF store).
|
||||
#[derive(Clone, Debug)]
|
||||
struct PatternMetadata {
|
||||
task: String,
|
||||
reward: f32,
|
||||
success: bool,
|
||||
critique: String,
|
||||
}
|
||||
|
||||
impl RvfPatternStore {
|
||||
/// Create a new pattern store at the given path.
|
||||
pub fn create(path: &Path, dimension: u16) -> Result<Self, RvfError> {
|
||||
let config = VectorStoreConfig {
|
||||
dimension,
|
||||
metric: AgentDbMetric::Cosine,
|
||||
ef_search: 100,
|
||||
};
|
||||
let vector_store = RvfVectorStore::create(path, config)?;
|
||||
Ok(Self {
|
||||
vector_store,
|
||||
patterns: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing pattern store.
|
||||
pub fn open(path: &Path, dimension: u16) -> Result<Self, RvfError> {
|
||||
let config = VectorStoreConfig {
|
||||
dimension,
|
||||
metric: AgentDbMetric::Cosine,
|
||||
ef_search: 100,
|
||||
};
|
||||
let vector_store = RvfVectorStore::open(path, config)?;
|
||||
Ok(Self {
|
||||
vector_store,
|
||||
patterns: HashMap::new(),
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Store a memory pattern.
|
||||
///
|
||||
/// Returns the assigned pattern ID.
|
||||
pub fn store_pattern(&mut self, pattern: MemoryPattern) -> Result<u64, RvfError> {
|
||||
let id = if pattern.id > 0 {
|
||||
pattern.id
|
||||
} else {
|
||||
let id = self.next_id;
|
||||
self.next_id += 1;
|
||||
id
|
||||
};
|
||||
|
||||
// Ensure next_id stays ahead of manually assigned IDs.
|
||||
if id >= self.next_id {
|
||||
self.next_id = id + 1;
|
||||
}
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry {
|
||||
field_id: field_ids::TASK,
|
||||
value: MetadataValue::String(pattern.task.clone()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: field_ids::REWARD,
|
||||
value: MetadataValue::F64(pattern.reward as f64),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: field_ids::SUCCESS,
|
||||
value: MetadataValue::U64(if pattern.success { 1 } else { 0 }),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: field_ids::CRITIQUE,
|
||||
value: MetadataValue::String(pattern.critique.clone()),
|
||||
},
|
||||
];
|
||||
|
||||
self.vector_store
|
||||
.add_vectors(&[pattern.embedding.as_slice()], &[id], Some(&metadata))?;
|
||||
|
||||
self.patterns.insert(
|
||||
id,
|
||||
PatternMetadata {
|
||||
task: pattern.task,
|
||||
reward: pattern.reward,
|
||||
success: pattern.success,
|
||||
critique: pattern.critique,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Search for patterns similar to the given embedding.
|
||||
///
|
||||
/// Returns `(pattern_id, distance)` pairs sorted by distance.
|
||||
/// Optionally filter by minimum reward score.
|
||||
pub fn search_patterns(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
min_reward: Option<f32>,
|
||||
) -> Result<Vec<PatternSearchResult>, RvfError> {
|
||||
let search_k = if min_reward.is_some() { k * 3 } else { k };
|
||||
let results = self.vector_store.search(query_embedding, search_k, None)?;
|
||||
|
||||
let mut filtered: Vec<PatternSearchResult> = results
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let meta = self.patterns.get(&r.id)?;
|
||||
if let Some(threshold) = min_reward {
|
||||
if meta.reward < threshold {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(PatternSearchResult {
|
||||
id: r.id,
|
||||
distance: r.distance,
|
||||
task: meta.task.clone(),
|
||||
reward: meta.reward,
|
||||
success: meta.success,
|
||||
critique: meta.critique.clone(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
filtered.truncate(k);
|
||||
Ok(filtered)
|
||||
}
|
||||
|
||||
/// Search for patterns that failed (success == false).
|
||||
pub fn search_failures(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<PatternSearchResult>, RvfError> {
|
||||
let results = self.vector_store.search(query_embedding, k * 5, None)?;
|
||||
|
||||
let mut filtered: Vec<PatternSearchResult> = results
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let meta = self.patterns.get(&r.id)?;
|
||||
if meta.success {
|
||||
return None;
|
||||
}
|
||||
Some(PatternSearchResult {
|
||||
id: r.id,
|
||||
distance: r.distance,
|
||||
task: meta.task.clone(),
|
||||
reward: meta.reward,
|
||||
success: false,
|
||||
critique: meta.critique.clone(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
filtered.truncate(k);
|
||||
Ok(filtered)
|
||||
}
|
||||
|
||||
/// Delete a pattern by ID.
|
||||
pub fn delete_pattern(&mut self, id: u64) -> Result<bool, RvfError> {
|
||||
let deleted = self.vector_store.delete_vectors(&[id])?;
|
||||
self.patterns.remove(&id);
|
||||
Ok(deleted > 0)
|
||||
}
|
||||
|
||||
/// Get pattern metadata by ID.
|
||||
pub fn get_pattern(&self, id: u64) -> Option<PatternSearchResult> {
|
||||
let meta = self.patterns.get(&id)?;
|
||||
Some(PatternSearchResult {
|
||||
id,
|
||||
distance: 0.0,
|
||||
task: meta.task.clone(),
|
||||
reward: meta.reward,
|
||||
success: meta.success,
|
||||
critique: meta.critique.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get aggregate statistics about stored patterns.
|
||||
pub fn stats(&self) -> PatternStoreStats {
|
||||
let total = self.patterns.len();
|
||||
let successful = self.patterns.values().filter(|p| p.success).count();
|
||||
let avg_reward = if total > 0 {
|
||||
self.patterns.values().map(|p| p.reward as f64).sum::<f64>() / total as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
PatternStoreStats {
|
||||
total_patterns: total,
|
||||
successful_patterns: successful,
|
||||
failed_patterns: total - successful,
|
||||
avg_reward,
|
||||
vector_count: self.vector_store.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the store to disk.
|
||||
pub fn save(&mut self) -> Result<(), RvfError> {
|
||||
self.vector_store.save()
|
||||
}
|
||||
|
||||
/// Get the total number of patterns.
|
||||
pub fn len(&self) -> usize {
|
||||
self.patterns.len()
|
||||
}
|
||||
|
||||
/// Returns true if no patterns are stored.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.patterns.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// A pattern search result with full metadata.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PatternSearchResult {
|
||||
pub id: u64,
|
||||
pub distance: f32,
|
||||
pub task: String,
|
||||
pub reward: f32,
|
||||
pub success: bool,
|
||||
pub critique: String,
|
||||
}
|
||||
|
||||
/// Aggregate statistics for the pattern store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PatternStoreStats {
|
||||
pub total_patterns: usize,
|
||||
pub successful_patterns: usize,
|
||||
pub failed_patterns: usize,
|
||||
pub avg_reward: f64,
|
||||
pub vector_count: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn dummy_embedding(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn store_and_search_patterns() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns.rvf");
|
||||
|
||||
let dim = 8;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: (i as f32) / 10.0,
|
||||
success: i >= 5,
|
||||
critique: format!("critique_{}", i),
|
||||
embedding: dummy_embedding(dim, i),
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(store.len(), 10);
|
||||
|
||||
let query = dummy_embedding(dim, 7);
|
||||
let results = store.search_patterns(&query, 3, None).unwrap();
|
||||
assert!(!results.is_empty());
|
||||
assert!(results.len() <= 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_with_min_reward() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_reward.rvf");
|
||||
|
||||
let dim = 8;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: (i as f32) / 10.0,
|
||||
success: true,
|
||||
critique: String::new(),
|
||||
embedding: dummy_embedding(dim, i),
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
let query = dummy_embedding(dim, 5);
|
||||
let results = store.search_patterns(&query, 10, Some(0.5)).unwrap();
|
||||
assert!(results.iter().all(|r| r.reward >= 0.5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_failures() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_fail.rvf");
|
||||
|
||||
let dim = 8;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..10u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: 0.5,
|
||||
success: i % 2 == 0,
|
||||
critique: String::new(),
|
||||
embedding: dummy_embedding(dim, i),
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
let query = dummy_embedding(dim, 3);
|
||||
let results = store.search_failures(&query, 5).unwrap();
|
||||
assert!(results.iter().all(|r| !r.success));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_pattern() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_del.rvf");
|
||||
|
||||
let dim = 4;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
let pattern = MemoryPattern {
|
||||
id: 42,
|
||||
task: "test".into(),
|
||||
reward: 0.9,
|
||||
success: true,
|
||||
critique: "good".into(),
|
||||
embedding: vec![1.0, 2.0, 3.0, 4.0],
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
assert_eq!(store.len(), 1);
|
||||
|
||||
let deleted = store.delete_pattern(42).unwrap();
|
||||
assert!(deleted);
|
||||
assert_eq!(store.len(), 0);
|
||||
assert!(store.get_pattern(42).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stats() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_stats.rvf");
|
||||
|
||||
let dim = 4;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
for i in 0..5u64 {
|
||||
let pattern = MemoryPattern {
|
||||
id: 0,
|
||||
task: format!("task_{}", i),
|
||||
reward: (i as f32) * 0.2,
|
||||
success: i >= 3,
|
||||
critique: String::new(),
|
||||
embedding: vec![i as f32; dim],
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
}
|
||||
|
||||
let stats = store.stats();
|
||||
assert_eq!(stats.total_patterns, 5);
|
||||
assert_eq!(stats.successful_patterns, 2);
|
||||
assert_eq!(stats.failed_patterns, 3);
|
||||
assert!(stats.avg_reward > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_pattern_by_id() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("patterns_get.rvf");
|
||||
|
||||
let dim = 4;
|
||||
let mut store = RvfPatternStore::create(&path, dim as u16).unwrap();
|
||||
|
||||
let pattern = MemoryPattern {
|
||||
id: 100,
|
||||
task: "find_bugs".into(),
|
||||
reward: 0.85,
|
||||
success: true,
|
||||
critique: "good coverage".into(),
|
||||
embedding: vec![1.0, 0.0, 0.0, 0.0],
|
||||
};
|
||||
store.store_pattern(pattern).unwrap();
|
||||
|
||||
let result = store.get_pattern(100).unwrap();
|
||||
assert_eq!(result.task, "find_bugs");
|
||||
assert_eq!(result.reward, 0.85);
|
||||
assert!(result.success);
|
||||
assert_eq!(result.critique, "good coverage");
|
||||
|
||||
assert!(store.get_pattern(999).is_none());
|
||||
}
|
||||
}
|
||||
326
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/vector_store.rs
vendored
Normal file
326
vendor/ruvector/crates/rvf/rvf-adapters/agentdb/src/vector_store.rs
vendored
Normal file
@@ -0,0 +1,326 @@
|
||||
//! RVF-backed vector store for agentdb.
|
||||
//!
|
||||
//! Wraps [`RvfStore`] to provide the vector CRUD operations that agentdb
|
||||
//! expects: add, search, delete, get, save, and load.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, QueryOptions, RvfOptions, SearchResult,
|
||||
};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
|
||||
/// Distance metric selection matching agentdb's API.
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub enum AgentDbMetric {
|
||||
#[default]
|
||||
Cosine,
|
||||
L2,
|
||||
InnerProduct,
|
||||
}
|
||||
|
||||
impl From<AgentDbMetric> for DistanceMetric {
|
||||
fn from(m: AgentDbMetric) -> Self {
|
||||
match m {
|
||||
AgentDbMetric::Cosine => DistanceMetric::Cosine,
|
||||
AgentDbMetric::L2 => DistanceMetric::L2,
|
||||
AgentDbMetric::InnerProduct => DistanceMetric::InnerProduct,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for the RVF vector store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct VectorStoreConfig {
|
||||
/// Vector dimensionality.
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: AgentDbMetric,
|
||||
/// HNSW ef_search beam width for queries.
|
||||
pub ef_search: u16,
|
||||
}
|
||||
|
||||
impl Default for VectorStoreConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dimension: 128,
|
||||
metric: AgentDbMetric::Cosine,
|
||||
ef_search: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// RVF-backed vector store that provides the agentdb vector storage interface.
|
||||
///
|
||||
/// Maps agentdb operations to RvfStore calls:
|
||||
/// - `add_vectors` -> `ingest_batch`
|
||||
/// - `search` -> `query`
|
||||
/// - `delete_vectors` -> `delete`
|
||||
/// - `get_vector` -> single-vector query
|
||||
/// - `save` / `load` -> close / open
|
||||
pub struct RvfVectorStore {
|
||||
store: Option<RvfStore>,
|
||||
path: PathBuf,
|
||||
config: VectorStoreConfig,
|
||||
}
|
||||
|
||||
impl RvfVectorStore {
|
||||
/// Create a new RVF vector store at the given path.
|
||||
pub fn create(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
|
||||
let rvf_opts = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: config.metric.into(),
|
||||
profile: 1, // RVText profile
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(path, rvf_opts)?;
|
||||
|
||||
Ok(Self {
|
||||
store: Some(store),
|
||||
path: path.to_path_buf(),
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing RVF vector store.
|
||||
pub fn open(path: &Path, config: VectorStoreConfig) -> Result<Self, RvfError> {
|
||||
let store = RvfStore::open(path)?;
|
||||
Ok(Self {
|
||||
store: Some(store),
|
||||
path: path.to_path_buf(),
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
/// Add vectors with their IDs and optional metadata.
|
||||
///
|
||||
/// `vectors`: slice of float slices, one per vector.
|
||||
/// `ids`: one ID per vector.
|
||||
/// `metadata`: optional metadata entries (flat list, one entry per vector).
|
||||
pub fn add_vectors(
|
||||
&mut self,
|
||||
vectors: &[&[f32]],
|
||||
ids: &[u64],
|
||||
metadata: Option<&[MetadataEntry]>,
|
||||
) -> Result<u64, RvfError> {
|
||||
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let result = store.ingest_batch(vectors, ids, metadata)?;
|
||||
Ok(result.accepted)
|
||||
}
|
||||
|
||||
/// Search for the k nearest neighbors of a query vector.
|
||||
///
|
||||
/// Returns results sorted by distance (ascending).
|
||||
pub fn search(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
ef_search: Option<u16>,
|
||||
) -> Result<Vec<SearchResult>, RvfError> {
|
||||
let store = self.store.as_ref().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let opts = QueryOptions {
|
||||
ef_search: ef_search.unwrap_or(self.config.ef_search),
|
||||
..Default::default()
|
||||
};
|
||||
store.query(query, k, &opts)
|
||||
}
|
||||
|
||||
/// Delete vectors by their IDs.
|
||||
pub fn delete_vectors(&mut self, ids: &[u64]) -> Result<u64, RvfError> {
|
||||
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let result = store.delete(ids)?;
|
||||
Ok(result.deleted)
|
||||
}
|
||||
|
||||
/// Retrieve a single vector by ID.
|
||||
///
|
||||
/// Uses a zero-distance search trick: queries with each candidate until
|
||||
/// the exact ID is found. For small stores this is acceptable; for large
|
||||
/// stores the caller should maintain an ID index.
|
||||
///
|
||||
/// Returns `None` if the vector is not found or has been deleted.
|
||||
pub fn get_vector(&self, id: u64) -> Option<SearchResult> {
|
||||
let store = self.store.as_ref()?;
|
||||
let status = store.status();
|
||||
if status.total_vectors == 0 {
|
||||
return None;
|
||||
}
|
||||
// Query a large k and find the matching ID in results.
|
||||
// This is O(n) but correct. Production agentdb should cache vectors.
|
||||
let dim = self.config.dimension as usize;
|
||||
let zero_query = vec![0.0f32; dim];
|
||||
let opts = QueryOptions {
|
||||
ef_search: self.config.ef_search,
|
||||
..Default::default()
|
||||
};
|
||||
let results = store.query(&zero_query, status.total_vectors as usize, &opts).ok()?;
|
||||
results.into_iter().find(|r| r.id == id)
|
||||
}
|
||||
|
||||
/// Save the store (flushes and closes the underlying RVF file).
|
||||
pub fn save(&mut self) -> Result<(), RvfError> {
|
||||
if let Some(store) = self.store.take() {
|
||||
store.close()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reload the store from disk.
|
||||
pub fn load(&mut self) -> Result<(), RvfError> {
|
||||
if self.store.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
let store = RvfStore::open(&self.path)?;
|
||||
self.store = Some(store);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the current vector count.
|
||||
pub fn len(&self) -> u64 {
|
||||
self.store.as_ref().map_or(0, |s| s.status().total_vectors)
|
||||
}
|
||||
|
||||
/// Returns true if the store is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Run compaction to reclaim space from deleted vectors.
|
||||
pub fn compact(&mut self) -> Result<u64, RvfError> {
|
||||
let store = self.store.as_mut().ok_or(RvfError::Code(ErrorCode::InvalidManifest))?;
|
||||
let result = store.compact()?;
|
||||
Ok(result.bytes_reclaimed)
|
||||
}
|
||||
|
||||
/// Get the file path of the underlying RVF store.
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
|
||||
/// Get the store configuration.
|
||||
pub fn config(&self) -> &VectorStoreConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rvf_runtime::options::MetadataValue;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_config(dim: u16) -> VectorStoreConfig {
|
||||
VectorStoreConfig {
|
||||
dimension: dim,
|
||||
metric: AgentDbMetric::L2,
|
||||
ef_search: 100,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_add_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb.rvf");
|
||||
|
||||
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
|
||||
let v1 = [1.0f32, 0.0, 0.0, 0.0];
|
||||
let v2 = [0.0f32, 1.0, 0.0, 0.0];
|
||||
let v3 = [0.0f32, 0.0, 1.0, 0.0];
|
||||
|
||||
let accepted = store
|
||||
.add_vectors(&[&v1, &v2, &v3], &[10, 20, 30], None)
|
||||
.unwrap();
|
||||
assert_eq!(accepted, 3);
|
||||
|
||||
let results = store.search(&[1.0, 0.0, 0.0, 0.0], 2, None).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, 10);
|
||||
assert!(results[0].distance < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_and_compact() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_del.rvf");
|
||||
|
||||
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
|
||||
let vecs: Vec<[f32; 4]> = (0..10).map(|i| [i as f32, 0.0, 0.0, 0.0]).collect();
|
||||
let refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (0..10).collect();
|
||||
|
||||
store.add_vectors(&refs, &ids, None).unwrap();
|
||||
|
||||
let deleted = store.delete_vectors(&[0, 2, 4]).unwrap();
|
||||
assert_eq!(deleted, 3);
|
||||
assert_eq!(store.len(), 7);
|
||||
|
||||
let reclaimed = store.compact().unwrap();
|
||||
assert!(reclaimed > 0);
|
||||
assert_eq!(store.len(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn save_and_load() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_persist.rvf");
|
||||
|
||||
let config = make_config(4);
|
||||
{
|
||||
let mut store = RvfVectorStore::create(&path, config.clone()).unwrap();
|
||||
let v1 = [1.0f32, 2.0, 3.0, 4.0];
|
||||
store.add_vectors(&[&v1], &[42], None).unwrap();
|
||||
store.save().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let store = RvfVectorStore::open(&path, config).unwrap();
|
||||
assert_eq!(store.len(), 1);
|
||||
let results = store.search(&[1.0, 2.0, 3.0, 4.0], 1, None).unwrap();
|
||||
assert_eq!(results[0].id, 42);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_with_metadata() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_meta.rvf");
|
||||
|
||||
let mut store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
|
||||
let v1 = [1.0f32, 0.0, 0.0, 0.0];
|
||||
let v2 = [0.0f32, 1.0, 0.0, 0.0];
|
||||
|
||||
let metadata = vec![
|
||||
MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String("episode_a".into()),
|
||||
},
|
||||
MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String("episode_b".into()),
|
||||
},
|
||||
];
|
||||
|
||||
let accepted = store
|
||||
.add_vectors(&[&v1, &v2], &[1, 2], Some(&metadata))
|
||||
.unwrap();
|
||||
assert_eq!(accepted, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_store() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("agentdb_empty.rvf");
|
||||
|
||||
let store = RvfVectorStore::create(&path, make_config(4)).unwrap();
|
||||
assert!(store.is_empty());
|
||||
assert_eq!(store.len(), 0);
|
||||
|
||||
let results = store.search(&[0.0, 0.0, 0.0, 0.0], 5, None).unwrap();
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user