Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
17
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/lib.rs
vendored
Normal file
17
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/lib.rs
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
//! OSpipe adapter for the RuVector Format (RVF).
|
||||
//!
|
||||
//! Maps OSpipe's observation-state pipeline onto the RVF segment model:
|
||||
//!
|
||||
//! - **VEC_SEG**: State vector embeddings (screen, audio, UI observations)
|
||||
//! - **META_SEG**: Observation metadata (app name, content type, timestamps)
|
||||
//! - **JOURNAL_SEG**: Deletion records for expired observations
|
||||
//!
|
||||
//! The adapter bridges OSpipe's `StoredEmbedding` / `CapturedFrame` world
|
||||
//! (UUID ids, chrono timestamps, JSON metadata) to RVF's u64-id,
|
||||
//! field-based metadata model.
|
||||
|
||||
pub mod observation_store;
|
||||
pub mod pipeline;
|
||||
|
||||
pub use observation_store::{ObservationMeta, RvfObservationStore};
|
||||
pub use pipeline::{PipelineConfig, RvfPipelineAdapter};
|
||||
636
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/observation_store.rs
vendored
Normal file
636
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/observation_store.rs
vendored
Normal file
@@ -0,0 +1,636 @@
|
||||
//! RVF-backed observation store for OSpipe state vectors.
|
||||
//!
|
||||
//! Maps OSpipe observation embeddings into RVF segments with metadata
|
||||
//! stored via field IDs in META_SEG entries.
|
||||
//!
|
||||
//! # Field layout
|
||||
//!
|
||||
//! | field_id | type | description |
|
||||
//! |----------|--------|------------------------|
|
||||
//! | 0 | String | content_type |
|
||||
//! | 1 | String | app_name |
|
||||
//! | 2 | U64 | timestamp_secs (epoch) |
|
||||
//! | 3 | U64 | monitor_id |
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rvf_runtime::filter::FilterExpr;
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
|
||||
};
|
||||
use rvf_runtime::{IngestResult, RvfStore, SearchResult, StoreStatus};
|
||||
use rvf_types::RvfError;
|
||||
|
||||
/// Well-known metadata field IDs for OSpipe observations.
|
||||
pub mod fields {
|
||||
/// Content type (ocr, transcription, ui_event).
|
||||
pub const CONTENT_TYPE: u16 = 0;
|
||||
/// Application name.
|
||||
pub const APP_NAME: u16 = 1;
|
||||
/// Observation timestamp as seconds since UNIX epoch.
|
||||
pub const TIMESTAMP_SECS: u16 = 2;
|
||||
/// Monitor index.
|
||||
pub const MONITOR_ID: u16 = 3;
|
||||
}
|
||||
|
||||
/// Metadata for an observation to be recorded.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationMeta {
|
||||
/// Content type label (e.g. "ocr", "transcription", "ui_event").
|
||||
pub content_type: String,
|
||||
/// Application name, if known.
|
||||
pub app_name: Option<String>,
|
||||
/// Observation timestamp as seconds since UNIX epoch.
|
||||
pub timestamp_secs: u64,
|
||||
/// Monitor index, if applicable.
|
||||
pub monitor_id: Option<u32>,
|
||||
}
|
||||
|
||||
impl ObservationMeta {
|
||||
/// Convert to RVF metadata entries for a single vector.
|
||||
fn to_entries(&self) -> Vec<MetadataEntry> {
|
||||
let mut entries = Vec::with_capacity(4);
|
||||
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::CONTENT_TYPE,
|
||||
value: MetadataValue::String(self.content_type.clone()),
|
||||
});
|
||||
|
||||
if let Some(ref app) = self.app_name {
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::APP_NAME,
|
||||
value: MetadataValue::String(app.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::TIMESTAMP_SECS,
|
||||
value: MetadataValue::U64(self.timestamp_secs),
|
||||
});
|
||||
|
||||
if let Some(monitor) = self.monitor_id {
|
||||
entries.push(MetadataEntry {
|
||||
field_id: fields::MONITOR_ID,
|
||||
value: MetadataValue::U64(monitor as u64),
|
||||
});
|
||||
}
|
||||
|
||||
entries
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for the observation store.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationStoreConfig {
|
||||
/// Directory for RVF data files.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension.
|
||||
pub dimension: u16,
|
||||
/// Distance metric (defaults to Cosine for OSpipe embeddings).
|
||||
pub metric: DistanceMetric,
|
||||
}
|
||||
|
||||
impl ObservationStoreConfig {
|
||||
/// Create with required parameters, using Cosine metric by default.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension,
|
||||
metric: DistanceMetric::Cosine,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the distance metric.
|
||||
pub fn with_metric(mut self, metric: DistanceMetric) -> Self {
|
||||
self.metric = metric;
|
||||
self
|
||||
}
|
||||
|
||||
fn store_path(&self) -> PathBuf {
|
||||
self.data_dir.join("observations.rvf")
|
||||
}
|
||||
}
|
||||
|
||||
/// RVF-backed observation store for OSpipe.
|
||||
///
|
||||
/// Wraps an `RvfStore` and provides observation-oriented APIs:
|
||||
/// - `record_observation` -- ingest a state vector with metadata
|
||||
/// - `query_similar_states` -- k-NN search over observation vectors
|
||||
/// - `get_state_history` -- filtered query by time range
|
||||
/// - `compact_history` -- reclaim dead space from deleted observations
|
||||
pub struct RvfObservationStore {
|
||||
store: RvfStore,
|
||||
#[allow(dead_code)]
|
||||
config: ObservationStoreConfig,
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
impl RvfObservationStore {
|
||||
/// Create a new observation store, creating the RVF file.
|
||||
pub fn create(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
|
||||
if config.dimension == 0 {
|
||||
return Err(OspipeAdapterError::InvalidDimension);
|
||||
}
|
||||
std::fs::create_dir_all(&config.data_dir)
|
||||
.map_err(|e| OspipeAdapterError::Io(e.to_string()))?;
|
||||
|
||||
let options = RvfOptions {
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let store = RvfStore::create(&config.store_path(), options)
|
||||
.map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
next_id: 1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing observation store.
|
||||
pub fn open(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store = RvfStore::open(&config.store_path())
|
||||
.map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
let status = store.status();
|
||||
let next_id = status.total_vectors + status.current_epoch as u64 + 1;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
next_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing store in read-only mode.
|
||||
pub fn open_readonly(config: ObservationStoreConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store = RvfStore::open_readonly(&config.store_path())
|
||||
.map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
next_id: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Record a single observation with its state vector and metadata.
|
||||
///
|
||||
/// Returns the assigned vector ID and the ingest result.
|
||||
pub fn record_observation(
|
||||
&mut self,
|
||||
state_vector: &[f32],
|
||||
meta: &ObservationMeta,
|
||||
) -> Result<(u64, IngestResult), OspipeAdapterError> {
|
||||
let id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let entries = meta.to_entries();
|
||||
let result = self.store.ingest_batch(
|
||||
&[state_vector],
|
||||
&[id],
|
||||
Some(&entries),
|
||||
).map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok((id, result))
|
||||
}
|
||||
|
||||
/// Record a batch of observations.
|
||||
///
|
||||
/// `vectors` and `metas` must have the same length.
|
||||
/// Returns the assigned IDs and the ingest result.
|
||||
pub fn record_batch(
|
||||
&mut self,
|
||||
vectors: &[&[f32]],
|
||||
metas: &[ObservationMeta],
|
||||
) -> Result<(Vec<u64>, IngestResult), OspipeAdapterError> {
|
||||
if vectors.len() != metas.len() {
|
||||
return Err(OspipeAdapterError::LengthMismatch {
|
||||
vectors: vectors.len(),
|
||||
metas: metas.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let start_id = self.next_id;
|
||||
let ids: Vec<u64> = (start_id..start_id + vectors.len() as u64).collect();
|
||||
self.next_id = start_id + vectors.len() as u64;
|
||||
|
||||
// Flatten metadata entries: each vector gets its own entries.
|
||||
// RvfStore expects entries_per_id to be uniform, so we pad to
|
||||
// a consistent entry count per vector.
|
||||
let entries_per_vec: Vec<Vec<MetadataEntry>> =
|
||||
metas.iter().map(|m| m.to_entries()).collect();
|
||||
|
||||
let max_entries = entries_per_vec.iter().map(|e| e.len()).max().unwrap_or(0);
|
||||
|
||||
let mut flat_entries = Vec::with_capacity(vectors.len() * max_entries);
|
||||
for vec_entries in &entries_per_vec {
|
||||
for entry in vec_entries {
|
||||
flat_entries.push(entry.clone());
|
||||
}
|
||||
// Pad with dummy entries so every vector has the same count.
|
||||
for _ in vec_entries.len()..max_entries {
|
||||
flat_entries.push(MetadataEntry {
|
||||
field_id: u16::MAX,
|
||||
value: MetadataValue::U64(0),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let result = self.store.ingest_batch(
|
||||
vectors,
|
||||
&ids,
|
||||
if flat_entries.is_empty() { None } else { Some(&flat_entries) },
|
||||
).map_err(OspipeAdapterError::Rvf)?;
|
||||
|
||||
Ok((ids, result))
|
||||
}
|
||||
|
||||
/// Query for the k most similar observation states.
|
||||
pub fn query_similar_states(
|
||||
&self,
|
||||
state_vector: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
|
||||
self.store
|
||||
.query(state_vector, k, &QueryOptions::default())
|
||||
.map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Query with a metadata filter expression.
|
||||
pub fn query_filtered(
|
||||
&self,
|
||||
state_vector: &[f32],
|
||||
k: usize,
|
||||
filter: FilterExpr,
|
||||
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
|
||||
let opts = QueryOptions {
|
||||
filter: Some(filter),
|
||||
..Default::default()
|
||||
};
|
||||
self.store
|
||||
.query(state_vector, k, &opts)
|
||||
.map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Query for observations within a time range.
|
||||
///
|
||||
/// `start_secs` and `end_secs` are UNIX epoch seconds. The query
|
||||
/// vector is used for similarity ranking among the time-filtered results.
|
||||
pub fn get_state_history(
|
||||
&self,
|
||||
state_vector: &[f32],
|
||||
k: usize,
|
||||
start_secs: u64,
|
||||
end_secs: u64,
|
||||
) -> Result<Vec<SearchResult>, OspipeAdapterError> {
|
||||
use rvf_runtime::filter::FilterValue;
|
||||
|
||||
let filter = FilterExpr::And(vec![
|
||||
FilterExpr::Ge(fields::TIMESTAMP_SECS, FilterValue::U64(start_secs)),
|
||||
FilterExpr::Le(fields::TIMESTAMP_SECS, FilterValue::U64(end_secs)),
|
||||
]);
|
||||
|
||||
self.query_filtered(state_vector, k, filter)
|
||||
}
|
||||
|
||||
/// Run compaction to reclaim space from deleted observations.
|
||||
pub fn compact_history(&mut self) -> Result<rvf_runtime::CompactionResult, OspipeAdapterError> {
|
||||
self.store.compact().map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Delete observations by their IDs.
|
||||
pub fn delete_observations(
|
||||
&mut self,
|
||||
ids: &[u64],
|
||||
) -> Result<rvf_runtime::DeleteResult, OspipeAdapterError> {
|
||||
self.store.delete(ids).map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Delete observations matching a filter expression.
|
||||
pub fn delete_by_filter(
|
||||
&mut self,
|
||||
filter: &FilterExpr,
|
||||
) -> Result<rvf_runtime::DeleteResult, OspipeAdapterError> {
|
||||
self.store.delete_by_filter(filter).map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
|
||||
/// Get the current store status.
|
||||
pub fn status(&self) -> StoreStatus {
|
||||
self.store.status()
|
||||
}
|
||||
|
||||
/// Close the store, releasing locks.
|
||||
pub fn close(self) -> Result<(), OspipeAdapterError> {
|
||||
self.store.close().map_err(OspipeAdapterError::Rvf)
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors produced by the OSpipe adapter.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum OspipeAdapterError {
|
||||
/// Underlying RVF error.
|
||||
Rvf(RvfError),
|
||||
/// IO error (directory creation, etc.).
|
||||
Io(String),
|
||||
/// Vector dimension must be > 0.
|
||||
InvalidDimension,
|
||||
/// Batch vectors and metadata have different lengths.
|
||||
LengthMismatch { vectors: usize, metas: usize },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OspipeAdapterError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Rvf(e) => write!(f, "RVF error: {e}"),
|
||||
Self::Io(msg) => write!(f, "IO error: {msg}"),
|
||||
Self::InvalidDimension => write!(f, "vector dimension must be > 0"),
|
||||
Self::LengthMismatch { vectors, metas } => {
|
||||
write!(f, "vectors ({vectors}) and metas ({metas}) length mismatch")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for OspipeAdapterError {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_and_record_observation() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 64);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let vec = make_vector(64, 42);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: Some("VSCode".into()),
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: Some(0),
|
||||
};
|
||||
|
||||
let (id, result) = store.record_observation(&vec, &meta).unwrap();
|
||||
assert_eq!(id, 1);
|
||||
assert_eq!(result.accepted, 1);
|
||||
assert_eq!(result.rejected, 0);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_similar_states() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 32);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
// Insert 10 observations.
|
||||
for i in 0..10u64 {
|
||||
let vec = make_vector(32, i);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: now_secs() + i,
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
}
|
||||
|
||||
let query = make_vector(32, 5);
|
||||
let results = store.query_similar_states(&query, 3).unwrap();
|
||||
assert_eq!(results.len(), 3);
|
||||
|
||||
// Closest should be the same vector (id 6, since first id is 1).
|
||||
assert_eq!(results[0].id, 6);
|
||||
assert!(results[0].distance < 1e-5);
|
||||
|
||||
// Results are sorted by distance ascending.
|
||||
for i in 1..results.len() {
|
||||
assert!(results[i].distance >= results[i - 1].distance);
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_state_history_filters_by_time() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 16);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let base_time = 1_700_000_000u64;
|
||||
|
||||
// Insert observations at different times.
|
||||
for i in 0..5u64 {
|
||||
let vec = make_vector(16, i);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: base_time + i * 100,
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
}
|
||||
|
||||
// Query for observations in the range [base+100, base+300].
|
||||
let query = make_vector(16, 0);
|
||||
let results = store
|
||||
.get_state_history(&query, 10, base_time + 100, base_time + 300)
|
||||
.unwrap();
|
||||
|
||||
// Should get ids 2, 3, 4 (timestamps base+100, base+200, base+300).
|
||||
assert_eq!(results.len(), 3);
|
||||
let ids: Vec<u64> = results.iter().map(|r| r.id).collect();
|
||||
assert!(ids.contains(&2));
|
||||
assert!(ids.contains(&3));
|
||||
assert!(ids.contains(&4));
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_batch_and_query() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 16);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let vecs: Vec<Vec<f32>> = (0..5).map(|i| make_vector(16, i)).collect();
|
||||
let vec_refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
|
||||
let metas: Vec<ObservationMeta> = (0..5)
|
||||
.map(|i| ObservationMeta {
|
||||
content_type: if i % 2 == 0 { "ocr" } else { "transcription" }.into(),
|
||||
app_name: Some("TestApp".into()),
|
||||
timestamp_secs: now_secs() + i,
|
||||
monitor_id: None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (ids, result) = store.record_batch(&vec_refs, &metas).unwrap();
|
||||
assert_eq!(ids.len(), 5);
|
||||
assert_eq!(result.accepted, 5);
|
||||
|
||||
let query = make_vector(16, 2);
|
||||
let results = store.query_similar_states(&query, 1).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].id, 3); // id starts at 1, so seed=2 -> id=3
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_and_compact() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 8);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
// Insert 4 observations.
|
||||
for i in 0..4u64 {
|
||||
let vec = make_vector(8, i);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
}
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 4);
|
||||
|
||||
// Delete 2 observations.
|
||||
let del = store.delete_observations(&[1, 3]).unwrap();
|
||||
assert_eq!(del.deleted, 2);
|
||||
|
||||
let status = store.status();
|
||||
assert_eq!(status.total_vectors, 2);
|
||||
|
||||
// Compact.
|
||||
let compact = store.compact_history().unwrap();
|
||||
assert_eq!(compact.segments_compacted, 2);
|
||||
|
||||
// Verify remaining vectors are queryable.
|
||||
let query = make_vector(8, 1); // seed=1 -> was id=2
|
||||
let results = store.query_similar_states(&query, 10).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
let ids: Vec<u64> = results.iter().map(|r| r.id).collect();
|
||||
assert!(ids.contains(&2));
|
||||
assert!(ids.contains(&4));
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_existing_store() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 16);
|
||||
|
||||
// Create and populate.
|
||||
{
|
||||
let mut store = RvfObservationStore::create(config.clone()).unwrap();
|
||||
let vec = make_vector(16, 99);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "transcription".into(),
|
||||
app_name: Some("Zoom".into()),
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Reopen.
|
||||
{
|
||||
let store = RvfObservationStore::open(config).unwrap();
|
||||
let query = make_vector(16, 99);
|
||||
let results = store.query_similar_states(&query, 1).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert!(results[0].distance < 1e-5);
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn readonly_mode() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 8);
|
||||
|
||||
{
|
||||
let mut store = RvfObservationStore::create(config.clone()).unwrap();
|
||||
let vec = make_vector(8, 0);
|
||||
let meta = ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: now_secs(),
|
||||
monitor_id: None,
|
||||
};
|
||||
store.record_observation(&vec, &meta).unwrap();
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
let store = RvfObservationStore::open_readonly(config).unwrap();
|
||||
let status = store.status();
|
||||
assert!(status.read_only);
|
||||
assert_eq!(status.total_vectors, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_dimension_rejected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 0);
|
||||
let result = RvfObservationStore::create(config);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn batch_length_mismatch_rejected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = ObservationStoreConfig::new(dir.path(), 8);
|
||||
let mut store = RvfObservationStore::create(config).unwrap();
|
||||
|
||||
let vecs = [make_vector(8, 0)];
|
||||
let vec_refs: Vec<&[f32]> = vecs.iter().map(|v| v.as_slice()).collect();
|
||||
let metas = vec![
|
||||
ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: 0,
|
||||
monitor_id: None,
|
||||
},
|
||||
ObservationMeta {
|
||||
content_type: "ocr".into(),
|
||||
app_name: None,
|
||||
timestamp_secs: 0,
|
||||
monitor_id: None,
|
||||
},
|
||||
];
|
||||
|
||||
let result = store.record_batch(&vec_refs, &metas);
|
||||
assert!(result.is_err());
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
}
|
||||
267
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/pipeline.rs
vendored
Normal file
267
vendor/ruvector/crates/rvf/rvf-adapters/ospipe/src/pipeline.rs
vendored
Normal file
@@ -0,0 +1,267 @@
|
||||
//! Pipeline integration helpers for OSpipe.
|
||||
//!
|
||||
//! Provides [`RvfPipelineAdapter`] which wraps [`RvfObservationStore`] and
|
||||
//! exposes a simplified interface for OSpipe's ingestion pipeline to push
|
||||
//! captured frames directly into the RVF store.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rvf_runtime::options::DistanceMetric;
|
||||
|
||||
use crate::observation_store::{
|
||||
ObservationMeta, ObservationStoreConfig, OspipeAdapterError, RvfObservationStore,
|
||||
};
|
||||
|
||||
/// Configuration for the pipeline adapter.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PipelineConfig {
|
||||
/// Directory for RVF data files.
|
||||
pub data_dir: PathBuf,
|
||||
/// Vector embedding dimension.
|
||||
pub dimension: u16,
|
||||
/// Distance metric for similarity search.
|
||||
pub metric: DistanceMetric,
|
||||
/// Automatically compact when dead-space ratio exceeds this threshold.
|
||||
pub auto_compact_threshold: f64,
|
||||
}
|
||||
|
||||
impl PipelineConfig {
|
||||
/// Create a new pipeline config with required parameters.
|
||||
pub fn new(data_dir: impl Into<PathBuf>, dimension: u16) -> Self {
|
||||
Self {
|
||||
data_dir: data_dir.into(),
|
||||
dimension,
|
||||
metric: DistanceMetric::Cosine,
|
||||
auto_compact_threshold: 0.3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// High-level adapter that OSpipe's ingestion pipeline can use to persist
|
||||
/// observation vectors into an RVF store.
|
||||
///
|
||||
/// Handles store lifecycle, auto-compaction, and provides convenience
|
||||
/// methods that accept OSpipe-domain types directly.
|
||||
pub struct RvfPipelineAdapter {
|
||||
store: RvfObservationStore,
|
||||
config: PipelineConfig,
|
||||
ingest_count: u64,
|
||||
}
|
||||
|
||||
impl RvfPipelineAdapter {
|
||||
/// Create a new pipeline adapter, creating the underlying RVF file.
|
||||
pub fn create(config: PipelineConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store_config = ObservationStoreConfig {
|
||||
data_dir: config.data_dir.clone(),
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
};
|
||||
|
||||
let store = RvfObservationStore::create(store_config)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
ingest_count: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing pipeline adapter.
|
||||
pub fn open(config: PipelineConfig) -> Result<Self, OspipeAdapterError> {
|
||||
let store_config = ObservationStoreConfig {
|
||||
data_dir: config.data_dir.clone(),
|
||||
dimension: config.dimension,
|
||||
metric: config.metric,
|
||||
};
|
||||
|
||||
let store = RvfObservationStore::open(store_config)?;
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
ingest_count: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Ingest a single observation from the pipeline.
|
||||
///
|
||||
/// This is the primary entry point for OSpipe's ingestion pipeline.
|
||||
/// After ingestion, may trigger auto-compaction if the dead-space
|
||||
/// ratio exceeds the configured threshold.
|
||||
pub fn ingest(
|
||||
&mut self,
|
||||
embedding: &[f32],
|
||||
content_type: &str,
|
||||
app_name: Option<&str>,
|
||||
timestamp_secs: u64,
|
||||
monitor_id: Option<u32>,
|
||||
) -> Result<u64, OspipeAdapterError> {
|
||||
let meta = ObservationMeta {
|
||||
content_type: content_type.to_string(),
|
||||
app_name: app_name.map(|s| s.to_string()),
|
||||
timestamp_secs,
|
||||
monitor_id,
|
||||
};
|
||||
|
||||
let (id, _result) = self.store.record_observation(embedding, &meta)?;
|
||||
self.ingest_count += 1;
|
||||
|
||||
self.maybe_compact()?;
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Search for similar observations.
|
||||
pub fn search(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
) -> Result<Vec<rvf_runtime::SearchResult>, OspipeAdapterError> {
|
||||
self.store.query_similar_states(query, k)
|
||||
}
|
||||
|
||||
/// Search for observations within a time window.
|
||||
pub fn search_time_range(
|
||||
&self,
|
||||
query: &[f32],
|
||||
k: usize,
|
||||
start_secs: u64,
|
||||
end_secs: u64,
|
||||
) -> Result<Vec<rvf_runtime::SearchResult>, OspipeAdapterError> {
|
||||
self.store.get_state_history(query, k, start_secs, end_secs)
|
||||
}
|
||||
|
||||
/// Expire observations older than the given timestamp.
|
||||
///
|
||||
/// Scans for observations with timestamps before `before_secs` and
|
||||
/// soft-deletes them. Returns the number of observations deleted.
|
||||
pub fn expire_before(
|
||||
&mut self,
|
||||
before_secs: u64,
|
||||
) -> Result<u64, OspipeAdapterError> {
|
||||
use rvf_runtime::filter::{FilterExpr, FilterValue};
|
||||
|
||||
let filter = FilterExpr::Lt(
|
||||
crate::observation_store::fields::TIMESTAMP_SECS,
|
||||
FilterValue::U64(before_secs),
|
||||
);
|
||||
|
||||
let result = self.store.delete_by_filter(&filter)?;
|
||||
|
||||
Ok(result.deleted)
|
||||
}
|
||||
|
||||
/// Force a compaction cycle.
|
||||
pub fn compact(&mut self) -> Result<rvf_runtime::CompactionResult, OspipeAdapterError> {
|
||||
self.store.compact_history()
|
||||
}
|
||||
|
||||
/// Get the total number of live observations.
|
||||
pub fn observation_count(&self) -> u64 {
|
||||
self.store.status().total_vectors
|
||||
}
|
||||
|
||||
/// Close the adapter and release resources.
|
||||
pub fn close(self) -> Result<(), OspipeAdapterError> {
|
||||
self.store.close()
|
||||
}
|
||||
|
||||
/// Check if auto-compaction should run, and run it if so.
|
||||
fn maybe_compact(&mut self) -> Result<(), OspipeAdapterError> {
|
||||
let status = self.store.status();
|
||||
if status.dead_space_ratio > self.config.auto_compact_threshold {
|
||||
self.store.compact_history()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_ingest_and_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = PipelineConfig::new(dir.path(), 32);
|
||||
let mut adapter = RvfPipelineAdapter::create(config).unwrap();
|
||||
|
||||
let ts = now_secs();
|
||||
|
||||
for i in 0..5u64 {
|
||||
let vec = make_vector(32, i);
|
||||
adapter
|
||||
.ingest(&vec, "ocr", Some("VSCode"), ts + i, Some(0))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(adapter.observation_count(), 5);
|
||||
|
||||
let query = make_vector(32, 2);
|
||||
let results = adapter.search(&query, 2).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].id, 3); // seed=2 -> id=3 (1-indexed)
|
||||
|
||||
adapter.close().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_time_range_search() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = PipelineConfig::new(dir.path(), 16);
|
||||
let mut adapter = RvfPipelineAdapter::create(config).unwrap();
|
||||
|
||||
let base = 1_700_000_000u64;
|
||||
for i in 0..4u64 {
|
||||
let vec = make_vector(16, i);
|
||||
adapter
|
||||
.ingest(&vec, "transcription", None, base + i * 3600, None)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let query = make_vector(16, 0);
|
||||
let results = adapter
|
||||
.search_time_range(&query, 10, base + 3600, base + 7200)
|
||||
.unwrap();
|
||||
|
||||
// Should get observations at base+3600 (id=2) and base+7200 (id=3).
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_open_existing() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let config = PipelineConfig::new(dir.path(), 16);
|
||||
|
||||
{
|
||||
let mut adapter = RvfPipelineAdapter::create(config.clone()).unwrap();
|
||||
let vec = make_vector(16, 0);
|
||||
adapter.ingest(&vec, "ocr", None, now_secs(), None).unwrap();
|
||||
adapter.close().unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let adapter = RvfPipelineAdapter::open(config).unwrap();
|
||||
assert_eq!(adapter.observation_count(), 1);
|
||||
adapter.close().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user