Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,803 @@
//! File-Based Storage Implementation
//!
//! Persistent file storage with write-ahead logging (WAL) for durability.
//! Supports both JSON and bincode serialization formats.
//!
//! # Security
//!
//! All identifiers used in file paths are sanitized to prevent path traversal attacks.
//! Only alphanumeric characters, dashes, underscores, and dots are allowed.
use super::{GovernanceStorage, GraphStorage, StorageConfig, StorageError};
use parking_lot::{Mutex, RwLock};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fs::{self, File, OpenOptions};
use std::io::{BufReader, BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use uuid::Uuid;
/// Maximum allowed identifier length for security
const MAX_ID_LENGTH: usize = 256;
/// Validate and sanitize an identifier for use in file paths.
///
/// # Security
///
/// This function prevents path traversal attacks by:
/// - Rejecting empty identifiers
/// - Rejecting identifiers over MAX_ID_LENGTH
/// - Only allowing alphanumeric, dash, underscore, and dot characters
/// - Rejecting "." and ".." path components
/// - Rejecting identifiers starting with a dot (hidden files)
fn validate_path_id(id: &str) -> Result<(), StorageError> {
if id.is_empty() {
return Err(StorageError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Identifier cannot be empty",
)));
}
if id.len() > MAX_ID_LENGTH {
return Err(StorageError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("Identifier too long: {} (max: {})", id.len(), MAX_ID_LENGTH),
)));
}
// Reject path traversal attempts
if id == "." || id == ".." {
return Err(StorageError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Path traversal detected",
)));
}
// Reject hidden files (starting with dot)
if id.starts_with('.') {
return Err(StorageError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Identifiers cannot start with '.'",
)));
}
// Check each character is safe
for c in id.chars() {
if !c.is_ascii_alphanumeric() && c != '-' && c != '_' && c != '.' {
return Err(StorageError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("Invalid character '{}' in identifier", c),
)));
}
}
// Reject path separators
if id.contains('/') || id.contains('\\') {
return Err(StorageError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Path separators not allowed in identifier",
)));
}
Ok(())
}
/// File storage format for serialization
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum StorageFormat {
/// JSON format (human-readable, larger)
Json,
/// Bincode format (compact, faster)
#[default]
Bincode,
}
/// Write-ahead log entry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WalEntry {
pub sequence: u64,
pub operation: WalOperation,
pub checksum: [u8; 32],
pub timestamp: i64,
pub committed: bool,
}
/// WAL operation types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum WalOperation {
StoreNode {
node_id: String,
state: Vec<f32>,
},
DeleteNode {
node_id: String,
},
StoreEdge {
source: String,
target: String,
weight: f32,
},
DeleteEdge {
source: String,
target: String,
},
StorePolicy {
policy_id: String,
data: Vec<u8>,
},
StoreWitness {
witness_id: String,
data: Vec<u8>,
},
StoreLineage {
lineage_id: String,
data: Vec<u8>,
},
}
impl WalEntry {
fn new(sequence: u64, operation: WalOperation) -> Self {
let op_bytes = bincode::serde::encode_to_vec(&operation, bincode::config::standard())
.unwrap_or_default();
let checksum = *blake3::hash(&op_bytes).as_bytes();
Self {
sequence,
operation,
checksum,
timestamp: chrono::Utc::now().timestamp_millis(),
committed: false,
}
}
fn verify(&self) -> bool {
match bincode::serde::encode_to_vec(&self.operation, bincode::config::standard()) {
Ok(bytes) => self.checksum == *blake3::hash(&bytes).as_bytes(),
Err(_) => false,
}
}
}
/// Storage metadata persisted to disk
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct StorageMetadata {
pub version: u32,
pub format: String,
pub node_count: u64,
pub edge_count: u64,
pub last_wal_sequence: u64,
pub created_at: i64,
pub modified_at: i64,
}
/// File-based storage implementation with WAL
#[derive(Debug)]
pub struct FileStorage {
root: PathBuf,
format: StorageFormat,
wal_enabled: bool,
wal_sequence: Mutex<u64>,
wal_file: Mutex<Option<BufWriter<File>>>,
node_cache: RwLock<HashMap<String, Vec<f32>>>,
edge_cache: RwLock<HashMap<(String, String), f32>>,
adjacency_cache: RwLock<HashMap<String, HashSet<String>>>,
cache_dirty: RwLock<bool>,
metadata: RwLock<StorageMetadata>,
}
impl FileStorage {
pub fn new(root: impl AsRef<Path>) -> Result<Self, StorageError> {
Self::with_options(root, StorageFormat::Bincode, true)
}
pub fn with_options(
root: impl AsRef<Path>,
format: StorageFormat,
wal_enabled: bool,
) -> Result<Self, StorageError> {
let root = root.as_ref().to_path_buf();
for dir in ["nodes", "edges", "policies", "witnesses", "lineages", "wal"] {
fs::create_dir_all(root.join(dir))?;
}
let metadata_path = root.join("metadata.json");
let metadata: StorageMetadata = if metadata_path.exists() {
serde_json::from_reader(File::open(&metadata_path)?).unwrap_or_default()
} else {
StorageMetadata::default()
};
let storage = Self {
root,
format,
wal_enabled,
wal_sequence: Mutex::new(metadata.last_wal_sequence),
wal_file: Mutex::new(None),
node_cache: RwLock::new(HashMap::new()),
edge_cache: RwLock::new(HashMap::new()),
adjacency_cache: RwLock::new(HashMap::new()),
cache_dirty: RwLock::new(false),
metadata: RwLock::new(metadata),
};
if wal_enabled {
storage.open_wal_file()?;
storage.recover_from_wal()?;
}
storage.load_cache()?;
Ok(storage)
}
pub fn from_config(config: &StorageConfig) -> Result<Self, StorageError> {
Self::with_options(
&config.graph_path,
StorageFormat::Bincode,
config.enable_wal,
)
}
fn open_wal_file(&self) -> Result<(), StorageError> {
let seq = *self.wal_sequence.lock();
let path = self.root.join("wal").join(format!("{:06}.wal", seq / 1000));
let file = OpenOptions::new().create(true).append(true).open(&path)?;
*self.wal_file.lock() = Some(BufWriter::new(file));
Ok(())
}
fn write_wal(&self, operation: WalOperation) -> Result<u64, StorageError> {
if !self.wal_enabled {
return Ok(0);
}
let seq = {
let mut g = self.wal_sequence.lock();
*g += 1;
*g
};
let entry = WalEntry::new(seq, operation);
let bytes = bincode::serde::encode_to_vec(&entry, bincode::config::standard())
.map_err(|e| StorageError::Serialization(e.to_string()))?;
if let Some(ref mut wal) = *self.wal_file.lock() {
wal.write_all(&(bytes.len() as u32).to_le_bytes())?;
wal.write_all(&bytes)?;
wal.flush()?;
}
Ok(seq)
}
fn commit_wal(&self, _seq: u64) -> Result<(), StorageError> {
if let Some(ref mut wal) = *self.wal_file.lock() {
wal.flush()?;
}
Ok(())
}
fn recover_from_wal(&self) -> Result<(), StorageError> {
let wal_dir = self.root.join("wal");
let mut entries = Vec::new();
for entry in fs::read_dir(&wal_dir)? {
let path = entry?.path();
if path.extension().map_or(false, |e| e == "wal") {
let mut reader = BufReader::new(File::open(&path)?);
loop {
let mut len_bytes = [0u8; 4];
if reader.read_exact(&mut len_bytes).is_err() {
break;
}
let mut buf = vec![0u8; u32::from_le_bytes(len_bytes) as usize];
reader.read_exact(&mut buf)?;
if let Ok((e, _)) = bincode::serde::decode_from_slice::<WalEntry, _>(
&buf,
bincode::config::standard(),
) {
if e.verify() && !e.committed {
entries.push(e);
}
}
}
}
}
entries.sort_by_key(|e| e.sequence);
for e in entries {
self.apply_wal_operation(&e.operation)?;
}
Ok(())
}
fn apply_wal_operation(&self, op: &WalOperation) -> Result<(), StorageError> {
match op {
WalOperation::StoreNode { node_id, state } => {
self.write_node_file(node_id, state)?;
self.node_cache
.write()
.insert(node_id.clone(), state.clone());
}
WalOperation::DeleteNode { node_id } => {
self.delete_node_file(node_id)?;
self.node_cache.write().remove(node_id);
}
WalOperation::StoreEdge {
source,
target,
weight,
} => {
self.write_edge_file(source, target, *weight)?;
self.edge_cache
.write()
.insert((source.clone(), target.clone()), *weight);
}
WalOperation::DeleteEdge { source, target } => {
self.delete_edge_file(source, target)?;
self.edge_cache
.write()
.remove(&(source.clone(), target.clone()));
}
WalOperation::StorePolicy { policy_id, data } => {
self.write_data_file("policies", policy_id, data)?;
}
WalOperation::StoreWitness { witness_id, data } => {
self.write_data_file("witnesses", witness_id, data)?;
}
WalOperation::StoreLineage { lineage_id, data } => {
self.write_data_file("lineages", lineage_id, data)?;
}
}
Ok(())
}
fn load_cache(&self) -> Result<(), StorageError> {
let nodes_dir = self.root.join("nodes");
if nodes_dir.exists() {
for entry in fs::read_dir(&nodes_dir)? {
let path = entry?.path();
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
if let Ok(state) = self.read_node_file(stem) {
self.node_cache.write().insert(stem.to_string(), state);
}
}
}
}
let edges_dir = self.root.join("edges");
if edges_dir.exists() {
for entry in fs::read_dir(&edges_dir)? {
let path = entry?.path();
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
let parts: Vec<&str> = stem.splitn(2, '_').collect();
if parts.len() == 2 {
if let Ok(weight) = self.read_edge_file(parts[0], parts[1]) {
self.edge_cache
.write()
.insert((parts[0].to_string(), parts[1].to_string()), weight);
let mut adj = self.adjacency_cache.write();
adj.entry(parts[0].to_string())
.or_default()
.insert(parts[1].to_string());
adj.entry(parts[1].to_string())
.or_default()
.insert(parts[0].to_string());
}
}
}
}
}
Ok(())
}
fn write_node_file(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
let path = self.node_path(node_id);
let mut writer = BufWriter::new(File::create(&path)?);
match self.format {
StorageFormat::Json => serde_json::to_writer(&mut writer, state)
.map_err(|e| StorageError::Serialization(e.to_string()))?,
StorageFormat::Bincode => {
let bytes = bincode::serde::encode_to_vec(state, bincode::config::standard())
.map_err(|e| StorageError::Serialization(e.to_string()))?;
writer.write_all(&bytes)?;
}
}
writer.flush()?;
Ok(())
}
fn read_node_file(&self, node_id: &str) -> Result<Vec<f32>, StorageError> {
let mut reader = BufReader::new(File::open(self.node_path(node_id))?);
match self.format {
StorageFormat::Json => serde_json::from_reader(reader)
.map_err(|e| StorageError::Serialization(e.to_string())),
StorageFormat::Bincode => {
let mut bytes = Vec::new();
reader.read_to_end(&mut bytes)?;
let (result, _) =
bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
.map_err(|e| StorageError::Serialization(e.to_string()))?;
Ok(result)
}
}
}
fn delete_node_file(&self, node_id: &str) -> Result<(), StorageError> {
let path = self.node_path(node_id);
if path.exists() {
fs::remove_file(&path)?;
}
Ok(())
}
fn node_path(&self, node_id: &str) -> PathBuf {
// Note: Caller must validate node_id first using validate_path_id()
let ext = if self.format == StorageFormat::Json {
"json"
} else {
"bin"
};
self.root.join("nodes").join(format!("{}.{}", node_id, ext))
}
/// Validate node_id and return the safe path
fn safe_node_path(&self, node_id: &str) -> Result<PathBuf, StorageError> {
validate_path_id(node_id)?;
Ok(self.node_path(node_id))
}
fn write_edge_file(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
let mut writer = BufWriter::new(File::create(self.edge_path(source, target))?);
match self.format {
StorageFormat::Json => serde_json::to_writer(&mut writer, &weight)
.map_err(|e| StorageError::Serialization(e.to_string()))?,
StorageFormat::Bincode => {
let bytes = bincode::serde::encode_to_vec(&weight, bincode::config::standard())
.map_err(|e| StorageError::Serialization(e.to_string()))?;
writer.write_all(&bytes)?;
}
}
writer.flush()?;
Ok(())
}
fn read_edge_file(&self, source: &str, target: &str) -> Result<f32, StorageError> {
let mut reader = BufReader::new(File::open(self.edge_path(source, target))?);
match self.format {
StorageFormat::Json => serde_json::from_reader(reader)
.map_err(|e| StorageError::Serialization(e.to_string())),
StorageFormat::Bincode => {
let mut bytes = Vec::new();
reader.read_to_end(&mut bytes)?;
let (result, _) =
bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
.map_err(|e| StorageError::Serialization(e.to_string()))?;
Ok(result)
}
}
}
fn delete_edge_file(&self, source: &str, target: &str) -> Result<(), StorageError> {
let path = self.edge_path(source, target);
if path.exists() {
fs::remove_file(&path)?;
}
Ok(())
}
fn edge_path(&self, source: &str, target: &str) -> PathBuf {
// Note: Caller must validate source and target first using validate_path_id()
let ext = if self.format == StorageFormat::Json {
"json"
} else {
"bin"
};
self.root
.join("edges")
.join(format!("{}_{}.{}", source, target, ext))
}
/// Validate edge identifiers and return the safe path
fn safe_edge_path(&self, source: &str, target: &str) -> Result<PathBuf, StorageError> {
validate_path_id(source)?;
validate_path_id(target)?;
Ok(self.edge_path(source, target))
}
fn write_data_file(&self, dir: &str, id: &str, data: &[u8]) -> Result<(), StorageError> {
// Validate both directory name and id to prevent path traversal
validate_path_id(dir)?;
validate_path_id(id)?;
let mut file = File::create(self.root.join(dir).join(format!("{}.bin", id)))?;
file.write_all(data)?;
file.flush()?;
Ok(())
}
fn read_data_file(&self, dir: &str, id: &str) -> Result<Vec<u8>, StorageError> {
// Validate both directory name and id to prevent path traversal
validate_path_id(dir)?;
validate_path_id(id)?;
let mut data = Vec::new();
File::open(self.root.join(dir).join(format!("{}.bin", id)))?.read_to_end(&mut data)?;
Ok(data)
}
fn save_metadata(&self) -> Result<(), StorageError> {
let mut metadata = self.metadata.write();
metadata.modified_at = chrono::Utc::now().timestamp_millis();
metadata.last_wal_sequence = *self.wal_sequence.lock();
serde_json::to_writer_pretty(
BufWriter::new(File::create(self.root.join("metadata.json"))?),
&*metadata,
)
.map_err(|e| StorageError::Serialization(e.to_string()))?;
Ok(())
}
pub fn sync(&self) -> Result<(), StorageError> {
if *self.cache_dirty.read() {
self.save_metadata()?;
*self.cache_dirty.write() = false;
}
Ok(())
}
pub fn compact_wal(&self) -> Result<(), StorageError> {
self.save_metadata()
}
#[must_use]
pub fn stats(&self) -> StorageStats {
let metadata = self.metadata.read();
StorageStats {
node_count: self.node_cache.read().len(),
edge_count: self.edge_cache.read().len(),
wal_sequence: *self.wal_sequence.lock(),
root_path: self.root.clone(),
format: self.format,
wal_enabled: self.wal_enabled,
created_at: metadata.created_at,
modified_at: metadata.modified_at,
}
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot / (norm_a * norm_b)
}
}
#[derive(Debug, Clone)]
pub struct StorageStats {
pub node_count: usize,
pub edge_count: usize,
pub wal_sequence: u64,
pub root_path: PathBuf,
pub format: StorageFormat,
pub wal_enabled: bool,
pub created_at: i64,
pub modified_at: i64,
}
impl Drop for FileStorage {
fn drop(&mut self) {
let _ = self.sync();
}
}
impl GraphStorage for FileStorage {
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
// Validate node_id to prevent path traversal
validate_path_id(node_id)?;
let seq = self.write_wal(WalOperation::StoreNode {
node_id: node_id.to_string(),
state: state.to_vec(),
})?;
self.write_node_file(node_id, state)?;
self.node_cache
.write()
.insert(node_id.to_string(), state.to_vec());
{
let mut m = self.metadata.write();
m.node_count = self.node_cache.read().len() as u64;
}
self.commit_wal(seq)?;
*self.cache_dirty.write() = true;
Ok(())
}
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
// Validate node_id to prevent path traversal
validate_path_id(node_id)?;
if let Some(state) = self.node_cache.read().get(node_id) {
return Ok(Some(state.clone()));
}
match self.read_node_file(node_id) {
Ok(state) => {
self.node_cache
.write()
.insert(node_id.to_string(), state.clone());
Ok(Some(state))
}
Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(e),
}
}
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
// Validate identifiers to prevent path traversal
validate_path_id(source)?;
validate_path_id(target)?;
let seq = self.write_wal(WalOperation::StoreEdge {
source: source.to_string(),
target: target.to_string(),
weight,
})?;
self.write_edge_file(source, target, weight)?;
self.edge_cache
.write()
.insert((source.to_string(), target.to_string()), weight);
{
let mut adj = self.adjacency_cache.write();
adj.entry(source.to_string())
.or_default()
.insert(target.to_string());
adj.entry(target.to_string())
.or_default()
.insert(source.to_string());
}
{
let mut m = self.metadata.write();
m.edge_count = self.edge_cache.read().len() as u64;
}
self.commit_wal(seq)?;
*self.cache_dirty.write() = true;
Ok(())
}
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
// Validate identifiers to prevent path traversal
validate_path_id(source)?;
validate_path_id(target)?;
let seq = self.write_wal(WalOperation::DeleteEdge {
source: source.to_string(),
target: target.to_string(),
})?;
self.delete_edge_file(source, target)?;
self.edge_cache
.write()
.remove(&(source.to_string(), target.to_string()));
{
let mut adj = self.adjacency_cache.write();
if let Some(n) = adj.get_mut(source) {
n.remove(target);
}
if let Some(n) = adj.get_mut(target) {
n.remove(source);
}
}
{
let mut m = self.metadata.write();
m.edge_count = self.edge_cache.read().len() as u64;
}
self.commit_wal(seq)?;
*self.cache_dirty.write() = true;
Ok(())
}
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
if query.is_empty() {
return Ok(Vec::new());
}
let nodes = self.node_cache.read();
let mut sims: Vec<_> = nodes
.iter()
.map(|(id, s)| (id.clone(), Self::cosine_similarity(query, s)))
.collect();
sims.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
sims.truncate(k);
Ok(sims)
}
}
impl GovernanceStorage for FileStorage {
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
let id = Uuid::new_v4().to_string();
let seq = self.write_wal(WalOperation::StorePolicy {
policy_id: id.clone(),
data: bundle.to_vec(),
})?;
self.write_data_file("policies", &id, bundle)?;
self.commit_wal(seq)?;
*self.cache_dirty.write() = true;
Ok(id)
}
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
match self.read_data_file("policies", id) {
Ok(d) => Ok(Some(d)),
Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(e),
}
}
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
let id = Uuid::new_v4().to_string();
let seq = self.write_wal(WalOperation::StoreWitness {
witness_id: id.clone(),
data: witness.to_vec(),
})?;
self.write_data_file("witnesses", &id, witness)?;
self.commit_wal(seq)?;
*self.cache_dirty.write() = true;
Ok(id)
}
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
let mut results = Vec::new();
let dir = self.root.join("witnesses");
if dir.exists() {
for entry in fs::read_dir(&dir)? {
let path = entry?.path();
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
if let Ok(data) = self.read_data_file("witnesses", stem) {
if data
.windows(action_id.len())
.any(|w| w == action_id.as_bytes())
{
results.push(data);
}
}
}
}
}
Ok(results)
}
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
let id = Uuid::new_v4().to_string();
let seq = self.write_wal(WalOperation::StoreLineage {
lineage_id: id.clone(),
data: lineage.to_vec(),
})?;
self.write_data_file("lineages", &id, lineage)?;
self.commit_wal(seq)?;
*self.cache_dirty.write() = true;
Ok(id)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_file_storage_nodes() {
let temp_dir = TempDir::new().unwrap();
let storage = FileStorage::new(temp_dir.path()).unwrap();
storage.store_node("node-1", &[1.0, 0.0, 0.0]).unwrap();
let state = storage.get_node("node-1").unwrap();
assert!(state.is_some());
assert_eq!(state.unwrap(), vec![1.0, 0.0, 0.0]);
}
#[test]
fn test_file_storage_edges() {
let temp_dir = TempDir::new().unwrap();
let storage = FileStorage::new(temp_dir.path()).unwrap();
storage.store_edge("a", "b", 1.0).unwrap();
storage.delete_edge("a", "b").unwrap();
assert_eq!(storage.stats().edge_count, 0);
}
#[test]
fn test_storage_format_json() {
let temp_dir = TempDir::new().unwrap();
let storage =
FileStorage::with_options(temp_dir.path(), StorageFormat::Json, false).unwrap();
storage.store_node("json-node", &[1.0, 2.0]).unwrap();
let state = storage.get_node("json-node").unwrap();
assert_eq!(state.unwrap(), vec![1.0, 2.0]);
}
}

View File

@@ -0,0 +1,730 @@
//! In-Memory Storage Implementation
//!
//! Thread-safe in-memory storage for testing and development.
//! Uses `parking_lot::RwLock` for high-performance concurrent access.
//!
//! # Usage
//!
//! ```rust,ignore
//! use prime_radiant::storage::{InMemoryStorage, GraphStorage, GovernanceStorage};
//!
//! let storage = InMemoryStorage::new();
//!
//! // Store node states
//! storage.store_node("node-1", &[1.0, 0.0, 0.0])?;
//!
//! // Store edges
//! storage.store_edge("node-1", "node-2", 1.0)?;
//!
//! // Store policies
//! let policy_id = storage.store_policy(b"policy-data")?;
//! ```
use super::{GovernanceStorage, GraphStorage, StorageConfig, StorageError};
use ordered_float::OrderedFloat;
use parking_lot::RwLock;
use std::collections::{BTreeMap, HashMap, HashSet};
use uuid::Uuid;
/// In-memory storage implementation for testing and development.
///
/// This implementation provides:
/// - Thread-safe access via `parking_lot::RwLock`
/// - Efficient KNN search using brute-force (suitable for small datasets)
/// - Full governance storage support
/// - No persistence (data is lost on drop)
#[derive(Debug)]
pub struct InMemoryStorage {
/// Node states: node_id -> state vector
nodes: RwLock<HashMap<String, Vec<f32>>>,
/// Edges: (source, target) -> weight
edges: RwLock<HashMap<(String, String), f32>>,
/// Adjacency list for efficient neighbor lookup: node_id -> set of neighbors
adjacency: RwLock<HashMap<String, HashSet<String>>>,
/// Policy bundles: policy_id -> serialized data
policies: RwLock<HashMap<String, Vec<u8>>>,
/// Witness records: witness_id -> serialized data
witnesses: RwLock<HashMap<String, Vec<u8>>>,
/// Witness records by action: action_id -> list of witness_ids
witnesses_by_action: RwLock<HashMap<String, Vec<String>>>,
/// Lineage records: lineage_id -> serialized data
lineages: RwLock<HashMap<String, Vec<u8>>>,
/// Event log for audit trail
event_log: RwLock<Vec<StorageEvent>>,
/// Configuration
#[allow(dead_code)]
config: StorageConfig,
}
/// Storage event for audit logging
#[derive(Debug, Clone)]
pub struct StorageEvent {
/// Event timestamp (milliseconds since epoch)
pub timestamp: i64,
/// Event type
pub event_type: StorageEventType,
/// Entity ID involved
pub entity_id: String,
/// Optional details
pub details: Option<String>,
}
/// Type of storage event
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StorageEventType {
/// Node stored
NodeStored,
/// Node retrieved
NodeRetrieved,
/// Node deleted
NodeDeleted,
/// Edge stored
EdgeStored,
/// Edge deleted
EdgeDeleted,
/// Policy stored
PolicyStored,
/// Policy retrieved
PolicyRetrieved,
/// Witness stored
WitnessStored,
/// Witness retrieved
WitnessRetrieved,
/// Lineage stored
LineageStored,
}
impl InMemoryStorage {
/// Create a new in-memory storage instance.
#[must_use]
pub fn new() -> Self {
Self::with_config(StorageConfig::default())
}
/// Create a new in-memory storage instance with custom configuration.
#[must_use]
pub fn with_config(config: StorageConfig) -> Self {
Self {
nodes: RwLock::new(HashMap::new()),
edges: RwLock::new(HashMap::new()),
adjacency: RwLock::new(HashMap::new()),
policies: RwLock::new(HashMap::new()),
witnesses: RwLock::new(HashMap::new()),
witnesses_by_action: RwLock::new(HashMap::new()),
lineages: RwLock::new(HashMap::new()),
event_log: RwLock::new(Vec::new()),
config,
}
}
/// Get the number of stored nodes.
#[must_use]
pub fn node_count(&self) -> usize {
self.nodes.read().len()
}
/// Get the number of stored edges.
#[must_use]
pub fn edge_count(&self) -> usize {
self.edges.read().len()
}
/// Get all node IDs.
#[must_use]
pub fn node_ids(&self) -> Vec<String> {
self.nodes.read().keys().cloned().collect()
}
/// Get all edges as (source, target, weight) tuples.
#[must_use]
pub fn all_edges(&self) -> Vec<(String, String, f32)> {
self.edges
.read()
.iter()
.map(|((s, t), w)| (s.clone(), t.clone(), *w))
.collect()
}
/// Get neighbors of a node.
#[must_use]
pub fn get_neighbors(&self, node_id: &str) -> Vec<String> {
self.adjacency
.read()
.get(node_id)
.map(|set| set.iter().cloned().collect())
.unwrap_or_default()
}
/// Clear all stored data.
pub fn clear(&self) {
self.nodes.write().clear();
self.edges.write().clear();
self.adjacency.write().clear();
self.policies.write().clear();
self.witnesses.write().clear();
self.witnesses_by_action.write().clear();
self.lineages.write().clear();
self.event_log.write().clear();
}
/// Get the event log for audit purposes.
#[must_use]
pub fn get_event_log(&self) -> Vec<StorageEvent> {
self.event_log.read().clone()
}
/// Log a storage event.
fn log_event(&self, event_type: StorageEventType, entity_id: String, details: Option<String>) {
let event = StorageEvent {
timestamp: chrono::Utc::now().timestamp_millis(),
event_type,
entity_id,
details,
};
self.event_log.write().push(event);
}
/// Compute cosine similarity between two vectors.
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot / (norm_a * norm_b)
}
/// Compute L2 (Euclidean) distance between two vectors.
fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return f32::INFINITY;
}
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f32>()
.sqrt()
}
}
impl Default for InMemoryStorage {
fn default() -> Self {
Self::new()
}
}
impl GraphStorage for InMemoryStorage {
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
self.nodes
.write()
.insert(node_id.to_string(), state.to_vec());
self.log_event(
StorageEventType::NodeStored,
node_id.to_string(),
Some(format!("dim={}", state.len())),
);
Ok(())
}
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
let result = self.nodes.read().get(node_id).cloned();
if result.is_some() {
self.log_event(StorageEventType::NodeRetrieved, node_id.to_string(), None);
}
Ok(result)
}
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
let key = (source.to_string(), target.to_string());
self.edges.write().insert(key, weight);
// Update adjacency list (both directions for undirected graph semantics)
{
let mut adj = self.adjacency.write();
adj.entry(source.to_string())
.or_default()
.insert(target.to_string());
adj.entry(target.to_string())
.or_default()
.insert(source.to_string());
}
self.log_event(
StorageEventType::EdgeStored,
format!("{}->{}", source, target),
Some(format!("weight={}", weight)),
);
Ok(())
}
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
let key = (source.to_string(), target.to_string());
self.edges.write().remove(&key);
// Update adjacency list
{
let mut adj = self.adjacency.write();
if let Some(neighbors) = adj.get_mut(source) {
neighbors.remove(target);
}
if let Some(neighbors) = adj.get_mut(target) {
neighbors.remove(source);
}
}
self.log_event(
StorageEventType::EdgeDeleted,
format!("{}->{}", source, target),
None,
);
Ok(())
}
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
if query.is_empty() {
return Ok(Vec::new());
}
let nodes = self.nodes.read();
// Use a BTreeMap for efficient top-k extraction (sorted by similarity)
let mut similarities: BTreeMap<OrderedFloat<f32>, Vec<String>> = BTreeMap::new();
for (node_id, state) in nodes.iter() {
let similarity = Self::cosine_similarity(query, state);
similarities
.entry(OrderedFloat(-similarity)) // Negative for descending order
.or_default()
.push(node_id.clone());
}
// Extract top k results
let mut results = Vec::with_capacity(k);
for (neg_sim, node_ids) in similarities {
for node_id in node_ids {
if results.len() >= k {
break;
}
results.push((node_id, -neg_sim.0));
}
if results.len() >= k {
break;
}
}
Ok(results)
}
}
impl GovernanceStorage for InMemoryStorage {
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
let id = Uuid::new_v4().to_string();
self.policies.write().insert(id.clone(), bundle.to_vec());
self.log_event(
StorageEventType::PolicyStored,
id.clone(),
Some(format!("size={}", bundle.len())),
);
Ok(id)
}
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
let result = self.policies.read().get(id).cloned();
if result.is_some() {
self.log_event(StorageEventType::PolicyRetrieved, id.to_string(), None);
}
Ok(result)
}
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
let id = Uuid::new_v4().to_string();
self.witnesses.write().insert(id.clone(), witness.to_vec());
self.log_event(
StorageEventType::WitnessStored,
id.clone(),
Some(format!("size={}", witness.len())),
);
Ok(id)
}
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
let witness_ids = self.witnesses_by_action.read();
let witnesses = self.witnesses.read();
let ids = witness_ids.get(action_id);
if ids.is_none() {
return Ok(Vec::new());
}
let result: Vec<Vec<u8>> = ids
.unwrap()
.iter()
.filter_map(|id| witnesses.get(id).cloned())
.collect();
if !result.is_empty() {
self.log_event(
StorageEventType::WitnessRetrieved,
action_id.to_string(),
Some(format!("count={}", result.len())),
);
}
Ok(result)
}
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
let id = Uuid::new_v4().to_string();
self.lineages.write().insert(id.clone(), lineage.to_vec());
self.log_event(
StorageEventType::LineageStored,
id.clone(),
Some(format!("size={}", lineage.len())),
);
Ok(id)
}
}
/// Extended in-memory storage with additional indexing capabilities.
#[derive(Debug)]
pub struct IndexedInMemoryStorage {
/// Base storage
base: InMemoryStorage,
/// Node metadata index: tag -> set of node_ids
node_tags: RwLock<HashMap<String, HashSet<String>>>,
/// Policy metadata index: name -> policy_id
policy_by_name: RwLock<HashMap<String, String>>,
}
impl IndexedInMemoryStorage {
/// Create a new indexed in-memory storage.
#[must_use]
pub fn new() -> Self {
Self {
base: InMemoryStorage::new(),
node_tags: RwLock::new(HashMap::new()),
policy_by_name: RwLock::new(HashMap::new()),
}
}
/// Store a node with tags for indexing.
pub fn store_node_with_tags(
&self,
node_id: &str,
state: &[f32],
tags: &[&str],
) -> Result<(), StorageError> {
self.base.store_node(node_id, state)?;
let mut tag_index = self.node_tags.write();
for tag in tags {
tag_index
.entry((*tag).to_string())
.or_default()
.insert(node_id.to_string());
}
Ok(())
}
/// Find nodes by tag.
#[must_use]
pub fn find_by_tag(&self, tag: &str) -> Vec<String> {
self.node_tags
.read()
.get(tag)
.map(|set| set.iter().cloned().collect())
.unwrap_or_default()
}
/// Store a policy with a name for lookup.
pub fn store_policy_with_name(
&self,
name: &str,
bundle: &[u8],
) -> Result<String, StorageError> {
let id = self.base.store_policy(bundle)?;
self.policy_by_name
.write()
.insert(name.to_string(), id.clone());
Ok(id)
}
/// Get a policy by name.
pub fn get_policy_by_name(&self, name: &str) -> Result<Option<Vec<u8>>, StorageError> {
let id = self.policy_by_name.read().get(name).cloned();
match id {
Some(id) => self.base.get_policy(&id),
None => Ok(None),
}
}
/// Get the base storage for direct access.
#[must_use]
pub fn base(&self) -> &InMemoryStorage {
&self.base
}
}
impl Default for IndexedInMemoryStorage {
fn default() -> Self {
Self::new()
}
}
impl GraphStorage for IndexedInMemoryStorage {
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
self.base.store_node(node_id, state)
}
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
self.base.get_node(node_id)
}
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
self.base.store_edge(source, target, weight)
}
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
self.base.delete_edge(source, target)
}
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
self.base.find_similar(query, k)
}
}
impl GovernanceStorage for IndexedInMemoryStorage {
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
self.base.store_policy(bundle)
}
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
self.base.get_policy(id)
}
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
self.base.store_witness(witness)
}
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
self.base.get_witnesses_for_action(action_id)
}
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
self.base.store_lineage(lineage)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_in_memory_storage_nodes() {
let storage = InMemoryStorage::new();
// Store a node
storage.store_node("node-1", &[1.0, 0.0, 0.0]).unwrap();
storage.store_node("node-2", &[0.0, 1.0, 0.0]).unwrap();
assert_eq!(storage.node_count(), 2);
// Retrieve node
let state = storage.get_node("node-1").unwrap();
assert!(state.is_some());
assert_eq!(state.unwrap(), vec![1.0, 0.0, 0.0]);
// Non-existent node
let missing = storage.get_node("node-999").unwrap();
assert!(missing.is_none());
}
#[test]
fn test_in_memory_storage_edges() {
let storage = InMemoryStorage::new();
// Store nodes
storage.store_node("a", &[1.0]).unwrap();
storage.store_node("b", &[2.0]).unwrap();
storage.store_node("c", &[3.0]).unwrap();
// Store edges
storage.store_edge("a", "b", 1.0).unwrap();
storage.store_edge("b", "c", 2.0).unwrap();
assert_eq!(storage.edge_count(), 2);
// Check adjacency
let neighbors = storage.get_neighbors("b");
assert_eq!(neighbors.len(), 2);
assert!(neighbors.contains(&"a".to_string()));
assert!(neighbors.contains(&"c".to_string()));
// Delete edge
storage.delete_edge("a", "b").unwrap();
assert_eq!(storage.edge_count(), 1);
let neighbors = storage.get_neighbors("b");
assert_eq!(neighbors.len(), 1);
assert!(!neighbors.contains(&"a".to_string()));
}
#[test]
fn test_find_similar() {
let storage = InMemoryStorage::new();
// Store nodes with different orientations
storage.store_node("north", &[0.0, 1.0, 0.0]).unwrap();
storage.store_node("south", &[0.0, -1.0, 0.0]).unwrap();
storage.store_node("east", &[1.0, 0.0, 0.0]).unwrap();
storage
.store_node("northeast", &[0.707, 0.707, 0.0])
.unwrap();
// Query for vectors similar to north
let query = vec![0.0, 1.0, 0.0];
let results = storage.find_similar(&query, 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, "north");
assert!((results[0].1 - 1.0).abs() < 0.001); // Perfect match
assert_eq!(results[1].0, "northeast"); // Second closest
}
#[test]
fn test_governance_storage() {
let storage = InMemoryStorage::new();
// Store policy
let policy_data = b"test policy data";
let policy_id = storage.store_policy(policy_data).unwrap();
// Retrieve policy
let retrieved = storage.get_policy(&policy_id).unwrap();
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap(), policy_data.to_vec());
// Store witness
let witness_data = b"test witness data";
let witness_id = storage.store_witness(witness_data).unwrap();
assert!(!witness_id.is_empty());
// Store lineage
let lineage_data = b"test lineage data";
let lineage_id = storage.store_lineage(lineage_data).unwrap();
assert!(!lineage_id.is_empty());
}
#[test]
fn test_event_log() {
let storage = InMemoryStorage::new();
storage.store_node("test", &[1.0]).unwrap();
storage.get_node("test").unwrap();
storage.store_edge("a", "b", 1.0).unwrap();
let log = storage.get_event_log();
assert_eq!(log.len(), 3);
assert_eq!(log[0].event_type, StorageEventType::NodeStored);
assert_eq!(log[1].event_type, StorageEventType::NodeRetrieved);
assert_eq!(log[2].event_type, StorageEventType::EdgeStored);
}
#[test]
fn test_clear() {
let storage = InMemoryStorage::new();
storage.store_node("node", &[1.0]).unwrap();
storage.store_edge("a", "b", 1.0).unwrap();
storage.store_policy(b"policy").unwrap();
assert!(storage.node_count() > 0);
storage.clear();
assert_eq!(storage.node_count(), 0);
assert_eq!(storage.edge_count(), 0);
assert_eq!(storage.get_event_log().len(), 0);
}
#[test]
fn test_indexed_storage() {
let storage = IndexedInMemoryStorage::new();
// Store with tags
storage
.store_node_with_tags("node-1", &[1.0, 0.0], &["important", "category-a"])
.unwrap();
storage
.store_node_with_tags("node-2", &[0.0, 1.0], &["important"])
.unwrap();
storage
.store_node_with_tags("node-3", &[1.0, 1.0], &["category-a"])
.unwrap();
// Find by tag
let important = storage.find_by_tag("important");
assert_eq!(important.len(), 2);
let category_a = storage.find_by_tag("category-a");
assert_eq!(category_a.len(), 2);
// Store and retrieve policy by name
storage
.store_policy_with_name("default", b"default policy")
.unwrap();
let policy = storage.get_policy_by_name("default").unwrap();
assert!(policy.is_some());
assert_eq!(policy.unwrap(), b"default policy".to_vec());
}
#[test]
fn test_cosine_similarity() {
// Identical vectors
let sim = InMemoryStorage::cosine_similarity(&[1.0, 0.0], &[1.0, 0.0]);
assert!((sim - 1.0).abs() < 0.001);
// Orthogonal vectors
let sim = InMemoryStorage::cosine_similarity(&[1.0, 0.0], &[0.0, 1.0]);
assert!(sim.abs() < 0.001);
// Opposite vectors
let sim = InMemoryStorage::cosine_similarity(&[1.0, 0.0], &[-1.0, 0.0]);
assert!((sim - (-1.0)).abs() < 0.001);
}
#[test]
fn test_l2_distance() {
// Same point
let dist = InMemoryStorage::l2_distance(&[0.0, 0.0], &[0.0, 0.0]);
assert!(dist.abs() < 0.001);
// Unit distance
let dist = InMemoryStorage::l2_distance(&[0.0, 0.0], &[1.0, 0.0]);
assert!((dist - 1.0).abs() < 0.001);
// Diagonal
let dist = InMemoryStorage::l2_distance(&[0.0, 0.0], &[1.0, 1.0]);
assert!((dist - std::f32::consts::SQRT_2).abs() < 0.001);
}
}

View File

@@ -0,0 +1,575 @@
//! # Storage Layer Module
//!
//! Hybrid storage with PostgreSQL for transactional authority and ruvector for
//! high-performance vector and graph queries.
//!
//! ## Architecture
//!
//! ```text
//! +----------------------------------------------+
//! | Storage Layer |
//! +----------------------------------------------+
//! | |
//! | +------------------+ +------------------+ |
//! | | PostgreSQL | | ruvector | |
//! | | (Authority) | | (Graph/Vector) | |
//! | | | | | |
//! | | - Policy bundles | | - Node states | |
//! | | - Witnesses | | - Edge data | |
//! | | - Lineage | | - HNSW index | |
//! | | - Event log | | - Residual cache | |
//! | +------------------+ +------------------+ |
//! | |
//! +----------------------------------------------+
//! ```
//!
//! ## Storage Backends
//!
//! | Backend | Use Case | Features |
//! |---------|----------|----------|
//! | `InMemoryStorage` | Testing, Development | Thread-safe, fast, no persistence |
//! | `FileStorage` | Embedded, Edge | WAL, JSON/bincode, persistence |
//! | `PostgresStorage` | Production | ACID, indexes, concurrent access |
//!
//! ## Usage
//!
//! ```rust,ignore
//! use prime_radiant::storage::{
//! InMemoryStorage, FileStorage, GraphStorage, GovernanceStorage,
//! };
//!
//! // In-memory for testing
//! let memory_storage = InMemoryStorage::new();
//! memory_storage.store_node("node-1", &[1.0, 0.0, 0.0])?;
//!
//! // File-based for persistence
//! let file_storage = FileStorage::new("./data")?;
//! file_storage.store_node("node-1", &[1.0, 0.0, 0.0])?;
//!
//! // PostgreSQL for production (feature-gated)
//! #[cfg(feature = "postgres")]
//! let pg_storage = PostgresStorage::connect("postgresql://localhost/db").await?;
//! ```
// Module declarations
mod file;
mod memory;
#[cfg(feature = "postgres")]
#[cfg_attr(docsrs, doc(cfg(feature = "postgres")))]
mod postgres;
// Re-exports
pub use file::{FileStorage, StorageFormat, StorageMetadata, StorageStats, WalEntry, WalOperation};
pub use memory::{InMemoryStorage, IndexedInMemoryStorage, StorageEvent, StorageEventType};
#[cfg(feature = "postgres")]
pub use postgres::{
AsyncGraphStorageAdapter, EdgeRow, EventLogEntry, LineageRecordRow, NodeStateRow,
PolicyBundleRow, PostgresConfig, PostgresStats, PostgresStorage, WitnessRecordRow,
};
use serde::{Deserialize, Serialize};
/// Storage configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageConfig {
/// PostgreSQL connection string (optional).
pub postgres_url: Option<String>,
/// Path for local graph storage.
pub graph_path: String,
/// Path for event log.
pub event_log_path: String,
/// Enable write-ahead logging.
pub enable_wal: bool,
/// Cache size in MB.
pub cache_size_mb: usize,
}
impl Default for StorageConfig {
fn default() -> Self {
Self {
postgres_url: None,
graph_path: "./data/graph".to_string(),
event_log_path: "./data/events".to_string(),
enable_wal: true,
cache_size_mb: 256,
}
}
}
impl StorageConfig {
/// Create a configuration for in-memory storage only.
#[must_use]
pub fn in_memory() -> Self {
Self {
postgres_url: None,
graph_path: String::new(),
event_log_path: String::new(),
enable_wal: false,
cache_size_mb: 256,
}
}
/// Create a configuration for file-based storage.
#[must_use]
pub fn file_based(path: impl Into<String>) -> Self {
let path = path.into();
Self {
postgres_url: None,
graph_path: path.clone(),
event_log_path: format!("{}/events", path),
enable_wal: true,
cache_size_mb: 256,
}
}
/// Create a configuration for PostgreSQL storage.
#[must_use]
pub fn postgres(url: impl Into<String>) -> Self {
Self {
postgres_url: Some(url.into()),
graph_path: "./data/graph".to_string(),
event_log_path: "./data/events".to_string(),
enable_wal: false,
cache_size_mb: 256,
}
}
/// Set the cache size.
#[must_use]
pub const fn with_cache_size(mut self, size_mb: usize) -> Self {
self.cache_size_mb = size_mb;
self
}
/// Enable or disable WAL.
#[must_use]
pub const fn with_wal(mut self, enable: bool) -> Self {
self.enable_wal = enable;
self
}
}
/// Storage backend trait for graph operations.
///
/// This trait defines the interface for storing and retrieving graph data
/// including node states and edges. Implementations must be thread-safe.
pub trait GraphStorage: Send + Sync {
/// Store a node state.
///
/// # Arguments
///
/// * `node_id` - Unique identifier for the node
/// * `state` - State vector (typically f32 values representing the node's state)
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError>;
/// Retrieve a node state.
///
/// # Arguments
///
/// * `node_id` - Unique identifier for the node
///
/// # Returns
///
/// `Some(state)` if the node exists, `None` otherwise.
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError>;
/// Store an edge between two nodes.
///
/// # Arguments
///
/// * `source` - Source node ID
/// * `target` - Target node ID
/// * `weight` - Edge weight (typically representing constraint strength)
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError>;
/// Delete an edge between two nodes.
///
/// # Arguments
///
/// * `source` - Source node ID
/// * `target` - Target node ID
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError>;
/// Find nodes similar to a query vector.
///
/// This method performs approximate nearest neighbor search using cosine similarity.
/// For production workloads with large datasets, consider using HNSW-indexed storage.
///
/// # Arguments
///
/// * `query` - Query vector to search for similar nodes
/// * `k` - Maximum number of results to return
///
/// # Returns
///
/// Vector of (node_id, similarity_score) tuples, sorted by similarity descending.
///
/// # Errors
///
/// Returns error if the search operation fails.
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError>;
}
/// Storage backend trait for governance data.
///
/// This trait defines the interface for storing and retrieving governance objects
/// including policy bundles, witness records, and lineage records.
pub trait GovernanceStorage: Send + Sync {
/// Store a policy bundle.
///
/// # Arguments
///
/// * `bundle` - Serialized policy bundle data
///
/// # Returns
///
/// Unique identifier for the stored bundle.
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError>;
/// Retrieve a policy bundle.
///
/// # Arguments
///
/// * `id` - Policy bundle identifier
///
/// # Returns
///
/// `Some(data)` if the policy exists, `None` otherwise.
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError>;
/// Store a witness record.
///
/// Witness records provide immutable proof of gate decisions.
///
/// # Arguments
///
/// * `witness` - Serialized witness record data
///
/// # Returns
///
/// Unique identifier for the stored witness.
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError>;
/// Retrieve witness records for an action.
///
/// # Arguments
///
/// * `action_id` - Action identifier to search for
///
/// # Returns
///
/// Vector of witness record data for the given action.
///
/// # Errors
///
/// Returns error if the search operation fails.
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError>;
/// Store a lineage record.
///
/// Lineage records track provenance for authoritative writes.
///
/// # Arguments
///
/// * `lineage` - Serialized lineage record data
///
/// # Returns
///
/// Unique identifier for the stored lineage.
///
/// # Errors
///
/// Returns error if the storage operation fails.
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError>;
}
/// Storage error type.
#[derive(Debug, thiserror::Error)]
pub enum StorageError {
/// Connection error (database or file system)
#[error("Connection error: {0}")]
Connection(String),
/// Entity not found
#[error("Not found: {0}")]
NotFound(String),
/// Serialization/deserialization error
#[error("Serialization error: {0}")]
Serialization(String),
/// IO error
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
/// Invalid data format or content
#[error("Invalid data: {0}")]
InvalidData(String),
/// Transaction or operation failed
#[error("Transaction failed: {0}")]
Transaction(String),
/// Integrity constraint violation
#[error("Integrity violation: {0}")]
IntegrityViolation(String),
/// Resource exhausted (e.g., disk space)
#[error("Resource exhausted: {0}")]
ResourceExhausted(String),
/// Permission denied
#[error("Permission denied: {0}")]
PermissionDenied(String),
}
/// Hybrid storage that combines multiple backends.
///
/// Uses file storage for graph data and optionally PostgreSQL for governance data.
/// This provides the best of both worlds: fast local access for frequently accessed
/// data and ACID guarantees for critical governance data.
#[derive(Debug)]
pub struct HybridStorage {
/// File storage for graph data
file_storage: FileStorage,
/// Configuration
config: StorageConfig,
}
impl HybridStorage {
/// Create a new hybrid storage instance.
///
/// # Errors
///
/// Returns error if file storage cannot be initialized.
pub fn new(config: StorageConfig) -> Result<Self, StorageError> {
let file_storage = FileStorage::from_config(&config)?;
Ok(Self {
file_storage,
config,
})
}
/// Get the file storage backend.
#[must_use]
pub fn file_storage(&self) -> &FileStorage {
&self.file_storage
}
/// Get the configuration.
#[must_use]
pub fn config(&self) -> &StorageConfig {
&self.config
}
/// Check if PostgreSQL is configured.
#[must_use]
pub fn has_postgres(&self) -> bool {
self.config.postgres_url.is_some()
}
/// Sync all storage backends.
///
/// # Errors
///
/// Returns error if sync fails.
pub fn sync(&self) -> Result<(), StorageError> {
self.file_storage.sync()
}
}
impl GraphStorage for HybridStorage {
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
self.file_storage.store_node(node_id, state)
}
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
self.file_storage.get_node(node_id)
}
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
self.file_storage.store_edge(source, target, weight)
}
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
self.file_storage.delete_edge(source, target)
}
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
self.file_storage.find_similar(query, k)
}
}
impl GovernanceStorage for HybridStorage {
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
// For now, use file storage. In production, this would delegate to PostgreSQL.
self.file_storage.store_policy(bundle)
}
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
self.file_storage.get_policy(id)
}
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
self.file_storage.store_witness(witness)
}
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
self.file_storage.get_witnesses_for_action(action_id)
}
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
self.file_storage.store_lineage(lineage)
}
}
/// Factory for creating storage instances based on configuration.
pub struct StorageFactory;
impl StorageFactory {
/// Create a storage instance based on configuration.
///
/// # Errors
///
/// Returns error if storage cannot be created.
pub fn create_graph_storage(
config: &StorageConfig,
) -> Result<Box<dyn GraphStorage>, StorageError> {
if config.graph_path.is_empty() {
Ok(Box::new(InMemoryStorage::new()))
} else {
Ok(Box::new(FileStorage::from_config(config)?))
}
}
/// Create a governance storage instance.
///
/// # Errors
///
/// Returns error if storage cannot be created.
pub fn create_governance_storage(
config: &StorageConfig,
) -> Result<Box<dyn GovernanceStorage>, StorageError> {
if config.graph_path.is_empty() {
Ok(Box::new(InMemoryStorage::new()))
} else {
Ok(Box::new(FileStorage::from_config(config)?))
}
}
/// Create an in-memory storage (convenience method).
#[must_use]
pub fn in_memory() -> InMemoryStorage {
InMemoryStorage::new()
}
/// Create a file storage (convenience method).
///
/// # Errors
///
/// Returns error if storage cannot be created.
pub fn file(path: impl AsRef<std::path::Path>) -> Result<FileStorage, StorageError> {
FileStorage::new(path)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_storage_config_builders() {
let config = StorageConfig::in_memory();
assert!(config.graph_path.is_empty());
assert!(!config.enable_wal);
let config = StorageConfig::file_based("/tmp/test");
assert_eq!(config.graph_path, "/tmp/test");
assert!(config.enable_wal);
let config = StorageConfig::postgres("postgresql://localhost/db");
assert!(config.postgres_url.is_some());
}
#[test]
fn test_storage_factory_in_memory() {
let config = StorageConfig::in_memory();
let storage = StorageFactory::create_graph_storage(&config).unwrap();
storage.store_node("test", &[1.0, 2.0]).unwrap();
let state = storage.get_node("test").unwrap();
assert!(state.is_some());
}
#[test]
fn test_storage_factory_file() {
let temp_dir = TempDir::new().unwrap();
let config = StorageConfig::file_based(temp_dir.path().to_str().unwrap());
let storage = StorageFactory::create_graph_storage(&config).unwrap();
storage.store_node("test", &[1.0, 2.0]).unwrap();
let state = storage.get_node("test").unwrap();
assert!(state.is_some());
}
#[test]
fn test_hybrid_storage() {
let temp_dir = TempDir::new().unwrap();
let config = StorageConfig::file_based(temp_dir.path().to_str().unwrap());
let storage = HybridStorage::new(config).unwrap();
// Graph operations
storage.store_node("node-1", &[1.0, 0.0, 0.0]).unwrap();
let state = storage.get_node("node-1").unwrap();
assert!(state.is_some());
// Governance operations
let policy_id = storage.store_policy(b"test policy").unwrap();
let policy = storage.get_policy(&policy_id).unwrap();
assert!(policy.is_some());
storage.sync().unwrap();
}
#[test]
fn test_trait_object_usage() {
// Verify that storage types can be used as trait objects
let memory: Box<dyn GraphStorage> = Box::new(InMemoryStorage::new());
memory.store_node("test", &[1.0]).unwrap();
let memory: Box<dyn GovernanceStorage> = Box::new(InMemoryStorage::new());
let _ = memory.store_policy(b"test").unwrap();
}
}

File diff suppressed because it is too large Load Diff