Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
803
vendor/ruvector/crates/prime-radiant/src/storage/file.rs
vendored
Normal file
803
vendor/ruvector/crates/prime-radiant/src/storage/file.rs
vendored
Normal file
@@ -0,0 +1,803 @@
|
||||
//! File-Based Storage Implementation
|
||||
//!
|
||||
//! Persistent file storage with write-ahead logging (WAL) for durability.
|
||||
//! Supports both JSON and bincode serialization formats.
|
||||
//!
|
||||
//! # Security
|
||||
//!
|
||||
//! All identifiers used in file paths are sanitized to prevent path traversal attacks.
|
||||
//! Only alphanumeric characters, dashes, underscores, and dots are allowed.
|
||||
|
||||
use super::{GovernanceStorage, GraphStorage, StorageConfig, StorageError};
|
||||
use parking_lot::{Mutex, RwLock};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs::{self, File, OpenOptions};
|
||||
use std::io::{BufReader, BufWriter, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Maximum allowed identifier length for security
|
||||
const MAX_ID_LENGTH: usize = 256;
|
||||
|
||||
/// Validate and sanitize an identifier for use in file paths.
|
||||
///
|
||||
/// # Security
|
||||
///
|
||||
/// This function prevents path traversal attacks by:
|
||||
/// - Rejecting empty identifiers
|
||||
/// - Rejecting identifiers over MAX_ID_LENGTH
|
||||
/// - Only allowing alphanumeric, dash, underscore, and dot characters
|
||||
/// - Rejecting "." and ".." path components
|
||||
/// - Rejecting identifiers starting with a dot (hidden files)
|
||||
fn validate_path_id(id: &str) -> Result<(), StorageError> {
|
||||
if id.is_empty() {
|
||||
return Err(StorageError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Identifier cannot be empty",
|
||||
)));
|
||||
}
|
||||
|
||||
if id.len() > MAX_ID_LENGTH {
|
||||
return Err(StorageError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("Identifier too long: {} (max: {})", id.len(), MAX_ID_LENGTH),
|
||||
)));
|
||||
}
|
||||
|
||||
// Reject path traversal attempts
|
||||
if id == "." || id == ".." {
|
||||
return Err(StorageError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Path traversal detected",
|
||||
)));
|
||||
}
|
||||
|
||||
// Reject hidden files (starting with dot)
|
||||
if id.starts_with('.') {
|
||||
return Err(StorageError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Identifiers cannot start with '.'",
|
||||
)));
|
||||
}
|
||||
|
||||
// Check each character is safe
|
||||
for c in id.chars() {
|
||||
if !c.is_ascii_alphanumeric() && c != '-' && c != '_' && c != '.' {
|
||||
return Err(StorageError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("Invalid character '{}' in identifier", c),
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Reject path separators
|
||||
if id.contains('/') || id.contains('\\') {
|
||||
return Err(StorageError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Path separators not allowed in identifier",
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// File storage format for serialization
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub enum StorageFormat {
|
||||
/// JSON format (human-readable, larger)
|
||||
Json,
|
||||
/// Bincode format (compact, faster)
|
||||
#[default]
|
||||
Bincode,
|
||||
}
|
||||
|
||||
/// Write-ahead log entry
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalEntry {
|
||||
pub sequence: u64,
|
||||
pub operation: WalOperation,
|
||||
pub checksum: [u8; 32],
|
||||
pub timestamp: i64,
|
||||
pub committed: bool,
|
||||
}
|
||||
|
||||
/// WAL operation types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum WalOperation {
|
||||
StoreNode {
|
||||
node_id: String,
|
||||
state: Vec<f32>,
|
||||
},
|
||||
DeleteNode {
|
||||
node_id: String,
|
||||
},
|
||||
StoreEdge {
|
||||
source: String,
|
||||
target: String,
|
||||
weight: f32,
|
||||
},
|
||||
DeleteEdge {
|
||||
source: String,
|
||||
target: String,
|
||||
},
|
||||
StorePolicy {
|
||||
policy_id: String,
|
||||
data: Vec<u8>,
|
||||
},
|
||||
StoreWitness {
|
||||
witness_id: String,
|
||||
data: Vec<u8>,
|
||||
},
|
||||
StoreLineage {
|
||||
lineage_id: String,
|
||||
data: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
impl WalEntry {
|
||||
fn new(sequence: u64, operation: WalOperation) -> Self {
|
||||
let op_bytes = bincode::serde::encode_to_vec(&operation, bincode::config::standard())
|
||||
.unwrap_or_default();
|
||||
let checksum = *blake3::hash(&op_bytes).as_bytes();
|
||||
Self {
|
||||
sequence,
|
||||
operation,
|
||||
checksum,
|
||||
timestamp: chrono::Utc::now().timestamp_millis(),
|
||||
committed: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn verify(&self) -> bool {
|
||||
match bincode::serde::encode_to_vec(&self.operation, bincode::config::standard()) {
|
||||
Ok(bytes) => self.checksum == *blake3::hash(&bytes).as_bytes(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage metadata persisted to disk
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct StorageMetadata {
|
||||
pub version: u32,
|
||||
pub format: String,
|
||||
pub node_count: u64,
|
||||
pub edge_count: u64,
|
||||
pub last_wal_sequence: u64,
|
||||
pub created_at: i64,
|
||||
pub modified_at: i64,
|
||||
}
|
||||
|
||||
/// File-based storage implementation with WAL
|
||||
#[derive(Debug)]
|
||||
pub struct FileStorage {
|
||||
root: PathBuf,
|
||||
format: StorageFormat,
|
||||
wal_enabled: bool,
|
||||
wal_sequence: Mutex<u64>,
|
||||
wal_file: Mutex<Option<BufWriter<File>>>,
|
||||
node_cache: RwLock<HashMap<String, Vec<f32>>>,
|
||||
edge_cache: RwLock<HashMap<(String, String), f32>>,
|
||||
adjacency_cache: RwLock<HashMap<String, HashSet<String>>>,
|
||||
cache_dirty: RwLock<bool>,
|
||||
metadata: RwLock<StorageMetadata>,
|
||||
}
|
||||
|
||||
impl FileStorage {
|
||||
pub fn new(root: impl AsRef<Path>) -> Result<Self, StorageError> {
|
||||
Self::with_options(root, StorageFormat::Bincode, true)
|
||||
}
|
||||
|
||||
pub fn with_options(
|
||||
root: impl AsRef<Path>,
|
||||
format: StorageFormat,
|
||||
wal_enabled: bool,
|
||||
) -> Result<Self, StorageError> {
|
||||
let root = root.as_ref().to_path_buf();
|
||||
for dir in ["nodes", "edges", "policies", "witnesses", "lineages", "wal"] {
|
||||
fs::create_dir_all(root.join(dir))?;
|
||||
}
|
||||
|
||||
let metadata_path = root.join("metadata.json");
|
||||
let metadata: StorageMetadata = if metadata_path.exists() {
|
||||
serde_json::from_reader(File::open(&metadata_path)?).unwrap_or_default()
|
||||
} else {
|
||||
StorageMetadata::default()
|
||||
};
|
||||
|
||||
let storage = Self {
|
||||
root,
|
||||
format,
|
||||
wal_enabled,
|
||||
wal_sequence: Mutex::new(metadata.last_wal_sequence),
|
||||
wal_file: Mutex::new(None),
|
||||
node_cache: RwLock::new(HashMap::new()),
|
||||
edge_cache: RwLock::new(HashMap::new()),
|
||||
adjacency_cache: RwLock::new(HashMap::new()),
|
||||
cache_dirty: RwLock::new(false),
|
||||
metadata: RwLock::new(metadata),
|
||||
};
|
||||
|
||||
if wal_enabled {
|
||||
storage.open_wal_file()?;
|
||||
storage.recover_from_wal()?;
|
||||
}
|
||||
storage.load_cache()?;
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
pub fn from_config(config: &StorageConfig) -> Result<Self, StorageError> {
|
||||
Self::with_options(
|
||||
&config.graph_path,
|
||||
StorageFormat::Bincode,
|
||||
config.enable_wal,
|
||||
)
|
||||
}
|
||||
|
||||
fn open_wal_file(&self) -> Result<(), StorageError> {
|
||||
let seq = *self.wal_sequence.lock();
|
||||
let path = self.root.join("wal").join(format!("{:06}.wal", seq / 1000));
|
||||
let file = OpenOptions::new().create(true).append(true).open(&path)?;
|
||||
*self.wal_file.lock() = Some(BufWriter::new(file));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_wal(&self, operation: WalOperation) -> Result<u64, StorageError> {
|
||||
if !self.wal_enabled {
|
||||
return Ok(0);
|
||||
}
|
||||
let seq = {
|
||||
let mut g = self.wal_sequence.lock();
|
||||
*g += 1;
|
||||
*g
|
||||
};
|
||||
let entry = WalEntry::new(seq, operation);
|
||||
let bytes = bincode::serde::encode_to_vec(&entry, bincode::config::standard())
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
if let Some(ref mut wal) = *self.wal_file.lock() {
|
||||
wal.write_all(&(bytes.len() as u32).to_le_bytes())?;
|
||||
wal.write_all(&bytes)?;
|
||||
wal.flush()?;
|
||||
}
|
||||
Ok(seq)
|
||||
}
|
||||
|
||||
fn commit_wal(&self, _seq: u64) -> Result<(), StorageError> {
|
||||
if let Some(ref mut wal) = *self.wal_file.lock() {
|
||||
wal.flush()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn recover_from_wal(&self) -> Result<(), StorageError> {
|
||||
let wal_dir = self.root.join("wal");
|
||||
let mut entries = Vec::new();
|
||||
for entry in fs::read_dir(&wal_dir)? {
|
||||
let path = entry?.path();
|
||||
if path.extension().map_or(false, |e| e == "wal") {
|
||||
let mut reader = BufReader::new(File::open(&path)?);
|
||||
loop {
|
||||
let mut len_bytes = [0u8; 4];
|
||||
if reader.read_exact(&mut len_bytes).is_err() {
|
||||
break;
|
||||
}
|
||||
let mut buf = vec![0u8; u32::from_le_bytes(len_bytes) as usize];
|
||||
reader.read_exact(&mut buf)?;
|
||||
if let Ok((e, _)) = bincode::serde::decode_from_slice::<WalEntry, _>(
|
||||
&buf,
|
||||
bincode::config::standard(),
|
||||
) {
|
||||
if e.verify() && !e.committed {
|
||||
entries.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
entries.sort_by_key(|e| e.sequence);
|
||||
for e in entries {
|
||||
self.apply_wal_operation(&e.operation)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_wal_operation(&self, op: &WalOperation) -> Result<(), StorageError> {
|
||||
match op {
|
||||
WalOperation::StoreNode { node_id, state } => {
|
||||
self.write_node_file(node_id, state)?;
|
||||
self.node_cache
|
||||
.write()
|
||||
.insert(node_id.clone(), state.clone());
|
||||
}
|
||||
WalOperation::DeleteNode { node_id } => {
|
||||
self.delete_node_file(node_id)?;
|
||||
self.node_cache.write().remove(node_id);
|
||||
}
|
||||
WalOperation::StoreEdge {
|
||||
source,
|
||||
target,
|
||||
weight,
|
||||
} => {
|
||||
self.write_edge_file(source, target, *weight)?;
|
||||
self.edge_cache
|
||||
.write()
|
||||
.insert((source.clone(), target.clone()), *weight);
|
||||
}
|
||||
WalOperation::DeleteEdge { source, target } => {
|
||||
self.delete_edge_file(source, target)?;
|
||||
self.edge_cache
|
||||
.write()
|
||||
.remove(&(source.clone(), target.clone()));
|
||||
}
|
||||
WalOperation::StorePolicy { policy_id, data } => {
|
||||
self.write_data_file("policies", policy_id, data)?;
|
||||
}
|
||||
WalOperation::StoreWitness { witness_id, data } => {
|
||||
self.write_data_file("witnesses", witness_id, data)?;
|
||||
}
|
||||
WalOperation::StoreLineage { lineage_id, data } => {
|
||||
self.write_data_file("lineages", lineage_id, data)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_cache(&self) -> Result<(), StorageError> {
|
||||
let nodes_dir = self.root.join("nodes");
|
||||
if nodes_dir.exists() {
|
||||
for entry in fs::read_dir(&nodes_dir)? {
|
||||
let path = entry?.path();
|
||||
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
|
||||
if let Ok(state) = self.read_node_file(stem) {
|
||||
self.node_cache.write().insert(stem.to_string(), state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let edges_dir = self.root.join("edges");
|
||||
if edges_dir.exists() {
|
||||
for entry in fs::read_dir(&edges_dir)? {
|
||||
let path = entry?.path();
|
||||
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
|
||||
let parts: Vec<&str> = stem.splitn(2, '_').collect();
|
||||
if parts.len() == 2 {
|
||||
if let Ok(weight) = self.read_edge_file(parts[0], parts[1]) {
|
||||
self.edge_cache
|
||||
.write()
|
||||
.insert((parts[0].to_string(), parts[1].to_string()), weight);
|
||||
let mut adj = self.adjacency_cache.write();
|
||||
adj.entry(parts[0].to_string())
|
||||
.or_default()
|
||||
.insert(parts[1].to_string());
|
||||
adj.entry(parts[1].to_string())
|
||||
.or_default()
|
||||
.insert(parts[0].to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_node_file(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
|
||||
let path = self.node_path(node_id);
|
||||
let mut writer = BufWriter::new(File::create(&path)?);
|
||||
match self.format {
|
||||
StorageFormat::Json => serde_json::to_writer(&mut writer, state)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?,
|
||||
StorageFormat::Bincode => {
|
||||
let bytes = bincode::serde::encode_to_vec(state, bincode::config::standard())
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
writer.write_all(&bytes)?;
|
||||
}
|
||||
}
|
||||
writer.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_node_file(&self, node_id: &str) -> Result<Vec<f32>, StorageError> {
|
||||
let mut reader = BufReader::new(File::open(self.node_path(node_id))?);
|
||||
match self.format {
|
||||
StorageFormat::Json => serde_json::from_reader(reader)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string())),
|
||||
StorageFormat::Bincode => {
|
||||
let mut bytes = Vec::new();
|
||||
reader.read_to_end(&mut bytes)?;
|
||||
let (result, _) =
|
||||
bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_node_file(&self, node_id: &str) -> Result<(), StorageError> {
|
||||
let path = self.node_path(node_id);
|
||||
if path.exists() {
|
||||
fs::remove_file(&path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn node_path(&self, node_id: &str) -> PathBuf {
|
||||
// Note: Caller must validate node_id first using validate_path_id()
|
||||
let ext = if self.format == StorageFormat::Json {
|
||||
"json"
|
||||
} else {
|
||||
"bin"
|
||||
};
|
||||
self.root.join("nodes").join(format!("{}.{}", node_id, ext))
|
||||
}
|
||||
|
||||
/// Validate node_id and return the safe path
|
||||
fn safe_node_path(&self, node_id: &str) -> Result<PathBuf, StorageError> {
|
||||
validate_path_id(node_id)?;
|
||||
Ok(self.node_path(node_id))
|
||||
}
|
||||
|
||||
fn write_edge_file(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
|
||||
let mut writer = BufWriter::new(File::create(self.edge_path(source, target))?);
|
||||
match self.format {
|
||||
StorageFormat::Json => serde_json::to_writer(&mut writer, &weight)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?,
|
||||
StorageFormat::Bincode => {
|
||||
let bytes = bincode::serde::encode_to_vec(&weight, bincode::config::standard())
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
writer.write_all(&bytes)?;
|
||||
}
|
||||
}
|
||||
writer.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_edge_file(&self, source: &str, target: &str) -> Result<f32, StorageError> {
|
||||
let mut reader = BufReader::new(File::open(self.edge_path(source, target))?);
|
||||
match self.format {
|
||||
StorageFormat::Json => serde_json::from_reader(reader)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string())),
|
||||
StorageFormat::Bincode => {
|
||||
let mut bytes = Vec::new();
|
||||
reader.read_to_end(&mut bytes)?;
|
||||
let (result, _) =
|
||||
bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_edge_file(&self, source: &str, target: &str) -> Result<(), StorageError> {
|
||||
let path = self.edge_path(source, target);
|
||||
if path.exists() {
|
||||
fs::remove_file(&path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn edge_path(&self, source: &str, target: &str) -> PathBuf {
|
||||
// Note: Caller must validate source and target first using validate_path_id()
|
||||
let ext = if self.format == StorageFormat::Json {
|
||||
"json"
|
||||
} else {
|
||||
"bin"
|
||||
};
|
||||
self.root
|
||||
.join("edges")
|
||||
.join(format!("{}_{}.{}", source, target, ext))
|
||||
}
|
||||
|
||||
/// Validate edge identifiers and return the safe path
|
||||
fn safe_edge_path(&self, source: &str, target: &str) -> Result<PathBuf, StorageError> {
|
||||
validate_path_id(source)?;
|
||||
validate_path_id(target)?;
|
||||
Ok(self.edge_path(source, target))
|
||||
}
|
||||
|
||||
fn write_data_file(&self, dir: &str, id: &str, data: &[u8]) -> Result<(), StorageError> {
|
||||
// Validate both directory name and id to prevent path traversal
|
||||
validate_path_id(dir)?;
|
||||
validate_path_id(id)?;
|
||||
let mut file = File::create(self.root.join(dir).join(format!("{}.bin", id)))?;
|
||||
file.write_all(data)?;
|
||||
file.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_data_file(&self, dir: &str, id: &str) -> Result<Vec<u8>, StorageError> {
|
||||
// Validate both directory name and id to prevent path traversal
|
||||
validate_path_id(dir)?;
|
||||
validate_path_id(id)?;
|
||||
let mut data = Vec::new();
|
||||
File::open(self.root.join(dir).join(format!("{}.bin", id)))?.read_to_end(&mut data)?;
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn save_metadata(&self) -> Result<(), StorageError> {
|
||||
let mut metadata = self.metadata.write();
|
||||
metadata.modified_at = chrono::Utc::now().timestamp_millis();
|
||||
metadata.last_wal_sequence = *self.wal_sequence.lock();
|
||||
serde_json::to_writer_pretty(
|
||||
BufWriter::new(File::create(self.root.join("metadata.json"))?),
|
||||
&*metadata,
|
||||
)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn sync(&self) -> Result<(), StorageError> {
|
||||
if *self.cache_dirty.read() {
|
||||
self.save_metadata()?;
|
||||
*self.cache_dirty.write() = false;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compact_wal(&self) -> Result<(), StorageError> {
|
||||
self.save_metadata()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn stats(&self) -> StorageStats {
|
||||
let metadata = self.metadata.read();
|
||||
StorageStats {
|
||||
node_count: self.node_cache.read().len(),
|
||||
edge_count: self.edge_cache.read().len(),
|
||||
wal_sequence: *self.wal_sequence.lock(),
|
||||
root_path: self.root.clone(),
|
||||
format: self.format,
|
||||
wal_enabled: self.wal_enabled,
|
||||
created_at: metadata.created_at,
|
||||
modified_at: metadata.modified_at,
|
||||
}
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
dot / (norm_a * norm_b)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StorageStats {
|
||||
pub node_count: usize,
|
||||
pub edge_count: usize,
|
||||
pub wal_sequence: u64,
|
||||
pub root_path: PathBuf,
|
||||
pub format: StorageFormat,
|
||||
pub wal_enabled: bool,
|
||||
pub created_at: i64,
|
||||
pub modified_at: i64,
|
||||
}
|
||||
|
||||
impl Drop for FileStorage {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.sync();
|
||||
}
|
||||
}
|
||||
|
||||
impl GraphStorage for FileStorage {
|
||||
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
|
||||
// Validate node_id to prevent path traversal
|
||||
validate_path_id(node_id)?;
|
||||
let seq = self.write_wal(WalOperation::StoreNode {
|
||||
node_id: node_id.to_string(),
|
||||
state: state.to_vec(),
|
||||
})?;
|
||||
self.write_node_file(node_id, state)?;
|
||||
self.node_cache
|
||||
.write()
|
||||
.insert(node_id.to_string(), state.to_vec());
|
||||
{
|
||||
let mut m = self.metadata.write();
|
||||
m.node_count = self.node_cache.read().len() as u64;
|
||||
}
|
||||
self.commit_wal(seq)?;
|
||||
*self.cache_dirty.write() = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
|
||||
// Validate node_id to prevent path traversal
|
||||
validate_path_id(node_id)?;
|
||||
if let Some(state) = self.node_cache.read().get(node_id) {
|
||||
return Ok(Some(state.clone()));
|
||||
}
|
||||
match self.read_node_file(node_id) {
|
||||
Ok(state) => {
|
||||
self.node_cache
|
||||
.write()
|
||||
.insert(node_id.to_string(), state.clone());
|
||||
Ok(Some(state))
|
||||
}
|
||||
Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
|
||||
// Validate identifiers to prevent path traversal
|
||||
validate_path_id(source)?;
|
||||
validate_path_id(target)?;
|
||||
let seq = self.write_wal(WalOperation::StoreEdge {
|
||||
source: source.to_string(),
|
||||
target: target.to_string(),
|
||||
weight,
|
||||
})?;
|
||||
self.write_edge_file(source, target, weight)?;
|
||||
self.edge_cache
|
||||
.write()
|
||||
.insert((source.to_string(), target.to_string()), weight);
|
||||
{
|
||||
let mut adj = self.adjacency_cache.write();
|
||||
adj.entry(source.to_string())
|
||||
.or_default()
|
||||
.insert(target.to_string());
|
||||
adj.entry(target.to_string())
|
||||
.or_default()
|
||||
.insert(source.to_string());
|
||||
}
|
||||
{
|
||||
let mut m = self.metadata.write();
|
||||
m.edge_count = self.edge_cache.read().len() as u64;
|
||||
}
|
||||
self.commit_wal(seq)?;
|
||||
*self.cache_dirty.write() = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
|
||||
// Validate identifiers to prevent path traversal
|
||||
validate_path_id(source)?;
|
||||
validate_path_id(target)?;
|
||||
let seq = self.write_wal(WalOperation::DeleteEdge {
|
||||
source: source.to_string(),
|
||||
target: target.to_string(),
|
||||
})?;
|
||||
self.delete_edge_file(source, target)?;
|
||||
self.edge_cache
|
||||
.write()
|
||||
.remove(&(source.to_string(), target.to_string()));
|
||||
{
|
||||
let mut adj = self.adjacency_cache.write();
|
||||
if let Some(n) = adj.get_mut(source) {
|
||||
n.remove(target);
|
||||
}
|
||||
if let Some(n) = adj.get_mut(target) {
|
||||
n.remove(source);
|
||||
}
|
||||
}
|
||||
{
|
||||
let mut m = self.metadata.write();
|
||||
m.edge_count = self.edge_cache.read().len() as u64;
|
||||
}
|
||||
self.commit_wal(seq)?;
|
||||
*self.cache_dirty.write() = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
|
||||
if query.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let nodes = self.node_cache.read();
|
||||
let mut sims: Vec<_> = nodes
|
||||
.iter()
|
||||
.map(|(id, s)| (id.clone(), Self::cosine_similarity(query, s)))
|
||||
.collect();
|
||||
sims.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
sims.truncate(k);
|
||||
Ok(sims)
|
||||
}
|
||||
}
|
||||
|
||||
impl GovernanceStorage for FileStorage {
|
||||
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
|
||||
let id = Uuid::new_v4().to_string();
|
||||
let seq = self.write_wal(WalOperation::StorePolicy {
|
||||
policy_id: id.clone(),
|
||||
data: bundle.to_vec(),
|
||||
})?;
|
||||
self.write_data_file("policies", &id, bundle)?;
|
||||
self.commit_wal(seq)?;
|
||||
*self.cache_dirty.write() = true;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
|
||||
match self.read_data_file("policies", id) {
|
||||
Ok(d) => Ok(Some(d)),
|
||||
Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
|
||||
let id = Uuid::new_v4().to_string();
|
||||
let seq = self.write_wal(WalOperation::StoreWitness {
|
||||
witness_id: id.clone(),
|
||||
data: witness.to_vec(),
|
||||
})?;
|
||||
self.write_data_file("witnesses", &id, witness)?;
|
||||
self.commit_wal(seq)?;
|
||||
*self.cache_dirty.write() = true;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
|
||||
let mut results = Vec::new();
|
||||
let dir = self.root.join("witnesses");
|
||||
if dir.exists() {
|
||||
for entry in fs::read_dir(&dir)? {
|
||||
let path = entry?.path();
|
||||
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
|
||||
if let Ok(data) = self.read_data_file("witnesses", stem) {
|
||||
if data
|
||||
.windows(action_id.len())
|
||||
.any(|w| w == action_id.as_bytes())
|
||||
{
|
||||
results.push(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
|
||||
let id = Uuid::new_v4().to_string();
|
||||
let seq = self.write_wal(WalOperation::StoreLineage {
|
||||
lineage_id: id.clone(),
|
||||
data: lineage.to_vec(),
|
||||
})?;
|
||||
self.write_data_file("lineages", &id, lineage)?;
|
||||
self.commit_wal(seq)?;
|
||||
*self.cache_dirty.write() = true;
|
||||
Ok(id)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_file_storage_nodes() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let storage = FileStorage::new(temp_dir.path()).unwrap();
|
||||
storage.store_node("node-1", &[1.0, 0.0, 0.0]).unwrap();
|
||||
let state = storage.get_node("node-1").unwrap();
|
||||
assert!(state.is_some());
|
||||
assert_eq!(state.unwrap(), vec![1.0, 0.0, 0.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_storage_edges() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let storage = FileStorage::new(temp_dir.path()).unwrap();
|
||||
storage.store_edge("a", "b", 1.0).unwrap();
|
||||
storage.delete_edge("a", "b").unwrap();
|
||||
assert_eq!(storage.stats().edge_count, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_storage_format_json() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let storage =
|
||||
FileStorage::with_options(temp_dir.path(), StorageFormat::Json, false).unwrap();
|
||||
storage.store_node("json-node", &[1.0, 2.0]).unwrap();
|
||||
let state = storage.get_node("json-node").unwrap();
|
||||
assert_eq!(state.unwrap(), vec![1.0, 2.0]);
|
||||
}
|
||||
}
|
||||
730
vendor/ruvector/crates/prime-radiant/src/storage/memory.rs
vendored
Normal file
730
vendor/ruvector/crates/prime-radiant/src/storage/memory.rs
vendored
Normal file
@@ -0,0 +1,730 @@
|
||||
//! In-Memory Storage Implementation
|
||||
//!
|
||||
//! Thread-safe in-memory storage for testing and development.
|
||||
//! Uses `parking_lot::RwLock` for high-performance concurrent access.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! use prime_radiant::storage::{InMemoryStorage, GraphStorage, GovernanceStorage};
|
||||
//!
|
||||
//! let storage = InMemoryStorage::new();
|
||||
//!
|
||||
//! // Store node states
|
||||
//! storage.store_node("node-1", &[1.0, 0.0, 0.0])?;
|
||||
//!
|
||||
//! // Store edges
|
||||
//! storage.store_edge("node-1", "node-2", 1.0)?;
|
||||
//!
|
||||
//! // Store policies
|
||||
//! let policy_id = storage.store_policy(b"policy-data")?;
|
||||
//! ```
|
||||
|
||||
use super::{GovernanceStorage, GraphStorage, StorageConfig, StorageError};
|
||||
use ordered_float::OrderedFloat;
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// In-memory storage implementation for testing and development.
|
||||
///
|
||||
/// This implementation provides:
|
||||
/// - Thread-safe access via `parking_lot::RwLock`
|
||||
/// - Efficient KNN search using brute-force (suitable for small datasets)
|
||||
/// - Full governance storage support
|
||||
/// - No persistence (data is lost on drop)
|
||||
#[derive(Debug)]
|
||||
pub struct InMemoryStorage {
|
||||
/// Node states: node_id -> state vector
|
||||
nodes: RwLock<HashMap<String, Vec<f32>>>,
|
||||
|
||||
/// Edges: (source, target) -> weight
|
||||
edges: RwLock<HashMap<(String, String), f32>>,
|
||||
|
||||
/// Adjacency list for efficient neighbor lookup: node_id -> set of neighbors
|
||||
adjacency: RwLock<HashMap<String, HashSet<String>>>,
|
||||
|
||||
/// Policy bundles: policy_id -> serialized data
|
||||
policies: RwLock<HashMap<String, Vec<u8>>>,
|
||||
|
||||
/// Witness records: witness_id -> serialized data
|
||||
witnesses: RwLock<HashMap<String, Vec<u8>>>,
|
||||
|
||||
/// Witness records by action: action_id -> list of witness_ids
|
||||
witnesses_by_action: RwLock<HashMap<String, Vec<String>>>,
|
||||
|
||||
/// Lineage records: lineage_id -> serialized data
|
||||
lineages: RwLock<HashMap<String, Vec<u8>>>,
|
||||
|
||||
/// Event log for audit trail
|
||||
event_log: RwLock<Vec<StorageEvent>>,
|
||||
|
||||
/// Configuration
|
||||
#[allow(dead_code)]
|
||||
config: StorageConfig,
|
||||
}
|
||||
|
||||
/// Storage event for audit logging
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StorageEvent {
|
||||
/// Event timestamp (milliseconds since epoch)
|
||||
pub timestamp: i64,
|
||||
/// Event type
|
||||
pub event_type: StorageEventType,
|
||||
/// Entity ID involved
|
||||
pub entity_id: String,
|
||||
/// Optional details
|
||||
pub details: Option<String>,
|
||||
}
|
||||
|
||||
/// Type of storage event
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum StorageEventType {
|
||||
/// Node stored
|
||||
NodeStored,
|
||||
/// Node retrieved
|
||||
NodeRetrieved,
|
||||
/// Node deleted
|
||||
NodeDeleted,
|
||||
/// Edge stored
|
||||
EdgeStored,
|
||||
/// Edge deleted
|
||||
EdgeDeleted,
|
||||
/// Policy stored
|
||||
PolicyStored,
|
||||
/// Policy retrieved
|
||||
PolicyRetrieved,
|
||||
/// Witness stored
|
||||
WitnessStored,
|
||||
/// Witness retrieved
|
||||
WitnessRetrieved,
|
||||
/// Lineage stored
|
||||
LineageStored,
|
||||
}
|
||||
|
||||
impl InMemoryStorage {
|
||||
/// Create a new in-memory storage instance.
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self::with_config(StorageConfig::default())
|
||||
}
|
||||
|
||||
/// Create a new in-memory storage instance with custom configuration.
|
||||
#[must_use]
|
||||
pub fn with_config(config: StorageConfig) -> Self {
|
||||
Self {
|
||||
nodes: RwLock::new(HashMap::new()),
|
||||
edges: RwLock::new(HashMap::new()),
|
||||
adjacency: RwLock::new(HashMap::new()),
|
||||
policies: RwLock::new(HashMap::new()),
|
||||
witnesses: RwLock::new(HashMap::new()),
|
||||
witnesses_by_action: RwLock::new(HashMap::new()),
|
||||
lineages: RwLock::new(HashMap::new()),
|
||||
event_log: RwLock::new(Vec::new()),
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of stored nodes.
|
||||
#[must_use]
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.nodes.read().len()
|
||||
}
|
||||
|
||||
/// Get the number of stored edges.
|
||||
#[must_use]
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.read().len()
|
||||
}
|
||||
|
||||
/// Get all node IDs.
|
||||
#[must_use]
|
||||
pub fn node_ids(&self) -> Vec<String> {
|
||||
self.nodes.read().keys().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get all edges as (source, target, weight) tuples.
|
||||
#[must_use]
|
||||
pub fn all_edges(&self) -> Vec<(String, String, f32)> {
|
||||
self.edges
|
||||
.read()
|
||||
.iter()
|
||||
.map(|((s, t), w)| (s.clone(), t.clone(), *w))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get neighbors of a node.
|
||||
#[must_use]
|
||||
pub fn get_neighbors(&self, node_id: &str) -> Vec<String> {
|
||||
self.adjacency
|
||||
.read()
|
||||
.get(node_id)
|
||||
.map(|set| set.iter().cloned().collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Clear all stored data.
|
||||
pub fn clear(&self) {
|
||||
self.nodes.write().clear();
|
||||
self.edges.write().clear();
|
||||
self.adjacency.write().clear();
|
||||
self.policies.write().clear();
|
||||
self.witnesses.write().clear();
|
||||
self.witnesses_by_action.write().clear();
|
||||
self.lineages.write().clear();
|
||||
self.event_log.write().clear();
|
||||
}
|
||||
|
||||
/// Get the event log for audit purposes.
|
||||
#[must_use]
|
||||
pub fn get_event_log(&self) -> Vec<StorageEvent> {
|
||||
self.event_log.read().clone()
|
||||
}
|
||||
|
||||
/// Log a storage event.
|
||||
fn log_event(&self, event_type: StorageEventType, entity_id: String, details: Option<String>) {
|
||||
let event = StorageEvent {
|
||||
timestamp: chrono::Utc::now().timestamp_millis(),
|
||||
event_type,
|
||||
entity_id,
|
||||
details,
|
||||
};
|
||||
self.event_log.write().push(event);
|
||||
}
|
||||
|
||||
/// Compute cosine similarity between two vectors.
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
/// Compute L2 (Euclidean) distance between two vectors.
|
||||
fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return f32::INFINITY;
|
||||
}
|
||||
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.map(|(x, y)| (x - y).powi(2))
|
||||
.sum::<f32>()
|
||||
.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for InMemoryStorage {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl GraphStorage for InMemoryStorage {
|
||||
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
|
||||
self.nodes
|
||||
.write()
|
||||
.insert(node_id.to_string(), state.to_vec());
|
||||
self.log_event(
|
||||
StorageEventType::NodeStored,
|
||||
node_id.to_string(),
|
||||
Some(format!("dim={}", state.len())),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
|
||||
let result = self.nodes.read().get(node_id).cloned();
|
||||
if result.is_some() {
|
||||
self.log_event(StorageEventType::NodeRetrieved, node_id.to_string(), None);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
|
||||
let key = (source.to_string(), target.to_string());
|
||||
self.edges.write().insert(key, weight);
|
||||
|
||||
// Update adjacency list (both directions for undirected graph semantics)
|
||||
{
|
||||
let mut adj = self.adjacency.write();
|
||||
adj.entry(source.to_string())
|
||||
.or_default()
|
||||
.insert(target.to_string());
|
||||
adj.entry(target.to_string())
|
||||
.or_default()
|
||||
.insert(source.to_string());
|
||||
}
|
||||
|
||||
self.log_event(
|
||||
StorageEventType::EdgeStored,
|
||||
format!("{}->{}", source, target),
|
||||
Some(format!("weight={}", weight)),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
|
||||
let key = (source.to_string(), target.to_string());
|
||||
self.edges.write().remove(&key);
|
||||
|
||||
// Update adjacency list
|
||||
{
|
||||
let mut adj = self.adjacency.write();
|
||||
if let Some(neighbors) = adj.get_mut(source) {
|
||||
neighbors.remove(target);
|
||||
}
|
||||
if let Some(neighbors) = adj.get_mut(target) {
|
||||
neighbors.remove(source);
|
||||
}
|
||||
}
|
||||
|
||||
self.log_event(
|
||||
StorageEventType::EdgeDeleted,
|
||||
format!("{}->{}", source, target),
|
||||
None,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
|
||||
if query.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let nodes = self.nodes.read();
|
||||
|
||||
// Use a BTreeMap for efficient top-k extraction (sorted by similarity)
|
||||
let mut similarities: BTreeMap<OrderedFloat<f32>, Vec<String>> = BTreeMap::new();
|
||||
|
||||
for (node_id, state) in nodes.iter() {
|
||||
let similarity = Self::cosine_similarity(query, state);
|
||||
similarities
|
||||
.entry(OrderedFloat(-similarity)) // Negative for descending order
|
||||
.or_default()
|
||||
.push(node_id.clone());
|
||||
}
|
||||
|
||||
// Extract top k results
|
||||
let mut results = Vec::with_capacity(k);
|
||||
for (neg_sim, node_ids) in similarities {
|
||||
for node_id in node_ids {
|
||||
if results.len() >= k {
|
||||
break;
|
||||
}
|
||||
results.push((node_id, -neg_sim.0));
|
||||
}
|
||||
if results.len() >= k {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
impl GovernanceStorage for InMemoryStorage {
|
||||
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
|
||||
let id = Uuid::new_v4().to_string();
|
||||
self.policies.write().insert(id.clone(), bundle.to_vec());
|
||||
self.log_event(
|
||||
StorageEventType::PolicyStored,
|
||||
id.clone(),
|
||||
Some(format!("size={}", bundle.len())),
|
||||
);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
|
||||
let result = self.policies.read().get(id).cloned();
|
||||
if result.is_some() {
|
||||
self.log_event(StorageEventType::PolicyRetrieved, id.to_string(), None);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
|
||||
let id = Uuid::new_v4().to_string();
|
||||
self.witnesses.write().insert(id.clone(), witness.to_vec());
|
||||
self.log_event(
|
||||
StorageEventType::WitnessStored,
|
||||
id.clone(),
|
||||
Some(format!("size={}", witness.len())),
|
||||
);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
|
||||
let witness_ids = self.witnesses_by_action.read();
|
||||
let witnesses = self.witnesses.read();
|
||||
|
||||
let ids = witness_ids.get(action_id);
|
||||
if ids.is_none() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let result: Vec<Vec<u8>> = ids
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter_map(|id| witnesses.get(id).cloned())
|
||||
.collect();
|
||||
|
||||
if !result.is_empty() {
|
||||
self.log_event(
|
||||
StorageEventType::WitnessRetrieved,
|
||||
action_id.to_string(),
|
||||
Some(format!("count={}", result.len())),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
|
||||
let id = Uuid::new_v4().to_string();
|
||||
self.lineages.write().insert(id.clone(), lineage.to_vec());
|
||||
self.log_event(
|
||||
StorageEventType::LineageStored,
|
||||
id.clone(),
|
||||
Some(format!("size={}", lineage.len())),
|
||||
);
|
||||
Ok(id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extended in-memory storage with additional indexing capabilities.
|
||||
#[derive(Debug)]
|
||||
pub struct IndexedInMemoryStorage {
|
||||
/// Base storage
|
||||
base: InMemoryStorage,
|
||||
|
||||
/// Node metadata index: tag -> set of node_ids
|
||||
node_tags: RwLock<HashMap<String, HashSet<String>>>,
|
||||
|
||||
/// Policy metadata index: name -> policy_id
|
||||
policy_by_name: RwLock<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
impl IndexedInMemoryStorage {
|
||||
/// Create a new indexed in-memory storage.
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
base: InMemoryStorage::new(),
|
||||
node_tags: RwLock::new(HashMap::new()),
|
||||
policy_by_name: RwLock::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Store a node with tags for indexing.
|
||||
pub fn store_node_with_tags(
|
||||
&self,
|
||||
node_id: &str,
|
||||
state: &[f32],
|
||||
tags: &[&str],
|
||||
) -> Result<(), StorageError> {
|
||||
self.base.store_node(node_id, state)?;
|
||||
|
||||
let mut tag_index = self.node_tags.write();
|
||||
for tag in tags {
|
||||
tag_index
|
||||
.entry((*tag).to_string())
|
||||
.or_default()
|
||||
.insert(node_id.to_string());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find nodes by tag.
|
||||
#[must_use]
|
||||
pub fn find_by_tag(&self, tag: &str) -> Vec<String> {
|
||||
self.node_tags
|
||||
.read()
|
||||
.get(tag)
|
||||
.map(|set| set.iter().cloned().collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Store a policy with a name for lookup.
|
||||
pub fn store_policy_with_name(
|
||||
&self,
|
||||
name: &str,
|
||||
bundle: &[u8],
|
||||
) -> Result<String, StorageError> {
|
||||
let id = self.base.store_policy(bundle)?;
|
||||
self.policy_by_name
|
||||
.write()
|
||||
.insert(name.to_string(), id.clone());
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Get a policy by name.
|
||||
pub fn get_policy_by_name(&self, name: &str) -> Result<Option<Vec<u8>>, StorageError> {
|
||||
let id = self.policy_by_name.read().get(name).cloned();
|
||||
match id {
|
||||
Some(id) => self.base.get_policy(&id),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the base storage for direct access.
|
||||
#[must_use]
|
||||
pub fn base(&self) -> &InMemoryStorage {
|
||||
&self.base
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IndexedInMemoryStorage {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl GraphStorage for IndexedInMemoryStorage {
|
||||
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
|
||||
self.base.store_node(node_id, state)
|
||||
}
|
||||
|
||||
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
|
||||
self.base.get_node(node_id)
|
||||
}
|
||||
|
||||
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
|
||||
self.base.store_edge(source, target, weight)
|
||||
}
|
||||
|
||||
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
|
||||
self.base.delete_edge(source, target)
|
||||
}
|
||||
|
||||
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
|
||||
self.base.find_similar(query, k)
|
||||
}
|
||||
}
|
||||
|
||||
impl GovernanceStorage for IndexedInMemoryStorage {
|
||||
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
|
||||
self.base.store_policy(bundle)
|
||||
}
|
||||
|
||||
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
|
||||
self.base.get_policy(id)
|
||||
}
|
||||
|
||||
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
|
||||
self.base.store_witness(witness)
|
||||
}
|
||||
|
||||
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
|
||||
self.base.get_witnesses_for_action(action_id)
|
||||
}
|
||||
|
||||
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
|
||||
self.base.store_lineage(lineage)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_in_memory_storage_nodes() {
|
||||
let storage = InMemoryStorage::new();
|
||||
|
||||
// Store a node
|
||||
storage.store_node("node-1", &[1.0, 0.0, 0.0]).unwrap();
|
||||
storage.store_node("node-2", &[0.0, 1.0, 0.0]).unwrap();
|
||||
|
||||
assert_eq!(storage.node_count(), 2);
|
||||
|
||||
// Retrieve node
|
||||
let state = storage.get_node("node-1").unwrap();
|
||||
assert!(state.is_some());
|
||||
assert_eq!(state.unwrap(), vec![1.0, 0.0, 0.0]);
|
||||
|
||||
// Non-existent node
|
||||
let missing = storage.get_node("node-999").unwrap();
|
||||
assert!(missing.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_in_memory_storage_edges() {
|
||||
let storage = InMemoryStorage::new();
|
||||
|
||||
// Store nodes
|
||||
storage.store_node("a", &[1.0]).unwrap();
|
||||
storage.store_node("b", &[2.0]).unwrap();
|
||||
storage.store_node("c", &[3.0]).unwrap();
|
||||
|
||||
// Store edges
|
||||
storage.store_edge("a", "b", 1.0).unwrap();
|
||||
storage.store_edge("b", "c", 2.0).unwrap();
|
||||
|
||||
assert_eq!(storage.edge_count(), 2);
|
||||
|
||||
// Check adjacency
|
||||
let neighbors = storage.get_neighbors("b");
|
||||
assert_eq!(neighbors.len(), 2);
|
||||
assert!(neighbors.contains(&"a".to_string()));
|
||||
assert!(neighbors.contains(&"c".to_string()));
|
||||
|
||||
// Delete edge
|
||||
storage.delete_edge("a", "b").unwrap();
|
||||
assert_eq!(storage.edge_count(), 1);
|
||||
|
||||
let neighbors = storage.get_neighbors("b");
|
||||
assert_eq!(neighbors.len(), 1);
|
||||
assert!(!neighbors.contains(&"a".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_similar() {
|
||||
let storage = InMemoryStorage::new();
|
||||
|
||||
// Store nodes with different orientations
|
||||
storage.store_node("north", &[0.0, 1.0, 0.0]).unwrap();
|
||||
storage.store_node("south", &[0.0, -1.0, 0.0]).unwrap();
|
||||
storage.store_node("east", &[1.0, 0.0, 0.0]).unwrap();
|
||||
storage
|
||||
.store_node("northeast", &[0.707, 0.707, 0.0])
|
||||
.unwrap();
|
||||
|
||||
// Query for vectors similar to north
|
||||
let query = vec![0.0, 1.0, 0.0];
|
||||
let results = storage.find_similar(&query, 2).unwrap();
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].0, "north");
|
||||
assert!((results[0].1 - 1.0).abs() < 0.001); // Perfect match
|
||||
assert_eq!(results[1].0, "northeast"); // Second closest
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_governance_storage() {
|
||||
let storage = InMemoryStorage::new();
|
||||
|
||||
// Store policy
|
||||
let policy_data = b"test policy data";
|
||||
let policy_id = storage.store_policy(policy_data).unwrap();
|
||||
|
||||
// Retrieve policy
|
||||
let retrieved = storage.get_policy(&policy_id).unwrap();
|
||||
assert!(retrieved.is_some());
|
||||
assert_eq!(retrieved.unwrap(), policy_data.to_vec());
|
||||
|
||||
// Store witness
|
||||
let witness_data = b"test witness data";
|
||||
let witness_id = storage.store_witness(witness_data).unwrap();
|
||||
assert!(!witness_id.is_empty());
|
||||
|
||||
// Store lineage
|
||||
let lineage_data = b"test lineage data";
|
||||
let lineage_id = storage.store_lineage(lineage_data).unwrap();
|
||||
assert!(!lineage_id.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_log() {
|
||||
let storage = InMemoryStorage::new();
|
||||
|
||||
storage.store_node("test", &[1.0]).unwrap();
|
||||
storage.get_node("test").unwrap();
|
||||
storage.store_edge("a", "b", 1.0).unwrap();
|
||||
|
||||
let log = storage.get_event_log();
|
||||
assert_eq!(log.len(), 3);
|
||||
assert_eq!(log[0].event_type, StorageEventType::NodeStored);
|
||||
assert_eq!(log[1].event_type, StorageEventType::NodeRetrieved);
|
||||
assert_eq!(log[2].event_type, StorageEventType::EdgeStored);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear() {
|
||||
let storage = InMemoryStorage::new();
|
||||
|
||||
storage.store_node("node", &[1.0]).unwrap();
|
||||
storage.store_edge("a", "b", 1.0).unwrap();
|
||||
storage.store_policy(b"policy").unwrap();
|
||||
|
||||
assert!(storage.node_count() > 0);
|
||||
|
||||
storage.clear();
|
||||
|
||||
assert_eq!(storage.node_count(), 0);
|
||||
assert_eq!(storage.edge_count(), 0);
|
||||
assert_eq!(storage.get_event_log().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_indexed_storage() {
|
||||
let storage = IndexedInMemoryStorage::new();
|
||||
|
||||
// Store with tags
|
||||
storage
|
||||
.store_node_with_tags("node-1", &[1.0, 0.0], &["important", "category-a"])
|
||||
.unwrap();
|
||||
storage
|
||||
.store_node_with_tags("node-2", &[0.0, 1.0], &["important"])
|
||||
.unwrap();
|
||||
storage
|
||||
.store_node_with_tags("node-3", &[1.0, 1.0], &["category-a"])
|
||||
.unwrap();
|
||||
|
||||
// Find by tag
|
||||
let important = storage.find_by_tag("important");
|
||||
assert_eq!(important.len(), 2);
|
||||
|
||||
let category_a = storage.find_by_tag("category-a");
|
||||
assert_eq!(category_a.len(), 2);
|
||||
|
||||
// Store and retrieve policy by name
|
||||
storage
|
||||
.store_policy_with_name("default", b"default policy")
|
||||
.unwrap();
|
||||
|
||||
let policy = storage.get_policy_by_name("default").unwrap();
|
||||
assert!(policy.is_some());
|
||||
assert_eq!(policy.unwrap(), b"default policy".to_vec());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity() {
|
||||
// Identical vectors
|
||||
let sim = InMemoryStorage::cosine_similarity(&[1.0, 0.0], &[1.0, 0.0]);
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
|
||||
// Orthogonal vectors
|
||||
let sim = InMemoryStorage::cosine_similarity(&[1.0, 0.0], &[0.0, 1.0]);
|
||||
assert!(sim.abs() < 0.001);
|
||||
|
||||
// Opposite vectors
|
||||
let sim = InMemoryStorage::cosine_similarity(&[1.0, 0.0], &[-1.0, 0.0]);
|
||||
assert!((sim - (-1.0)).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_distance() {
|
||||
// Same point
|
||||
let dist = InMemoryStorage::l2_distance(&[0.0, 0.0], &[0.0, 0.0]);
|
||||
assert!(dist.abs() < 0.001);
|
||||
|
||||
// Unit distance
|
||||
let dist = InMemoryStorage::l2_distance(&[0.0, 0.0], &[1.0, 0.0]);
|
||||
assert!((dist - 1.0).abs() < 0.001);
|
||||
|
||||
// Diagonal
|
||||
let dist = InMemoryStorage::l2_distance(&[0.0, 0.0], &[1.0, 1.0]);
|
||||
assert!((dist - std::f32::consts::SQRT_2).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
575
vendor/ruvector/crates/prime-radiant/src/storage/mod.rs
vendored
Normal file
575
vendor/ruvector/crates/prime-radiant/src/storage/mod.rs
vendored
Normal file
@@ -0,0 +1,575 @@
|
||||
//! # Storage Layer Module
|
||||
//!
|
||||
//! Hybrid storage with PostgreSQL for transactional authority and ruvector for
|
||||
//! high-performance vector and graph queries.
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! ```text
|
||||
//! +----------------------------------------------+
|
||||
//! | Storage Layer |
|
||||
//! +----------------------------------------------+
|
||||
//! | |
|
||||
//! | +------------------+ +------------------+ |
|
||||
//! | | PostgreSQL | | ruvector | |
|
||||
//! | | (Authority) | | (Graph/Vector) | |
|
||||
//! | | | | | |
|
||||
//! | | - Policy bundles | | - Node states | |
|
||||
//! | | - Witnesses | | - Edge data | |
|
||||
//! | | - Lineage | | - HNSW index | |
|
||||
//! | | - Event log | | - Residual cache | |
|
||||
//! | +------------------+ +------------------+ |
|
||||
//! | |
|
||||
//! +----------------------------------------------+
|
||||
//! ```
|
||||
//!
|
||||
//! ## Storage Backends
|
||||
//!
|
||||
//! | Backend | Use Case | Features |
|
||||
//! |---------|----------|----------|
|
||||
//! | `InMemoryStorage` | Testing, Development | Thread-safe, fast, no persistence |
|
||||
//! | `FileStorage` | Embedded, Edge | WAL, JSON/bincode, persistence |
|
||||
//! | `PostgresStorage` | Production | ACID, indexes, concurrent access |
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! use prime_radiant::storage::{
|
||||
//! InMemoryStorage, FileStorage, GraphStorage, GovernanceStorage,
|
||||
//! };
|
||||
//!
|
||||
//! // In-memory for testing
|
||||
//! let memory_storage = InMemoryStorage::new();
|
||||
//! memory_storage.store_node("node-1", &[1.0, 0.0, 0.0])?;
|
||||
//!
|
||||
//! // File-based for persistence
|
||||
//! let file_storage = FileStorage::new("./data")?;
|
||||
//! file_storage.store_node("node-1", &[1.0, 0.0, 0.0])?;
|
||||
//!
|
||||
//! // PostgreSQL for production (feature-gated)
|
||||
//! #[cfg(feature = "postgres")]
|
||||
//! let pg_storage = PostgresStorage::connect("postgresql://localhost/db").await?;
|
||||
//! ```
|
||||
|
||||
// Module declarations
|
||||
mod file;
|
||||
mod memory;
|
||||
|
||||
#[cfg(feature = "postgres")]
|
||||
#[cfg_attr(docsrs, doc(cfg(feature = "postgres")))]
|
||||
mod postgres;
|
||||
|
||||
// Re-exports
|
||||
pub use file::{FileStorage, StorageFormat, StorageMetadata, StorageStats, WalEntry, WalOperation};
|
||||
pub use memory::{InMemoryStorage, IndexedInMemoryStorage, StorageEvent, StorageEventType};
|
||||
|
||||
#[cfg(feature = "postgres")]
|
||||
pub use postgres::{
|
||||
AsyncGraphStorageAdapter, EdgeRow, EventLogEntry, LineageRecordRow, NodeStateRow,
|
||||
PolicyBundleRow, PostgresConfig, PostgresStats, PostgresStorage, WitnessRecordRow,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Storage configuration.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StorageConfig {
|
||||
/// PostgreSQL connection string (optional).
|
||||
pub postgres_url: Option<String>,
|
||||
/// Path for local graph storage.
|
||||
pub graph_path: String,
|
||||
/// Path for event log.
|
||||
pub event_log_path: String,
|
||||
/// Enable write-ahead logging.
|
||||
pub enable_wal: bool,
|
||||
/// Cache size in MB.
|
||||
pub cache_size_mb: usize,
|
||||
}
|
||||
|
||||
impl Default for StorageConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
postgres_url: None,
|
||||
graph_path: "./data/graph".to_string(),
|
||||
event_log_path: "./data/events".to_string(),
|
||||
enable_wal: true,
|
||||
cache_size_mb: 256,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageConfig {
|
||||
/// Create a configuration for in-memory storage only.
|
||||
#[must_use]
|
||||
pub fn in_memory() -> Self {
|
||||
Self {
|
||||
postgres_url: None,
|
||||
graph_path: String::new(),
|
||||
event_log_path: String::new(),
|
||||
enable_wal: false,
|
||||
cache_size_mb: 256,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a configuration for file-based storage.
|
||||
#[must_use]
|
||||
pub fn file_based(path: impl Into<String>) -> Self {
|
||||
let path = path.into();
|
||||
Self {
|
||||
postgres_url: None,
|
||||
graph_path: path.clone(),
|
||||
event_log_path: format!("{}/events", path),
|
||||
enable_wal: true,
|
||||
cache_size_mb: 256,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a configuration for PostgreSQL storage.
|
||||
#[must_use]
|
||||
pub fn postgres(url: impl Into<String>) -> Self {
|
||||
Self {
|
||||
postgres_url: Some(url.into()),
|
||||
graph_path: "./data/graph".to_string(),
|
||||
event_log_path: "./data/events".to_string(),
|
||||
enable_wal: false,
|
||||
cache_size_mb: 256,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the cache size.
|
||||
#[must_use]
|
||||
pub const fn with_cache_size(mut self, size_mb: usize) -> Self {
|
||||
self.cache_size_mb = size_mb;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable or disable WAL.
|
||||
#[must_use]
|
||||
pub const fn with_wal(mut self, enable: bool) -> Self {
|
||||
self.enable_wal = enable;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage backend trait for graph operations.
|
||||
///
|
||||
/// This trait defines the interface for storing and retrieving graph data
|
||||
/// including node states and edges. Implementations must be thread-safe.
|
||||
pub trait GraphStorage: Send + Sync {
|
||||
/// Store a node state.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `node_id` - Unique identifier for the node
|
||||
/// * `state` - State vector (typically f32 values representing the node's state)
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError>;
|
||||
|
||||
/// Retrieve a node state.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `node_id` - Unique identifier for the node
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(state)` if the node exists, `None` otherwise.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError>;
|
||||
|
||||
/// Store an edge between two nodes.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `source` - Source node ID
|
||||
/// * `target` - Target node ID
|
||||
/// * `weight` - Edge weight (typically representing constraint strength)
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError>;
|
||||
|
||||
/// Delete an edge between two nodes.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `source` - Source node ID
|
||||
/// * `target` - Target node ID
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError>;
|
||||
|
||||
/// Find nodes similar to a query vector.
|
||||
///
|
||||
/// This method performs approximate nearest neighbor search using cosine similarity.
|
||||
/// For production workloads with large datasets, consider using HNSW-indexed storage.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - Query vector to search for similar nodes
|
||||
/// * `k` - Maximum number of results to return
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Vector of (node_id, similarity_score) tuples, sorted by similarity descending.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the search operation fails.
|
||||
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError>;
|
||||
}
|
||||
|
||||
/// Storage backend trait for governance data.
|
||||
///
|
||||
/// This trait defines the interface for storing and retrieving governance objects
|
||||
/// including policy bundles, witness records, and lineage records.
|
||||
pub trait GovernanceStorage: Send + Sync {
|
||||
/// Store a policy bundle.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `bundle` - Serialized policy bundle data
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Unique identifier for the stored bundle.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError>;
|
||||
|
||||
/// Retrieve a policy bundle.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `id` - Policy bundle identifier
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// `Some(data)` if the policy exists, `None` otherwise.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError>;
|
||||
|
||||
/// Store a witness record.
|
||||
///
|
||||
/// Witness records provide immutable proof of gate decisions.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `witness` - Serialized witness record data
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Unique identifier for the stored witness.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError>;
|
||||
|
||||
/// Retrieve witness records for an action.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `action_id` - Action identifier to search for
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Vector of witness record data for the given action.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the search operation fails.
|
||||
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError>;
|
||||
|
||||
/// Store a lineage record.
|
||||
///
|
||||
/// Lineage records track provenance for authoritative writes.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `lineage` - Serialized lineage record data
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Unique identifier for the stored lineage.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if the storage operation fails.
|
||||
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError>;
|
||||
}
|
||||
|
||||
/// Storage error type.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum StorageError {
|
||||
/// Connection error (database or file system)
|
||||
#[error("Connection error: {0}")]
|
||||
Connection(String),
|
||||
|
||||
/// Entity not found
|
||||
#[error("Not found: {0}")]
|
||||
NotFound(String),
|
||||
|
||||
/// Serialization/deserialization error
|
||||
#[error("Serialization error: {0}")]
|
||||
Serialization(String),
|
||||
|
||||
/// IO error
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
/// Invalid data format or content
|
||||
#[error("Invalid data: {0}")]
|
||||
InvalidData(String),
|
||||
|
||||
/// Transaction or operation failed
|
||||
#[error("Transaction failed: {0}")]
|
||||
Transaction(String),
|
||||
|
||||
/// Integrity constraint violation
|
||||
#[error("Integrity violation: {0}")]
|
||||
IntegrityViolation(String),
|
||||
|
||||
/// Resource exhausted (e.g., disk space)
|
||||
#[error("Resource exhausted: {0}")]
|
||||
ResourceExhausted(String),
|
||||
|
||||
/// Permission denied
|
||||
#[error("Permission denied: {0}")]
|
||||
PermissionDenied(String),
|
||||
}
|
||||
|
||||
/// Hybrid storage that combines multiple backends.
|
||||
///
|
||||
/// Uses file storage for graph data and optionally PostgreSQL for governance data.
|
||||
/// This provides the best of both worlds: fast local access for frequently accessed
|
||||
/// data and ACID guarantees for critical governance data.
|
||||
#[derive(Debug)]
|
||||
pub struct HybridStorage {
|
||||
/// File storage for graph data
|
||||
file_storage: FileStorage,
|
||||
/// Configuration
|
||||
config: StorageConfig,
|
||||
}
|
||||
|
||||
impl HybridStorage {
|
||||
/// Create a new hybrid storage instance.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if file storage cannot be initialized.
|
||||
pub fn new(config: StorageConfig) -> Result<Self, StorageError> {
|
||||
let file_storage = FileStorage::from_config(&config)?;
|
||||
|
||||
Ok(Self {
|
||||
file_storage,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the file storage backend.
|
||||
#[must_use]
|
||||
pub fn file_storage(&self) -> &FileStorage {
|
||||
&self.file_storage
|
||||
}
|
||||
|
||||
/// Get the configuration.
|
||||
#[must_use]
|
||||
pub fn config(&self) -> &StorageConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Check if PostgreSQL is configured.
|
||||
#[must_use]
|
||||
pub fn has_postgres(&self) -> bool {
|
||||
self.config.postgres_url.is_some()
|
||||
}
|
||||
|
||||
/// Sync all storage backends.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if sync fails.
|
||||
pub fn sync(&self) -> Result<(), StorageError> {
|
||||
self.file_storage.sync()
|
||||
}
|
||||
}
|
||||
|
||||
impl GraphStorage for HybridStorage {
|
||||
fn store_node(&self, node_id: &str, state: &[f32]) -> Result<(), StorageError> {
|
||||
self.file_storage.store_node(node_id, state)
|
||||
}
|
||||
|
||||
fn get_node(&self, node_id: &str) -> Result<Option<Vec<f32>>, StorageError> {
|
||||
self.file_storage.get_node(node_id)
|
||||
}
|
||||
|
||||
fn store_edge(&self, source: &str, target: &str, weight: f32) -> Result<(), StorageError> {
|
||||
self.file_storage.store_edge(source, target, weight)
|
||||
}
|
||||
|
||||
fn delete_edge(&self, source: &str, target: &str) -> Result<(), StorageError> {
|
||||
self.file_storage.delete_edge(source, target)
|
||||
}
|
||||
|
||||
fn find_similar(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, StorageError> {
|
||||
self.file_storage.find_similar(query, k)
|
||||
}
|
||||
}
|
||||
|
||||
impl GovernanceStorage for HybridStorage {
|
||||
fn store_policy(&self, bundle: &[u8]) -> Result<String, StorageError> {
|
||||
// For now, use file storage. In production, this would delegate to PostgreSQL.
|
||||
self.file_storage.store_policy(bundle)
|
||||
}
|
||||
|
||||
fn get_policy(&self, id: &str) -> Result<Option<Vec<u8>>, StorageError> {
|
||||
self.file_storage.get_policy(id)
|
||||
}
|
||||
|
||||
fn store_witness(&self, witness: &[u8]) -> Result<String, StorageError> {
|
||||
self.file_storage.store_witness(witness)
|
||||
}
|
||||
|
||||
fn get_witnesses_for_action(&self, action_id: &str) -> Result<Vec<Vec<u8>>, StorageError> {
|
||||
self.file_storage.get_witnesses_for_action(action_id)
|
||||
}
|
||||
|
||||
fn store_lineage(&self, lineage: &[u8]) -> Result<String, StorageError> {
|
||||
self.file_storage.store_lineage(lineage)
|
||||
}
|
||||
}
|
||||
|
||||
/// Factory for creating storage instances based on configuration.
|
||||
pub struct StorageFactory;
|
||||
|
||||
impl StorageFactory {
|
||||
/// Create a storage instance based on configuration.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if storage cannot be created.
|
||||
pub fn create_graph_storage(
|
||||
config: &StorageConfig,
|
||||
) -> Result<Box<dyn GraphStorage>, StorageError> {
|
||||
if config.graph_path.is_empty() {
|
||||
Ok(Box::new(InMemoryStorage::new()))
|
||||
} else {
|
||||
Ok(Box::new(FileStorage::from_config(config)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a governance storage instance.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if storage cannot be created.
|
||||
pub fn create_governance_storage(
|
||||
config: &StorageConfig,
|
||||
) -> Result<Box<dyn GovernanceStorage>, StorageError> {
|
||||
if config.graph_path.is_empty() {
|
||||
Ok(Box::new(InMemoryStorage::new()))
|
||||
} else {
|
||||
Ok(Box::new(FileStorage::from_config(config)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an in-memory storage (convenience method).
|
||||
#[must_use]
|
||||
pub fn in_memory() -> InMemoryStorage {
|
||||
InMemoryStorage::new()
|
||||
}
|
||||
|
||||
/// Create a file storage (convenience method).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if storage cannot be created.
|
||||
pub fn file(path: impl AsRef<std::path::Path>) -> Result<FileStorage, StorageError> {
|
||||
FileStorage::new(path)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_storage_config_builders() {
|
||||
let config = StorageConfig::in_memory();
|
||||
assert!(config.graph_path.is_empty());
|
||||
assert!(!config.enable_wal);
|
||||
|
||||
let config = StorageConfig::file_based("/tmp/test");
|
||||
assert_eq!(config.graph_path, "/tmp/test");
|
||||
assert!(config.enable_wal);
|
||||
|
||||
let config = StorageConfig::postgres("postgresql://localhost/db");
|
||||
assert!(config.postgres_url.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_storage_factory_in_memory() {
|
||||
let config = StorageConfig::in_memory();
|
||||
let storage = StorageFactory::create_graph_storage(&config).unwrap();
|
||||
|
||||
storage.store_node("test", &[1.0, 2.0]).unwrap();
|
||||
let state = storage.get_node("test").unwrap();
|
||||
assert!(state.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_storage_factory_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let config = StorageConfig::file_based(temp_dir.path().to_str().unwrap());
|
||||
let storage = StorageFactory::create_graph_storage(&config).unwrap();
|
||||
|
||||
storage.store_node("test", &[1.0, 2.0]).unwrap();
|
||||
let state = storage.get_node("test").unwrap();
|
||||
assert!(state.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hybrid_storage() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let config = StorageConfig::file_based(temp_dir.path().to_str().unwrap());
|
||||
let storage = HybridStorage::new(config).unwrap();
|
||||
|
||||
// Graph operations
|
||||
storage.store_node("node-1", &[1.0, 0.0, 0.0]).unwrap();
|
||||
let state = storage.get_node("node-1").unwrap();
|
||||
assert!(state.is_some());
|
||||
|
||||
// Governance operations
|
||||
let policy_id = storage.store_policy(b"test policy").unwrap();
|
||||
let policy = storage.get_policy(&policy_id).unwrap();
|
||||
assert!(policy.is_some());
|
||||
|
||||
storage.sync().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trait_object_usage() {
|
||||
// Verify that storage types can be used as trait objects
|
||||
let memory: Box<dyn GraphStorage> = Box::new(InMemoryStorage::new());
|
||||
memory.store_node("test", &[1.0]).unwrap();
|
||||
|
||||
let memory: Box<dyn GovernanceStorage> = Box::new(InMemoryStorage::new());
|
||||
let _ = memory.store_policy(b"test").unwrap();
|
||||
}
|
||||
}
|
||||
1081
vendor/ruvector/crates/prime-radiant/src/storage/postgres.rs
vendored
Normal file
1081
vendor/ruvector/crates/prime-radiant/src/storage/postgres.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user