Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
429
vendor/ruvector/crates/rvlite/src/storage/epoch.rs
vendored
Normal file
429
vendor/ruvector/crates/rvlite/src/storage/epoch.rs
vendored
Normal file
@@ -0,0 +1,429 @@
|
||||
//! Epoch-based reconciliation for hybrid RVF + IndexedDB persistence.
|
||||
//!
|
||||
//! RVF is the source of truth for vectors. IndexedDB is a rebuildable
|
||||
//! cache for metadata. Both stores share a monotonic epoch counter.
|
||||
//!
|
||||
//! Write order:
|
||||
//! 1. Write vectors to RVF (append-only, crash-safe)
|
||||
//! 2. Write metadata to IndexedDB
|
||||
//! 3. Commit shared epoch in both stores
|
||||
//!
|
||||
//! On startup: compare epochs and rebuild the lagging side.
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// Monotonic epoch counter shared between RVF and metadata stores.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Epoch(pub u64);
|
||||
|
||||
impl Epoch {
|
||||
pub const ZERO: Self = Self(0);
|
||||
|
||||
pub fn next(self) -> Self {
|
||||
Self(self.0.checked_add(1).expect("epoch overflow"))
|
||||
}
|
||||
|
||||
pub fn value(self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// State describing the relationship between RVF and metadata epochs.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum EpochState {
|
||||
/// Both stores agree on the current epoch.
|
||||
Synchronized,
|
||||
/// RVF store is ahead of metadata by the given delta.
|
||||
RvfAhead(u64),
|
||||
/// Metadata store is ahead of RVF by the given delta (anomalous).
|
||||
MetadataAhead(u64),
|
||||
}
|
||||
|
||||
/// Action to take after comparing epochs.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReconcileAction {
|
||||
/// No reconciliation needed -- both stores are in sync.
|
||||
None,
|
||||
/// Metadata is stale; rebuild it from the authoritative RVF store.
|
||||
RebuildMetadata,
|
||||
/// RVF is somehow behind metadata; rebuild vectors from RVF file.
|
||||
/// This should not normally happen and indicates a prior incomplete write.
|
||||
RebuildFromRvf,
|
||||
/// Metadata is ahead which should never happen under correct operation.
|
||||
/// Log a warning and trust RVF as the source of truth.
|
||||
LogWarningTrustRvf,
|
||||
}
|
||||
|
||||
/// Result of comparing epochs between RVF and metadata stores.
|
||||
///
|
||||
/// Kept for backward compatibility with existing callers.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReconciliationAction {
|
||||
/// Both stores are in sync -- no action needed.
|
||||
InSync,
|
||||
/// RVF is ahead -- rebuild metadata from RVF vectors.
|
||||
RebuildMetadata {
|
||||
rvf_epoch: Epoch,
|
||||
metadata_epoch: Epoch,
|
||||
},
|
||||
/// Metadata is ahead (should not happen) -- log warning, trust RVF.
|
||||
TrustRvf {
|
||||
rvf_epoch: Epoch,
|
||||
metadata_epoch: Epoch,
|
||||
},
|
||||
}
|
||||
|
||||
/// Compare raw epoch values and return the relationship state.
|
||||
pub fn compare_epochs(rvf_epoch: u64, metadata_epoch: u64) -> EpochState {
|
||||
if rvf_epoch == metadata_epoch {
|
||||
EpochState::Synchronized
|
||||
} else if rvf_epoch > metadata_epoch {
|
||||
EpochState::RvfAhead(rvf_epoch - metadata_epoch)
|
||||
} else {
|
||||
EpochState::MetadataAhead(metadata_epoch - rvf_epoch)
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the reconciliation action for a given epoch state.
|
||||
pub fn reconcile_action(state: &EpochState) -> ReconcileAction {
|
||||
match state {
|
||||
EpochState::Synchronized => ReconcileAction::None,
|
||||
EpochState::RvfAhead(delta) => {
|
||||
if *delta == 1 {
|
||||
// Common case: a single write committed to RVF but metadata
|
||||
// update was lost (e.g. crash between step 1 and step 2).
|
||||
ReconcileAction::RebuildMetadata
|
||||
} else {
|
||||
// Multiple epochs behind -- still rebuild metadata, but the
|
||||
// gap is larger so more data must be replayed.
|
||||
ReconcileAction::RebuildMetadata
|
||||
}
|
||||
}
|
||||
EpochState::MetadataAhead(delta) => {
|
||||
if *delta == 1 {
|
||||
// Metadata committed but RVF write was lost. This means the
|
||||
// RVF file is still valid at its own epoch -- rebuild from it.
|
||||
ReconcileAction::RebuildFromRvf
|
||||
} else {
|
||||
// Large gap with metadata ahead is anomalous. Trust RVF.
|
||||
ReconcileAction::LogWarningTrustRvf
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare epochs and determine reconciliation action (legacy API).
|
||||
pub fn reconcile(rvf_epoch: Epoch, metadata_epoch: Epoch) -> ReconciliationAction {
|
||||
match rvf_epoch.cmp(&metadata_epoch) {
|
||||
std::cmp::Ordering::Equal => ReconciliationAction::InSync,
|
||||
std::cmp::Ordering::Greater => ReconciliationAction::RebuildMetadata {
|
||||
rvf_epoch,
|
||||
metadata_epoch,
|
||||
},
|
||||
std::cmp::Ordering::Less => ReconciliationAction::TrustRvf {
|
||||
rvf_epoch,
|
||||
metadata_epoch,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Thread-safe monotonic epoch tracker.
|
||||
///
|
||||
/// Uses `AtomicU64` internally so it can be shared across threads without
|
||||
/// a mutex. The counter is strictly monotonic: it can only move forward.
|
||||
///
|
||||
/// # Write protocol
|
||||
///
|
||||
/// Callers must follow the three-phase commit:
|
||||
/// 1. Call `begin_write()` to get the next epoch value.
|
||||
/// 2. Write vectors to RVF with that epoch.
|
||||
/// 3. Write metadata to IndexedDB with that epoch.
|
||||
/// 4. Call `commit(epoch)` to advance the tracker.
|
||||
///
|
||||
/// If step 2 or 3 fails, do NOT call `commit` -- the tracker stays at the
|
||||
/// previous epoch so that the next startup triggers reconciliation.
|
||||
pub struct EpochTracker {
|
||||
/// Current committed epoch.
|
||||
current: AtomicU64,
|
||||
}
|
||||
|
||||
impl EpochTracker {
|
||||
/// Create a new tracker starting at the given epoch.
|
||||
pub fn new(initial: u64) -> Self {
|
||||
Self {
|
||||
current: AtomicU64::new(initial),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a tracker starting at epoch zero.
|
||||
pub fn zero() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
|
||||
/// Read the current committed epoch.
|
||||
pub fn current(&self) -> u64 {
|
||||
self.current.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Return the next epoch value for a pending write.
|
||||
///
|
||||
/// This does NOT advance the tracker. The caller must call `commit`
|
||||
/// after both RVF and metadata writes succeed.
|
||||
pub fn begin_write(&self) -> u64 {
|
||||
self.current
|
||||
.load(Ordering::Acquire)
|
||||
.checked_add(1)
|
||||
.expect("epoch overflow")
|
||||
}
|
||||
|
||||
/// Commit the given epoch, advancing the tracker.
|
||||
///
|
||||
/// Returns `true` if the commit succeeded (epoch was exactly current + 1).
|
||||
/// Returns `false` if the epoch was stale or out of order, which means
|
||||
/// another writer committed first or the caller passed a wrong value.
|
||||
pub fn commit(&self, epoch: u64) -> bool {
|
||||
let expected = epoch.checked_sub(1).unwrap_or(0);
|
||||
self.current
|
||||
.compare_exchange(expected, epoch, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Force-set the epoch to a specific value.
|
||||
///
|
||||
/// Used during recovery/reconciliation when we need to align the
|
||||
/// tracker with a known-good state read from disk.
|
||||
pub fn force_set(&self, epoch: u64) {
|
||||
self.current.store(epoch, Ordering::Release);
|
||||
}
|
||||
|
||||
/// Check the relationship between the RVF epoch stored on disk and the
|
||||
/// metadata epoch, then return the appropriate reconciliation action.
|
||||
pub fn check_and_reconcile(&self, rvf_epoch: u64, metadata_epoch: u64) -> ReconcileAction {
|
||||
let state = compare_epochs(rvf_epoch, metadata_epoch);
|
||||
let action = reconcile_action(&state);
|
||||
|
||||
// After reconciliation, align the tracker to the authoritative epoch.
|
||||
match &action {
|
||||
ReconcileAction::None => {
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
ReconcileAction::RebuildMetadata | ReconcileAction::RebuildFromRvf => {
|
||||
// After rebuild, both sides will match the RVF epoch.
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
ReconcileAction::LogWarningTrustRvf => {
|
||||
// Trust RVF -- set tracker to RVF epoch.
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
}
|
||||
|
||||
action
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for EpochTracker {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("EpochTracker")
|
||||
.field("current", &self.current.load(Ordering::Relaxed))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ---- Legacy API tests (preserved) ----
|
||||
|
||||
#[test]
|
||||
fn in_sync() {
|
||||
let e = Epoch(5);
|
||||
assert_eq!(reconcile(e, e), ReconciliationAction::InSync);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rvf_ahead_rebuilds_metadata() {
|
||||
let action = reconcile(Epoch(3), Epoch(2));
|
||||
assert_eq!(
|
||||
action,
|
||||
ReconciliationAction::RebuildMetadata {
|
||||
rvf_epoch: Epoch(3),
|
||||
metadata_epoch: Epoch(2),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metadata_ahead_trusts_rvf() {
|
||||
let action = reconcile(Epoch(1), Epoch(3));
|
||||
assert_eq!(
|
||||
action,
|
||||
ReconciliationAction::TrustRvf {
|
||||
rvf_epoch: Epoch(1),
|
||||
metadata_epoch: Epoch(3),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn epoch_increment() {
|
||||
assert_eq!(Epoch::ZERO.next(), Epoch(1));
|
||||
assert_eq!(Epoch(99).next(), Epoch(100));
|
||||
}
|
||||
|
||||
// ---- New epoch state / reconcile tests ----
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_synchronized() {
|
||||
assert_eq!(compare_epochs(5, 5), EpochState::Synchronized);
|
||||
assert_eq!(compare_epochs(0, 0), EpochState::Synchronized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_rvf_ahead() {
|
||||
assert_eq!(compare_epochs(10, 7), EpochState::RvfAhead(3));
|
||||
assert_eq!(compare_epochs(1, 0), EpochState::RvfAhead(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_metadata_ahead() {
|
||||
assert_eq!(compare_epochs(3, 8), EpochState::MetadataAhead(5));
|
||||
assert_eq!(compare_epochs(0, 1), EpochState::MetadataAhead(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_none_when_synchronized() {
|
||||
let state = EpochState::Synchronized;
|
||||
assert_eq!(reconcile_action(&state), ReconcileAction::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_rebuild_metadata_when_rvf_ahead() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::RvfAhead(1)),
|
||||
ReconcileAction::RebuildMetadata
|
||||
);
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::RvfAhead(5)),
|
||||
ReconcileAction::RebuildMetadata
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_rebuild_from_rvf_when_metadata_ahead_by_one() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::MetadataAhead(1)),
|
||||
ReconcileAction::RebuildFromRvf
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_log_warning_when_metadata_far_ahead() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::MetadataAhead(3)),
|
||||
ReconcileAction::LogWarningTrustRvf
|
||||
);
|
||||
}
|
||||
|
||||
// ---- EpochTracker tests ----
|
||||
|
||||
#[test]
|
||||
fn tracker_zero_starts_at_zero() {
|
||||
let tracker = EpochTracker::zero();
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_new_starts_at_initial() {
|
||||
let tracker = EpochTracker::new(42);
|
||||
assert_eq!(tracker.current(), 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_begin_write_returns_next() {
|
||||
let tracker = EpochTracker::new(10);
|
||||
assert_eq!(tracker.begin_write(), 11);
|
||||
// begin_write is idempotent until commit
|
||||
assert_eq!(tracker.begin_write(), 11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_advances_epoch() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let next = tracker.begin_write();
|
||||
assert_eq!(next, 1);
|
||||
assert!(tracker.commit(next));
|
||||
assert_eq!(tracker.current(), 1);
|
||||
|
||||
let next2 = tracker.begin_write();
|
||||
assert_eq!(next2, 2);
|
||||
assert!(tracker.commit(next2));
|
||||
assert_eq!(tracker.current(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_rejects_stale_epoch() {
|
||||
let tracker = EpochTracker::new(5);
|
||||
// Try to commit epoch 3 which is behind current
|
||||
assert!(!tracker.commit(3));
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_rejects_skip() {
|
||||
let tracker = EpochTracker::new(5);
|
||||
// Try to commit epoch 8, skipping 6 and 7
|
||||
assert!(!tracker.commit(8));
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_force_set() {
|
||||
let tracker = EpochTracker::new(10);
|
||||
tracker.force_set(100);
|
||||
assert_eq!(tracker.current(), 100);
|
||||
// Can also go backward with force_set (recovery scenario)
|
||||
tracker.force_set(5);
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_in_sync() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(7, 7);
|
||||
assert_eq!(action, ReconcileAction::None);
|
||||
assert_eq!(tracker.current(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_rvf_ahead() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(10, 8);
|
||||
assert_eq!(action, ReconcileAction::RebuildMetadata);
|
||||
assert_eq!(tracker.current(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_metadata_far_ahead() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(3, 8);
|
||||
assert_eq!(action, ReconcileAction::LogWarningTrustRvf);
|
||||
assert_eq!(tracker.current(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_debug_format() {
|
||||
let tracker = EpochTracker::new(42);
|
||||
let debug = format!("{:?}", tracker);
|
||||
assert!(debug.contains("EpochTracker"));
|
||||
assert!(debug.contains("42"));
|
||||
}
|
||||
|
||||
// ---- Thread safety (basic) ----
|
||||
|
||||
#[test]
|
||||
fn tracker_is_send_and_sync() {
|
||||
fn assert_send_sync<T: Send + Sync>() {}
|
||||
assert_send_sync::<EpochTracker>();
|
||||
}
|
||||
}
|
||||
296
vendor/ruvector/crates/rvlite/src/storage/id_map.rs
vendored
Normal file
296
vendor/ruvector/crates/rvlite/src/storage/id_map.rs
vendored
Normal file
@@ -0,0 +1,296 @@
|
||||
//! Direct mapping between RVF vector IDs and SQL primary keys.
|
||||
//!
|
||||
//! In rvlite the mapping is identity: RVF u64 IDs are the same as SQL
|
||||
//! primary keys. This zero-cost design avoids an extra lookup table and
|
||||
//! keeps memory usage minimal.
|
||||
//!
|
||||
//! The [`IdMapping`] trait exists for future extensibility -- if a
|
||||
//! non-identity mapping is ever needed (e.g. hashed IDs, composite keys),
|
||||
//! a new implementation can be swapped in without changing call sites.
|
||||
|
||||
/// Trait for converting between RVF vector IDs and SQL primary keys.
|
||||
///
|
||||
/// Implementors define how the two ID spaces relate to each other.
|
||||
/// The default implementation ([`DirectIdMap`]) uses identity mapping.
|
||||
pub trait IdMapping {
|
||||
/// Convert a SQL primary key to an RVF vector ID.
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64;
|
||||
|
||||
/// Convert an RVF vector ID back to a SQL primary key.
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64;
|
||||
|
||||
/// Validate that every RVF ID in the slice has a corresponding SQL PK
|
||||
/// in the other slice, and vice versa. Both slices must contain the
|
||||
/// same set of values (possibly in different order) for the mapping
|
||||
/// to be considered valid.
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool;
|
||||
}
|
||||
|
||||
/// Zero-cost identity mapping where RVF u64 IDs equal SQL primary keys.
|
||||
///
|
||||
/// This is the default and recommended mapping for rvlite. Because
|
||||
/// both ID spaces use `u64`, no conversion is needed and the mapping
|
||||
/// functions compile down to no-ops.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use rvlite::storage::id_map::{DirectIdMap, IdMapping};
|
||||
/// let map = DirectIdMap;
|
||||
/// assert_eq!(map.to_rvf_id(42), 42);
|
||||
/// assert_eq!(map.to_sql_pk(42), 42);
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct DirectIdMap;
|
||||
|
||||
impl DirectIdMap {
|
||||
/// Create a new direct (identity) ID map.
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Convert a SQL primary key to an RVF vector ID (identity).
|
||||
///
|
||||
/// This is a free function alternative to the trait method, useful when
|
||||
/// you know the concrete type and want to avoid dynamic dispatch.
|
||||
#[inline(always)]
|
||||
pub fn to_rvf_id(sql_pk: u64) -> u64 {
|
||||
sql_pk
|
||||
}
|
||||
|
||||
/// Convert an RVF vector ID to a SQL primary key (identity).
|
||||
#[inline(always)]
|
||||
pub fn to_sql_pk(rvf_id: u64) -> u64 {
|
||||
rvf_id
|
||||
}
|
||||
|
||||
/// Validate that the two slices contain the same set of IDs.
|
||||
///
|
||||
/// Under identity mapping, `rvf_ids` and `sql_pks` must be equal
|
||||
/// as sets (same elements, possibly different order).
|
||||
pub fn validate_mapping(rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
if rvf_ids.len() != sql_pks.len() {
|
||||
return false;
|
||||
}
|
||||
let mut rvf_sorted: Vec<u64> = rvf_ids.to_vec();
|
||||
let mut sql_sorted: Vec<u64> = sql_pks.to_vec();
|
||||
rvf_sorted.sort_unstable();
|
||||
sql_sorted.sort_unstable();
|
||||
rvf_sorted == sql_sorted
|
||||
}
|
||||
}
|
||||
|
||||
impl IdMapping for DirectIdMap {
|
||||
#[inline(always)]
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
|
||||
sql_pk
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
|
||||
rvf_id
|
||||
}
|
||||
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
DirectIdMap::validate_mapping(rvf_ids, sql_pks)
|
||||
}
|
||||
}
|
||||
|
||||
/// An offset-based ID mapping where SQL PKs start from a different base.
|
||||
///
|
||||
/// Useful when the SQL table uses auto-increment starting at 1 but
|
||||
/// the RVF store is zero-indexed (or vice versa).
|
||||
///
|
||||
/// `rvf_id = sql_pk + offset`
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct OffsetIdMap {
|
||||
/// Offset added to SQL PK to produce the RVF ID.
|
||||
/// Can be negative via wrapping arithmetic on u64.
|
||||
offset: i64,
|
||||
}
|
||||
|
||||
impl OffsetIdMap {
|
||||
/// Create an offset mapping.
|
||||
///
|
||||
/// `offset` is added to SQL PKs to produce RVF IDs.
|
||||
/// Use a negative offset if RVF IDs are smaller than SQL PKs.
|
||||
pub fn new(offset: i64) -> Self {
|
||||
Self { offset }
|
||||
}
|
||||
}
|
||||
|
||||
impl IdMapping for OffsetIdMap {
|
||||
#[inline]
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
|
||||
(sql_pk as i64).wrapping_add(self.offset) as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
|
||||
(rvf_id as i64).wrapping_sub(self.offset) as u64
|
||||
}
|
||||
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
if rvf_ids.len() != sql_pks.len() {
|
||||
return false;
|
||||
}
|
||||
let mut expected: Vec<u64> = sql_pks.iter().map(|&pk| self.to_rvf_id(pk)).collect();
|
||||
let mut actual: Vec<u64> = rvf_ids.to_vec();
|
||||
expected.sort_unstable();
|
||||
actual.sort_unstable();
|
||||
expected == actual
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ---- DirectIdMap tests ----
|
||||
|
||||
#[test]
|
||||
fn direct_to_rvf_id_is_identity() {
|
||||
assert_eq!(DirectIdMap::to_rvf_id(0), 0);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(42), 42);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(u64::MAX), u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_to_sql_pk_is_identity() {
|
||||
assert_eq!(DirectIdMap::to_sql_pk(0), 0);
|
||||
assert_eq!(DirectIdMap::to_sql_pk(42), 42);
|
||||
assert_eq!(DirectIdMap::to_sql_pk(u64::MAX), u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_roundtrip() {
|
||||
for id in [0, 1, 100, u64::MAX / 2, u64::MAX] {
|
||||
assert_eq!(DirectIdMap::to_sql_pk(DirectIdMap::to_rvf_id(id)), id);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(DirectIdMap::to_sql_pk(id)), id);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_same_elements() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![3, 1, 2];
|
||||
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_empty() {
|
||||
assert!(DirectIdMap::validate_mapping(&[], &[]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_different_length_fails() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![1, 2];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_different_elements_fails() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![1, 2, 4];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_duplicates_match() {
|
||||
let rvf = vec![1, 1, 2];
|
||||
let sql = vec![1, 2, 1];
|
||||
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_duplicates_mismatch() {
|
||||
let rvf = vec![1, 1, 2];
|
||||
let sql = vec![1, 2, 2];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
// ---- IdMapping trait via DirectIdMap ----
|
||||
|
||||
#[test]
|
||||
fn trait_direct_to_rvf_id() {
|
||||
let map = DirectIdMap;
|
||||
assert_eq!(IdMapping::to_rvf_id(&map, 99), 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trait_direct_to_sql_pk() {
|
||||
let map = DirectIdMap;
|
||||
assert_eq!(IdMapping::to_sql_pk(&map, 99), 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trait_direct_validate() {
|
||||
let map = DirectIdMap;
|
||||
assert!(IdMapping::validate_mapping(&map, &[1, 2], &[2, 1]));
|
||||
assert!(!IdMapping::validate_mapping(&map, &[1, 2], &[2, 3]));
|
||||
}
|
||||
|
||||
// ---- OffsetIdMap tests ----
|
||||
|
||||
#[test]
|
||||
fn offset_positive() {
|
||||
let map = OffsetIdMap::new(10);
|
||||
assert_eq!(map.to_rvf_id(0), 10);
|
||||
assert_eq!(map.to_rvf_id(5), 15);
|
||||
assert_eq!(map.to_sql_pk(10), 0);
|
||||
assert_eq!(map.to_sql_pk(15), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_negative() {
|
||||
let map = OffsetIdMap::new(-1);
|
||||
// SQL PK 1 -> RVF ID 0
|
||||
assert_eq!(map.to_rvf_id(1), 0);
|
||||
assert_eq!(map.to_sql_pk(0), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_zero_is_identity() {
|
||||
let map = OffsetIdMap::new(0);
|
||||
for id in [0, 1, 42, 1000] {
|
||||
assert_eq!(map.to_rvf_id(id), id);
|
||||
assert_eq!(map.to_sql_pk(id), id);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_roundtrip() {
|
||||
let map = OffsetIdMap::new(7);
|
||||
for pk in [0, 1, 100, 999] {
|
||||
assert_eq!(map.to_sql_pk(map.to_rvf_id(pk)), pk);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_validate() {
|
||||
let map = OffsetIdMap::new(10);
|
||||
// SQL PKs [0, 1, 2] -> RVF IDs [10, 11, 12]
|
||||
assert!(map.validate_mapping(&[12, 10, 11], &[2, 0, 1]));
|
||||
assert!(!map.validate_mapping(&[10, 11, 12], &[0, 1, 3]));
|
||||
}
|
||||
|
||||
// ---- Dynamic dispatch ----
|
||||
|
||||
#[test]
|
||||
fn trait_object_works() {
|
||||
let direct: Box<dyn IdMapping> = Box::new(DirectIdMap);
|
||||
assert_eq!(direct.to_rvf_id(5), 5);
|
||||
|
||||
let offset: Box<dyn IdMapping> = Box::new(OffsetIdMap::new(100));
|
||||
assert_eq!(offset.to_rvf_id(5), 105);
|
||||
}
|
||||
|
||||
// ---- Default impl ----
|
||||
|
||||
#[test]
|
||||
fn direct_default() {
|
||||
let map: DirectIdMap = Default::default();
|
||||
assert_eq!(map.to_rvf_id(7), 7);
|
||||
}
|
||||
}
|
||||
243
vendor/ruvector/crates/rvlite/src/storage/indexeddb.rs
vendored
Normal file
243
vendor/ruvector/crates/rvlite/src/storage/indexeddb.rs
vendored
Normal file
@@ -0,0 +1,243 @@
|
||||
//! IndexedDB storage implementation for WASM
|
||||
//!
|
||||
//! Uses web-sys bindings to interact with the browser's IndexedDB API
|
||||
//! for persistent storage of RvLite state.
|
||||
|
||||
use super::state::RvLiteState;
|
||||
use js_sys::{Object, Reflect};
|
||||
use wasm_bindgen::prelude::*;
|
||||
use wasm_bindgen::JsCast;
|
||||
use wasm_bindgen_futures::JsFuture;
|
||||
use web_sys::{IdbDatabase, IdbObjectStore, IdbRequest, IdbTransaction, IdbTransactionMode};
|
||||
|
||||
const DB_NAME: &str = "rvlite_db";
|
||||
const DB_VERSION: u32 = 1;
|
||||
const STORE_NAME: &str = "state";
|
||||
const STATE_KEY: &str = "main";
|
||||
|
||||
/// IndexedDB storage backend for RvLite persistence
|
||||
pub struct IndexedDBStorage {
|
||||
db: Option<IdbDatabase>,
|
||||
}
|
||||
|
||||
impl IndexedDBStorage {
|
||||
/// Create a new IndexedDB storage instance
|
||||
pub fn new() -> Self {
|
||||
Self { db: None }
|
||||
}
|
||||
|
||||
/// Initialize and open the IndexedDB database
|
||||
pub async fn init(&mut self) -> Result<(), JsValue> {
|
||||
let window = web_sys::window().ok_or_else(|| JsValue::from_str("No window"))?;
|
||||
let indexed_db = window
|
||||
.indexed_db()?
|
||||
.ok_or_else(|| JsValue::from_str("IndexedDB not available"))?;
|
||||
|
||||
let open_request = indexed_db.open_with_u32(DB_NAME, DB_VERSION)?;
|
||||
|
||||
// Handle database upgrade (create object store if needed)
|
||||
let onupgradeneeded = Closure::once(Box::new(move |event: web_sys::Event| {
|
||||
let target = event.target().unwrap();
|
||||
let request: IdbRequest = target.unchecked_into();
|
||||
let db: IdbDatabase = request.result().unwrap().unchecked_into();
|
||||
|
||||
// Create object store if it doesn't exist
|
||||
if !db.object_store_names().contains(STORE_NAME) {
|
||||
db.create_object_store(STORE_NAME).unwrap();
|
||||
}
|
||||
}) as Box<dyn FnOnce(_)>);
|
||||
|
||||
open_request.set_onupgradeneeded(Some(onupgradeneeded.as_ref().unchecked_ref()));
|
||||
onupgradeneeded.forget(); // Prevent closure from being dropped
|
||||
|
||||
// Wait for database to open using JsFuture
|
||||
let db_result = wait_for_request(&open_request).await?;
|
||||
let db: IdbDatabase = db_result.unchecked_into();
|
||||
|
||||
self.db = Some(db);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if IndexedDB is available
|
||||
pub fn is_available() -> bool {
|
||||
web_sys::window()
|
||||
.and_then(|w| w.indexed_db().ok().flatten())
|
||||
.is_some()
|
||||
}
|
||||
|
||||
/// Save state to IndexedDB
|
||||
pub async fn save(&self, state: &RvLiteState) -> Result<(), JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
// Convert state to JsValue
|
||||
let js_state = serde_wasm_bindgen::to_value(state)?;
|
||||
|
||||
// Start transaction
|
||||
let store_names = js_sys::Array::new();
|
||||
store_names.push(&JsValue::from_str(STORE_NAME));
|
||||
|
||||
let transaction =
|
||||
db.transaction_with_str_sequence_and_mode(&store_names, IdbTransactionMode::Readwrite)?;
|
||||
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
// Put state with key
|
||||
let request = store.put_with_key(&js_state, &JsValue::from_str(STATE_KEY))?;
|
||||
|
||||
// Wait for completion
|
||||
wait_for_request(&request).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load state from IndexedDB
|
||||
pub async fn load(&self) -> Result<Option<RvLiteState>, JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
// Start read transaction
|
||||
let transaction = db.transaction_with_str(STORE_NAME)?;
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
// Get state by key
|
||||
let request = store.get(&JsValue::from_str(STATE_KEY))?;
|
||||
|
||||
// Wait for result
|
||||
let result = wait_for_request(&request).await?;
|
||||
|
||||
if result.is_undefined() || result.is_null() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Deserialize state
|
||||
let state: RvLiteState = serde_wasm_bindgen::from_value(result)?;
|
||||
Ok(Some(state))
|
||||
}
|
||||
|
||||
/// Delete all stored state
|
||||
pub async fn clear(&self) -> Result<(), JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
let store_names = js_sys::Array::new();
|
||||
store_names.push(&JsValue::from_str(STORE_NAME));
|
||||
|
||||
let transaction =
|
||||
db.transaction_with_str_sequence_and_mode(&store_names, IdbTransactionMode::Readwrite)?;
|
||||
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
let request = store.clear()?;
|
||||
|
||||
wait_for_request(&request).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if state exists in storage
|
||||
pub async fn exists(&self) -> Result<bool, JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
let transaction = db.transaction_with_str(STORE_NAME)?;
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
let request = store.count_with_key(&JsValue::from_str(STATE_KEY))?;
|
||||
let result = wait_for_request(&request).await?;
|
||||
|
||||
let count = result.as_f64().unwrap_or(0.0) as u32;
|
||||
Ok(count > 0)
|
||||
}
|
||||
|
||||
/// Get storage info (for debugging)
|
||||
pub async fn get_info(&self) -> Result<JsValue, JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
let transaction = db.transaction_with_str(STORE_NAME)?;
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
let count_request = store.count()?;
|
||||
let count = wait_for_request(&count_request).await?;
|
||||
|
||||
let info = Object::new();
|
||||
Reflect::set(&info, &"database".into(), &DB_NAME.into())?;
|
||||
Reflect::set(&info, &"store".into(), &STORE_NAME.into())?;
|
||||
Reflect::set(&info, &"entries".into(), &count)?;
|
||||
|
||||
Ok(info.into())
|
||||
}
|
||||
|
||||
/// Close the database connection
|
||||
pub fn close(&mut self) {
|
||||
if let Some(db) = self.db.take() {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IndexedDBStorage {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for IndexedDBStorage {
|
||||
fn drop(&mut self) {
|
||||
self.close();
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for an IdbRequest to complete and return the result
|
||||
async fn wait_for_request(request: &IdbRequest) -> Result<JsValue, JsValue> {
|
||||
let promise = js_sys::Promise::new(&mut |resolve, reject| {
|
||||
// Success handler
|
||||
let resolve_clone = resolve.clone();
|
||||
let onsuccess = Closure::once(Box::new(move |_event: web_sys::Event| {
|
||||
// Note: We can't access request here due to lifetime issues
|
||||
// The result will be passed through the event
|
||||
resolve_clone.call0(&JsValue::NULL).unwrap();
|
||||
}) as Box<dyn FnOnce(_)>);
|
||||
|
||||
// Error handler
|
||||
let onerror = Closure::once(Box::new(move |_event: web_sys::Event| {
|
||||
reject
|
||||
.call1(&JsValue::NULL, &JsValue::from_str("IndexedDB error"))
|
||||
.unwrap();
|
||||
}) as Box<dyn FnOnce(_)>);
|
||||
|
||||
request.set_onsuccess(Some(onsuccess.as_ref().unchecked_ref()));
|
||||
request.set_onerror(Some(onerror.as_ref().unchecked_ref()));
|
||||
|
||||
onsuccess.forget();
|
||||
onerror.forget();
|
||||
});
|
||||
|
||||
JsFuture::from(promise).await?;
|
||||
|
||||
// Get the result after the request completes
|
||||
request.result()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Note: IndexedDB tests require a browser environment
|
||||
// These are placeholder tests for compilation verification
|
||||
|
||||
#[test]
|
||||
fn test_storage_new() {
|
||||
let storage = IndexedDBStorage::new();
|
||||
assert!(storage.db.is_none());
|
||||
}
|
||||
}
|
||||
21
vendor/ruvector/crates/rvlite/src/storage/mod.rs
vendored
Normal file
21
vendor/ruvector/crates/rvlite/src/storage/mod.rs
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
//! IndexedDB storage backend for WASM persistence
|
||||
//!
|
||||
//! Provides async-compatible persistence using IndexedDB for:
|
||||
//! - Vector database state
|
||||
//! - Cypher graph state
|
||||
//! - SPARQL triple store state
|
||||
|
||||
pub mod indexeddb;
|
||||
pub mod state;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod epoch;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod writer_lease;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod id_map;
|
||||
|
||||
pub use indexeddb::IndexedDBStorage;
|
||||
pub use state::{GraphState, RvLiteState, TripleStoreState, VectorState};
|
||||
158
vendor/ruvector/crates/rvlite/src/storage/state.rs
vendored
Normal file
158
vendor/ruvector/crates/rvlite/src/storage/state.rs
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
//! Serializable state structures for RvLite persistence
|
||||
//!
|
||||
//! These structures represent the complete state of the RvLite database
|
||||
//! in a format that can be serialized to/from IndexedDB.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Complete serializable state for RvLite
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RvLiteState {
|
||||
/// Version for schema migration
|
||||
pub version: u32,
|
||||
/// Timestamp of last save
|
||||
pub saved_at: u64,
|
||||
/// Vector database state
|
||||
pub vectors: VectorState,
|
||||
/// Cypher graph state
|
||||
pub graph: GraphState,
|
||||
/// SPARQL triple store state
|
||||
pub triples: TripleStoreState,
|
||||
/// SQL engine schemas
|
||||
pub sql_schemas: Vec<SqlTableState>,
|
||||
}
|
||||
|
||||
impl Default for RvLiteState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
version: 1,
|
||||
saved_at: 0,
|
||||
vectors: VectorState::default(),
|
||||
graph: GraphState::default(),
|
||||
triples: TripleStoreState::default(),
|
||||
sql_schemas: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializable vector database state
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct VectorState {
|
||||
/// Vector entries: id -> (vector, metadata)
|
||||
pub entries: Vec<VectorEntry>,
|
||||
/// Database dimensions
|
||||
pub dimensions: usize,
|
||||
/// Distance metric name
|
||||
pub distance_metric: String,
|
||||
/// Next auto-generated ID counter
|
||||
pub next_id: u64,
|
||||
}
|
||||
|
||||
/// Single vector entry for serialization
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VectorEntry {
|
||||
pub id: String,
|
||||
pub vector: Vec<f32>,
|
||||
pub metadata: Option<HashMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
/// Serializable Cypher graph state
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GraphState {
|
||||
/// All nodes
|
||||
pub nodes: Vec<NodeState>,
|
||||
/// All edges
|
||||
pub edges: Vec<EdgeState>,
|
||||
/// Next node ID counter
|
||||
pub next_node_id: usize,
|
||||
/// Next edge ID counter
|
||||
pub next_edge_id: usize,
|
||||
}
|
||||
|
||||
/// Serializable node
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeState {
|
||||
pub id: String,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: HashMap<String, PropertyValue>,
|
||||
}
|
||||
|
||||
/// Serializable edge
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EdgeState {
|
||||
pub id: String,
|
||||
pub from: String,
|
||||
pub to: String,
|
||||
pub edge_type: String,
|
||||
pub properties: HashMap<String, PropertyValue>,
|
||||
}
|
||||
|
||||
/// Property value for serialization (mirrors cypher::Value)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", content = "value")]
|
||||
pub enum PropertyValue {
|
||||
Null,
|
||||
Boolean(bool),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
List(Vec<PropertyValue>),
|
||||
Map(HashMap<String, PropertyValue>),
|
||||
}
|
||||
|
||||
/// Serializable SPARQL triple store state
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct TripleStoreState {
|
||||
/// All triples
|
||||
pub triples: Vec<TripleState>,
|
||||
/// Named graphs
|
||||
pub named_graphs: HashMap<String, Vec<u64>>,
|
||||
/// Default graph triple IDs
|
||||
pub default_graph: Vec<u64>,
|
||||
/// Next triple ID counter
|
||||
pub next_id: u64,
|
||||
}
|
||||
|
||||
/// Serializable RDF triple
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TripleState {
|
||||
pub id: u64,
|
||||
pub subject: RdfTermState,
|
||||
pub predicate: String,
|
||||
pub object: RdfTermState,
|
||||
}
|
||||
|
||||
/// Serializable RDF term
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum RdfTermState {
|
||||
Iri {
|
||||
value: String,
|
||||
},
|
||||
Literal {
|
||||
value: String,
|
||||
datatype: String,
|
||||
language: Option<String>,
|
||||
},
|
||||
BlankNode {
|
||||
id: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Serializable SQL table schema state
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SqlTableState {
|
||||
pub name: String,
|
||||
pub columns: Vec<SqlColumnState>,
|
||||
pub vector_column: Option<String>,
|
||||
pub vector_dimensions: Option<usize>,
|
||||
}
|
||||
|
||||
/// Serializable SQL column
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SqlColumnState {
|
||||
pub name: String,
|
||||
pub data_type: String,
|
||||
pub dimensions: Option<usize>,
|
||||
}
|
||||
555
vendor/ruvector/crates/rvlite/src/storage/writer_lease.rs
vendored
Normal file
555
vendor/ruvector/crates/rvlite/src/storage/writer_lease.rs
vendored
Normal file
@@ -0,0 +1,555 @@
|
||||
//! File-based writer lease for single-writer concurrency in rvlite.
|
||||
//!
|
||||
//! Provides a cooperative lock mechanism using a lock file with PID and
|
||||
//! timestamp. Only one writer may hold the lease at a time. The lease
|
||||
//! includes a heartbeat timestamp that is checked for staleness so that
|
||||
//! crashed processes do not permanently block new writers.
|
||||
//!
|
||||
//! Lock file location: `{store_path}.lock`
|
||||
//! Lock file contents: JSON with `pid`, `timestamp_secs`, `hostname`.
|
||||
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Default staleness threshold -- if the heartbeat is older than this
|
||||
/// duration, the lease is considered abandoned and may be force-acquired.
|
||||
const DEFAULT_STALE_THRESHOLD: Duration = Duration::from_secs(30);
|
||||
|
||||
/// Contents written to the lock file.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct LeaseMeta {
|
||||
/// Process ID of the lock holder.
|
||||
pid: u32,
|
||||
/// Unix timestamp in seconds when the lease was last refreshed.
|
||||
timestamp_secs: u64,
|
||||
/// Hostname of the lock holder.
|
||||
hostname: String,
|
||||
}
|
||||
|
||||
/// A writer lease backed by a lock file on disk.
|
||||
///
|
||||
/// While this struct is alive, the lease is held. Dropping it releases
|
||||
/// the lock file automatically via the `Drop` implementation.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::path::Path;
|
||||
/// use std::time::Duration;
|
||||
/// # // This is a doc-test stub; actual usage requires the rvf-backend feature.
|
||||
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// // let lease = WriterLease::acquire(Path::new("/data/store.rvf"), Duration::from_secs(5))?;
|
||||
/// // ... perform writes ...
|
||||
/// // lease.release()?; // or just let it drop
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct WriterLease {
|
||||
/// Path to the lock file.
|
||||
lock_path: PathBuf,
|
||||
/// Our PID, used to verify ownership on release.
|
||||
pid: u32,
|
||||
/// Whether the lease has been explicitly released.
|
||||
released: bool,
|
||||
}
|
||||
|
||||
impl WriterLease {
|
||||
/// Attempt to acquire the writer lease for the given store path.
|
||||
///
|
||||
/// The lock file is created at `{path}.lock`. If another process holds
|
||||
/// the lease, this function will retry until `timeout` elapses. If the
|
||||
/// existing lease is stale (heartbeat older than 30 seconds and the
|
||||
/// holder PID is not alive), the stale lock is broken and acquisition
|
||||
/// proceeds.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `io::Error` with `WouldBlock` if the timeout expires without
|
||||
/// acquiring the lease, or propagates any underlying I/O errors.
|
||||
pub fn acquire(path: &Path, timeout: Duration) -> io::Result<Self> {
|
||||
let lock_path = lock_path_for(path);
|
||||
let pid = std::process::id();
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
// Try to create the lock file exclusively.
|
||||
match try_create_lock(&lock_path, pid) {
|
||||
Ok(()) => {
|
||||
return Ok(WriterLease {
|
||||
lock_path,
|
||||
pid,
|
||||
released: false,
|
||||
});
|
||||
}
|
||||
Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
|
||||
// Lock file exists -- check if it is stale.
|
||||
if Self::is_stale(&lock_path, DEFAULT_STALE_THRESHOLD) {
|
||||
// Force-remove the stale lock and retry.
|
||||
let _ = fs::remove_file(&lock_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lock is active. Check timeout.
|
||||
if Instant::now() >= deadline {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
format!(
|
||||
"writer lease acquisition timed out after {:?} for {:?}",
|
||||
timeout, lock_path
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Brief sleep before retrying.
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Explicitly release the writer lease.
|
||||
///
|
||||
/// Verifies that the lock file still belongs to this process before
|
||||
/// removing it to avoid deleting a lock acquired by another process
|
||||
/// after a stale break.
|
||||
pub fn release(&mut self) -> io::Result<()> {
|
||||
if self.released {
|
||||
return Ok(());
|
||||
}
|
||||
self.do_release();
|
||||
self.released = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Refresh the heartbeat timestamp in the lock file.
|
||||
///
|
||||
/// Writers performing long operations should call this periodically
|
||||
/// (e.g. every 10 seconds) to prevent the lease from appearing stale.
|
||||
pub fn refresh_heartbeat(&self) -> io::Result<()> {
|
||||
if self.released {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"cannot refresh a released lease",
|
||||
));
|
||||
}
|
||||
// Verify we still own the lock.
|
||||
if !self.owns_lock() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"lease was taken over by another process",
|
||||
));
|
||||
}
|
||||
write_lock_file(&self.lock_path, self.pid)
|
||||
}
|
||||
|
||||
/// Check whether the lock file at the given path is stale.
|
||||
///
|
||||
/// A lock is stale if:
|
||||
/// - The lock file does not exist (vacuously stale).
|
||||
/// - The lock file cannot be parsed.
|
||||
/// - The heartbeat timestamp is older than `threshold`.
|
||||
/// - The PID in the lock file is not alive on the current host.
|
||||
pub fn is_stale(path: &Path, threshold: Duration) -> bool {
|
||||
let lock_path = if path.extension().map_or(false, |e| e == "lock") {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
lock_path_for(path)
|
||||
};
|
||||
|
||||
let content = match fs::read_to_string(&lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return true, // Missing or unreadable = stale.
|
||||
};
|
||||
|
||||
let meta: LeaseMeta = match serde_json::from_str(&content) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return true, // Corrupt = stale.
|
||||
};
|
||||
|
||||
// Check age.
|
||||
let now_secs = current_unix_secs();
|
||||
let age_secs = now_secs.saturating_sub(meta.timestamp_secs);
|
||||
if age_secs > threshold.as_secs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if PID is alive (only meaningful on same host).
|
||||
let our_hostname = get_hostname();
|
||||
if meta.hostname == our_hostname && !is_pid_alive(meta.pid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Return the path to the lock file.
|
||||
pub fn lock_path(&self) -> &Path {
|
||||
&self.lock_path
|
||||
}
|
||||
|
||||
/// Check whether this lease still owns the lock file.
|
||||
fn owns_lock(&self) -> bool {
|
||||
let content = match fs::read_to_string(&self.lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return false,
|
||||
};
|
||||
let meta: LeaseMeta = match serde_json::from_str(&content) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return false,
|
||||
};
|
||||
meta.pid == self.pid
|
||||
}
|
||||
|
||||
/// Internal release logic.
|
||||
fn do_release(&self) {
|
||||
if self.owns_lock() {
|
||||
let _ = fs::remove_file(&self.lock_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WriterLease {
|
||||
fn drop(&mut self) {
|
||||
if !self.released {
|
||||
self.do_release();
|
||||
self.released = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WriterLease {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("WriterLease")
|
||||
.field("lock_path", &self.lock_path)
|
||||
.field("pid", &self.pid)
|
||||
.field("released", &self.released)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Helper functions ----
|
||||
|
||||
/// Compute the lock file path for a store path.
|
||||
fn lock_path_for(store_path: &Path) -> PathBuf {
|
||||
let mut p = store_path.as_os_str().to_os_string();
|
||||
p.push(".lock");
|
||||
PathBuf::from(p)
|
||||
}
|
||||
|
||||
/// Try to atomically create the lock file. Fails with `AlreadyExists` if
|
||||
/// another process holds the lock.
|
||||
fn try_create_lock(lock_path: &Path, pid: u32) -> io::Result<()> {
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = lock_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
// Use create_new for O_CREAT | O_EXCL semantics.
|
||||
let meta = LeaseMeta {
|
||||
pid,
|
||||
timestamp_secs: current_unix_secs(),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}")))?;
|
||||
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(lock_path)?;
|
||||
file.write_all(content.as_bytes())?;
|
||||
file.sync_all()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Overwrite an existing lock file with a fresh timestamp.
|
||||
fn write_lock_file(lock_path: &Path, pid: u32) -> io::Result<()> {
|
||||
let meta = LeaseMeta {
|
||||
pid,
|
||||
timestamp_secs: current_unix_secs(),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}")))?;
|
||||
fs::write(lock_path, content.as_bytes())
|
||||
}
|
||||
|
||||
/// Get the current Unix timestamp in seconds.
|
||||
fn current_unix_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Best-effort hostname retrieval.
|
||||
fn get_hostname() -> String {
|
||||
std::env::var("HOSTNAME").unwrap_or_else(|_| {
|
||||
fs::read_to_string("/etc/hostname")
|
||||
.unwrap_or_else(|_| "unknown".into())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
}
|
||||
|
||||
/// Check whether a process with the given PID is alive.
|
||||
fn is_pid_alive(pid: u32) -> bool {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
// kill(pid, 0) checks existence without sending a signal.
|
||||
let ret = unsafe { libc_kill(pid as i32, 0) };
|
||||
if ret == 0 {
|
||||
return true;
|
||||
}
|
||||
// EPERM means the process exists but belongs to another user.
|
||||
let errno = unsafe { *errno_location() };
|
||||
errno == 1 // EPERM
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = pid;
|
||||
true // Conservatively assume alive on non-Unix.
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
extern "C" {
|
||||
fn kill(pid: i32, sig: i32) -> i32;
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
extern "C" {
|
||||
fn __errno_location() -> *mut i32;
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
|
||||
extern "C" {
|
||||
fn __error() -> *mut i32;
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
unsafe fn libc_kill(pid: i32, sig: i32) -> i32 {
|
||||
unsafe { kill(pid, sig) }
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
unsafe fn errno_location() -> *mut i32 {
|
||||
unsafe { __errno_location() }
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
|
||||
unsafe fn errno_location() -> *mut i32 {
|
||||
unsafe { __error() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering};
|
||||
|
||||
/// Counter to generate unique directory names for each test, avoiding
|
||||
/// cross-test interference when running in parallel.
|
||||
static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
fn unique_dir(name: &str) -> PathBuf {
|
||||
let id = TEST_COUNTER.fetch_add(1, AtomicOrdering::Relaxed);
|
||||
let dir = std::env::temp_dir().join(format!(
|
||||
"rvlite_lease_{}_{}_{}",
|
||||
std::process::id(),
|
||||
id,
|
||||
name
|
||||
));
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
dir
|
||||
}
|
||||
|
||||
fn cleanup(dir: &Path) {
|
||||
let _ = fs::remove_dir_all(dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lock_path_computation() {
|
||||
let p = Path::new("/tmp/store.rvf");
|
||||
assert_eq!(lock_path_for(p), PathBuf::from("/tmp/store.rvf.lock"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn acquire_and_release() {
|
||||
let dir = unique_dir("acquire_release");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert!(lease.lock_path().exists());
|
||||
|
||||
lease.release().unwrap();
|
||||
assert!(!lease.lock_path().exists());
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_acquire_fails_within_timeout() {
|
||||
let dir = unique_dir("double_acquire");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
// Second acquire should time out quickly. The lock is held by our own
|
||||
// PID and is fresh, so it cannot be broken as stale.
|
||||
let result = WriterLease::acquire(&store_path, Duration::from_millis(150));
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err().kind(), io::ErrorKind::WouldBlock);
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drop_releases_lease() {
|
||||
let dir = unique_dir("drop_release");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lock_file = lock_path_for(&store_path);
|
||||
|
||||
{
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert!(lock_file.exists());
|
||||
}
|
||||
// After drop, lock file should be gone.
|
||||
assert!(!lock_file.exists());
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lease_is_detected() {
|
||||
let dir = unique_dir("stale_detect");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
// Write a lock file with a very old timestamp and dead PID.
|
||||
let meta = LeaseMeta {
|
||||
pid: 999_999_999, // Almost certainly not alive.
|
||||
timestamp_secs: current_unix_secs().saturating_sub(120),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta).unwrap();
|
||||
fs::write(&lock_path, content).unwrap();
|
||||
|
||||
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fresh_lease_is_not_stale() {
|
||||
let dir = unique_dir("fresh_lease");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
assert!(!WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_lock_file_is_stale() {
|
||||
let path = Path::new("/tmp/nonexistent_rvlite_test_12345.rvf");
|
||||
assert!(WriterLease::is_stale(path, DEFAULT_STALE_THRESHOLD));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrupt_lock_file_is_stale() {
|
||||
let dir = unique_dir("corrupt");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
fs::write(&lock_path, b"not json").unwrap();
|
||||
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn refresh_heartbeat_updates_timestamp() {
|
||||
let dir = unique_dir("heartbeat");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
// refresh_heartbeat overwrites the lock file with a new timestamp.
|
||||
lease.refresh_heartbeat().unwrap();
|
||||
|
||||
// Read back and verify timestamp is recent.
|
||||
let content = fs::read_to_string(lease.lock_path()).unwrap();
|
||||
let meta: LeaseMeta = serde_json::from_str(&content).unwrap();
|
||||
let age = current_unix_secs().saturating_sub(meta.timestamp_secs);
|
||||
assert!(age < 5, "heartbeat should be very recent, got age={age}s");
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lease_force_acquire() {
|
||||
let dir = unique_dir("force_acquire");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
// Simulate a stale lock from a dead process.
|
||||
let meta = LeaseMeta {
|
||||
pid: 999_999_999,
|
||||
timestamp_secs: current_unix_secs().saturating_sub(60),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
fs::write(&lock_path, serde_json::to_string(&meta).unwrap()).unwrap();
|
||||
|
||||
// Should succeed because the existing lock is stale.
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert_eq!(lease.pid, std::process::id());
|
||||
|
||||
lease.release().unwrap();
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn release_is_idempotent() {
|
||||
let dir = unique_dir("idempotent");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
lease.release().unwrap();
|
||||
// Second release should be a no-op.
|
||||
lease.release().unwrap();
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn debug_format() {
|
||||
let dir = unique_dir("debug_fmt");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
let debug = format!("{:?}", lease);
|
||||
assert!(debug.contains("WriterLease"));
|
||||
assert!(debug.contains("lock_path"));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user