Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,738 @@
//! Contracted Graph Module for Integrity Control Plane
//!
//! This module implements the contracted operational graph - a fixed-size
//! meta-graph of ~1000 nodes representing partitions, centroids, shards, and
//! dependencies. This is NOT the full similarity graph.
//!
//! The contracted graph enables efficient mincut computation for integrity gating.
use std::collections::HashMap;
use std::fmt;
use dashmap::DashMap;
use serde::{Deserialize, Serialize};
/// Node types in the contracted graph
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum NodeType {
/// Data partition/segment
Partition,
/// IVFFlat centroid
Centroid,
/// Distributed shard
Shard,
/// External dependency (backup, compaction, etc.)
ExternalDependency,
/// Hybrid index node
HybridIndex,
}
impl fmt::Display for NodeType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
NodeType::Partition => write!(f, "partition"),
NodeType::Centroid => write!(f, "centroid"),
NodeType::Shard => write!(f, "shard"),
NodeType::ExternalDependency => write!(f, "external_dependency"),
NodeType::HybridIndex => write!(f, "hybrid_index"),
}
}
}
impl NodeType {
/// Parse node type from string
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"partition" => Some(NodeType::Partition),
"centroid" => Some(NodeType::Centroid),
"shard" => Some(NodeType::Shard),
"external_dependency" => Some(NodeType::ExternalDependency),
"hybrid_index" => Some(NodeType::HybridIndex),
_ => None,
}
}
}
/// Edge types representing data flow between components
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EdgeType {
/// Data flow between partitions
PartitionLink,
/// Query routing paths
RoutingLink,
/// Operational dependency
Dependency,
/// Replication stream
Replication,
}
impl fmt::Display for EdgeType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
EdgeType::PartitionLink => write!(f, "partition_link"),
EdgeType::RoutingLink => write!(f, "routing_link"),
EdgeType::Dependency => write!(f, "dependency"),
EdgeType::Replication => write!(f, "replication"),
}
}
}
impl EdgeType {
/// Parse edge type from string
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"partition_link" => Some(EdgeType::PartitionLink),
"routing_link" => Some(EdgeType::RoutingLink),
"dependency" => Some(EdgeType::Dependency),
"replication" => Some(EdgeType::Replication),
_ => None,
}
}
}
/// A node in the contracted graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContractedNode {
/// Collection this node belongs to
pub collection_id: i32,
/// Type of the node
pub node_type: NodeType,
/// Unique identifier within the type
pub node_id: i64,
/// Human-readable name
pub node_name: Option<String>,
/// Additional metadata
pub node_data: serde_json::Value,
/// Health score (0.0 = failed, 1.0 = healthy)
pub health_score: f32,
}
impl ContractedNode {
/// Create a new contracted node
pub fn new(collection_id: i32, node_type: NodeType, node_id: i64) -> Self {
Self {
collection_id,
node_type,
node_id,
node_name: None,
node_data: serde_json::json!({}),
health_score: 1.0,
}
}
/// Set the node name
pub fn with_name(mut self, name: impl Into<String>) -> Self {
self.node_name = Some(name.into());
self
}
/// Set node data
pub fn with_data(mut self, data: serde_json::Value) -> Self {
self.node_data = data;
self
}
/// Set health score
pub fn with_health(mut self, health: f32) -> Self {
self.health_score = health.clamp(0.0, 1.0);
self
}
/// Get the unique key for this node
pub fn key(&self) -> (NodeType, i64) {
(self.node_type, self.node_id)
}
}
/// An edge in the contracted graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContractedEdge {
/// Collection this edge belongs to
pub collection_id: i32,
/// Source node type
pub source_type: NodeType,
/// Source node ID
pub source_id: i64,
/// Target node type
pub target_type: NodeType,
/// Target node ID
pub target_id: i64,
/// Type of the edge
pub edge_type: EdgeType,
/// Max-flow capacity
pub capacity: f32,
/// Current utilization
pub current_flow: f32,
/// Edge latency in milliseconds
pub latency_ms: Option<f32>,
/// Recent error rate (0.0-1.0)
pub error_rate: f32,
}
impl ContractedEdge {
/// Create a new contracted edge
pub fn new(
collection_id: i32,
source_type: NodeType,
source_id: i64,
target_type: NodeType,
target_id: i64,
edge_type: EdgeType,
) -> Self {
Self {
collection_id,
source_type,
source_id,
target_type,
target_id,
edge_type,
capacity: 1.0,
current_flow: 0.0,
latency_ms: None,
error_rate: 0.0,
}
}
/// Set capacity
pub fn with_capacity(mut self, capacity: f32) -> Self {
self.capacity = capacity.max(0.0);
self
}
/// Set current flow
pub fn with_flow(mut self, flow: f32) -> Self {
self.current_flow = flow.max(0.0);
self
}
/// Set latency
pub fn with_latency(mut self, latency_ms: f32) -> Self {
self.latency_ms = Some(latency_ms);
self
}
/// Set error rate
pub fn with_error_rate(mut self, error_rate: f32) -> Self {
self.error_rate = error_rate.clamp(0.0, 1.0);
self
}
/// Get effective capacity (adjusted for error rate)
pub fn effective_capacity(&self) -> f64 {
(self.capacity as f64) * (1.0 - self.error_rate as f64)
}
/// Get source key
pub fn source_key(&self) -> (NodeType, i64) {
(self.source_type, self.source_id)
}
/// Get target key
pub fn target_key(&self) -> (NodeType, i64) {
(self.target_type, self.target_id)
}
}
/// The contracted graph structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContractedGraph {
/// Collection ID this graph belongs to
pub collection_id: i32,
/// All nodes in the graph
pub nodes: Vec<ContractedNode>,
/// All edges in the graph
pub edges: Vec<ContractedEdge>,
/// When the graph was last updated
pub last_updated: std::time::SystemTime,
}
impl ContractedGraph {
/// Create a new empty contracted graph
pub fn new(collection_id: i32) -> Self {
Self {
collection_id,
nodes: Vec::new(),
edges: Vec::new(),
last_updated: std::time::SystemTime::now(),
}
}
/// Add a node to the graph
pub fn add_node(&mut self, node: ContractedNode) {
self.nodes.push(node);
}
/// Add an edge to the graph
pub fn add_edge(&mut self, edge: ContractedEdge) {
self.edges.push(edge);
}
/// Get node count
pub fn node_count(&self) -> usize {
self.nodes.len()
}
/// Get edge count
pub fn edge_count(&self) -> usize {
self.edges.len()
}
/// Build a node index for quick lookups
pub fn build_node_index(&self) -> HashMap<(NodeType, i64), usize> {
self.nodes
.iter()
.enumerate()
.map(|(i, n)| (n.key(), i))
.collect()
}
/// Build adjacency matrix for mincut computation
pub fn build_capacity_matrix(&self) -> (Vec<Vec<f64>>, HashMap<(NodeType, i64), usize>) {
let n = self.nodes.len();
let node_index = self.build_node_index();
let mut capacity = vec![vec![0.0f64; n]; n];
for edge in &self.edges {
if let (Some(&i), Some(&j)) = (
node_index.get(&edge.source_key()),
node_index.get(&edge.target_key()),
) {
let cap = edge.effective_capacity();
capacity[i][j] = cap;
capacity[j][i] = cap; // Undirected graph
}
}
(capacity, node_index)
}
/// Get graph statistics
pub fn stats(&self) -> ContractedGraphStats {
let mut node_counts: HashMap<NodeType, usize> = HashMap::new();
let mut edge_counts: HashMap<EdgeType, usize> = HashMap::new();
let mut total_health = 0.0f32;
let mut total_capacity = 0.0f32;
let mut total_error_rate = 0.0f32;
for node in &self.nodes {
*node_counts.entry(node.node_type).or_insert(0) += 1;
total_health += node.health_score;
}
for edge in &self.edges {
*edge_counts.entry(edge.edge_type).or_insert(0) += 1;
total_capacity += edge.capacity;
total_error_rate += edge.error_rate;
}
let avg_health = if self.nodes.is_empty() {
1.0
} else {
total_health / self.nodes.len() as f32
};
let avg_capacity = if self.edges.is_empty() {
1.0
} else {
total_capacity / self.edges.len() as f32
};
let avg_error_rate = if self.edges.is_empty() {
0.0
} else {
total_error_rate / self.edges.len() as f32
};
ContractedGraphStats {
node_count: self.nodes.len(),
edge_count: self.edges.len(),
node_counts,
edge_counts,
avg_node_health: avg_health,
avg_edge_capacity: avg_capacity,
avg_error_rate,
}
}
}
/// Statistics about the contracted graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContractedGraphStats {
/// Total node count
pub node_count: usize,
/// Total edge count
pub edge_count: usize,
/// Nodes by type
pub node_counts: HashMap<NodeType, usize>,
/// Edges by type
pub edge_counts: HashMap<EdgeType, usize>,
/// Average node health
pub avg_node_health: f32,
/// Average edge capacity
pub avg_edge_capacity: f32,
/// Average error rate
pub avg_error_rate: f32,
}
/// Builder for constructing contracted graphs
pub struct ContractedGraphBuilder {
collection_id: i32,
nodes: Vec<ContractedNode>,
edges: Vec<ContractedEdge>,
}
impl ContractedGraphBuilder {
/// Create a new builder for a collection
pub fn new(collection_id: i32) -> Self {
Self {
collection_id,
nodes: Vec::new(),
edges: Vec::new(),
}
}
/// Add partition nodes
pub fn add_partition_nodes(&mut self, count: usize, health_scores: Option<&[f32]>) {
for i in 0..count {
let health = health_scores.and_then(|h| h.get(i).copied()).unwrap_or(1.0);
let node = ContractedNode::new(self.collection_id, NodeType::Partition, i as i64)
.with_name(format!("partition_{}", i))
.with_data(serde_json::json!({"index": i}))
.with_health(health);
self.nodes.push(node);
}
}
/// Add centroid nodes (for IVFFlat)
pub fn add_centroid_nodes(&mut self, count: usize, health_scores: Option<&[f32]>) {
for i in 0..count {
let health = health_scores.and_then(|h| h.get(i).copied()).unwrap_or(1.0);
let node = ContractedNode::new(self.collection_id, NodeType::Centroid, i as i64)
.with_name(format!("centroid_{}", i))
.with_data(serde_json::json!({"list_id": i}))
.with_health(health);
self.nodes.push(node);
}
}
/// Add shard nodes
pub fn add_shard_nodes(&mut self, count: usize, primary_index: usize) {
for i in 0..count {
let is_primary = i == primary_index;
let node = ContractedNode::new(self.collection_id, NodeType::Shard, i as i64)
.with_name(if is_primary {
format!("primary_shard_{}", i)
} else {
format!("replica_shard_{}", i)
})
.with_data(serde_json::json!({
"type": if is_primary { "primary" } else { "replica" },
"index": i
}))
.with_health(1.0);
self.nodes.push(node);
}
}
/// Add external dependency nodes
pub fn add_dependency_nodes(&mut self, dependencies: &[(&str, f32)]) {
for (i, (name, health)) in dependencies.iter().enumerate() {
let node =
ContractedNode::new(self.collection_id, NodeType::ExternalDependency, i as i64)
.with_name(*name)
.with_data(serde_json::json!({"service": name}))
.with_health(*health);
self.nodes.push(node);
}
}
/// Add partition-to-partition edges (data flow)
pub fn add_partition_links(&mut self) {
let partition_nodes: Vec<_> = self
.nodes
.iter()
.filter(|n| n.node_type == NodeType::Partition)
.collect();
for i in 0..partition_nodes.len() {
for j in (i + 1)..partition_nodes.len() {
let edge = ContractedEdge::new(
self.collection_id,
NodeType::Partition,
partition_nodes[i].node_id,
NodeType::Partition,
partition_nodes[j].node_id,
EdgeType::PartitionLink,
)
.with_capacity(1.0);
self.edges.push(edge);
}
}
}
/// Add centroid-to-shard edges (routing)
pub fn add_routing_links(&mut self) {
let centroid_nodes: Vec<_> = self
.nodes
.iter()
.filter(|n| n.node_type == NodeType::Centroid)
.collect();
let shard_nodes: Vec<_> = self
.nodes
.iter()
.filter(|n| n.node_type == NodeType::Shard)
.collect();
for centroid in &centroid_nodes {
for shard in &shard_nodes {
let edge = ContractedEdge::new(
self.collection_id,
NodeType::Centroid,
centroid.node_id,
NodeType::Shard,
shard.node_id,
EdgeType::RoutingLink,
)
.with_capacity(centroid.health_score);
self.edges.push(edge);
}
}
}
/// Add shard-to-dependency edges
pub fn add_dependency_links(&mut self) {
let shard_nodes: Vec<_> = self
.nodes
.iter()
.filter(|n| n.node_type == NodeType::Shard)
.collect();
let dep_nodes: Vec<_> = self
.nodes
.iter()
.filter(|n| n.node_type == NodeType::ExternalDependency)
.collect();
for shard in &shard_nodes {
for dep in &dep_nodes {
let edge = ContractedEdge::new(
self.collection_id,
NodeType::Shard,
shard.node_id,
NodeType::ExternalDependency,
dep.node_id,
EdgeType::Dependency,
)
.with_capacity(dep.health_score);
self.edges.push(edge);
}
}
}
/// Add replication edges between shards
pub fn add_replication_links(&mut self) {
let shard_nodes: Vec<_> = self
.nodes
.iter()
.filter(|n| n.node_type == NodeType::Shard)
.collect();
// Connect primary to replicas
if shard_nodes.len() > 1 {
let primary = &shard_nodes[0];
for replica in shard_nodes.iter().skip(1) {
let edge = ContractedEdge::new(
self.collection_id,
NodeType::Shard,
primary.node_id,
NodeType::Shard,
replica.node_id,
EdgeType::Replication,
)
.with_capacity(1.0);
self.edges.push(edge);
}
}
}
/// Build the contracted graph
pub fn build(self) -> ContractedGraph {
ContractedGraph {
collection_id: self.collection_id,
nodes: self.nodes,
edges: self.edges,
last_updated: std::time::SystemTime::now(),
}
}
/// Build a default graph structure
pub fn build_default(
collection_id: i32,
num_partitions: usize,
num_centroids: usize,
num_shards: usize,
) -> ContractedGraph {
let mut builder = Self::new(collection_id);
// Add nodes
builder.add_partition_nodes(num_partitions.min(100), None);
builder.add_centroid_nodes(num_centroids.min(500), None);
builder.add_shard_nodes(num_shards.min(10), 0);
builder.add_dependency_nodes(&[
("backup_service", 1.0),
("compaction_service", 1.0),
("gnn_trainer", 1.0),
]);
// Add edges
builder.add_partition_links();
builder.add_routing_links();
builder.add_dependency_links();
builder.add_replication_links();
builder.build()
}
}
/// Global registry for contracted graphs
static GRAPH_REGISTRY: once_cell::sync::Lazy<DashMap<i32, ContractedGraph>> =
once_cell::sync::Lazy::new(DashMap::new);
/// Get or create a contracted graph for a collection
pub fn get_or_create_graph(collection_id: i32) -> ContractedGraph {
GRAPH_REGISTRY
.entry(collection_id)
.or_insert_with(|| {
// Default: 10 partitions, 100 centroids, 1 shard
ContractedGraphBuilder::build_default(collection_id, 10, 100, 1)
})
.clone()
}
/// Get an existing contracted graph
pub fn get_graph(collection_id: i32) -> Option<ContractedGraph> {
GRAPH_REGISTRY.get(&collection_id).map(|g| g.clone())
}
/// Store or update a contracted graph
pub fn store_graph(graph: ContractedGraph) {
GRAPH_REGISTRY.insert(graph.collection_id, graph);
}
/// Remove a contracted graph
pub fn remove_graph(collection_id: i32) -> Option<ContractedGraph> {
GRAPH_REGISTRY.remove(&collection_id).map(|(_, g)| g)
}
/// List all collection IDs with contracted graphs
pub fn list_graph_collections() -> Vec<i32> {
GRAPH_REGISTRY.iter().map(|e| *e.key()).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_contracted_node_creation() {
let node = ContractedNode::new(1, NodeType::Partition, 42)
.with_name("partition_42")
.with_data(serde_json::json!({"size": 1000}))
.with_health(0.95);
assert_eq!(node.collection_id, 1);
assert_eq!(node.node_type, NodeType::Partition);
assert_eq!(node.node_id, 42);
assert_eq!(node.node_name, Some("partition_42".to_string()));
assert!((node.health_score - 0.95).abs() < 0.001);
}
#[test]
fn test_contracted_edge_creation() {
let edge = ContractedEdge::new(
1,
NodeType::Partition,
1,
NodeType::Partition,
2,
EdgeType::PartitionLink,
)
.with_capacity(2.0)
.with_error_rate(0.1);
assert_eq!(edge.capacity, 2.0);
assert!((edge.effective_capacity() - 1.8).abs() < 0.001);
}
#[test]
fn test_graph_builder() {
let graph = ContractedGraphBuilder::build_default(1, 5, 10, 2);
assert_eq!(graph.collection_id, 1);
assert!(graph.node_count() > 0);
assert!(graph.edge_count() > 0);
let stats = graph.stats();
assert!(stats.node_counts.contains_key(&NodeType::Partition));
assert!(stats.node_counts.contains_key(&NodeType::Centroid));
assert!(stats.node_counts.contains_key(&NodeType::Shard));
}
#[test]
fn test_capacity_matrix() {
let graph = ContractedGraphBuilder::build_default(1, 3, 0, 1);
let (matrix, index) = graph.build_capacity_matrix();
assert_eq!(matrix.len(), graph.node_count());
assert_eq!(index.len(), graph.node_count());
}
#[test]
fn test_node_type_display() {
assert_eq!(NodeType::Partition.to_string(), "partition");
assert_eq!(NodeType::Centroid.to_string(), "centroid");
assert_eq!(
NodeType::ExternalDependency.to_string(),
"external_dependency"
);
}
#[test]
fn test_edge_type_parsing() {
assert_eq!(
EdgeType::from_str("partition_link"),
Some(EdgeType::PartitionLink)
);
assert_eq!(
EdgeType::from_str("routing_link"),
Some(EdgeType::RoutingLink)
);
assert_eq!(EdgeType::from_str("invalid"), None);
}
#[test]
fn test_graph_registry() {
let graph = ContractedGraphBuilder::build_default(999, 2, 2, 1);
store_graph(graph.clone());
let retrieved = get_graph(999);
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap().collection_id, 999);
remove_graph(999);
assert!(get_graph(999).is_none());
}
}

View File

@@ -0,0 +1,726 @@
//! Integrity Events Module
//!
//! Defines integrity event types that trigger contracted graph updates
//! and state transitions. Events support delta updates for efficiency.
use std::collections::VecDeque;
use std::sync::{Arc, RwLock};
use std::time::SystemTime;
use dashmap::DashMap;
use serde::{Deserialize, Serialize};
use super::gating::IntegrityState;
use super::mincut::WitnessEdge;
/// Types of integrity events
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IntegrityEventType {
/// A new partition was created
PartitionCreated,
/// A partition was deleted
PartitionDeleted,
/// A partition's health changed
PartitionHealthChanged,
/// An IVFFlat centroid was moved/updated
CentroidMoved,
/// Centroids were rebalanced
CentroidRebalanced,
/// A shard was rebalanced
ShardRebalanced,
/// A new shard was added
ShardAdded,
/// A shard was removed
ShardRemoved,
/// An external dependency became unavailable
DependencyDown,
/// An external dependency recovered
DependencyUp,
/// Integrity state changed
StateChanged,
/// Lambda cut was sampled
LambdaSampled,
/// Graph was rebuilt
GraphRebuilt,
/// Edge capacity changed significantly
EdgeCapacityChanged,
/// Error rate threshold exceeded
ErrorRateExceeded,
/// Manual intervention
ManualOverride,
}
impl std::fmt::Display for IntegrityEventType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = serde_json::to_string(self).unwrap_or_else(|_| "unknown".to_string());
// Remove quotes from JSON string
write!(f, "{}", s.trim_matches('"'))
}
}
impl IntegrityEventType {
/// Whether this event requires graph update
pub fn requires_graph_update(&self) -> bool {
matches!(
self,
IntegrityEventType::PartitionCreated
| IntegrityEventType::PartitionDeleted
| IntegrityEventType::CentroidMoved
| IntegrityEventType::CentroidRebalanced
| IntegrityEventType::ShardRebalanced
| IntegrityEventType::ShardAdded
| IntegrityEventType::ShardRemoved
| IntegrityEventType::DependencyDown
| IntegrityEventType::DependencyUp
)
}
/// Whether this event requires mincut recomputation
pub fn requires_mincut_recomputation(&self) -> bool {
matches!(
self,
IntegrityEventType::PartitionCreated
| IntegrityEventType::PartitionDeleted
| IntegrityEventType::PartitionHealthChanged
| IntegrityEventType::ShardRebalanced
| IntegrityEventType::ShardAdded
| IntegrityEventType::ShardRemoved
| IntegrityEventType::DependencyDown
| IntegrityEventType::DependencyUp
| IntegrityEventType::EdgeCapacityChanged
| IntegrityEventType::GraphRebuilt
)
}
/// Event severity level (0 = info, 1 = warning, 2 = critical)
pub fn severity(&self) -> u8 {
match self {
IntegrityEventType::LambdaSampled => 0,
IntegrityEventType::GraphRebuilt => 0,
IntegrityEventType::PartitionCreated => 0,
IntegrityEventType::CentroidMoved => 0,
IntegrityEventType::CentroidRebalanced => 1,
IntegrityEventType::PartitionDeleted => 1,
IntegrityEventType::PartitionHealthChanged => 1,
IntegrityEventType::ShardRebalanced => 1,
IntegrityEventType::ShardAdded => 1,
IntegrityEventType::EdgeCapacityChanged => 1,
IntegrityEventType::StateChanged => 2,
IntegrityEventType::ShardRemoved => 2,
IntegrityEventType::DependencyDown => 2,
IntegrityEventType::DependencyUp => 1,
IntegrityEventType::ErrorRateExceeded => 2,
IntegrityEventType::ManualOverride => 2,
}
}
}
/// Content of an integrity event
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IntegrityEventContent {
/// Event ID (unique within collection)
pub event_id: u64,
/// Collection this event belongs to
pub collection_id: i32,
/// Type of event
pub event_type: IntegrityEventType,
/// Previous state (for state changes)
pub previous_state: Option<IntegrityState>,
/// New state (for state changes)
pub new_state: Option<IntegrityState>,
/// Lambda cut value at event time
pub lambda_cut: Option<f32>,
/// Witness edges (for mincut events)
pub witness_edges: Option<Vec<WitnessEdge>>,
/// Additional metadata
pub metadata: serde_json::Value,
/// Event timestamp
pub created_at: SystemTime,
/// Source of the event
pub source: String,
}
impl IntegrityEventContent {
/// Create a new event
pub fn new(
collection_id: i32,
event_type: IntegrityEventType,
source: impl Into<String>,
) -> Self {
Self {
event_id: 0, // Assigned by event store
collection_id,
event_type,
previous_state: None,
new_state: None,
lambda_cut: None,
witness_edges: None,
metadata: serde_json::json!({}),
created_at: SystemTime::now(),
source: source.into(),
}
}
/// Create a state change event
pub fn state_change(
collection_id: i32,
previous: IntegrityState,
new: IntegrityState,
lambda_cut: f32,
witness_edges: Vec<WitnessEdge>,
source: impl Into<String>,
) -> Self {
Self {
event_id: 0,
collection_id,
event_type: IntegrityEventType::StateChanged,
previous_state: Some(previous),
new_state: Some(new),
lambda_cut: Some(lambda_cut),
witness_edges: Some(witness_edges),
metadata: serde_json::json!({
"direction": if new > previous { "degrading" } else { "improving" }
}),
created_at: SystemTime::now(),
source: source.into(),
}
}
/// Create a lambda sampled event
pub fn lambda_sampled(
collection_id: i32,
lambda_cut: f32,
state: IntegrityState,
source: impl Into<String>,
) -> Self {
Self {
event_id: 0,
collection_id,
event_type: IntegrityEventType::LambdaSampled,
previous_state: None,
new_state: Some(state),
lambda_cut: Some(lambda_cut),
witness_edges: None,
metadata: serde_json::json!({}),
created_at: SystemTime::now(),
source: source.into(),
}
}
/// Set metadata
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
self.metadata = metadata;
self
}
/// Add metadata field
pub fn with_metadata_field(mut self, key: &str, value: serde_json::Value) -> Self {
if let serde_json::Value::Object(ref mut map) = self.metadata {
map.insert(key.to_string(), value);
}
self
}
}
/// Delta update for contracted graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphDelta {
/// Collection ID
pub collection_id: i32,
/// Nodes to add
pub add_nodes: Vec<DeltaNode>,
/// Nodes to remove (by type and id)
pub remove_nodes: Vec<(String, i64)>,
/// Nodes to update
pub update_nodes: Vec<DeltaNode>,
/// Edges to add
pub add_edges: Vec<DeltaEdge>,
/// Edges to remove (by source and target)
pub remove_edges: Vec<((String, i64), (String, i64))>,
/// Edges to update
pub update_edges: Vec<DeltaEdge>,
}
/// Node delta for graph updates
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeltaNode {
pub node_type: String,
pub node_id: i64,
pub node_name: Option<String>,
pub health_score: Option<f32>,
pub metadata: Option<serde_json::Value>,
}
/// Edge delta for graph updates
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeltaEdge {
pub source_type: String,
pub source_id: i64,
pub target_type: String,
pub target_id: i64,
pub edge_type: String,
pub capacity: Option<f32>,
pub current_flow: Option<f32>,
pub error_rate: Option<f32>,
}
impl GraphDelta {
/// Create an empty delta
pub fn new(collection_id: i32) -> Self {
Self {
collection_id,
add_nodes: Vec::new(),
remove_nodes: Vec::new(),
update_nodes: Vec::new(),
add_edges: Vec::new(),
remove_edges: Vec::new(),
update_edges: Vec::new(),
}
}
/// Check if delta is empty
pub fn is_empty(&self) -> bool {
self.add_nodes.is_empty()
&& self.remove_nodes.is_empty()
&& self.update_nodes.is_empty()
&& self.add_edges.is_empty()
&& self.remove_edges.is_empty()
&& self.update_edges.is_empty()
}
/// Count total changes
pub fn change_count(&self) -> usize {
self.add_nodes.len()
+ self.remove_nodes.len()
+ self.update_nodes.len()
+ self.add_edges.len()
+ self.remove_edges.len()
+ self.update_edges.len()
}
}
/// Event store for persisting integrity events
pub struct IntegrityEventStore {
/// Collection ID
collection_id: i32,
/// Maximum events to keep in memory
max_events: usize,
/// Event counter for IDs
next_event_id: std::sync::atomic::AtomicU64,
/// In-memory event buffer
events: RwLock<VecDeque<IntegrityEventContent>>,
/// Event listeners
listeners: RwLock<Vec<Box<dyn Fn(&IntegrityEventContent) + Send + Sync>>>,
}
impl IntegrityEventStore {
/// Create a new event store
pub fn new(collection_id: i32, max_events: usize) -> Self {
Self {
collection_id,
max_events,
next_event_id: std::sync::atomic::AtomicU64::new(1),
events: RwLock::new(VecDeque::with_capacity(max_events)),
listeners: RwLock::new(Vec::new()),
}
}
/// Record an event
pub fn record(&self, mut event: IntegrityEventContent) -> u64 {
// Assign event ID
let event_id = self
.next_event_id
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
event.event_id = event_id;
// Add to buffer
{
let mut events = self.events.write().unwrap();
if events.len() >= self.max_events {
events.pop_front();
}
events.push_back(event.clone());
}
// Notify listeners
{
let listeners = self.listeners.read().unwrap();
for listener in listeners.iter() {
listener(&event);
}
}
event_id
}
/// Get recent events
pub fn get_recent(&self, count: usize) -> Vec<IntegrityEventContent> {
let events = self.events.read().unwrap();
events.iter().rev().take(count).cloned().collect()
}
/// Get events by type
pub fn get_by_type(
&self,
event_type: IntegrityEventType,
count: usize,
) -> Vec<IntegrityEventContent> {
let events = self.events.read().unwrap();
events
.iter()
.rev()
.filter(|e| e.event_type == event_type)
.take(count)
.cloned()
.collect()
}
/// Get events since a timestamp
pub fn get_since(&self, since: SystemTime) -> Vec<IntegrityEventContent> {
let events = self.events.read().unwrap();
events
.iter()
.filter(|e| e.created_at >= since)
.cloned()
.collect()
}
/// Get state change events
pub fn get_state_changes(&self, count: usize) -> Vec<IntegrityEventContent> {
self.get_by_type(IntegrityEventType::StateChanged, count)
}
/// Add an event listener
pub fn add_listener<F>(&self, listener: F)
where
F: Fn(&IntegrityEventContent) + Send + Sync + 'static,
{
let mut listeners = self.listeners.write().unwrap();
listeners.push(Box::new(listener));
}
/// Get event count
pub fn event_count(&self) -> usize {
self.events.read().unwrap().len()
}
/// Clear all events
pub fn clear(&self) {
self.events.write().unwrap().clear();
}
/// Get statistics
pub fn stats(&self) -> EventStoreStats {
let events = self.events.read().unwrap();
let mut by_type: std::collections::HashMap<IntegrityEventType, usize> =
std::collections::HashMap::new();
let mut by_severity = [0usize; 3];
for event in events.iter() {
*by_type.entry(event.event_type).or_insert(0) += 1;
let severity = event.event_type.severity() as usize;
if severity < 3 {
by_severity[severity] += 1;
}
}
EventStoreStats {
total_events: events.len(),
by_type,
info_count: by_severity[0],
warning_count: by_severity[1],
critical_count: by_severity[2],
}
}
}
/// Statistics about the event store
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EventStoreStats {
pub total_events: usize,
pub by_type: std::collections::HashMap<IntegrityEventType, usize>,
pub info_count: usize,
pub warning_count: usize,
pub critical_count: usize,
}
/// Global registry for event stores
static EVENT_REGISTRY: once_cell::sync::Lazy<DashMap<i32, Arc<IntegrityEventStore>>> =
once_cell::sync::Lazy::new(DashMap::new);
/// Get or create an event store for a collection
pub fn get_or_create_event_store(collection_id: i32) -> Arc<IntegrityEventStore> {
EVENT_REGISTRY
.entry(collection_id)
.or_insert_with(|| Arc::new(IntegrityEventStore::new(collection_id, 10000)))
.clone()
}
/// Get an existing event store
pub fn get_event_store(collection_id: i32) -> Option<Arc<IntegrityEventStore>> {
EVENT_REGISTRY.get(&collection_id).map(|e| e.clone())
}
/// Record an integrity event
pub fn record_event(event: IntegrityEventContent) -> u64 {
let store = get_or_create_event_store(event.collection_id);
store.record(event)
}
/// Create a graph delta from an event
pub fn event_to_delta(event: &IntegrityEventContent) -> Option<GraphDelta> {
if !event.event_type.requires_graph_update() {
return None;
}
let mut delta = GraphDelta::new(event.collection_id);
match event.event_type {
IntegrityEventType::PartitionCreated => {
if let Some(partition_id) = event.metadata.get("partition_id").and_then(|v| v.as_i64())
{
delta.add_nodes.push(DeltaNode {
node_type: "partition".to_string(),
node_id: partition_id,
node_name: Some(format!("partition_{}", partition_id)),
health_score: Some(1.0),
metadata: None,
});
}
}
IntegrityEventType::PartitionDeleted => {
if let Some(partition_id) = event.metadata.get("partition_id").and_then(|v| v.as_i64())
{
delta
.remove_nodes
.push(("partition".to_string(), partition_id));
}
}
IntegrityEventType::DependencyDown => {
if let Some(dep_id) = event.metadata.get("dependency_id").and_then(|v| v.as_i64()) {
delta.update_nodes.push(DeltaNode {
node_type: "external_dependency".to_string(),
node_id: dep_id,
node_name: None,
health_score: Some(0.0),
metadata: None,
});
}
}
IntegrityEventType::DependencyUp => {
if let Some(dep_id) = event.metadata.get("dependency_id").and_then(|v| v.as_i64()) {
delta.update_nodes.push(DeltaNode {
node_type: "external_dependency".to_string(),
node_id: dep_id,
node_name: None,
health_score: Some(1.0),
metadata: None,
});
}
}
_ => {
// Other events handled elsewhere or require full graph info
}
}
if delta.is_empty() {
None
} else {
Some(delta)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_event_type_display() {
assert_eq!(
IntegrityEventType::StateChanged.to_string(),
"state_changed"
);
assert_eq!(
IntegrityEventType::LambdaSampled.to_string(),
"lambda_sampled"
);
}
#[test]
fn test_event_type_properties() {
assert!(IntegrityEventType::PartitionCreated.requires_graph_update());
assert!(!IntegrityEventType::LambdaSampled.requires_graph_update());
assert!(IntegrityEventType::GraphRebuilt.requires_mincut_recomputation());
assert!(!IntegrityEventType::ManualOverride.requires_mincut_recomputation());
}
#[test]
fn test_event_creation() {
let event = IntegrityEventContent::new(1, IntegrityEventType::GraphRebuilt, "test");
assert_eq!(event.collection_id, 1);
assert_eq!(event.event_type, IntegrityEventType::GraphRebuilt);
assert_eq!(event.source, "test");
}
#[test]
fn test_state_change_event() {
let event = IntegrityEventContent::state_change(
1,
IntegrityState::Normal,
IntegrityState::Stress,
0.65,
vec![],
"integrity_worker",
);
assert_eq!(event.event_type, IntegrityEventType::StateChanged);
assert_eq!(event.previous_state, Some(IntegrityState::Normal));
assert_eq!(event.new_state, Some(IntegrityState::Stress));
assert_eq!(event.lambda_cut, Some(0.65));
}
#[test]
fn test_event_store() {
let store = IntegrityEventStore::new(1, 100);
// Record events
let id1 = store.record(IntegrityEventContent::new(
1,
IntegrityEventType::GraphRebuilt,
"test",
));
let id2 = store.record(IntegrityEventContent::new(
1,
IntegrityEventType::LambdaSampled,
"test",
));
assert_eq!(id1, 1);
assert_eq!(id2, 2);
assert_eq!(store.event_count(), 2);
// Get recent
let recent = store.get_recent(10);
assert_eq!(recent.len(), 2);
assert_eq!(recent[0].event_id, 2); // Most recent first
}
#[test]
fn test_event_store_overflow() {
let store = IntegrityEventStore::new(1, 5);
// Record more than max
for i in 0..10 {
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::LambdaSampled,
format!("test_{}", i),
));
}
assert_eq!(store.event_count(), 5);
// Oldest events should be removed
let events = store.get_recent(10);
assert_eq!(events.len(), 5);
assert!(events[0].source.contains("test_9")); // Most recent
}
#[test]
fn test_get_by_type() {
let store = IntegrityEventStore::new(1, 100);
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::GraphRebuilt,
"test",
));
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::LambdaSampled,
"test",
));
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::LambdaSampled,
"test",
));
let sampled = store.get_by_type(IntegrityEventType::LambdaSampled, 10);
assert_eq!(sampled.len(), 2);
}
#[test]
fn test_graph_delta() {
let mut delta = GraphDelta::new(1);
assert!(delta.is_empty());
delta.add_nodes.push(DeltaNode {
node_type: "partition".to_string(),
node_id: 1,
node_name: None,
health_score: Some(1.0),
metadata: None,
});
assert!(!delta.is_empty());
assert_eq!(delta.change_count(), 1);
}
#[test]
fn test_event_to_delta() {
let event = IntegrityEventContent::new(1, IntegrityEventType::PartitionCreated, "test")
.with_metadata_field("partition_id", serde_json::json!(42));
let delta = event_to_delta(&event);
assert!(delta.is_some());
let delta = delta.unwrap();
assert_eq!(delta.add_nodes.len(), 1);
assert_eq!(delta.add_nodes[0].node_id, 42);
}
#[test]
fn test_event_store_stats() {
let store = IntegrityEventStore::new(1, 100);
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::LambdaSampled,
"test",
));
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::StateChanged,
"test",
));
store.record(IntegrityEventContent::new(
1,
IntegrityEventType::DependencyDown,
"test",
));
let stats = store.stats();
assert_eq!(stats.total_events, 3);
assert_eq!(stats.info_count, 1);
assert_eq!(stats.critical_count, 2);
}
#[test]
fn test_global_event_registry() {
let store = get_or_create_event_store(12345);
let event_id = record_event(IntegrityEventContent::new(
12345,
IntegrityEventType::GraphRebuilt,
"test",
));
assert!(event_id > 0);
let retrieved = get_event_store(12345);
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap().event_count(), 1);
}
}

View File

@@ -0,0 +1,755 @@
//! Integrity Gating Module
//!
//! Implements the integrity gate check system with hysteresis-based state
//! transitions. Operations are allowed, throttled, or blocked based on the
//! current integrity state.
use dashmap::DashMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
/// Integrity states representing system health levels
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IntegrityState {
/// System is healthy, all operations allowed
Normal = 0,
/// System under stress, some operations throttled
Stress = 1,
/// Critical state, many operations blocked
Critical = 2,
/// Emergency state, only essential operations allowed
Emergency = 3,
}
impl std::fmt::Display for IntegrityState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IntegrityState::Normal => write!(f, "normal"),
IntegrityState::Stress => write!(f, "stress"),
IntegrityState::Critical => write!(f, "critical"),
IntegrityState::Emergency => write!(f, "emergency"),
}
}
}
impl IntegrityState {
/// Parse from string
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"normal" => Some(IntegrityState::Normal),
"stress" => Some(IntegrityState::Stress),
"critical" => Some(IntegrityState::Critical),
"emergency" => Some(IntegrityState::Emergency),
_ => None,
}
}
/// Determine state from lambda cut value using thresholds
pub fn from_lambda(
lambda_cut: f64,
threshold_high: f64,
threshold_low: f64,
threshold_critical: f64,
) -> Self {
if lambda_cut >= threshold_high {
IntegrityState::Normal
} else if lambda_cut >= threshold_low {
IntegrityState::Stress
} else if lambda_cut >= threshold_critical {
IntegrityState::Critical
} else {
IntegrityState::Emergency
}
}
/// Convert to numeric value for atomic operations
pub fn as_u32(&self) -> u32 {
*self as u32
}
/// Convert from numeric value
pub fn from_u32(v: u32) -> Self {
match v {
0 => IntegrityState::Normal,
1 => IntegrityState::Stress,
2 => IntegrityState::Critical,
_ => IntegrityState::Emergency,
}
}
}
/// Hysteresis thresholds for state transitions
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HysteresisThresholds {
/// Rising threshold to enter Normal from Stress
pub normal_rising: f64,
/// Falling threshold to enter Stress from Normal
pub normal_falling: f64,
/// Rising threshold to enter Stress from Critical
pub stress_rising: f64,
/// Falling threshold to enter Critical from Stress
pub stress_falling: f64,
/// Rising threshold to enter Critical from Emergency
pub critical_rising: f64,
/// Falling threshold to enter Emergency from Critical
pub critical_falling: f64,
}
impl Default for HysteresisThresholds {
fn default() -> Self {
Self {
normal_rising: 0.8,
normal_falling: 0.7,
stress_rising: 0.5,
stress_falling: 0.4,
critical_rising: 0.2,
critical_falling: 0.1,
}
}
}
impl HysteresisThresholds {
/// Compute next state with hysteresis
pub fn compute_next_state(&self, current: IntegrityState, lambda_cut: f64) -> IntegrityState {
match current {
IntegrityState::Normal => {
if lambda_cut < self.normal_falling {
IntegrityState::Stress
} else {
IntegrityState::Normal
}
}
IntegrityState::Stress => {
if lambda_cut >= self.normal_rising {
IntegrityState::Normal
} else if lambda_cut < self.stress_falling {
IntegrityState::Critical
} else {
IntegrityState::Stress
}
}
IntegrityState::Critical => {
if lambda_cut >= self.stress_rising {
IntegrityState::Stress
} else if lambda_cut < self.critical_falling {
IntegrityState::Emergency
} else {
IntegrityState::Critical
}
}
IntegrityState::Emergency => {
if lambda_cut >= self.critical_rising {
IntegrityState::Critical
} else {
IntegrityState::Emergency
}
}
}
}
}
/// Operation permissions for each state
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatePermissions {
/// Allow read operations
pub allow_reads: bool,
/// Allow single inserts
pub allow_single_insert: bool,
/// Allow bulk inserts
pub allow_bulk_insert: bool,
/// Allow deletes
pub allow_delete: bool,
/// Allow updates
pub allow_update: bool,
/// Allow index rewiring
pub allow_index_rewire: bool,
/// Allow compression/compaction
pub allow_compression: bool,
/// Allow replication
pub allow_replication: bool,
/// Allow backups
pub allow_backup: bool,
/// Throttle percentage for inserts (0-100)
pub throttle_inserts_pct: u8,
/// Throttle percentage for searches (0-100)
pub throttle_searches_pct: u8,
/// Maximum concurrent searches (None = unlimited)
pub max_concurrent_searches: Option<u32>,
/// Pause GNN training
pub pause_gnn_training: bool,
/// Pause tier management
pub pause_tier_management: bool,
}
impl Default for StatePermissions {
fn default() -> Self {
Self::normal()
}
}
impl StatePermissions {
/// Normal state permissions - all operations allowed
pub fn normal() -> Self {
Self {
allow_reads: true,
allow_single_insert: true,
allow_bulk_insert: true,
allow_delete: true,
allow_update: true,
allow_index_rewire: true,
allow_compression: true,
allow_replication: true,
allow_backup: true,
throttle_inserts_pct: 0,
throttle_searches_pct: 0,
max_concurrent_searches: None,
pause_gnn_training: false,
pause_tier_management: false,
}
}
/// Stress state permissions - throttled operations
pub fn stress() -> Self {
Self {
allow_reads: true,
allow_single_insert: true,
allow_bulk_insert: false, // No bulk inserts
allow_delete: true,
allow_update: true,
allow_index_rewire: false, // No index rewiring
allow_compression: false, // No compression
allow_replication: true,
allow_backup: true,
throttle_inserts_pct: 50, // 50% throttle
throttle_searches_pct: 0,
max_concurrent_searches: Some(100),
pause_gnn_training: true, // Pause training
pause_tier_management: false,
}
}
/// Critical state permissions - limited operations
pub fn critical() -> Self {
Self {
allow_reads: true,
allow_single_insert: true,
allow_bulk_insert: false,
allow_delete: false, // No deletes
allow_update: false, // No updates
allow_index_rewire: false,
allow_compression: false,
allow_replication: true, // Keep replication
allow_backup: true, // Keep backups
throttle_inserts_pct: 90, // Heavy throttle
throttle_searches_pct: 25, // Some search throttle
max_concurrent_searches: Some(50),
pause_gnn_training: true,
pause_tier_management: true,
}
}
/// Emergency state permissions - read-only mode
pub fn emergency() -> Self {
Self {
allow_reads: true,
allow_single_insert: false, // No writes
allow_bulk_insert: false,
allow_delete: false,
allow_update: false,
allow_index_rewire: false,
allow_compression: false,
allow_replication: false, // Stop replication
allow_backup: true, // Allow backup for recovery
throttle_inserts_pct: 100, // Block all inserts
throttle_searches_pct: 50, // Heavy search throttle
max_concurrent_searches: Some(20),
pause_gnn_training: true,
pause_tier_management: true,
}
}
/// Get permissions for a given state
pub fn for_state(state: IntegrityState) -> Self {
match state {
IntegrityState::Normal => Self::normal(),
IntegrityState::Stress => Self::stress(),
IntegrityState::Critical => Self::critical(),
IntegrityState::Emergency => Self::emergency(),
}
}
}
/// Result of a gate check
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GateResult {
/// Whether the operation is allowed
pub allowed: bool,
/// Throttle percentage (0-100)
pub throttle_pct: u8,
/// Current integrity state
pub state: IntegrityState,
/// Reason for rejection (if any)
pub reason: Option<String>,
/// Suggested retry delay in milliseconds
pub retry_delay_ms: Option<u64>,
}
impl GateResult {
/// Create an allowed result
pub fn allow(state: IntegrityState) -> Self {
Self {
allowed: true,
throttle_pct: 0,
state,
reason: None,
retry_delay_ms: None,
}
}
/// Create a throttled result
pub fn throttle(state: IntegrityState, throttle_pct: u8) -> Self {
Self {
allowed: true,
throttle_pct,
state,
reason: None,
retry_delay_ms: None,
}
}
/// Create a blocked result
pub fn block(state: IntegrityState, reason: impl Into<String>) -> Self {
Self {
allowed: false,
throttle_pct: 100,
state,
reason: Some(reason.into()),
retry_delay_ms: Some(5000), // 5 second default retry
}
}
/// Should apply throttling
pub fn should_throttle(&self) -> bool {
self.throttle_pct > 0
}
}
/// Integrity gate for a collection
pub struct IntegrityGate {
/// Collection ID
collection_id: i32,
/// Current state (atomic for lock-free reads)
state: AtomicU32,
/// Current lambda cut value (scaled by 1000 for atomic storage)
lambda_cut_scaled: AtomicU32,
/// Hysteresis thresholds
thresholds: HysteresisThresholds,
/// Custom permissions (override defaults)
custom_permissions: Option<HashMap<IntegrityState, StatePermissions>>,
/// Concurrent search counter
concurrent_searches: AtomicU32,
/// Last state change time (epoch millis)
last_state_change_ms: AtomicU64,
}
impl IntegrityGate {
/// Create a new integrity gate
pub fn new(collection_id: i32) -> Self {
Self {
collection_id,
state: AtomicU32::new(IntegrityState::Normal.as_u32()),
lambda_cut_scaled: AtomicU32::new(1000), // 1.0 scaled
thresholds: HysteresisThresholds::default(),
custom_permissions: None,
concurrent_searches: AtomicU32::new(0),
last_state_change_ms: AtomicU64::new(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64,
),
}
}
/// Create with custom thresholds
pub fn with_thresholds(mut self, thresholds: HysteresisThresholds) -> Self {
self.thresholds = thresholds;
self
}
/// Set custom permissions
pub fn with_permissions(
mut self,
permissions: HashMap<IntegrityState, StatePermissions>,
) -> Self {
self.custom_permissions = Some(permissions);
self
}
/// Get current state
pub fn current_state(&self) -> IntegrityState {
IntegrityState::from_u32(self.state.load(Ordering::Relaxed))
}
/// Get current lambda cut value
pub fn current_lambda_cut(&self) -> f64 {
self.lambda_cut_scaled.load(Ordering::Relaxed) as f64 / 1000.0
}
/// Update state based on new lambda cut value
pub fn update_lambda(&self, lambda_cut: f64) -> Option<IntegrityState> {
let current = self.current_state();
let new_state = self.thresholds.compute_next_state(current, lambda_cut);
// Store lambda cut (scaled)
let scaled = (lambda_cut * 1000.0).round() as u32;
self.lambda_cut_scaled.store(scaled, Ordering::Relaxed);
// Update state if changed
if new_state != current {
self.state.store(new_state.as_u32(), Ordering::Release);
self.last_state_change_ms.store(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64,
Ordering::Relaxed,
);
Some(new_state)
} else {
None
}
}
/// Force set state (for testing or admin override)
pub fn force_state(&self, state: IntegrityState) {
self.state.store(state.as_u32(), Ordering::Release);
self.last_state_change_ms.store(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64,
Ordering::Relaxed,
);
}
/// Get permissions for current state
pub fn current_permissions(&self) -> StatePermissions {
let state = self.current_state();
self.custom_permissions
.as_ref()
.and_then(|p| p.get(&state).cloned())
.unwrap_or_else(|| StatePermissions::for_state(state))
}
/// Check if an operation is allowed
pub fn check_operation(&self, operation: &str) -> GateResult {
let state = self.current_state();
let permissions = self.current_permissions();
// Map operation to permission
let (allowed, throttle_pct) = match operation.to_lowercase().as_str() {
"search" | "read" | "query" => {
let within_limit = permissions.max_concurrent_searches.map_or(true, |max| {
self.concurrent_searches.load(Ordering::Relaxed) < max
});
(
permissions.allow_reads && within_limit,
permissions.throttle_searches_pct,
)
}
"insert" => (
permissions.allow_single_insert,
permissions.throttle_inserts_pct,
),
"bulk_insert" => (
permissions.allow_bulk_insert,
permissions.throttle_inserts_pct,
),
"delete" => (permissions.allow_delete, 0),
"update" => (permissions.allow_update, 0),
"index_build" | "index_rewire" => (permissions.allow_index_rewire, 0),
"compression" | "compact" => (permissions.allow_compression, 0),
"replication" | "replicate" => (permissions.allow_replication, 0),
"backup" => (permissions.allow_backup, 0),
"gnn_train" | "gnn_training" => (!permissions.pause_gnn_training, 0),
"tier_manage" | "tier_management" => (!permissions.pause_tier_management, 0),
_ => {
// Unknown operations allowed by default
(true, 0)
}
};
if !allowed {
GateResult::block(
state,
format!(
"Operation '{}' blocked: system in {} state",
operation, state
),
)
} else if throttle_pct > 0 {
GateResult::throttle(state, throttle_pct)
} else {
GateResult::allow(state)
}
}
/// Increment concurrent search counter
pub fn begin_search(&self) -> bool {
let permissions = self.current_permissions();
if let Some(max) = permissions.max_concurrent_searches {
let current = self.concurrent_searches.fetch_add(1, Ordering::AcqRel);
if current >= max {
self.concurrent_searches.fetch_sub(1, Ordering::AcqRel);
return false;
}
} else {
self.concurrent_searches.fetch_add(1, Ordering::AcqRel);
}
true
}
/// Decrement concurrent search counter
pub fn end_search(&self) {
let prev = self.concurrent_searches.fetch_sub(1, Ordering::AcqRel);
if prev == 0 {
// Shouldn't happen, but prevent underflow
self.concurrent_searches.store(0, Ordering::Relaxed);
}
}
/// Get gate status as JSON
pub fn status(&self) -> serde_json::Value {
let state = self.current_state();
let permissions = self.current_permissions();
serde_json::json!({
"collection_id": self.collection_id,
"state": state.to_string(),
"lambda_cut": self.current_lambda_cut(),
"concurrent_searches": self.concurrent_searches.load(Ordering::Relaxed),
"permissions": {
"allow_reads": permissions.allow_reads,
"allow_single_insert": permissions.allow_single_insert,
"allow_bulk_insert": permissions.allow_bulk_insert,
"allow_delete": permissions.allow_delete,
"allow_update": permissions.allow_update,
"allow_index_rewire": permissions.allow_index_rewire,
"throttle_inserts_pct": permissions.throttle_inserts_pct,
"throttle_searches_pct": permissions.throttle_searches_pct,
}
})
}
}
/// Apply throttling based on percentage
/// Returns true if the operation should proceed, false if throttled
pub fn apply_throttle(throttle_pct: u8) -> bool {
if throttle_pct == 0 {
return true; // Not throttled
}
if throttle_pct >= 100 {
return false; // Fully throttled
}
// Random rejection based on percentage
let random_val = (std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.subsec_nanos()
% 100) as u8;
random_val >= throttle_pct
}
/// Global registry for integrity gates
static GATE_REGISTRY: once_cell::sync::Lazy<DashMap<i32, IntegrityGate>> =
once_cell::sync::Lazy::new(DashMap::new);
/// Get or create an integrity gate for a collection
pub fn get_or_create_gate(
collection_id: i32,
) -> dashmap::mapref::one::Ref<'static, i32, IntegrityGate> {
if !GATE_REGISTRY.contains_key(&collection_id) {
GATE_REGISTRY.insert(collection_id, IntegrityGate::new(collection_id));
}
GATE_REGISTRY.get(&collection_id).unwrap()
}
/// Get an existing integrity gate
pub fn get_gate(
collection_id: i32,
) -> Option<dashmap::mapref::one::Ref<'static, i32, IntegrityGate>> {
GATE_REGISTRY.get(&collection_id)
}
/// Check integrity gate for an operation
pub fn check_integrity_gate(collection_id: i32, operation: &str) -> GateResult {
let gate = get_or_create_gate(collection_id);
gate.check_operation(operation)
}
/// Update lambda cut value for a collection
pub fn update_lambda_cut(collection_id: i32, lambda_cut: f64) -> Option<IntegrityState> {
let gate = get_or_create_gate(collection_id);
gate.update_lambda(lambda_cut)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_integrity_state_display() {
assert_eq!(IntegrityState::Normal.to_string(), "normal");
assert_eq!(IntegrityState::Stress.to_string(), "stress");
assert_eq!(IntegrityState::Critical.to_string(), "critical");
assert_eq!(IntegrityState::Emergency.to_string(), "emergency");
}
#[test]
fn test_state_parsing() {
assert_eq!(
IntegrityState::from_str("normal"),
Some(IntegrityState::Normal)
);
assert_eq!(
IntegrityState::from_str("STRESS"),
Some(IntegrityState::Stress)
);
assert_eq!(IntegrityState::from_str("invalid"), None);
}
#[test]
fn test_hysteresis_transitions() {
let thresholds = HysteresisThresholds::default();
// Normal -> Stress when lambda drops below 0.7
let state = thresholds.compute_next_state(IntegrityState::Normal, 0.6);
assert_eq!(state, IntegrityState::Stress);
// Stress -> Normal when lambda rises above 0.8
let state = thresholds.compute_next_state(IntegrityState::Stress, 0.85);
assert_eq!(state, IntegrityState::Normal);
// Stress stays Stress between 0.5 and 0.8
let state = thresholds.compute_next_state(IntegrityState::Stress, 0.6);
assert_eq!(state, IntegrityState::Stress);
}
#[test]
fn test_gate_operations() {
let gate = IntegrityGate::new(1);
// Normal state - all allowed
let result = gate.check_operation("insert");
assert!(result.allowed);
assert_eq!(result.throttle_pct, 0);
// Force stress state
gate.force_state(IntegrityState::Stress);
// Bulk insert blocked in stress
let result = gate.check_operation("bulk_insert");
assert!(!result.allowed);
// Single insert throttled in stress
let result = gate.check_operation("insert");
assert!(result.allowed);
assert_eq!(result.throttle_pct, 50);
}
#[test]
fn test_emergency_permissions() {
let gate = IntegrityGate::new(1);
gate.force_state(IntegrityState::Emergency);
// Reads still allowed
let result = gate.check_operation("search");
assert!(result.allowed);
// Writes blocked
let result = gate.check_operation("insert");
assert!(!result.allowed);
let result = gate.check_operation("delete");
assert!(!result.allowed);
// Backups still allowed
let result = gate.check_operation("backup");
assert!(result.allowed);
}
#[test]
fn test_lambda_update() {
let gate = IntegrityGate::new(1);
// Initially normal
assert_eq!(gate.current_state(), IntegrityState::Normal);
// Drop lambda to trigger stress
let new_state = gate.update_lambda(0.5);
assert_eq!(new_state, Some(IntegrityState::Stress));
assert_eq!(gate.current_state(), IntegrityState::Stress);
// Lambda cut stored correctly
assert!((gate.current_lambda_cut() - 0.5).abs() < 0.01);
}
#[test]
fn test_concurrent_search_limit() {
let gate = IntegrityGate::new(1);
gate.force_state(IntegrityState::Critical); // max 50 searches
// Start many searches
for _ in 0..50 {
assert!(gate.begin_search());
}
// 51st should fail
assert!(!gate.begin_search());
// End one, then can start another
gate.end_search();
assert!(gate.begin_search());
}
#[test]
fn test_throttle_function() {
// 0% throttle always passes
for _ in 0..100 {
assert!(apply_throttle(0));
}
// 100% throttle always blocks
for _ in 0..100 {
assert!(!apply_throttle(100));
}
}
#[test]
fn test_gate_registry() {
let gate1 = get_or_create_gate(9001);
assert_eq!(gate1.collection_id, 9001);
let gate2 = get_gate(9001);
assert!(gate2.is_some());
// Check non-existent
let gate3 = get_gate(9999);
assert!(gate3.is_none());
}
#[test]
fn test_gate_status() {
let gate = IntegrityGate::new(42);
let status = gate.status();
assert_eq!(status["collection_id"], 42);
assert_eq!(status["state"], "normal");
assert!(status["permissions"]["allow_reads"].as_bool().unwrap());
}
}

View File

@@ -0,0 +1,547 @@
//! Mincut Computation Module
//!
//! Implements the Stoer-Wagner algorithm for computing the global minimum cut
//! on the contracted graph. This is the PRIMARY integrity metric.
//!
//! Complexity: O(VE + V^2 log V) where V is number of nodes and E is edges.
//!
//! IMPORTANT: This computes lambda_cut (minimum cut value), NOT lambda2
//! (algebraic connectivity). These are different concepts:
//! - lambda_cut: Minimum number of edges to disconnect the graph
//! - lambda2: Second smallest eigenvalue of the Laplacian (spectral stress)
use std::collections::{HashMap, HashSet};
use serde::{Deserialize, Serialize};
use super::contracted_graph::{ContractedGraph, NodeType};
/// Configuration for mincut computation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MincutConfig {
/// Whether to compute lambda2 (algebraic connectivity) as well
pub compute_lambda2: bool,
/// Maximum iterations for power iteration (lambda2)
pub max_iterations: usize,
/// Convergence tolerance for power iteration
pub tolerance: f64,
}
impl Default for MincutConfig {
fn default() -> Self {
Self {
compute_lambda2: false,
max_iterations: 100,
tolerance: 1e-8,
}
}
}
/// Result of mincut computation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MincutResult {
/// Minimum cut value (PRIMARY METRIC)
pub lambda_cut: f32,
/// Algebraic connectivity (OPTIONAL DRIFT SIGNAL)
pub lambda2: Option<f32>,
/// Edges participating in the minimum cut
pub witness_edges: Vec<WitnessEdge>,
/// Partition of nodes on one side of the cut
pub cut_partition: Vec<usize>,
/// Computation time in milliseconds
pub computation_time_ms: u64,
}
impl MincutResult {
/// Check if the graph is well-connected
pub fn is_well_connected(&self, threshold: f32) -> bool {
self.lambda_cut >= threshold
}
/// Get the number of witness edges
pub fn witness_count(&self) -> usize {
self.witness_edges.len()
}
}
/// An edge that participates in the minimum cut
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WitnessEdge {
/// Source node type
pub source_type: String,
/// Source node ID
pub source_id: i64,
/// Target node type
pub target_type: String,
/// Target node ID
pub target_id: i64,
/// Edge type
pub edge_type: String,
/// Edge capacity
pub capacity: f32,
/// Current flow on the edge
pub flow: f32,
}
/// Mincut computer using Stoer-Wagner algorithm
pub struct MincutComputer {
config: MincutConfig,
}
impl MincutComputer {
/// Create a new mincut computer
pub fn new() -> Self {
Self {
config: MincutConfig::default(),
}
}
/// Create with custom configuration
pub fn with_config(config: MincutConfig) -> Self {
Self { config }
}
/// Compute the minimum cut on a contracted graph
pub fn compute(&self, graph: &ContractedGraph) -> MincutResult {
let n = graph.nodes.len();
// Handle trivial cases
if n < 2 {
return MincutResult {
lambda_cut: 0.0,
lambda2: None,
witness_edges: vec![],
cut_partition: vec![],
computation_time_ms: 0,
};
}
let start = std::time::Instant::now();
// Build capacity matrix
let (capacity, node_index) = graph.build_capacity_matrix();
// Compute global mincut using Stoer-Wagner
let (lambda_cut, cut_partition) = self.stoer_wagner_mincut(&capacity);
// Find witness edges (edges crossing the cut)
let witness_edges = self.find_witness_edges(graph, &node_index, &cut_partition);
// Optionally compute lambda2 (algebraic connectivity)
let lambda2 = if self.config.compute_lambda2 {
Some(self.compute_algebraic_connectivity(&capacity, n) as f32)
} else {
None
};
let computation_time_ms = start.elapsed().as_millis() as u64;
MincutResult {
lambda_cut: lambda_cut as f32,
lambda2,
witness_edges,
cut_partition,
computation_time_ms,
}
}
/// Stoer-Wagner algorithm for global minimum cut
/// Returns (mincut_value, partition of nodes on one side)
fn stoer_wagner_mincut(&self, capacity: &[Vec<f64>]) -> (f64, Vec<usize>) {
let n = capacity.len();
if n == 0 {
return (0.0, vec![]);
}
if n == 1 {
return (0.0, vec![0]);
}
let mut best_cut = f64::MAX;
let mut best_partition = vec![];
// Working copies
let mut vertices: Vec<usize> = (0..n).collect();
let mut merged: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
let mut cap = capacity.to_vec();
while vertices.len() > 1 {
// Maximum adjacency search to find s-t cut
let (s_idx, t_idx, cut_of_phase) = self.minimum_cut_phase(&vertices, &cap);
if cut_of_phase < best_cut {
best_cut = cut_of_phase;
best_partition = merged[vertices[t_idx]].clone();
}
// Get actual vertex indices
let t_vertex = vertices[t_idx];
let s_vertex = vertices[s_idx];
// Update capacities - merge t into s
for &v in &vertices {
if v != s_vertex && v != t_vertex {
cap[s_vertex][v] += cap[t_vertex][v];
cap[v][s_vertex] += cap[v][t_vertex];
}
}
// Merge vertex sets
let t_merged = merged[t_vertex].clone();
merged[s_vertex].extend(t_merged);
// Remove t from active vertices
vertices.remove(t_idx);
}
(best_cut, best_partition)
}
/// One phase of Stoer-Wagner: find minimum s-t cut using maximum adjacency search
/// Returns (s_index, t_index, cut_of_phase) where indices are into vertices array
fn minimum_cut_phase(&self, vertices: &[usize], cap: &[Vec<f64>]) -> (usize, usize, f64) {
let n = cap.len();
let num_vertices = vertices.len();
if num_vertices < 2 {
return (0, 0, 0.0);
}
let mut in_a = vec![false; n];
let mut cut_weight = vec![0.0f64; n];
let mut last_idx = 0;
let mut before_last_idx = 0;
for _round in 0..num_vertices {
// Find most tightly connected vertex not yet in A
let mut max_weight = -1.0;
let mut max_idx = 0;
for (idx, &v) in vertices.iter().enumerate() {
if !in_a[v] && (max_weight < 0.0 || cut_weight[v] > max_weight) {
max_weight = cut_weight[v];
max_idx = idx;
}
}
let max_v = vertices[max_idx];
in_a[max_v] = true;
before_last_idx = last_idx;
last_idx = max_idx;
// Update cut weights for remaining vertices
for (_, &v) in vertices.iter().enumerate() {
if !in_a[v] {
cut_weight[v] += cap[max_v][v];
}
}
}
// The cut of the phase is the weight of t (the last vertex added)
let t_vertex = vertices[last_idx];
(before_last_idx, last_idx, cut_weight[t_vertex])
}
/// Find edges crossing the minimum cut (witness edges)
fn find_witness_edges(
&self,
graph: &ContractedGraph,
node_index: &HashMap<(NodeType, i64), usize>,
partition: &[usize],
) -> Vec<WitnessEdge> {
let partition_set: HashSet<_> = partition.iter().copied().collect();
graph
.edges
.iter()
.filter_map(|edge| {
let i = node_index.get(&edge.source_key())?;
let j = node_index.get(&edge.target_key())?;
// Edge crosses cut if exactly one endpoint is in the partition
let i_in = partition_set.contains(i);
let j_in = partition_set.contains(j);
if i_in != j_in {
Some(WitnessEdge {
source_type: edge.source_type.to_string(),
source_id: edge.source_id,
target_type: edge.target_type.to_string(),
target_id: edge.target_id,
edge_type: edge.edge_type.to_string(),
capacity: edge.capacity,
flow: edge.current_flow,
})
} else {
None
}
})
.collect()
}
/// Compute algebraic connectivity (lambda2) as optional drift signal
/// This is DIFFERENT from mincut - provides spectral stress insight
fn compute_algebraic_connectivity(&self, capacity: &[Vec<f64>], n: usize) -> f64 {
if n < 2 {
return 0.0;
}
// Build Laplacian: L = D - A
let mut laplacian = vec![vec![0.0f64; n]; n];
for i in 0..n {
let degree: f64 = capacity[i].iter().sum();
laplacian[i][i] = degree;
for j in 0..n {
laplacian[i][j] -= capacity[i][j];
}
}
// Power iteration to find second smallest eigenvalue
// Start with random vector orthogonal to constant vector
let mut v: Vec<f64> = (0..n).map(|i| (i as f64 * 0.7).sin()).collect();
// Orthogonalize against constant vector (normalize)
let mean: f64 = v.iter().sum::<f64>() / n as f64;
for x in &mut v {
*x -= mean;
}
// Normalize
let norm: f64 = v.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-10 {
for x in &mut v {
*x /= norm;
}
}
// Shifted inverse power iteration for second smallest eigenvalue
// We want to find λ₂, so we shift to find smallest non-zero eigenvalue
let shift = 0.001; // Small shift to avoid singular matrix
for _ in 0..self.config.max_iterations {
// Compute L*v
let mut lv = vec![0.0f64; n];
for i in 0..n {
for j in 0..n {
lv[i] += laplacian[i][j] * v[j];
}
}
// Apply shift: (L + shift*I) * v
for i in 0..n {
lv[i] += shift * v[i];
}
// Orthogonalize against constant vector
let mean: f64 = lv.iter().sum::<f64>() / n as f64;
for x in &mut lv {
*x -= mean;
}
// Normalize
let norm: f64 = lv.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm < 1e-10 {
break;
}
let mut new_v = lv;
for x in &mut new_v {
*x /= norm;
}
// Check convergence
let diff: f64 = v.iter().zip(new_v.iter()).map(|(a, b)| (a - b).abs()).sum();
v = new_v;
if diff < self.config.tolerance {
break;
}
}
// Rayleigh quotient: λ = (v^T L v) / (v^T v)
let mut vtlv = 0.0f64;
for i in 0..n {
for j in 0..n {
vtlv += v[i] * laplacian[i][j] * v[j];
}
}
let vtv: f64 = v.iter().map(|x| x * x).sum();
if vtv > 1e-10 {
(vtlv / vtv).max(0.0)
} else {
0.0
}
}
}
impl Default for MincutComputer {
fn default() -> Self {
Self::new()
}
}
/// Compute mincut for a given collection
pub fn compute_mincut(graph: &ContractedGraph) -> MincutResult {
MincutComputer::new().compute(graph)
}
/// Compute mincut with lambda2
pub fn compute_mincut_with_lambda2(graph: &ContractedGraph) -> MincutResult {
MincutComputer::with_config(MincutConfig {
compute_lambda2: true,
..Default::default()
})
.compute(graph)
}
#[cfg(test)]
mod tests {
use super::super::contracted_graph::{ContractedGraphBuilder, EdgeType};
use super::*;
#[test]
fn test_mincut_empty_graph() {
let graph = ContractedGraph::new(1);
let result = compute_mincut(&graph);
assert_eq!(result.lambda_cut, 0.0);
assert!(result.witness_edges.is_empty());
}
#[test]
fn test_mincut_single_node() {
let mut graph = ContractedGraph::new(1);
graph.add_node(super::super::contracted_graph::ContractedNode::new(
1,
NodeType::Partition,
0,
));
let result = compute_mincut(&graph);
assert_eq!(result.lambda_cut, 0.0);
}
#[test]
fn test_mincut_two_connected_nodes() {
use super::super::contracted_graph::{ContractedEdge, ContractedNode};
let mut graph = ContractedGraph::new(1);
graph.add_node(ContractedNode::new(1, NodeType::Partition, 0));
graph.add_node(ContractedNode::new(1, NodeType::Partition, 1));
graph.add_edge(
ContractedEdge::new(
1,
NodeType::Partition,
0,
NodeType::Partition,
1,
EdgeType::PartitionLink,
)
.with_capacity(5.0),
);
let result = compute_mincut(&graph);
assert!((result.lambda_cut - 5.0).abs() < 0.01);
assert_eq!(result.witness_edges.len(), 1);
}
#[test]
fn test_mincut_triangle() {
use super::super::contracted_graph::{ContractedEdge, ContractedNode};
let mut graph = ContractedGraph::new(1);
for i in 0..3 {
graph.add_node(ContractedNode::new(1, NodeType::Partition, i));
}
// Create triangle with edges of capacity 1.0
graph.add_edge(
ContractedEdge::new(
1,
NodeType::Partition,
0,
NodeType::Partition,
1,
EdgeType::PartitionLink,
)
.with_capacity(1.0),
);
graph.add_edge(
ContractedEdge::new(
1,
NodeType::Partition,
1,
NodeType::Partition,
2,
EdgeType::PartitionLink,
)
.with_capacity(1.0),
);
graph.add_edge(
ContractedEdge::new(
1,
NodeType::Partition,
0,
NodeType::Partition,
2,
EdgeType::PartitionLink,
)
.with_capacity(1.0),
);
let result = compute_mincut(&graph);
// Mincut of a triangle is 2 (cut one node from the other two)
assert!((result.lambda_cut - 2.0).abs() < 0.01);
}
#[test]
fn test_mincut_with_lambda2() {
let graph = ContractedGraphBuilder::build_default(1, 5, 0, 1);
let result = compute_mincut_with_lambda2(&graph);
assert!(result.lambda2.is_some());
assert!(result.lambda2.unwrap() >= 0.0);
}
#[test]
fn test_mincut_default_graph() {
let graph = ContractedGraphBuilder::build_default(1, 5, 10, 2);
let result = compute_mincut(&graph);
assert!(result.lambda_cut >= 0.0);
assert!(result.computation_time_ms < 10000); // Should complete quickly
}
#[test]
fn test_witness_edges() {
use super::super::contracted_graph::{ContractedEdge, ContractedNode};
let mut graph = ContractedGraph::new(1);
graph.add_node(ContractedNode::new(1, NodeType::Partition, 0));
graph.add_node(ContractedNode::new(1, NodeType::Partition, 1));
graph.add_edge(
ContractedEdge::new(
1,
NodeType::Partition,
0,
NodeType::Partition,
1,
EdgeType::PartitionLink,
)
.with_capacity(1.0)
.with_flow(0.5),
);
let result = compute_mincut(&graph);
assert_eq!(result.witness_edges.len(), 1);
let witness = &result.witness_edges[0];
assert_eq!(witness.source_type, "partition");
assert_eq!(witness.edge_type, "partition_link");
assert_eq!(witness.capacity, 1.0);
assert_eq!(witness.flow, 0.5);
}
}

View File

@@ -0,0 +1,261 @@
//! # Integrity Control Plane
//!
//! Stoer-Wagner mincut gating for vector search integrity.
use pgrx::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IntegrityContract {
pub id: String,
pub name: String,
pub min_recall: f64,
pub max_latency_ms: u64,
pub min_mincut: f64,
pub active: bool,
}
impl Default for IntegrityContract {
fn default() -> Self {
Self {
id: "default".to_string(),
name: "Default Contract".to_string(),
min_recall: 0.95,
max_latency_ms: 100,
min_mincut: 0.1,
active: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationResult {
pub passed: bool,
pub recall: f64,
pub latency_ms: u64,
pub mincut: f64,
pub failures: Vec<String>,
}
pub struct IntegrityManager {
contracts: HashMap<String, IntegrityContract>,
}
impl IntegrityManager {
pub fn new() -> Self {
let mut contracts = HashMap::new();
contracts.insert("default".to_string(), IntegrityContract::default());
Self { contracts }
}
pub fn register_contract(&mut self, contract: IntegrityContract) {
self.contracts.insert(contract.id.clone(), contract);
}
pub fn get_contract(&self, id: &str) -> Option<&IntegrityContract> {
self.contracts.get(id)
}
pub fn validate(
&self,
contract_id: &str,
recall: f64,
latency_ms: u64,
mincut: f64,
) -> ValidationResult {
let contract = self.contracts.get(contract_id).cloned().unwrap_or_default();
let mut failures = Vec::new();
if recall < contract.min_recall {
failures.push(format!("Recall {:.3} < {:.3}", recall, contract.min_recall));
}
if latency_ms > contract.max_latency_ms {
failures.push(format!(
"Latency {}ms > {}ms",
latency_ms, contract.max_latency_ms
));
}
if mincut < contract.min_mincut {
failures.push(format!("Mincut {:.3} < {:.3}", mincut, contract.min_mincut));
}
ValidationResult {
passed: failures.is_empty(),
recall,
latency_ms,
mincut,
failures,
}
}
pub fn list_contracts(&self) -> Vec<&IntegrityContract> {
self.contracts.values().collect()
}
}
impl Default for IntegrityManager {
fn default() -> Self {
Self::new()
}
}
static INTEGRITY_MANAGER: std::sync::OnceLock<Arc<RwLock<IntegrityManager>>> =
std::sync::OnceLock::new();
pub fn get_integrity_manager() -> Arc<RwLock<IntegrityManager>> {
INTEGRITY_MANAGER
.get_or_init(|| Arc::new(RwLock::new(IntegrityManager::new())))
.clone()
}
// Submodule exports
pub mod contracted_graph;
pub mod events;
pub mod gating;
pub mod mincut;
pub use mincut::{MincutConfig, MincutResult, WitnessEdge};
/// Get current mincut for an index (used by gated_transformer module)
pub fn get_current_mincut(_index_name: &str) -> Result<MincutResult, String> {
// TODO: Implement actual index mincut lookup
// For now, return a default result
Ok(MincutResult {
lambda_cut: 10.0,
lambda2: None,
witness_edges: vec![],
cut_partition: vec![],
computation_time_ms: 0,
})
}
pub fn stoer_wagner_mincut(n: usize, edges: &[(usize, usize, f64)]) -> f64 {
if n <= 1 || edges.is_empty() {
return 0.0;
}
let mut adj = vec![vec![0.0; n]; n];
for &(u, v, w) in edges {
if u < n && v < n {
adj[u][v] += w;
adj[v][u] += w;
}
}
let mut min_cut = f64::MAX;
let mut active: Vec<bool> = vec![true; n];
for phase in 0..n - 1 {
let mut weights: Vec<f64> = vec![0.0; n];
let mut in_a = vec![false; n];
let mut last = 0;
let mut second_last = 0;
for _ in 0..n - phase {
let mut max_weight = -1.0;
let mut max_vertex = 0;
for v in 0..n {
if active[v] && !in_a[v] && weights[v] > max_weight {
max_weight = weights[v];
max_vertex = v;
}
}
second_last = last;
last = max_vertex;
in_a[max_vertex] = true;
for v in 0..n {
if active[v] && !in_a[v] {
weights[v] += adj[max_vertex][v];
}
}
}
min_cut = min_cut.min(weights[last]);
active[last] = false;
for v in 0..n {
adj[second_last][v] += adj[last][v];
adj[v][second_last] += adj[v][last];
}
}
min_cut
}
#[pg_extern]
fn ruvector_integrity_status() -> pgrx::JsonB {
let manager = get_integrity_manager();
let reader = manager.read().unwrap();
let contracts: Vec<_> = reader
.list_contracts()
.iter()
.map(|c| c.id.clone())
.collect();
pgrx::JsonB(serde_json::json!({
"enabled": true,
"active_contracts": contracts.len(),
"contracts": contracts,
}))
}
#[pg_extern]
fn ruvector_integrity_create_contract(
id: &str,
name: &str,
min_recall: f64,
max_latency_ms: i64,
min_mincut: f64,
) -> pgrx::JsonB {
let contract = IntegrityContract {
id: id.to_string(),
name: name.to_string(),
min_recall,
max_latency_ms: max_latency_ms as u64,
min_mincut,
active: true,
};
let manager = get_integrity_manager();
manager.write().unwrap().register_contract(contract.clone());
pgrx::JsonB(serde_json::json!({ "success": true, "contract": contract }))
}
#[pg_extern]
fn ruvector_integrity_validate(
contract_id: &str,
recall: f64,
latency_ms: i64,
mincut: f64,
) -> pgrx::JsonB {
let manager = get_integrity_manager();
let result = manager
.read()
.unwrap()
.validate(contract_id, recall, latency_ms as u64, mincut);
pgrx::JsonB(serde_json::json!(result))
}
#[pg_extern]
fn ruvector_mincut(n: i32, edges_json: pgrx::JsonB) -> f64 {
let edges: Vec<(usize, usize, f64)> = serde_json::from_value(edges_json.0).unwrap_or_default();
stoer_wagner_mincut(n as usize, &edges)
}
#[cfg(feature = "pg_test")]
#[pg_schema]
mod tests {
use super::*;
#[pg_test]
fn test_integrity_status() {
let status = ruvector_integrity_status();
assert!(status.0.get("enabled").is_some());
}
#[pg_test]
fn test_mincut_simple() {
let edges = vec![(0, 1, 1.0), (1, 2, 1.0)];
let mincut = stoer_wagner_mincut(3, &edges);
assert!(mincut >= 0.0);
}
}