Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
738
vendor/ruvector/crates/ruvector-postgres/src/integrity/contracted_graph.rs
vendored
Normal file
738
vendor/ruvector/crates/ruvector-postgres/src/integrity/contracted_graph.rs
vendored
Normal file
@@ -0,0 +1,738 @@
|
||||
//! Contracted Graph Module for Integrity Control Plane
|
||||
//!
|
||||
//! This module implements the contracted operational graph - a fixed-size
|
||||
//! meta-graph of ~1000 nodes representing partitions, centroids, shards, and
|
||||
//! dependencies. This is NOT the full similarity graph.
|
||||
//!
|
||||
//! The contracted graph enables efficient mincut computation for integrity gating.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Node types in the contracted graph
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum NodeType {
|
||||
/// Data partition/segment
|
||||
Partition,
|
||||
/// IVFFlat centroid
|
||||
Centroid,
|
||||
/// Distributed shard
|
||||
Shard,
|
||||
/// External dependency (backup, compaction, etc.)
|
||||
ExternalDependency,
|
||||
/// Hybrid index node
|
||||
HybridIndex,
|
||||
}
|
||||
|
||||
impl fmt::Display for NodeType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
NodeType::Partition => write!(f, "partition"),
|
||||
NodeType::Centroid => write!(f, "centroid"),
|
||||
NodeType::Shard => write!(f, "shard"),
|
||||
NodeType::ExternalDependency => write!(f, "external_dependency"),
|
||||
NodeType::HybridIndex => write!(f, "hybrid_index"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeType {
|
||||
/// Parse node type from string
|
||||
pub fn from_str(s: &str) -> Option<Self> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"partition" => Some(NodeType::Partition),
|
||||
"centroid" => Some(NodeType::Centroid),
|
||||
"shard" => Some(NodeType::Shard),
|
||||
"external_dependency" => Some(NodeType::ExternalDependency),
|
||||
"hybrid_index" => Some(NodeType::HybridIndex),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Edge types representing data flow between components
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EdgeType {
|
||||
/// Data flow between partitions
|
||||
PartitionLink,
|
||||
/// Query routing paths
|
||||
RoutingLink,
|
||||
/// Operational dependency
|
||||
Dependency,
|
||||
/// Replication stream
|
||||
Replication,
|
||||
}
|
||||
|
||||
impl fmt::Display for EdgeType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
EdgeType::PartitionLink => write!(f, "partition_link"),
|
||||
EdgeType::RoutingLink => write!(f, "routing_link"),
|
||||
EdgeType::Dependency => write!(f, "dependency"),
|
||||
EdgeType::Replication => write!(f, "replication"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EdgeType {
|
||||
/// Parse edge type from string
|
||||
pub fn from_str(s: &str) -> Option<Self> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"partition_link" => Some(EdgeType::PartitionLink),
|
||||
"routing_link" => Some(EdgeType::RoutingLink),
|
||||
"dependency" => Some(EdgeType::Dependency),
|
||||
"replication" => Some(EdgeType::Replication),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A node in the contracted graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ContractedNode {
|
||||
/// Collection this node belongs to
|
||||
pub collection_id: i32,
|
||||
/// Type of the node
|
||||
pub node_type: NodeType,
|
||||
/// Unique identifier within the type
|
||||
pub node_id: i64,
|
||||
/// Human-readable name
|
||||
pub node_name: Option<String>,
|
||||
/// Additional metadata
|
||||
pub node_data: serde_json::Value,
|
||||
/// Health score (0.0 = failed, 1.0 = healthy)
|
||||
pub health_score: f32,
|
||||
}
|
||||
|
||||
impl ContractedNode {
|
||||
/// Create a new contracted node
|
||||
pub fn new(collection_id: i32, node_type: NodeType, node_id: i64) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
node_type,
|
||||
node_id,
|
||||
node_name: None,
|
||||
node_data: serde_json::json!({}),
|
||||
health_score: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the node name
|
||||
pub fn with_name(mut self, name: impl Into<String>) -> Self {
|
||||
self.node_name = Some(name.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set node data
|
||||
pub fn with_data(mut self, data: serde_json::Value) -> Self {
|
||||
self.node_data = data;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set health score
|
||||
pub fn with_health(mut self, health: f32) -> Self {
|
||||
self.health_score = health.clamp(0.0, 1.0);
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the unique key for this node
|
||||
pub fn key(&self) -> (NodeType, i64) {
|
||||
(self.node_type, self.node_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// An edge in the contracted graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ContractedEdge {
|
||||
/// Collection this edge belongs to
|
||||
pub collection_id: i32,
|
||||
/// Source node type
|
||||
pub source_type: NodeType,
|
||||
/// Source node ID
|
||||
pub source_id: i64,
|
||||
/// Target node type
|
||||
pub target_type: NodeType,
|
||||
/// Target node ID
|
||||
pub target_id: i64,
|
||||
/// Type of the edge
|
||||
pub edge_type: EdgeType,
|
||||
/// Max-flow capacity
|
||||
pub capacity: f32,
|
||||
/// Current utilization
|
||||
pub current_flow: f32,
|
||||
/// Edge latency in milliseconds
|
||||
pub latency_ms: Option<f32>,
|
||||
/// Recent error rate (0.0-1.0)
|
||||
pub error_rate: f32,
|
||||
}
|
||||
|
||||
impl ContractedEdge {
|
||||
/// Create a new contracted edge
|
||||
pub fn new(
|
||||
collection_id: i32,
|
||||
source_type: NodeType,
|
||||
source_id: i64,
|
||||
target_type: NodeType,
|
||||
target_id: i64,
|
||||
edge_type: EdgeType,
|
||||
) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
source_type,
|
||||
source_id,
|
||||
target_type,
|
||||
target_id,
|
||||
edge_type,
|
||||
capacity: 1.0,
|
||||
current_flow: 0.0,
|
||||
latency_ms: None,
|
||||
error_rate: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set capacity
|
||||
pub fn with_capacity(mut self, capacity: f32) -> Self {
|
||||
self.capacity = capacity.max(0.0);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set current flow
|
||||
pub fn with_flow(mut self, flow: f32) -> Self {
|
||||
self.current_flow = flow.max(0.0);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set latency
|
||||
pub fn with_latency(mut self, latency_ms: f32) -> Self {
|
||||
self.latency_ms = Some(latency_ms);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set error rate
|
||||
pub fn with_error_rate(mut self, error_rate: f32) -> Self {
|
||||
self.error_rate = error_rate.clamp(0.0, 1.0);
|
||||
self
|
||||
}
|
||||
|
||||
/// Get effective capacity (adjusted for error rate)
|
||||
pub fn effective_capacity(&self) -> f64 {
|
||||
(self.capacity as f64) * (1.0 - self.error_rate as f64)
|
||||
}
|
||||
|
||||
/// Get source key
|
||||
pub fn source_key(&self) -> (NodeType, i64) {
|
||||
(self.source_type, self.source_id)
|
||||
}
|
||||
|
||||
/// Get target key
|
||||
pub fn target_key(&self) -> (NodeType, i64) {
|
||||
(self.target_type, self.target_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// The contracted graph structure
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ContractedGraph {
|
||||
/// Collection ID this graph belongs to
|
||||
pub collection_id: i32,
|
||||
/// All nodes in the graph
|
||||
pub nodes: Vec<ContractedNode>,
|
||||
/// All edges in the graph
|
||||
pub edges: Vec<ContractedEdge>,
|
||||
/// When the graph was last updated
|
||||
pub last_updated: std::time::SystemTime,
|
||||
}
|
||||
|
||||
impl ContractedGraph {
|
||||
/// Create a new empty contracted graph
|
||||
pub fn new(collection_id: i32) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
last_updated: std::time::SystemTime::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a node to the graph
|
||||
pub fn add_node(&mut self, node: ContractedNode) {
|
||||
self.nodes.push(node);
|
||||
}
|
||||
|
||||
/// Add an edge to the graph
|
||||
pub fn add_edge(&mut self, edge: ContractedEdge) {
|
||||
self.edges.push(edge);
|
||||
}
|
||||
|
||||
/// Get node count
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
/// Get edge count
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
|
||||
/// Build a node index for quick lookups
|
||||
pub fn build_node_index(&self) -> HashMap<(NodeType, i64), usize> {
|
||||
self.nodes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, n)| (n.key(), i))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Build adjacency matrix for mincut computation
|
||||
pub fn build_capacity_matrix(&self) -> (Vec<Vec<f64>>, HashMap<(NodeType, i64), usize>) {
|
||||
let n = self.nodes.len();
|
||||
let node_index = self.build_node_index();
|
||||
let mut capacity = vec![vec![0.0f64; n]; n];
|
||||
|
||||
for edge in &self.edges {
|
||||
if let (Some(&i), Some(&j)) = (
|
||||
node_index.get(&edge.source_key()),
|
||||
node_index.get(&edge.target_key()),
|
||||
) {
|
||||
let cap = edge.effective_capacity();
|
||||
capacity[i][j] = cap;
|
||||
capacity[j][i] = cap; // Undirected graph
|
||||
}
|
||||
}
|
||||
|
||||
(capacity, node_index)
|
||||
}
|
||||
|
||||
/// Get graph statistics
|
||||
pub fn stats(&self) -> ContractedGraphStats {
|
||||
let mut node_counts: HashMap<NodeType, usize> = HashMap::new();
|
||||
let mut edge_counts: HashMap<EdgeType, usize> = HashMap::new();
|
||||
let mut total_health = 0.0f32;
|
||||
let mut total_capacity = 0.0f32;
|
||||
let mut total_error_rate = 0.0f32;
|
||||
|
||||
for node in &self.nodes {
|
||||
*node_counts.entry(node.node_type).or_insert(0) += 1;
|
||||
total_health += node.health_score;
|
||||
}
|
||||
|
||||
for edge in &self.edges {
|
||||
*edge_counts.entry(edge.edge_type).or_insert(0) += 1;
|
||||
total_capacity += edge.capacity;
|
||||
total_error_rate += edge.error_rate;
|
||||
}
|
||||
|
||||
let avg_health = if self.nodes.is_empty() {
|
||||
1.0
|
||||
} else {
|
||||
total_health / self.nodes.len() as f32
|
||||
};
|
||||
|
||||
let avg_capacity = if self.edges.is_empty() {
|
||||
1.0
|
||||
} else {
|
||||
total_capacity / self.edges.len() as f32
|
||||
};
|
||||
|
||||
let avg_error_rate = if self.edges.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
total_error_rate / self.edges.len() as f32
|
||||
};
|
||||
|
||||
ContractedGraphStats {
|
||||
node_count: self.nodes.len(),
|
||||
edge_count: self.edges.len(),
|
||||
node_counts,
|
||||
edge_counts,
|
||||
avg_node_health: avg_health,
|
||||
avg_edge_capacity: avg_capacity,
|
||||
avg_error_rate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about the contracted graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ContractedGraphStats {
|
||||
/// Total node count
|
||||
pub node_count: usize,
|
||||
/// Total edge count
|
||||
pub edge_count: usize,
|
||||
/// Nodes by type
|
||||
pub node_counts: HashMap<NodeType, usize>,
|
||||
/// Edges by type
|
||||
pub edge_counts: HashMap<EdgeType, usize>,
|
||||
/// Average node health
|
||||
pub avg_node_health: f32,
|
||||
/// Average edge capacity
|
||||
pub avg_edge_capacity: f32,
|
||||
/// Average error rate
|
||||
pub avg_error_rate: f32,
|
||||
}
|
||||
|
||||
/// Builder for constructing contracted graphs
|
||||
pub struct ContractedGraphBuilder {
|
||||
collection_id: i32,
|
||||
nodes: Vec<ContractedNode>,
|
||||
edges: Vec<ContractedEdge>,
|
||||
}
|
||||
|
||||
impl ContractedGraphBuilder {
|
||||
/// Create a new builder for a collection
|
||||
pub fn new(collection_id: i32) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add partition nodes
|
||||
pub fn add_partition_nodes(&mut self, count: usize, health_scores: Option<&[f32]>) {
|
||||
for i in 0..count {
|
||||
let health = health_scores.and_then(|h| h.get(i).copied()).unwrap_or(1.0);
|
||||
|
||||
let node = ContractedNode::new(self.collection_id, NodeType::Partition, i as i64)
|
||||
.with_name(format!("partition_{}", i))
|
||||
.with_data(serde_json::json!({"index": i}))
|
||||
.with_health(health);
|
||||
|
||||
self.nodes.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
/// Add centroid nodes (for IVFFlat)
|
||||
pub fn add_centroid_nodes(&mut self, count: usize, health_scores: Option<&[f32]>) {
|
||||
for i in 0..count {
|
||||
let health = health_scores.and_then(|h| h.get(i).copied()).unwrap_or(1.0);
|
||||
|
||||
let node = ContractedNode::new(self.collection_id, NodeType::Centroid, i as i64)
|
||||
.with_name(format!("centroid_{}", i))
|
||||
.with_data(serde_json::json!({"list_id": i}))
|
||||
.with_health(health);
|
||||
|
||||
self.nodes.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
/// Add shard nodes
|
||||
pub fn add_shard_nodes(&mut self, count: usize, primary_index: usize) {
|
||||
for i in 0..count {
|
||||
let is_primary = i == primary_index;
|
||||
let node = ContractedNode::new(self.collection_id, NodeType::Shard, i as i64)
|
||||
.with_name(if is_primary {
|
||||
format!("primary_shard_{}", i)
|
||||
} else {
|
||||
format!("replica_shard_{}", i)
|
||||
})
|
||||
.with_data(serde_json::json!({
|
||||
"type": if is_primary { "primary" } else { "replica" },
|
||||
"index": i
|
||||
}))
|
||||
.with_health(1.0);
|
||||
|
||||
self.nodes.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
/// Add external dependency nodes
|
||||
pub fn add_dependency_nodes(&mut self, dependencies: &[(&str, f32)]) {
|
||||
for (i, (name, health)) in dependencies.iter().enumerate() {
|
||||
let node =
|
||||
ContractedNode::new(self.collection_id, NodeType::ExternalDependency, i as i64)
|
||||
.with_name(*name)
|
||||
.with_data(serde_json::json!({"service": name}))
|
||||
.with_health(*health);
|
||||
|
||||
self.nodes.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
/// Add partition-to-partition edges (data flow)
|
||||
pub fn add_partition_links(&mut self) {
|
||||
let partition_nodes: Vec<_> = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type == NodeType::Partition)
|
||||
.collect();
|
||||
|
||||
for i in 0..partition_nodes.len() {
|
||||
for j in (i + 1)..partition_nodes.len() {
|
||||
let edge = ContractedEdge::new(
|
||||
self.collection_id,
|
||||
NodeType::Partition,
|
||||
partition_nodes[i].node_id,
|
||||
NodeType::Partition,
|
||||
partition_nodes[j].node_id,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(1.0);
|
||||
|
||||
self.edges.push(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add centroid-to-shard edges (routing)
|
||||
pub fn add_routing_links(&mut self) {
|
||||
let centroid_nodes: Vec<_> = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type == NodeType::Centroid)
|
||||
.collect();
|
||||
|
||||
let shard_nodes: Vec<_> = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type == NodeType::Shard)
|
||||
.collect();
|
||||
|
||||
for centroid in ¢roid_nodes {
|
||||
for shard in &shard_nodes {
|
||||
let edge = ContractedEdge::new(
|
||||
self.collection_id,
|
||||
NodeType::Centroid,
|
||||
centroid.node_id,
|
||||
NodeType::Shard,
|
||||
shard.node_id,
|
||||
EdgeType::RoutingLink,
|
||||
)
|
||||
.with_capacity(centroid.health_score);
|
||||
|
||||
self.edges.push(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add shard-to-dependency edges
|
||||
pub fn add_dependency_links(&mut self) {
|
||||
let shard_nodes: Vec<_> = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type == NodeType::Shard)
|
||||
.collect();
|
||||
|
||||
let dep_nodes: Vec<_> = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type == NodeType::ExternalDependency)
|
||||
.collect();
|
||||
|
||||
for shard in &shard_nodes {
|
||||
for dep in &dep_nodes {
|
||||
let edge = ContractedEdge::new(
|
||||
self.collection_id,
|
||||
NodeType::Shard,
|
||||
shard.node_id,
|
||||
NodeType::ExternalDependency,
|
||||
dep.node_id,
|
||||
EdgeType::Dependency,
|
||||
)
|
||||
.with_capacity(dep.health_score);
|
||||
|
||||
self.edges.push(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add replication edges between shards
|
||||
pub fn add_replication_links(&mut self) {
|
||||
let shard_nodes: Vec<_> = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type == NodeType::Shard)
|
||||
.collect();
|
||||
|
||||
// Connect primary to replicas
|
||||
if shard_nodes.len() > 1 {
|
||||
let primary = &shard_nodes[0];
|
||||
for replica in shard_nodes.iter().skip(1) {
|
||||
let edge = ContractedEdge::new(
|
||||
self.collection_id,
|
||||
NodeType::Shard,
|
||||
primary.node_id,
|
||||
NodeType::Shard,
|
||||
replica.node_id,
|
||||
EdgeType::Replication,
|
||||
)
|
||||
.with_capacity(1.0);
|
||||
|
||||
self.edges.push(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the contracted graph
|
||||
pub fn build(self) -> ContractedGraph {
|
||||
ContractedGraph {
|
||||
collection_id: self.collection_id,
|
||||
nodes: self.nodes,
|
||||
edges: self.edges,
|
||||
last_updated: std::time::SystemTime::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a default graph structure
|
||||
pub fn build_default(
|
||||
collection_id: i32,
|
||||
num_partitions: usize,
|
||||
num_centroids: usize,
|
||||
num_shards: usize,
|
||||
) -> ContractedGraph {
|
||||
let mut builder = Self::new(collection_id);
|
||||
|
||||
// Add nodes
|
||||
builder.add_partition_nodes(num_partitions.min(100), None);
|
||||
builder.add_centroid_nodes(num_centroids.min(500), None);
|
||||
builder.add_shard_nodes(num_shards.min(10), 0);
|
||||
builder.add_dependency_nodes(&[
|
||||
("backup_service", 1.0),
|
||||
("compaction_service", 1.0),
|
||||
("gnn_trainer", 1.0),
|
||||
]);
|
||||
|
||||
// Add edges
|
||||
builder.add_partition_links();
|
||||
builder.add_routing_links();
|
||||
builder.add_dependency_links();
|
||||
builder.add_replication_links();
|
||||
|
||||
builder.build()
|
||||
}
|
||||
}
|
||||
|
||||
/// Global registry for contracted graphs
|
||||
static GRAPH_REGISTRY: once_cell::sync::Lazy<DashMap<i32, ContractedGraph>> =
|
||||
once_cell::sync::Lazy::new(DashMap::new);
|
||||
|
||||
/// Get or create a contracted graph for a collection
|
||||
pub fn get_or_create_graph(collection_id: i32) -> ContractedGraph {
|
||||
GRAPH_REGISTRY
|
||||
.entry(collection_id)
|
||||
.or_insert_with(|| {
|
||||
// Default: 10 partitions, 100 centroids, 1 shard
|
||||
ContractedGraphBuilder::build_default(collection_id, 10, 100, 1)
|
||||
})
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Get an existing contracted graph
|
||||
pub fn get_graph(collection_id: i32) -> Option<ContractedGraph> {
|
||||
GRAPH_REGISTRY.get(&collection_id).map(|g| g.clone())
|
||||
}
|
||||
|
||||
/// Store or update a contracted graph
|
||||
pub fn store_graph(graph: ContractedGraph) {
|
||||
GRAPH_REGISTRY.insert(graph.collection_id, graph);
|
||||
}
|
||||
|
||||
/// Remove a contracted graph
|
||||
pub fn remove_graph(collection_id: i32) -> Option<ContractedGraph> {
|
||||
GRAPH_REGISTRY.remove(&collection_id).map(|(_, g)| g)
|
||||
}
|
||||
|
||||
/// List all collection IDs with contracted graphs
|
||||
pub fn list_graph_collections() -> Vec<i32> {
|
||||
GRAPH_REGISTRY.iter().map(|e| *e.key()).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_contracted_node_creation() {
|
||||
let node = ContractedNode::new(1, NodeType::Partition, 42)
|
||||
.with_name("partition_42")
|
||||
.with_data(serde_json::json!({"size": 1000}))
|
||||
.with_health(0.95);
|
||||
|
||||
assert_eq!(node.collection_id, 1);
|
||||
assert_eq!(node.node_type, NodeType::Partition);
|
||||
assert_eq!(node.node_id, 42);
|
||||
assert_eq!(node.node_name, Some("partition_42".to_string()));
|
||||
assert!((node.health_score - 0.95).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contracted_edge_creation() {
|
||||
let edge = ContractedEdge::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
1,
|
||||
NodeType::Partition,
|
||||
2,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(2.0)
|
||||
.with_error_rate(0.1);
|
||||
|
||||
assert_eq!(edge.capacity, 2.0);
|
||||
assert!((edge.effective_capacity() - 1.8).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graph_builder() {
|
||||
let graph = ContractedGraphBuilder::build_default(1, 5, 10, 2);
|
||||
|
||||
assert_eq!(graph.collection_id, 1);
|
||||
assert!(graph.node_count() > 0);
|
||||
assert!(graph.edge_count() > 0);
|
||||
|
||||
let stats = graph.stats();
|
||||
assert!(stats.node_counts.contains_key(&NodeType::Partition));
|
||||
assert!(stats.node_counts.contains_key(&NodeType::Centroid));
|
||||
assert!(stats.node_counts.contains_key(&NodeType::Shard));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_capacity_matrix() {
|
||||
let graph = ContractedGraphBuilder::build_default(1, 3, 0, 1);
|
||||
let (matrix, index) = graph.build_capacity_matrix();
|
||||
|
||||
assert_eq!(matrix.len(), graph.node_count());
|
||||
assert_eq!(index.len(), graph.node_count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_type_display() {
|
||||
assert_eq!(NodeType::Partition.to_string(), "partition");
|
||||
assert_eq!(NodeType::Centroid.to_string(), "centroid");
|
||||
assert_eq!(
|
||||
NodeType::ExternalDependency.to_string(),
|
||||
"external_dependency"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_type_parsing() {
|
||||
assert_eq!(
|
||||
EdgeType::from_str("partition_link"),
|
||||
Some(EdgeType::PartitionLink)
|
||||
);
|
||||
assert_eq!(
|
||||
EdgeType::from_str("routing_link"),
|
||||
Some(EdgeType::RoutingLink)
|
||||
);
|
||||
assert_eq!(EdgeType::from_str("invalid"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graph_registry() {
|
||||
let graph = ContractedGraphBuilder::build_default(999, 2, 2, 1);
|
||||
store_graph(graph.clone());
|
||||
|
||||
let retrieved = get_graph(999);
|
||||
assert!(retrieved.is_some());
|
||||
assert_eq!(retrieved.unwrap().collection_id, 999);
|
||||
|
||||
remove_graph(999);
|
||||
assert!(get_graph(999).is_none());
|
||||
}
|
||||
}
|
||||
726
vendor/ruvector/crates/ruvector-postgres/src/integrity/events.rs
vendored
Normal file
726
vendor/ruvector/crates/ruvector-postgres/src/integrity/events.rs
vendored
Normal file
@@ -0,0 +1,726 @@
|
||||
//! Integrity Events Module
|
||||
//!
|
||||
//! Defines integrity event types that trigger contracted graph updates
|
||||
//! and state transitions. Events support delta updates for efficiency.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::SystemTime;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::gating::IntegrityState;
|
||||
use super::mincut::WitnessEdge;
|
||||
|
||||
/// Types of integrity events
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum IntegrityEventType {
|
||||
/// A new partition was created
|
||||
PartitionCreated,
|
||||
/// A partition was deleted
|
||||
PartitionDeleted,
|
||||
/// A partition's health changed
|
||||
PartitionHealthChanged,
|
||||
/// An IVFFlat centroid was moved/updated
|
||||
CentroidMoved,
|
||||
/// Centroids were rebalanced
|
||||
CentroidRebalanced,
|
||||
/// A shard was rebalanced
|
||||
ShardRebalanced,
|
||||
/// A new shard was added
|
||||
ShardAdded,
|
||||
/// A shard was removed
|
||||
ShardRemoved,
|
||||
/// An external dependency became unavailable
|
||||
DependencyDown,
|
||||
/// An external dependency recovered
|
||||
DependencyUp,
|
||||
/// Integrity state changed
|
||||
StateChanged,
|
||||
/// Lambda cut was sampled
|
||||
LambdaSampled,
|
||||
/// Graph was rebuilt
|
||||
GraphRebuilt,
|
||||
/// Edge capacity changed significantly
|
||||
EdgeCapacityChanged,
|
||||
/// Error rate threshold exceeded
|
||||
ErrorRateExceeded,
|
||||
/// Manual intervention
|
||||
ManualOverride,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IntegrityEventType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = serde_json::to_string(self).unwrap_or_else(|_| "unknown".to_string());
|
||||
// Remove quotes from JSON string
|
||||
write!(f, "{}", s.trim_matches('"'))
|
||||
}
|
||||
}
|
||||
|
||||
impl IntegrityEventType {
|
||||
/// Whether this event requires graph update
|
||||
pub fn requires_graph_update(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
IntegrityEventType::PartitionCreated
|
||||
| IntegrityEventType::PartitionDeleted
|
||||
| IntegrityEventType::CentroidMoved
|
||||
| IntegrityEventType::CentroidRebalanced
|
||||
| IntegrityEventType::ShardRebalanced
|
||||
| IntegrityEventType::ShardAdded
|
||||
| IntegrityEventType::ShardRemoved
|
||||
| IntegrityEventType::DependencyDown
|
||||
| IntegrityEventType::DependencyUp
|
||||
)
|
||||
}
|
||||
|
||||
/// Whether this event requires mincut recomputation
|
||||
pub fn requires_mincut_recomputation(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
IntegrityEventType::PartitionCreated
|
||||
| IntegrityEventType::PartitionDeleted
|
||||
| IntegrityEventType::PartitionHealthChanged
|
||||
| IntegrityEventType::ShardRebalanced
|
||||
| IntegrityEventType::ShardAdded
|
||||
| IntegrityEventType::ShardRemoved
|
||||
| IntegrityEventType::DependencyDown
|
||||
| IntegrityEventType::DependencyUp
|
||||
| IntegrityEventType::EdgeCapacityChanged
|
||||
| IntegrityEventType::GraphRebuilt
|
||||
)
|
||||
}
|
||||
|
||||
/// Event severity level (0 = info, 1 = warning, 2 = critical)
|
||||
pub fn severity(&self) -> u8 {
|
||||
match self {
|
||||
IntegrityEventType::LambdaSampled => 0,
|
||||
IntegrityEventType::GraphRebuilt => 0,
|
||||
IntegrityEventType::PartitionCreated => 0,
|
||||
IntegrityEventType::CentroidMoved => 0,
|
||||
IntegrityEventType::CentroidRebalanced => 1,
|
||||
IntegrityEventType::PartitionDeleted => 1,
|
||||
IntegrityEventType::PartitionHealthChanged => 1,
|
||||
IntegrityEventType::ShardRebalanced => 1,
|
||||
IntegrityEventType::ShardAdded => 1,
|
||||
IntegrityEventType::EdgeCapacityChanged => 1,
|
||||
IntegrityEventType::StateChanged => 2,
|
||||
IntegrityEventType::ShardRemoved => 2,
|
||||
IntegrityEventType::DependencyDown => 2,
|
||||
IntegrityEventType::DependencyUp => 1,
|
||||
IntegrityEventType::ErrorRateExceeded => 2,
|
||||
IntegrityEventType::ManualOverride => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Content of an integrity event
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IntegrityEventContent {
|
||||
/// Event ID (unique within collection)
|
||||
pub event_id: u64,
|
||||
/// Collection this event belongs to
|
||||
pub collection_id: i32,
|
||||
/// Type of event
|
||||
pub event_type: IntegrityEventType,
|
||||
/// Previous state (for state changes)
|
||||
pub previous_state: Option<IntegrityState>,
|
||||
/// New state (for state changes)
|
||||
pub new_state: Option<IntegrityState>,
|
||||
/// Lambda cut value at event time
|
||||
pub lambda_cut: Option<f32>,
|
||||
/// Witness edges (for mincut events)
|
||||
pub witness_edges: Option<Vec<WitnessEdge>>,
|
||||
/// Additional metadata
|
||||
pub metadata: serde_json::Value,
|
||||
/// Event timestamp
|
||||
pub created_at: SystemTime,
|
||||
/// Source of the event
|
||||
pub source: String,
|
||||
}
|
||||
|
||||
impl IntegrityEventContent {
|
||||
/// Create a new event
|
||||
pub fn new(
|
||||
collection_id: i32,
|
||||
event_type: IntegrityEventType,
|
||||
source: impl Into<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
event_id: 0, // Assigned by event store
|
||||
collection_id,
|
||||
event_type,
|
||||
previous_state: None,
|
||||
new_state: None,
|
||||
lambda_cut: None,
|
||||
witness_edges: None,
|
||||
metadata: serde_json::json!({}),
|
||||
created_at: SystemTime::now(),
|
||||
source: source.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a state change event
|
||||
pub fn state_change(
|
||||
collection_id: i32,
|
||||
previous: IntegrityState,
|
||||
new: IntegrityState,
|
||||
lambda_cut: f32,
|
||||
witness_edges: Vec<WitnessEdge>,
|
||||
source: impl Into<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
event_id: 0,
|
||||
collection_id,
|
||||
event_type: IntegrityEventType::StateChanged,
|
||||
previous_state: Some(previous),
|
||||
new_state: Some(new),
|
||||
lambda_cut: Some(lambda_cut),
|
||||
witness_edges: Some(witness_edges),
|
||||
metadata: serde_json::json!({
|
||||
"direction": if new > previous { "degrading" } else { "improving" }
|
||||
}),
|
||||
created_at: SystemTime::now(),
|
||||
source: source.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a lambda sampled event
|
||||
pub fn lambda_sampled(
|
||||
collection_id: i32,
|
||||
lambda_cut: f32,
|
||||
state: IntegrityState,
|
||||
source: impl Into<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
event_id: 0,
|
||||
collection_id,
|
||||
event_type: IntegrityEventType::LambdaSampled,
|
||||
previous_state: None,
|
||||
new_state: Some(state),
|
||||
lambda_cut: Some(lambda_cut),
|
||||
witness_edges: None,
|
||||
metadata: serde_json::json!({}),
|
||||
created_at: SystemTime::now(),
|
||||
source: source.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set metadata
|
||||
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add metadata field
|
||||
pub fn with_metadata_field(mut self, key: &str, value: serde_json::Value) -> Self {
|
||||
if let serde_json::Value::Object(ref mut map) = self.metadata {
|
||||
map.insert(key.to_string(), value);
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Delta update for contracted graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GraphDelta {
|
||||
/// Collection ID
|
||||
pub collection_id: i32,
|
||||
/// Nodes to add
|
||||
pub add_nodes: Vec<DeltaNode>,
|
||||
/// Nodes to remove (by type and id)
|
||||
pub remove_nodes: Vec<(String, i64)>,
|
||||
/// Nodes to update
|
||||
pub update_nodes: Vec<DeltaNode>,
|
||||
/// Edges to add
|
||||
pub add_edges: Vec<DeltaEdge>,
|
||||
/// Edges to remove (by source and target)
|
||||
pub remove_edges: Vec<((String, i64), (String, i64))>,
|
||||
/// Edges to update
|
||||
pub update_edges: Vec<DeltaEdge>,
|
||||
}
|
||||
|
||||
/// Node delta for graph updates
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DeltaNode {
|
||||
pub node_type: String,
|
||||
pub node_id: i64,
|
||||
pub node_name: Option<String>,
|
||||
pub health_score: Option<f32>,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
/// Edge delta for graph updates
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DeltaEdge {
|
||||
pub source_type: String,
|
||||
pub source_id: i64,
|
||||
pub target_type: String,
|
||||
pub target_id: i64,
|
||||
pub edge_type: String,
|
||||
pub capacity: Option<f32>,
|
||||
pub current_flow: Option<f32>,
|
||||
pub error_rate: Option<f32>,
|
||||
}
|
||||
|
||||
impl GraphDelta {
|
||||
/// Create an empty delta
|
||||
pub fn new(collection_id: i32) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
add_nodes: Vec::new(),
|
||||
remove_nodes: Vec::new(),
|
||||
update_nodes: Vec::new(),
|
||||
add_edges: Vec::new(),
|
||||
remove_edges: Vec::new(),
|
||||
update_edges: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if delta is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.add_nodes.is_empty()
|
||||
&& self.remove_nodes.is_empty()
|
||||
&& self.update_nodes.is_empty()
|
||||
&& self.add_edges.is_empty()
|
||||
&& self.remove_edges.is_empty()
|
||||
&& self.update_edges.is_empty()
|
||||
}
|
||||
|
||||
/// Count total changes
|
||||
pub fn change_count(&self) -> usize {
|
||||
self.add_nodes.len()
|
||||
+ self.remove_nodes.len()
|
||||
+ self.update_nodes.len()
|
||||
+ self.add_edges.len()
|
||||
+ self.remove_edges.len()
|
||||
+ self.update_edges.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Event store for persisting integrity events
|
||||
pub struct IntegrityEventStore {
|
||||
/// Collection ID
|
||||
collection_id: i32,
|
||||
/// Maximum events to keep in memory
|
||||
max_events: usize,
|
||||
/// Event counter for IDs
|
||||
next_event_id: std::sync::atomic::AtomicU64,
|
||||
/// In-memory event buffer
|
||||
events: RwLock<VecDeque<IntegrityEventContent>>,
|
||||
/// Event listeners
|
||||
listeners: RwLock<Vec<Box<dyn Fn(&IntegrityEventContent) + Send + Sync>>>,
|
||||
}
|
||||
|
||||
impl IntegrityEventStore {
|
||||
/// Create a new event store
|
||||
pub fn new(collection_id: i32, max_events: usize) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
max_events,
|
||||
next_event_id: std::sync::atomic::AtomicU64::new(1),
|
||||
events: RwLock::new(VecDeque::with_capacity(max_events)),
|
||||
listeners: RwLock::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record an event
|
||||
pub fn record(&self, mut event: IntegrityEventContent) -> u64 {
|
||||
// Assign event ID
|
||||
let event_id = self
|
||||
.next_event_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
event.event_id = event_id;
|
||||
|
||||
// Add to buffer
|
||||
{
|
||||
let mut events = self.events.write().unwrap();
|
||||
if events.len() >= self.max_events {
|
||||
events.pop_front();
|
||||
}
|
||||
events.push_back(event.clone());
|
||||
}
|
||||
|
||||
// Notify listeners
|
||||
{
|
||||
let listeners = self.listeners.read().unwrap();
|
||||
for listener in listeners.iter() {
|
||||
listener(&event);
|
||||
}
|
||||
}
|
||||
|
||||
event_id
|
||||
}
|
||||
|
||||
/// Get recent events
|
||||
pub fn get_recent(&self, count: usize) -> Vec<IntegrityEventContent> {
|
||||
let events = self.events.read().unwrap();
|
||||
events.iter().rev().take(count).cloned().collect()
|
||||
}
|
||||
|
||||
/// Get events by type
|
||||
pub fn get_by_type(
|
||||
&self,
|
||||
event_type: IntegrityEventType,
|
||||
count: usize,
|
||||
) -> Vec<IntegrityEventContent> {
|
||||
let events = self.events.read().unwrap();
|
||||
events
|
||||
.iter()
|
||||
.rev()
|
||||
.filter(|e| e.event_type == event_type)
|
||||
.take(count)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get events since a timestamp
|
||||
pub fn get_since(&self, since: SystemTime) -> Vec<IntegrityEventContent> {
|
||||
let events = self.events.read().unwrap();
|
||||
events
|
||||
.iter()
|
||||
.filter(|e| e.created_at >= since)
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get state change events
|
||||
pub fn get_state_changes(&self, count: usize) -> Vec<IntegrityEventContent> {
|
||||
self.get_by_type(IntegrityEventType::StateChanged, count)
|
||||
}
|
||||
|
||||
/// Add an event listener
|
||||
pub fn add_listener<F>(&self, listener: F)
|
||||
where
|
||||
F: Fn(&IntegrityEventContent) + Send + Sync + 'static,
|
||||
{
|
||||
let mut listeners = self.listeners.write().unwrap();
|
||||
listeners.push(Box::new(listener));
|
||||
}
|
||||
|
||||
/// Get event count
|
||||
pub fn event_count(&self) -> usize {
|
||||
self.events.read().unwrap().len()
|
||||
}
|
||||
|
||||
/// Clear all events
|
||||
pub fn clear(&self) {
|
||||
self.events.write().unwrap().clear();
|
||||
}
|
||||
|
||||
/// Get statistics
|
||||
pub fn stats(&self) -> EventStoreStats {
|
||||
let events = self.events.read().unwrap();
|
||||
let mut by_type: std::collections::HashMap<IntegrityEventType, usize> =
|
||||
std::collections::HashMap::new();
|
||||
let mut by_severity = [0usize; 3];
|
||||
|
||||
for event in events.iter() {
|
||||
*by_type.entry(event.event_type).or_insert(0) += 1;
|
||||
let severity = event.event_type.severity() as usize;
|
||||
if severity < 3 {
|
||||
by_severity[severity] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
EventStoreStats {
|
||||
total_events: events.len(),
|
||||
by_type,
|
||||
info_count: by_severity[0],
|
||||
warning_count: by_severity[1],
|
||||
critical_count: by_severity[2],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about the event store
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EventStoreStats {
|
||||
pub total_events: usize,
|
||||
pub by_type: std::collections::HashMap<IntegrityEventType, usize>,
|
||||
pub info_count: usize,
|
||||
pub warning_count: usize,
|
||||
pub critical_count: usize,
|
||||
}
|
||||
|
||||
/// Global registry for event stores
|
||||
static EVENT_REGISTRY: once_cell::sync::Lazy<DashMap<i32, Arc<IntegrityEventStore>>> =
|
||||
once_cell::sync::Lazy::new(DashMap::new);
|
||||
|
||||
/// Get or create an event store for a collection
|
||||
pub fn get_or_create_event_store(collection_id: i32) -> Arc<IntegrityEventStore> {
|
||||
EVENT_REGISTRY
|
||||
.entry(collection_id)
|
||||
.or_insert_with(|| Arc::new(IntegrityEventStore::new(collection_id, 10000)))
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Get an existing event store
|
||||
pub fn get_event_store(collection_id: i32) -> Option<Arc<IntegrityEventStore>> {
|
||||
EVENT_REGISTRY.get(&collection_id).map(|e| e.clone())
|
||||
}
|
||||
|
||||
/// Record an integrity event
|
||||
pub fn record_event(event: IntegrityEventContent) -> u64 {
|
||||
let store = get_or_create_event_store(event.collection_id);
|
||||
store.record(event)
|
||||
}
|
||||
|
||||
/// Create a graph delta from an event
|
||||
pub fn event_to_delta(event: &IntegrityEventContent) -> Option<GraphDelta> {
|
||||
if !event.event_type.requires_graph_update() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut delta = GraphDelta::new(event.collection_id);
|
||||
|
||||
match event.event_type {
|
||||
IntegrityEventType::PartitionCreated => {
|
||||
if let Some(partition_id) = event.metadata.get("partition_id").and_then(|v| v.as_i64())
|
||||
{
|
||||
delta.add_nodes.push(DeltaNode {
|
||||
node_type: "partition".to_string(),
|
||||
node_id: partition_id,
|
||||
node_name: Some(format!("partition_{}", partition_id)),
|
||||
health_score: Some(1.0),
|
||||
metadata: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
IntegrityEventType::PartitionDeleted => {
|
||||
if let Some(partition_id) = event.metadata.get("partition_id").and_then(|v| v.as_i64())
|
||||
{
|
||||
delta
|
||||
.remove_nodes
|
||||
.push(("partition".to_string(), partition_id));
|
||||
}
|
||||
}
|
||||
IntegrityEventType::DependencyDown => {
|
||||
if let Some(dep_id) = event.metadata.get("dependency_id").and_then(|v| v.as_i64()) {
|
||||
delta.update_nodes.push(DeltaNode {
|
||||
node_type: "external_dependency".to_string(),
|
||||
node_id: dep_id,
|
||||
node_name: None,
|
||||
health_score: Some(0.0),
|
||||
metadata: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
IntegrityEventType::DependencyUp => {
|
||||
if let Some(dep_id) = event.metadata.get("dependency_id").and_then(|v| v.as_i64()) {
|
||||
delta.update_nodes.push(DeltaNode {
|
||||
node_type: "external_dependency".to_string(),
|
||||
node_id: dep_id,
|
||||
node_name: None,
|
||||
health_score: Some(1.0),
|
||||
metadata: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Other events handled elsewhere or require full graph info
|
||||
}
|
||||
}
|
||||
|
||||
if delta.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(delta)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_event_type_display() {
|
||||
assert_eq!(
|
||||
IntegrityEventType::StateChanged.to_string(),
|
||||
"state_changed"
|
||||
);
|
||||
assert_eq!(
|
||||
IntegrityEventType::LambdaSampled.to_string(),
|
||||
"lambda_sampled"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_type_properties() {
|
||||
assert!(IntegrityEventType::PartitionCreated.requires_graph_update());
|
||||
assert!(!IntegrityEventType::LambdaSampled.requires_graph_update());
|
||||
|
||||
assert!(IntegrityEventType::GraphRebuilt.requires_mincut_recomputation());
|
||||
assert!(!IntegrityEventType::ManualOverride.requires_mincut_recomputation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_creation() {
|
||||
let event = IntegrityEventContent::new(1, IntegrityEventType::GraphRebuilt, "test");
|
||||
assert_eq!(event.collection_id, 1);
|
||||
assert_eq!(event.event_type, IntegrityEventType::GraphRebuilt);
|
||||
assert_eq!(event.source, "test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_state_change_event() {
|
||||
let event = IntegrityEventContent::state_change(
|
||||
1,
|
||||
IntegrityState::Normal,
|
||||
IntegrityState::Stress,
|
||||
0.65,
|
||||
vec![],
|
||||
"integrity_worker",
|
||||
);
|
||||
|
||||
assert_eq!(event.event_type, IntegrityEventType::StateChanged);
|
||||
assert_eq!(event.previous_state, Some(IntegrityState::Normal));
|
||||
assert_eq!(event.new_state, Some(IntegrityState::Stress));
|
||||
assert_eq!(event.lambda_cut, Some(0.65));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_store() {
|
||||
let store = IntegrityEventStore::new(1, 100);
|
||||
|
||||
// Record events
|
||||
let id1 = store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::GraphRebuilt,
|
||||
"test",
|
||||
));
|
||||
let id2 = store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::LambdaSampled,
|
||||
"test",
|
||||
));
|
||||
|
||||
assert_eq!(id1, 1);
|
||||
assert_eq!(id2, 2);
|
||||
assert_eq!(store.event_count(), 2);
|
||||
|
||||
// Get recent
|
||||
let recent = store.get_recent(10);
|
||||
assert_eq!(recent.len(), 2);
|
||||
assert_eq!(recent[0].event_id, 2); // Most recent first
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_store_overflow() {
|
||||
let store = IntegrityEventStore::new(1, 5);
|
||||
|
||||
// Record more than max
|
||||
for i in 0..10 {
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::LambdaSampled,
|
||||
format!("test_{}", i),
|
||||
));
|
||||
}
|
||||
|
||||
assert_eq!(store.event_count(), 5);
|
||||
|
||||
// Oldest events should be removed
|
||||
let events = store.get_recent(10);
|
||||
assert_eq!(events.len(), 5);
|
||||
assert!(events[0].source.contains("test_9")); // Most recent
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_by_type() {
|
||||
let store = IntegrityEventStore::new(1, 100);
|
||||
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::GraphRebuilt,
|
||||
"test",
|
||||
));
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::LambdaSampled,
|
||||
"test",
|
||||
));
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::LambdaSampled,
|
||||
"test",
|
||||
));
|
||||
|
||||
let sampled = store.get_by_type(IntegrityEventType::LambdaSampled, 10);
|
||||
assert_eq!(sampled.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graph_delta() {
|
||||
let mut delta = GraphDelta::new(1);
|
||||
assert!(delta.is_empty());
|
||||
|
||||
delta.add_nodes.push(DeltaNode {
|
||||
node_type: "partition".to_string(),
|
||||
node_id: 1,
|
||||
node_name: None,
|
||||
health_score: Some(1.0),
|
||||
metadata: None,
|
||||
});
|
||||
|
||||
assert!(!delta.is_empty());
|
||||
assert_eq!(delta.change_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_to_delta() {
|
||||
let event = IntegrityEventContent::new(1, IntegrityEventType::PartitionCreated, "test")
|
||||
.with_metadata_field("partition_id", serde_json::json!(42));
|
||||
|
||||
let delta = event_to_delta(&event);
|
||||
assert!(delta.is_some());
|
||||
|
||||
let delta = delta.unwrap();
|
||||
assert_eq!(delta.add_nodes.len(), 1);
|
||||
assert_eq!(delta.add_nodes[0].node_id, 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_store_stats() {
|
||||
let store = IntegrityEventStore::new(1, 100);
|
||||
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::LambdaSampled,
|
||||
"test",
|
||||
));
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::StateChanged,
|
||||
"test",
|
||||
));
|
||||
store.record(IntegrityEventContent::new(
|
||||
1,
|
||||
IntegrityEventType::DependencyDown,
|
||||
"test",
|
||||
));
|
||||
|
||||
let stats = store.stats();
|
||||
assert_eq!(stats.total_events, 3);
|
||||
assert_eq!(stats.info_count, 1);
|
||||
assert_eq!(stats.critical_count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_global_event_registry() {
|
||||
let store = get_or_create_event_store(12345);
|
||||
let event_id = record_event(IntegrityEventContent::new(
|
||||
12345,
|
||||
IntegrityEventType::GraphRebuilt,
|
||||
"test",
|
||||
));
|
||||
|
||||
assert!(event_id > 0);
|
||||
|
||||
let retrieved = get_event_store(12345);
|
||||
assert!(retrieved.is_some());
|
||||
assert_eq!(retrieved.unwrap().event_count(), 1);
|
||||
}
|
||||
}
|
||||
755
vendor/ruvector/crates/ruvector-postgres/src/integrity/gating.rs
vendored
Normal file
755
vendor/ruvector/crates/ruvector-postgres/src/integrity/gating.rs
vendored
Normal file
@@ -0,0 +1,755 @@
|
||||
//! Integrity Gating Module
|
||||
//!
|
||||
//! Implements the integrity gate check system with hysteresis-based state
|
||||
//! transitions. Operations are allowed, throttled, or blocked based on the
|
||||
//! current integrity state.
|
||||
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
||||
|
||||
/// Integrity states representing system health levels
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum IntegrityState {
|
||||
/// System is healthy, all operations allowed
|
||||
Normal = 0,
|
||||
/// System under stress, some operations throttled
|
||||
Stress = 1,
|
||||
/// Critical state, many operations blocked
|
||||
Critical = 2,
|
||||
/// Emergency state, only essential operations allowed
|
||||
Emergency = 3,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IntegrityState {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
IntegrityState::Normal => write!(f, "normal"),
|
||||
IntegrityState::Stress => write!(f, "stress"),
|
||||
IntegrityState::Critical => write!(f, "critical"),
|
||||
IntegrityState::Emergency => write!(f, "emergency"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntegrityState {
|
||||
/// Parse from string
|
||||
pub fn from_str(s: &str) -> Option<Self> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"normal" => Some(IntegrityState::Normal),
|
||||
"stress" => Some(IntegrityState::Stress),
|
||||
"critical" => Some(IntegrityState::Critical),
|
||||
"emergency" => Some(IntegrityState::Emergency),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine state from lambda cut value using thresholds
|
||||
pub fn from_lambda(
|
||||
lambda_cut: f64,
|
||||
threshold_high: f64,
|
||||
threshold_low: f64,
|
||||
threshold_critical: f64,
|
||||
) -> Self {
|
||||
if lambda_cut >= threshold_high {
|
||||
IntegrityState::Normal
|
||||
} else if lambda_cut >= threshold_low {
|
||||
IntegrityState::Stress
|
||||
} else if lambda_cut >= threshold_critical {
|
||||
IntegrityState::Critical
|
||||
} else {
|
||||
IntegrityState::Emergency
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert to numeric value for atomic operations
|
||||
pub fn as_u32(&self) -> u32 {
|
||||
*self as u32
|
||||
}
|
||||
|
||||
/// Convert from numeric value
|
||||
pub fn from_u32(v: u32) -> Self {
|
||||
match v {
|
||||
0 => IntegrityState::Normal,
|
||||
1 => IntegrityState::Stress,
|
||||
2 => IntegrityState::Critical,
|
||||
_ => IntegrityState::Emergency,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hysteresis thresholds for state transitions
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HysteresisThresholds {
|
||||
/// Rising threshold to enter Normal from Stress
|
||||
pub normal_rising: f64,
|
||||
/// Falling threshold to enter Stress from Normal
|
||||
pub normal_falling: f64,
|
||||
/// Rising threshold to enter Stress from Critical
|
||||
pub stress_rising: f64,
|
||||
/// Falling threshold to enter Critical from Stress
|
||||
pub stress_falling: f64,
|
||||
/// Rising threshold to enter Critical from Emergency
|
||||
pub critical_rising: f64,
|
||||
/// Falling threshold to enter Emergency from Critical
|
||||
pub critical_falling: f64,
|
||||
}
|
||||
|
||||
impl Default for HysteresisThresholds {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
normal_rising: 0.8,
|
||||
normal_falling: 0.7,
|
||||
stress_rising: 0.5,
|
||||
stress_falling: 0.4,
|
||||
critical_rising: 0.2,
|
||||
critical_falling: 0.1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HysteresisThresholds {
|
||||
/// Compute next state with hysteresis
|
||||
pub fn compute_next_state(&self, current: IntegrityState, lambda_cut: f64) -> IntegrityState {
|
||||
match current {
|
||||
IntegrityState::Normal => {
|
||||
if lambda_cut < self.normal_falling {
|
||||
IntegrityState::Stress
|
||||
} else {
|
||||
IntegrityState::Normal
|
||||
}
|
||||
}
|
||||
IntegrityState::Stress => {
|
||||
if lambda_cut >= self.normal_rising {
|
||||
IntegrityState::Normal
|
||||
} else if lambda_cut < self.stress_falling {
|
||||
IntegrityState::Critical
|
||||
} else {
|
||||
IntegrityState::Stress
|
||||
}
|
||||
}
|
||||
IntegrityState::Critical => {
|
||||
if lambda_cut >= self.stress_rising {
|
||||
IntegrityState::Stress
|
||||
} else if lambda_cut < self.critical_falling {
|
||||
IntegrityState::Emergency
|
||||
} else {
|
||||
IntegrityState::Critical
|
||||
}
|
||||
}
|
||||
IntegrityState::Emergency => {
|
||||
if lambda_cut >= self.critical_rising {
|
||||
IntegrityState::Critical
|
||||
} else {
|
||||
IntegrityState::Emergency
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Operation permissions for each state
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StatePermissions {
|
||||
/// Allow read operations
|
||||
pub allow_reads: bool,
|
||||
/// Allow single inserts
|
||||
pub allow_single_insert: bool,
|
||||
/// Allow bulk inserts
|
||||
pub allow_bulk_insert: bool,
|
||||
/// Allow deletes
|
||||
pub allow_delete: bool,
|
||||
/// Allow updates
|
||||
pub allow_update: bool,
|
||||
/// Allow index rewiring
|
||||
pub allow_index_rewire: bool,
|
||||
/// Allow compression/compaction
|
||||
pub allow_compression: bool,
|
||||
/// Allow replication
|
||||
pub allow_replication: bool,
|
||||
/// Allow backups
|
||||
pub allow_backup: bool,
|
||||
/// Throttle percentage for inserts (0-100)
|
||||
pub throttle_inserts_pct: u8,
|
||||
/// Throttle percentage for searches (0-100)
|
||||
pub throttle_searches_pct: u8,
|
||||
/// Maximum concurrent searches (None = unlimited)
|
||||
pub max_concurrent_searches: Option<u32>,
|
||||
/// Pause GNN training
|
||||
pub pause_gnn_training: bool,
|
||||
/// Pause tier management
|
||||
pub pause_tier_management: bool,
|
||||
}
|
||||
|
||||
impl Default for StatePermissions {
|
||||
fn default() -> Self {
|
||||
Self::normal()
|
||||
}
|
||||
}
|
||||
|
||||
impl StatePermissions {
|
||||
/// Normal state permissions - all operations allowed
|
||||
pub fn normal() -> Self {
|
||||
Self {
|
||||
allow_reads: true,
|
||||
allow_single_insert: true,
|
||||
allow_bulk_insert: true,
|
||||
allow_delete: true,
|
||||
allow_update: true,
|
||||
allow_index_rewire: true,
|
||||
allow_compression: true,
|
||||
allow_replication: true,
|
||||
allow_backup: true,
|
||||
throttle_inserts_pct: 0,
|
||||
throttle_searches_pct: 0,
|
||||
max_concurrent_searches: None,
|
||||
pause_gnn_training: false,
|
||||
pause_tier_management: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Stress state permissions - throttled operations
|
||||
pub fn stress() -> Self {
|
||||
Self {
|
||||
allow_reads: true,
|
||||
allow_single_insert: true,
|
||||
allow_bulk_insert: false, // No bulk inserts
|
||||
allow_delete: true,
|
||||
allow_update: true,
|
||||
allow_index_rewire: false, // No index rewiring
|
||||
allow_compression: false, // No compression
|
||||
allow_replication: true,
|
||||
allow_backup: true,
|
||||
throttle_inserts_pct: 50, // 50% throttle
|
||||
throttle_searches_pct: 0,
|
||||
max_concurrent_searches: Some(100),
|
||||
pause_gnn_training: true, // Pause training
|
||||
pause_tier_management: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Critical state permissions - limited operations
|
||||
pub fn critical() -> Self {
|
||||
Self {
|
||||
allow_reads: true,
|
||||
allow_single_insert: true,
|
||||
allow_bulk_insert: false,
|
||||
allow_delete: false, // No deletes
|
||||
allow_update: false, // No updates
|
||||
allow_index_rewire: false,
|
||||
allow_compression: false,
|
||||
allow_replication: true, // Keep replication
|
||||
allow_backup: true, // Keep backups
|
||||
throttle_inserts_pct: 90, // Heavy throttle
|
||||
throttle_searches_pct: 25, // Some search throttle
|
||||
max_concurrent_searches: Some(50),
|
||||
pause_gnn_training: true,
|
||||
pause_tier_management: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Emergency state permissions - read-only mode
|
||||
pub fn emergency() -> Self {
|
||||
Self {
|
||||
allow_reads: true,
|
||||
allow_single_insert: false, // No writes
|
||||
allow_bulk_insert: false,
|
||||
allow_delete: false,
|
||||
allow_update: false,
|
||||
allow_index_rewire: false,
|
||||
allow_compression: false,
|
||||
allow_replication: false, // Stop replication
|
||||
allow_backup: true, // Allow backup for recovery
|
||||
throttle_inserts_pct: 100, // Block all inserts
|
||||
throttle_searches_pct: 50, // Heavy search throttle
|
||||
max_concurrent_searches: Some(20),
|
||||
pause_gnn_training: true,
|
||||
pause_tier_management: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get permissions for a given state
|
||||
pub fn for_state(state: IntegrityState) -> Self {
|
||||
match state {
|
||||
IntegrityState::Normal => Self::normal(),
|
||||
IntegrityState::Stress => Self::stress(),
|
||||
IntegrityState::Critical => Self::critical(),
|
||||
IntegrityState::Emergency => Self::emergency(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a gate check
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GateResult {
|
||||
/// Whether the operation is allowed
|
||||
pub allowed: bool,
|
||||
/// Throttle percentage (0-100)
|
||||
pub throttle_pct: u8,
|
||||
/// Current integrity state
|
||||
pub state: IntegrityState,
|
||||
/// Reason for rejection (if any)
|
||||
pub reason: Option<String>,
|
||||
/// Suggested retry delay in milliseconds
|
||||
pub retry_delay_ms: Option<u64>,
|
||||
}
|
||||
|
||||
impl GateResult {
|
||||
/// Create an allowed result
|
||||
pub fn allow(state: IntegrityState) -> Self {
|
||||
Self {
|
||||
allowed: true,
|
||||
throttle_pct: 0,
|
||||
state,
|
||||
reason: None,
|
||||
retry_delay_ms: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a throttled result
|
||||
pub fn throttle(state: IntegrityState, throttle_pct: u8) -> Self {
|
||||
Self {
|
||||
allowed: true,
|
||||
throttle_pct,
|
||||
state,
|
||||
reason: None,
|
||||
retry_delay_ms: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a blocked result
|
||||
pub fn block(state: IntegrityState, reason: impl Into<String>) -> Self {
|
||||
Self {
|
||||
allowed: false,
|
||||
throttle_pct: 100,
|
||||
state,
|
||||
reason: Some(reason.into()),
|
||||
retry_delay_ms: Some(5000), // 5 second default retry
|
||||
}
|
||||
}
|
||||
|
||||
/// Should apply throttling
|
||||
pub fn should_throttle(&self) -> bool {
|
||||
self.throttle_pct > 0
|
||||
}
|
||||
}
|
||||
|
||||
/// Integrity gate for a collection
|
||||
pub struct IntegrityGate {
|
||||
/// Collection ID
|
||||
collection_id: i32,
|
||||
/// Current state (atomic for lock-free reads)
|
||||
state: AtomicU32,
|
||||
/// Current lambda cut value (scaled by 1000 for atomic storage)
|
||||
lambda_cut_scaled: AtomicU32,
|
||||
/// Hysteresis thresholds
|
||||
thresholds: HysteresisThresholds,
|
||||
/// Custom permissions (override defaults)
|
||||
custom_permissions: Option<HashMap<IntegrityState, StatePermissions>>,
|
||||
/// Concurrent search counter
|
||||
concurrent_searches: AtomicU32,
|
||||
/// Last state change time (epoch millis)
|
||||
last_state_change_ms: AtomicU64,
|
||||
}
|
||||
|
||||
impl IntegrityGate {
|
||||
/// Create a new integrity gate
|
||||
pub fn new(collection_id: i32) -> Self {
|
||||
Self {
|
||||
collection_id,
|
||||
state: AtomicU32::new(IntegrityState::Normal.as_u32()),
|
||||
lambda_cut_scaled: AtomicU32::new(1000), // 1.0 scaled
|
||||
thresholds: HysteresisThresholds::default(),
|
||||
custom_permissions: None,
|
||||
concurrent_searches: AtomicU32::new(0),
|
||||
last_state_change_ms: AtomicU64::new(
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with custom thresholds
|
||||
pub fn with_thresholds(mut self, thresholds: HysteresisThresholds) -> Self {
|
||||
self.thresholds = thresholds;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set custom permissions
|
||||
pub fn with_permissions(
|
||||
mut self,
|
||||
permissions: HashMap<IntegrityState, StatePermissions>,
|
||||
) -> Self {
|
||||
self.custom_permissions = Some(permissions);
|
||||
self
|
||||
}
|
||||
|
||||
/// Get current state
|
||||
pub fn current_state(&self) -> IntegrityState {
|
||||
IntegrityState::from_u32(self.state.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
/// Get current lambda cut value
|
||||
pub fn current_lambda_cut(&self) -> f64 {
|
||||
self.lambda_cut_scaled.load(Ordering::Relaxed) as f64 / 1000.0
|
||||
}
|
||||
|
||||
/// Update state based on new lambda cut value
|
||||
pub fn update_lambda(&self, lambda_cut: f64) -> Option<IntegrityState> {
|
||||
let current = self.current_state();
|
||||
let new_state = self.thresholds.compute_next_state(current, lambda_cut);
|
||||
|
||||
// Store lambda cut (scaled)
|
||||
let scaled = (lambda_cut * 1000.0).round() as u32;
|
||||
self.lambda_cut_scaled.store(scaled, Ordering::Relaxed);
|
||||
|
||||
// Update state if changed
|
||||
if new_state != current {
|
||||
self.state.store(new_state.as_u32(), Ordering::Release);
|
||||
self.last_state_change_ms.store(
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
Some(new_state)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Force set state (for testing or admin override)
|
||||
pub fn force_state(&self, state: IntegrityState) {
|
||||
self.state.store(state.as_u32(), Ordering::Release);
|
||||
self.last_state_change_ms.store(
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
}
|
||||
|
||||
/// Get permissions for current state
|
||||
pub fn current_permissions(&self) -> StatePermissions {
|
||||
let state = self.current_state();
|
||||
self.custom_permissions
|
||||
.as_ref()
|
||||
.and_then(|p| p.get(&state).cloned())
|
||||
.unwrap_or_else(|| StatePermissions::for_state(state))
|
||||
}
|
||||
|
||||
/// Check if an operation is allowed
|
||||
pub fn check_operation(&self, operation: &str) -> GateResult {
|
||||
let state = self.current_state();
|
||||
let permissions = self.current_permissions();
|
||||
|
||||
// Map operation to permission
|
||||
let (allowed, throttle_pct) = match operation.to_lowercase().as_str() {
|
||||
"search" | "read" | "query" => {
|
||||
let within_limit = permissions.max_concurrent_searches.map_or(true, |max| {
|
||||
self.concurrent_searches.load(Ordering::Relaxed) < max
|
||||
});
|
||||
(
|
||||
permissions.allow_reads && within_limit,
|
||||
permissions.throttle_searches_pct,
|
||||
)
|
||||
}
|
||||
"insert" => (
|
||||
permissions.allow_single_insert,
|
||||
permissions.throttle_inserts_pct,
|
||||
),
|
||||
"bulk_insert" => (
|
||||
permissions.allow_bulk_insert,
|
||||
permissions.throttle_inserts_pct,
|
||||
),
|
||||
"delete" => (permissions.allow_delete, 0),
|
||||
"update" => (permissions.allow_update, 0),
|
||||
"index_build" | "index_rewire" => (permissions.allow_index_rewire, 0),
|
||||
"compression" | "compact" => (permissions.allow_compression, 0),
|
||||
"replication" | "replicate" => (permissions.allow_replication, 0),
|
||||
"backup" => (permissions.allow_backup, 0),
|
||||
"gnn_train" | "gnn_training" => (!permissions.pause_gnn_training, 0),
|
||||
"tier_manage" | "tier_management" => (!permissions.pause_tier_management, 0),
|
||||
_ => {
|
||||
// Unknown operations allowed by default
|
||||
(true, 0)
|
||||
}
|
||||
};
|
||||
|
||||
if !allowed {
|
||||
GateResult::block(
|
||||
state,
|
||||
format!(
|
||||
"Operation '{}' blocked: system in {} state",
|
||||
operation, state
|
||||
),
|
||||
)
|
||||
} else if throttle_pct > 0 {
|
||||
GateResult::throttle(state, throttle_pct)
|
||||
} else {
|
||||
GateResult::allow(state)
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment concurrent search counter
|
||||
pub fn begin_search(&self) -> bool {
|
||||
let permissions = self.current_permissions();
|
||||
if let Some(max) = permissions.max_concurrent_searches {
|
||||
let current = self.concurrent_searches.fetch_add(1, Ordering::AcqRel);
|
||||
if current >= max {
|
||||
self.concurrent_searches.fetch_sub(1, Ordering::AcqRel);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
self.concurrent_searches.fetch_add(1, Ordering::AcqRel);
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Decrement concurrent search counter
|
||||
pub fn end_search(&self) {
|
||||
let prev = self.concurrent_searches.fetch_sub(1, Ordering::AcqRel);
|
||||
if prev == 0 {
|
||||
// Shouldn't happen, but prevent underflow
|
||||
self.concurrent_searches.store(0, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get gate status as JSON
|
||||
pub fn status(&self) -> serde_json::Value {
|
||||
let state = self.current_state();
|
||||
let permissions = self.current_permissions();
|
||||
|
||||
serde_json::json!({
|
||||
"collection_id": self.collection_id,
|
||||
"state": state.to_string(),
|
||||
"lambda_cut": self.current_lambda_cut(),
|
||||
"concurrent_searches": self.concurrent_searches.load(Ordering::Relaxed),
|
||||
"permissions": {
|
||||
"allow_reads": permissions.allow_reads,
|
||||
"allow_single_insert": permissions.allow_single_insert,
|
||||
"allow_bulk_insert": permissions.allow_bulk_insert,
|
||||
"allow_delete": permissions.allow_delete,
|
||||
"allow_update": permissions.allow_update,
|
||||
"allow_index_rewire": permissions.allow_index_rewire,
|
||||
"throttle_inserts_pct": permissions.throttle_inserts_pct,
|
||||
"throttle_searches_pct": permissions.throttle_searches_pct,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply throttling based on percentage
|
||||
/// Returns true if the operation should proceed, false if throttled
|
||||
pub fn apply_throttle(throttle_pct: u8) -> bool {
|
||||
if throttle_pct == 0 {
|
||||
return true; // Not throttled
|
||||
}
|
||||
if throttle_pct >= 100 {
|
||||
return false; // Fully throttled
|
||||
}
|
||||
|
||||
// Random rejection based on percentage
|
||||
let random_val = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.subsec_nanos()
|
||||
% 100) as u8;
|
||||
|
||||
random_val >= throttle_pct
|
||||
}
|
||||
|
||||
/// Global registry for integrity gates
|
||||
static GATE_REGISTRY: once_cell::sync::Lazy<DashMap<i32, IntegrityGate>> =
|
||||
once_cell::sync::Lazy::new(DashMap::new);
|
||||
|
||||
/// Get or create an integrity gate for a collection
|
||||
pub fn get_or_create_gate(
|
||||
collection_id: i32,
|
||||
) -> dashmap::mapref::one::Ref<'static, i32, IntegrityGate> {
|
||||
if !GATE_REGISTRY.contains_key(&collection_id) {
|
||||
GATE_REGISTRY.insert(collection_id, IntegrityGate::new(collection_id));
|
||||
}
|
||||
GATE_REGISTRY.get(&collection_id).unwrap()
|
||||
}
|
||||
|
||||
/// Get an existing integrity gate
|
||||
pub fn get_gate(
|
||||
collection_id: i32,
|
||||
) -> Option<dashmap::mapref::one::Ref<'static, i32, IntegrityGate>> {
|
||||
GATE_REGISTRY.get(&collection_id)
|
||||
}
|
||||
|
||||
/// Check integrity gate for an operation
|
||||
pub fn check_integrity_gate(collection_id: i32, operation: &str) -> GateResult {
|
||||
let gate = get_or_create_gate(collection_id);
|
||||
gate.check_operation(operation)
|
||||
}
|
||||
|
||||
/// Update lambda cut value for a collection
|
||||
pub fn update_lambda_cut(collection_id: i32, lambda_cut: f64) -> Option<IntegrityState> {
|
||||
let gate = get_or_create_gate(collection_id);
|
||||
gate.update_lambda(lambda_cut)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_integrity_state_display() {
|
||||
assert_eq!(IntegrityState::Normal.to_string(), "normal");
|
||||
assert_eq!(IntegrityState::Stress.to_string(), "stress");
|
||||
assert_eq!(IntegrityState::Critical.to_string(), "critical");
|
||||
assert_eq!(IntegrityState::Emergency.to_string(), "emergency");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_state_parsing() {
|
||||
assert_eq!(
|
||||
IntegrityState::from_str("normal"),
|
||||
Some(IntegrityState::Normal)
|
||||
);
|
||||
assert_eq!(
|
||||
IntegrityState::from_str("STRESS"),
|
||||
Some(IntegrityState::Stress)
|
||||
);
|
||||
assert_eq!(IntegrityState::from_str("invalid"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hysteresis_transitions() {
|
||||
let thresholds = HysteresisThresholds::default();
|
||||
|
||||
// Normal -> Stress when lambda drops below 0.7
|
||||
let state = thresholds.compute_next_state(IntegrityState::Normal, 0.6);
|
||||
assert_eq!(state, IntegrityState::Stress);
|
||||
|
||||
// Stress -> Normal when lambda rises above 0.8
|
||||
let state = thresholds.compute_next_state(IntegrityState::Stress, 0.85);
|
||||
assert_eq!(state, IntegrityState::Normal);
|
||||
|
||||
// Stress stays Stress between 0.5 and 0.8
|
||||
let state = thresholds.compute_next_state(IntegrityState::Stress, 0.6);
|
||||
assert_eq!(state, IntegrityState::Stress);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gate_operations() {
|
||||
let gate = IntegrityGate::new(1);
|
||||
|
||||
// Normal state - all allowed
|
||||
let result = gate.check_operation("insert");
|
||||
assert!(result.allowed);
|
||||
assert_eq!(result.throttle_pct, 0);
|
||||
|
||||
// Force stress state
|
||||
gate.force_state(IntegrityState::Stress);
|
||||
|
||||
// Bulk insert blocked in stress
|
||||
let result = gate.check_operation("bulk_insert");
|
||||
assert!(!result.allowed);
|
||||
|
||||
// Single insert throttled in stress
|
||||
let result = gate.check_operation("insert");
|
||||
assert!(result.allowed);
|
||||
assert_eq!(result.throttle_pct, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_emergency_permissions() {
|
||||
let gate = IntegrityGate::new(1);
|
||||
gate.force_state(IntegrityState::Emergency);
|
||||
|
||||
// Reads still allowed
|
||||
let result = gate.check_operation("search");
|
||||
assert!(result.allowed);
|
||||
|
||||
// Writes blocked
|
||||
let result = gate.check_operation("insert");
|
||||
assert!(!result.allowed);
|
||||
|
||||
let result = gate.check_operation("delete");
|
||||
assert!(!result.allowed);
|
||||
|
||||
// Backups still allowed
|
||||
let result = gate.check_operation("backup");
|
||||
assert!(result.allowed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lambda_update() {
|
||||
let gate = IntegrityGate::new(1);
|
||||
|
||||
// Initially normal
|
||||
assert_eq!(gate.current_state(), IntegrityState::Normal);
|
||||
|
||||
// Drop lambda to trigger stress
|
||||
let new_state = gate.update_lambda(0.5);
|
||||
assert_eq!(new_state, Some(IntegrityState::Stress));
|
||||
assert_eq!(gate.current_state(), IntegrityState::Stress);
|
||||
|
||||
// Lambda cut stored correctly
|
||||
assert!((gate.current_lambda_cut() - 0.5).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_search_limit() {
|
||||
let gate = IntegrityGate::new(1);
|
||||
gate.force_state(IntegrityState::Critical); // max 50 searches
|
||||
|
||||
// Start many searches
|
||||
for _ in 0..50 {
|
||||
assert!(gate.begin_search());
|
||||
}
|
||||
|
||||
// 51st should fail
|
||||
assert!(!gate.begin_search());
|
||||
|
||||
// End one, then can start another
|
||||
gate.end_search();
|
||||
assert!(gate.begin_search());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_throttle_function() {
|
||||
// 0% throttle always passes
|
||||
for _ in 0..100 {
|
||||
assert!(apply_throttle(0));
|
||||
}
|
||||
|
||||
// 100% throttle always blocks
|
||||
for _ in 0..100 {
|
||||
assert!(!apply_throttle(100));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gate_registry() {
|
||||
let gate1 = get_or_create_gate(9001);
|
||||
assert_eq!(gate1.collection_id, 9001);
|
||||
|
||||
let gate2 = get_gate(9001);
|
||||
assert!(gate2.is_some());
|
||||
|
||||
// Check non-existent
|
||||
let gate3 = get_gate(9999);
|
||||
assert!(gate3.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gate_status() {
|
||||
let gate = IntegrityGate::new(42);
|
||||
let status = gate.status();
|
||||
|
||||
assert_eq!(status["collection_id"], 42);
|
||||
assert_eq!(status["state"], "normal");
|
||||
assert!(status["permissions"]["allow_reads"].as_bool().unwrap());
|
||||
}
|
||||
}
|
||||
547
vendor/ruvector/crates/ruvector-postgres/src/integrity/mincut.rs
vendored
Normal file
547
vendor/ruvector/crates/ruvector-postgres/src/integrity/mincut.rs
vendored
Normal file
@@ -0,0 +1,547 @@
|
||||
//! Mincut Computation Module
|
||||
//!
|
||||
//! Implements the Stoer-Wagner algorithm for computing the global minimum cut
|
||||
//! on the contracted graph. This is the PRIMARY integrity metric.
|
||||
//!
|
||||
//! Complexity: O(VE + V^2 log V) where V is number of nodes and E is edges.
|
||||
//!
|
||||
//! IMPORTANT: This computes lambda_cut (minimum cut value), NOT lambda2
|
||||
//! (algebraic connectivity). These are different concepts:
|
||||
//! - lambda_cut: Minimum number of edges to disconnect the graph
|
||||
//! - lambda2: Second smallest eigenvalue of the Laplacian (spectral stress)
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::contracted_graph::{ContractedGraph, NodeType};
|
||||
|
||||
/// Configuration for mincut computation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MincutConfig {
|
||||
/// Whether to compute lambda2 (algebraic connectivity) as well
|
||||
pub compute_lambda2: bool,
|
||||
/// Maximum iterations for power iteration (lambda2)
|
||||
pub max_iterations: usize,
|
||||
/// Convergence tolerance for power iteration
|
||||
pub tolerance: f64,
|
||||
}
|
||||
|
||||
impl Default for MincutConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
compute_lambda2: false,
|
||||
max_iterations: 100,
|
||||
tolerance: 1e-8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of mincut computation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MincutResult {
|
||||
/// Minimum cut value (PRIMARY METRIC)
|
||||
pub lambda_cut: f32,
|
||||
/// Algebraic connectivity (OPTIONAL DRIFT SIGNAL)
|
||||
pub lambda2: Option<f32>,
|
||||
/// Edges participating in the minimum cut
|
||||
pub witness_edges: Vec<WitnessEdge>,
|
||||
/// Partition of nodes on one side of the cut
|
||||
pub cut_partition: Vec<usize>,
|
||||
/// Computation time in milliseconds
|
||||
pub computation_time_ms: u64,
|
||||
}
|
||||
|
||||
impl MincutResult {
|
||||
/// Check if the graph is well-connected
|
||||
pub fn is_well_connected(&self, threshold: f32) -> bool {
|
||||
self.lambda_cut >= threshold
|
||||
}
|
||||
|
||||
/// Get the number of witness edges
|
||||
pub fn witness_count(&self) -> usize {
|
||||
self.witness_edges.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// An edge that participates in the minimum cut
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WitnessEdge {
|
||||
/// Source node type
|
||||
pub source_type: String,
|
||||
/// Source node ID
|
||||
pub source_id: i64,
|
||||
/// Target node type
|
||||
pub target_type: String,
|
||||
/// Target node ID
|
||||
pub target_id: i64,
|
||||
/// Edge type
|
||||
pub edge_type: String,
|
||||
/// Edge capacity
|
||||
pub capacity: f32,
|
||||
/// Current flow on the edge
|
||||
pub flow: f32,
|
||||
}
|
||||
|
||||
/// Mincut computer using Stoer-Wagner algorithm
|
||||
pub struct MincutComputer {
|
||||
config: MincutConfig,
|
||||
}
|
||||
|
||||
impl MincutComputer {
|
||||
/// Create a new mincut computer
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: MincutConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with custom configuration
|
||||
pub fn with_config(config: MincutConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Compute the minimum cut on a contracted graph
|
||||
pub fn compute(&self, graph: &ContractedGraph) -> MincutResult {
|
||||
let n = graph.nodes.len();
|
||||
|
||||
// Handle trivial cases
|
||||
if n < 2 {
|
||||
return MincutResult {
|
||||
lambda_cut: 0.0,
|
||||
lambda2: None,
|
||||
witness_edges: vec![],
|
||||
cut_partition: vec![],
|
||||
computation_time_ms: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Build capacity matrix
|
||||
let (capacity, node_index) = graph.build_capacity_matrix();
|
||||
|
||||
// Compute global mincut using Stoer-Wagner
|
||||
let (lambda_cut, cut_partition) = self.stoer_wagner_mincut(&capacity);
|
||||
|
||||
// Find witness edges (edges crossing the cut)
|
||||
let witness_edges = self.find_witness_edges(graph, &node_index, &cut_partition);
|
||||
|
||||
// Optionally compute lambda2 (algebraic connectivity)
|
||||
let lambda2 = if self.config.compute_lambda2 {
|
||||
Some(self.compute_algebraic_connectivity(&capacity, n) as f32)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let computation_time_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
MincutResult {
|
||||
lambda_cut: lambda_cut as f32,
|
||||
lambda2,
|
||||
witness_edges,
|
||||
cut_partition,
|
||||
computation_time_ms,
|
||||
}
|
||||
}
|
||||
|
||||
/// Stoer-Wagner algorithm for global minimum cut
|
||||
/// Returns (mincut_value, partition of nodes on one side)
|
||||
fn stoer_wagner_mincut(&self, capacity: &[Vec<f64>]) -> (f64, Vec<usize>) {
|
||||
let n = capacity.len();
|
||||
|
||||
if n == 0 {
|
||||
return (0.0, vec![]);
|
||||
}
|
||||
|
||||
if n == 1 {
|
||||
return (0.0, vec![0]);
|
||||
}
|
||||
|
||||
let mut best_cut = f64::MAX;
|
||||
let mut best_partition = vec![];
|
||||
|
||||
// Working copies
|
||||
let mut vertices: Vec<usize> = (0..n).collect();
|
||||
let mut merged: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
|
||||
let mut cap = capacity.to_vec();
|
||||
|
||||
while vertices.len() > 1 {
|
||||
// Maximum adjacency search to find s-t cut
|
||||
let (s_idx, t_idx, cut_of_phase) = self.minimum_cut_phase(&vertices, &cap);
|
||||
|
||||
if cut_of_phase < best_cut {
|
||||
best_cut = cut_of_phase;
|
||||
best_partition = merged[vertices[t_idx]].clone();
|
||||
}
|
||||
|
||||
// Get actual vertex indices
|
||||
let t_vertex = vertices[t_idx];
|
||||
let s_vertex = vertices[s_idx];
|
||||
|
||||
// Update capacities - merge t into s
|
||||
for &v in &vertices {
|
||||
if v != s_vertex && v != t_vertex {
|
||||
cap[s_vertex][v] += cap[t_vertex][v];
|
||||
cap[v][s_vertex] += cap[v][t_vertex];
|
||||
}
|
||||
}
|
||||
|
||||
// Merge vertex sets
|
||||
let t_merged = merged[t_vertex].clone();
|
||||
merged[s_vertex].extend(t_merged);
|
||||
|
||||
// Remove t from active vertices
|
||||
vertices.remove(t_idx);
|
||||
}
|
||||
|
||||
(best_cut, best_partition)
|
||||
}
|
||||
|
||||
/// One phase of Stoer-Wagner: find minimum s-t cut using maximum adjacency search
|
||||
/// Returns (s_index, t_index, cut_of_phase) where indices are into vertices array
|
||||
fn minimum_cut_phase(&self, vertices: &[usize], cap: &[Vec<f64>]) -> (usize, usize, f64) {
|
||||
let n = cap.len();
|
||||
let num_vertices = vertices.len();
|
||||
|
||||
if num_vertices < 2 {
|
||||
return (0, 0, 0.0);
|
||||
}
|
||||
|
||||
let mut in_a = vec![false; n];
|
||||
let mut cut_weight = vec![0.0f64; n];
|
||||
|
||||
let mut last_idx = 0;
|
||||
let mut before_last_idx = 0;
|
||||
|
||||
for _round in 0..num_vertices {
|
||||
// Find most tightly connected vertex not yet in A
|
||||
let mut max_weight = -1.0;
|
||||
let mut max_idx = 0;
|
||||
|
||||
for (idx, &v) in vertices.iter().enumerate() {
|
||||
if !in_a[v] && (max_weight < 0.0 || cut_weight[v] > max_weight) {
|
||||
max_weight = cut_weight[v];
|
||||
max_idx = idx;
|
||||
}
|
||||
}
|
||||
|
||||
let max_v = vertices[max_idx];
|
||||
in_a[max_v] = true;
|
||||
before_last_idx = last_idx;
|
||||
last_idx = max_idx;
|
||||
|
||||
// Update cut weights for remaining vertices
|
||||
for (_, &v) in vertices.iter().enumerate() {
|
||||
if !in_a[v] {
|
||||
cut_weight[v] += cap[max_v][v];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The cut of the phase is the weight of t (the last vertex added)
|
||||
let t_vertex = vertices[last_idx];
|
||||
(before_last_idx, last_idx, cut_weight[t_vertex])
|
||||
}
|
||||
|
||||
/// Find edges crossing the minimum cut (witness edges)
|
||||
fn find_witness_edges(
|
||||
&self,
|
||||
graph: &ContractedGraph,
|
||||
node_index: &HashMap<(NodeType, i64), usize>,
|
||||
partition: &[usize],
|
||||
) -> Vec<WitnessEdge> {
|
||||
let partition_set: HashSet<_> = partition.iter().copied().collect();
|
||||
|
||||
graph
|
||||
.edges
|
||||
.iter()
|
||||
.filter_map(|edge| {
|
||||
let i = node_index.get(&edge.source_key())?;
|
||||
let j = node_index.get(&edge.target_key())?;
|
||||
|
||||
// Edge crosses cut if exactly one endpoint is in the partition
|
||||
let i_in = partition_set.contains(i);
|
||||
let j_in = partition_set.contains(j);
|
||||
|
||||
if i_in != j_in {
|
||||
Some(WitnessEdge {
|
||||
source_type: edge.source_type.to_string(),
|
||||
source_id: edge.source_id,
|
||||
target_type: edge.target_type.to_string(),
|
||||
target_id: edge.target_id,
|
||||
edge_type: edge.edge_type.to_string(),
|
||||
capacity: edge.capacity,
|
||||
flow: edge.current_flow,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compute algebraic connectivity (lambda2) as optional drift signal
|
||||
/// This is DIFFERENT from mincut - provides spectral stress insight
|
||||
fn compute_algebraic_connectivity(&self, capacity: &[Vec<f64>], n: usize) -> f64 {
|
||||
if n < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Build Laplacian: L = D - A
|
||||
let mut laplacian = vec![vec![0.0f64; n]; n];
|
||||
for i in 0..n {
|
||||
let degree: f64 = capacity[i].iter().sum();
|
||||
laplacian[i][i] = degree;
|
||||
for j in 0..n {
|
||||
laplacian[i][j] -= capacity[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
// Power iteration to find second smallest eigenvalue
|
||||
// Start with random vector orthogonal to constant vector
|
||||
let mut v: Vec<f64> = (0..n).map(|i| (i as f64 * 0.7).sin()).collect();
|
||||
|
||||
// Orthogonalize against constant vector (normalize)
|
||||
let mean: f64 = v.iter().sum::<f64>() / n as f64;
|
||||
for x in &mut v {
|
||||
*x -= mean;
|
||||
}
|
||||
|
||||
// Normalize
|
||||
let norm: f64 = v.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
if norm > 1e-10 {
|
||||
for x in &mut v {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
|
||||
// Shifted inverse power iteration for second smallest eigenvalue
|
||||
// We want to find λ₂, so we shift to find smallest non-zero eigenvalue
|
||||
let shift = 0.001; // Small shift to avoid singular matrix
|
||||
|
||||
for _ in 0..self.config.max_iterations {
|
||||
// Compute L*v
|
||||
let mut lv = vec![0.0f64; n];
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
lv[i] += laplacian[i][j] * v[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Apply shift: (L + shift*I) * v
|
||||
for i in 0..n {
|
||||
lv[i] += shift * v[i];
|
||||
}
|
||||
|
||||
// Orthogonalize against constant vector
|
||||
let mean: f64 = lv.iter().sum::<f64>() / n as f64;
|
||||
for x in &mut lv {
|
||||
*x -= mean;
|
||||
}
|
||||
|
||||
// Normalize
|
||||
let norm: f64 = lv.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
if norm < 1e-10 {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut new_v = lv;
|
||||
for x in &mut new_v {
|
||||
*x /= norm;
|
||||
}
|
||||
|
||||
// Check convergence
|
||||
let diff: f64 = v.iter().zip(new_v.iter()).map(|(a, b)| (a - b).abs()).sum();
|
||||
|
||||
v = new_v;
|
||||
|
||||
if diff < self.config.tolerance {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Rayleigh quotient: λ = (v^T L v) / (v^T v)
|
||||
let mut vtlv = 0.0f64;
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
vtlv += v[i] * laplacian[i][j] * v[j];
|
||||
}
|
||||
}
|
||||
let vtv: f64 = v.iter().map(|x| x * x).sum();
|
||||
|
||||
if vtv > 1e-10 {
|
||||
(vtlv / vtv).max(0.0)
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MincutComputer {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute mincut for a given collection
|
||||
pub fn compute_mincut(graph: &ContractedGraph) -> MincutResult {
|
||||
MincutComputer::new().compute(graph)
|
||||
}
|
||||
|
||||
/// Compute mincut with lambda2
|
||||
pub fn compute_mincut_with_lambda2(graph: &ContractedGraph) -> MincutResult {
|
||||
MincutComputer::with_config(MincutConfig {
|
||||
compute_lambda2: true,
|
||||
..Default::default()
|
||||
})
|
||||
.compute(graph)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::super::contracted_graph::{ContractedGraphBuilder, EdgeType};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_mincut_empty_graph() {
|
||||
let graph = ContractedGraph::new(1);
|
||||
let result = compute_mincut(&graph);
|
||||
assert_eq!(result.lambda_cut, 0.0);
|
||||
assert!(result.witness_edges.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mincut_single_node() {
|
||||
let mut graph = ContractedGraph::new(1);
|
||||
graph.add_node(super::super::contracted_graph::ContractedNode::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
0,
|
||||
));
|
||||
|
||||
let result = compute_mincut(&graph);
|
||||
assert_eq!(result.lambda_cut, 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mincut_two_connected_nodes() {
|
||||
use super::super::contracted_graph::{ContractedEdge, ContractedNode};
|
||||
|
||||
let mut graph = ContractedGraph::new(1);
|
||||
graph.add_node(ContractedNode::new(1, NodeType::Partition, 0));
|
||||
graph.add_node(ContractedNode::new(1, NodeType::Partition, 1));
|
||||
graph.add_edge(
|
||||
ContractedEdge::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
0,
|
||||
NodeType::Partition,
|
||||
1,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(5.0),
|
||||
);
|
||||
|
||||
let result = compute_mincut(&graph);
|
||||
assert!((result.lambda_cut - 5.0).abs() < 0.01);
|
||||
assert_eq!(result.witness_edges.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mincut_triangle() {
|
||||
use super::super::contracted_graph::{ContractedEdge, ContractedNode};
|
||||
|
||||
let mut graph = ContractedGraph::new(1);
|
||||
for i in 0..3 {
|
||||
graph.add_node(ContractedNode::new(1, NodeType::Partition, i));
|
||||
}
|
||||
|
||||
// Create triangle with edges of capacity 1.0
|
||||
graph.add_edge(
|
||||
ContractedEdge::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
0,
|
||||
NodeType::Partition,
|
||||
1,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(1.0),
|
||||
);
|
||||
graph.add_edge(
|
||||
ContractedEdge::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
1,
|
||||
NodeType::Partition,
|
||||
2,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(1.0),
|
||||
);
|
||||
graph.add_edge(
|
||||
ContractedEdge::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
0,
|
||||
NodeType::Partition,
|
||||
2,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(1.0),
|
||||
);
|
||||
|
||||
let result = compute_mincut(&graph);
|
||||
// Mincut of a triangle is 2 (cut one node from the other two)
|
||||
assert!((result.lambda_cut - 2.0).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mincut_with_lambda2() {
|
||||
let graph = ContractedGraphBuilder::build_default(1, 5, 0, 1);
|
||||
let result = compute_mincut_with_lambda2(&graph);
|
||||
|
||||
assert!(result.lambda2.is_some());
|
||||
assert!(result.lambda2.unwrap() >= 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mincut_default_graph() {
|
||||
let graph = ContractedGraphBuilder::build_default(1, 5, 10, 2);
|
||||
let result = compute_mincut(&graph);
|
||||
|
||||
assert!(result.lambda_cut >= 0.0);
|
||||
assert!(result.computation_time_ms < 10000); // Should complete quickly
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_witness_edges() {
|
||||
use super::super::contracted_graph::{ContractedEdge, ContractedNode};
|
||||
|
||||
let mut graph = ContractedGraph::new(1);
|
||||
graph.add_node(ContractedNode::new(1, NodeType::Partition, 0));
|
||||
graph.add_node(ContractedNode::new(1, NodeType::Partition, 1));
|
||||
graph.add_edge(
|
||||
ContractedEdge::new(
|
||||
1,
|
||||
NodeType::Partition,
|
||||
0,
|
||||
NodeType::Partition,
|
||||
1,
|
||||
EdgeType::PartitionLink,
|
||||
)
|
||||
.with_capacity(1.0)
|
||||
.with_flow(0.5),
|
||||
);
|
||||
|
||||
let result = compute_mincut(&graph);
|
||||
assert_eq!(result.witness_edges.len(), 1);
|
||||
|
||||
let witness = &result.witness_edges[0];
|
||||
assert_eq!(witness.source_type, "partition");
|
||||
assert_eq!(witness.edge_type, "partition_link");
|
||||
assert_eq!(witness.capacity, 1.0);
|
||||
assert_eq!(witness.flow, 0.5);
|
||||
}
|
||||
}
|
||||
261
vendor/ruvector/crates/ruvector-postgres/src/integrity/mod.rs
vendored
Normal file
261
vendor/ruvector/crates/ruvector-postgres/src/integrity/mod.rs
vendored
Normal file
@@ -0,0 +1,261 @@
|
||||
//! # Integrity Control Plane
|
||||
//!
|
||||
//! Stoer-Wagner mincut gating for vector search integrity.
|
||||
|
||||
use pgrx::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IntegrityContract {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub min_recall: f64,
|
||||
pub max_latency_ms: u64,
|
||||
pub min_mincut: f64,
|
||||
pub active: bool,
|
||||
}
|
||||
|
||||
impl Default for IntegrityContract {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: "default".to_string(),
|
||||
name: "Default Contract".to_string(),
|
||||
min_recall: 0.95,
|
||||
max_latency_ms: 100,
|
||||
min_mincut: 0.1,
|
||||
active: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ValidationResult {
|
||||
pub passed: bool,
|
||||
pub recall: f64,
|
||||
pub latency_ms: u64,
|
||||
pub mincut: f64,
|
||||
pub failures: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct IntegrityManager {
|
||||
contracts: HashMap<String, IntegrityContract>,
|
||||
}
|
||||
|
||||
impl IntegrityManager {
|
||||
pub fn new() -> Self {
|
||||
let mut contracts = HashMap::new();
|
||||
contracts.insert("default".to_string(), IntegrityContract::default());
|
||||
Self { contracts }
|
||||
}
|
||||
|
||||
pub fn register_contract(&mut self, contract: IntegrityContract) {
|
||||
self.contracts.insert(contract.id.clone(), contract);
|
||||
}
|
||||
|
||||
pub fn get_contract(&self, id: &str) -> Option<&IntegrityContract> {
|
||||
self.contracts.get(id)
|
||||
}
|
||||
|
||||
pub fn validate(
|
||||
&self,
|
||||
contract_id: &str,
|
||||
recall: f64,
|
||||
latency_ms: u64,
|
||||
mincut: f64,
|
||||
) -> ValidationResult {
|
||||
let contract = self.contracts.get(contract_id).cloned().unwrap_or_default();
|
||||
let mut failures = Vec::new();
|
||||
|
||||
if recall < contract.min_recall {
|
||||
failures.push(format!("Recall {:.3} < {:.3}", recall, contract.min_recall));
|
||||
}
|
||||
if latency_ms > contract.max_latency_ms {
|
||||
failures.push(format!(
|
||||
"Latency {}ms > {}ms",
|
||||
latency_ms, contract.max_latency_ms
|
||||
));
|
||||
}
|
||||
if mincut < contract.min_mincut {
|
||||
failures.push(format!("Mincut {:.3} < {:.3}", mincut, contract.min_mincut));
|
||||
}
|
||||
|
||||
ValidationResult {
|
||||
passed: failures.is_empty(),
|
||||
recall,
|
||||
latency_ms,
|
||||
mincut,
|
||||
failures,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_contracts(&self) -> Vec<&IntegrityContract> {
|
||||
self.contracts.values().collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IntegrityManager {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
static INTEGRITY_MANAGER: std::sync::OnceLock<Arc<RwLock<IntegrityManager>>> =
|
||||
std::sync::OnceLock::new();
|
||||
|
||||
pub fn get_integrity_manager() -> Arc<RwLock<IntegrityManager>> {
|
||||
INTEGRITY_MANAGER
|
||||
.get_or_init(|| Arc::new(RwLock::new(IntegrityManager::new())))
|
||||
.clone()
|
||||
}
|
||||
|
||||
// Submodule exports
|
||||
pub mod contracted_graph;
|
||||
pub mod events;
|
||||
pub mod gating;
|
||||
pub mod mincut;
|
||||
|
||||
pub use mincut::{MincutConfig, MincutResult, WitnessEdge};
|
||||
|
||||
/// Get current mincut for an index (used by gated_transformer module)
|
||||
pub fn get_current_mincut(_index_name: &str) -> Result<MincutResult, String> {
|
||||
// TODO: Implement actual index mincut lookup
|
||||
// For now, return a default result
|
||||
Ok(MincutResult {
|
||||
lambda_cut: 10.0,
|
||||
lambda2: None,
|
||||
witness_edges: vec![],
|
||||
cut_partition: vec![],
|
||||
computation_time_ms: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn stoer_wagner_mincut(n: usize, edges: &[(usize, usize, f64)]) -> f64 {
|
||||
if n <= 1 || edges.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mut adj = vec![vec![0.0; n]; n];
|
||||
for &(u, v, w) in edges {
|
||||
if u < n && v < n {
|
||||
adj[u][v] += w;
|
||||
adj[v][u] += w;
|
||||
}
|
||||
}
|
||||
|
||||
let mut min_cut = f64::MAX;
|
||||
let mut active: Vec<bool> = vec![true; n];
|
||||
|
||||
for phase in 0..n - 1 {
|
||||
let mut weights: Vec<f64> = vec![0.0; n];
|
||||
let mut in_a = vec![false; n];
|
||||
let mut last = 0;
|
||||
let mut second_last = 0;
|
||||
|
||||
for _ in 0..n - phase {
|
||||
let mut max_weight = -1.0;
|
||||
let mut max_vertex = 0;
|
||||
for v in 0..n {
|
||||
if active[v] && !in_a[v] && weights[v] > max_weight {
|
||||
max_weight = weights[v];
|
||||
max_vertex = v;
|
||||
}
|
||||
}
|
||||
second_last = last;
|
||||
last = max_vertex;
|
||||
in_a[max_vertex] = true;
|
||||
|
||||
for v in 0..n {
|
||||
if active[v] && !in_a[v] {
|
||||
weights[v] += adj[max_vertex][v];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
min_cut = min_cut.min(weights[last]);
|
||||
active[last] = false;
|
||||
for v in 0..n {
|
||||
adj[second_last][v] += adj[last][v];
|
||||
adj[v][second_last] += adj[v][last];
|
||||
}
|
||||
}
|
||||
min_cut
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_integrity_status() -> pgrx::JsonB {
|
||||
let manager = get_integrity_manager();
|
||||
let reader = manager.read().unwrap();
|
||||
let contracts: Vec<_> = reader
|
||||
.list_contracts()
|
||||
.iter()
|
||||
.map(|c| c.id.clone())
|
||||
.collect();
|
||||
pgrx::JsonB(serde_json::json!({
|
||||
"enabled": true,
|
||||
"active_contracts": contracts.len(),
|
||||
"contracts": contracts,
|
||||
}))
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_integrity_create_contract(
|
||||
id: &str,
|
||||
name: &str,
|
||||
min_recall: f64,
|
||||
max_latency_ms: i64,
|
||||
min_mincut: f64,
|
||||
) -> pgrx::JsonB {
|
||||
let contract = IntegrityContract {
|
||||
id: id.to_string(),
|
||||
name: name.to_string(),
|
||||
min_recall,
|
||||
max_latency_ms: max_latency_ms as u64,
|
||||
min_mincut,
|
||||
active: true,
|
||||
};
|
||||
let manager = get_integrity_manager();
|
||||
manager.write().unwrap().register_contract(contract.clone());
|
||||
pgrx::JsonB(serde_json::json!({ "success": true, "contract": contract }))
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_integrity_validate(
|
||||
contract_id: &str,
|
||||
recall: f64,
|
||||
latency_ms: i64,
|
||||
mincut: f64,
|
||||
) -> pgrx::JsonB {
|
||||
let manager = get_integrity_manager();
|
||||
let result = manager
|
||||
.read()
|
||||
.unwrap()
|
||||
.validate(contract_id, recall, latency_ms as u64, mincut);
|
||||
pgrx::JsonB(serde_json::json!(result))
|
||||
}
|
||||
|
||||
#[pg_extern]
|
||||
fn ruvector_mincut(n: i32, edges_json: pgrx::JsonB) -> f64 {
|
||||
let edges: Vec<(usize, usize, f64)> = serde_json::from_value(edges_json.0).unwrap_or_default();
|
||||
stoer_wagner_mincut(n as usize, &edges)
|
||||
}
|
||||
|
||||
#[cfg(feature = "pg_test")]
|
||||
#[pg_schema]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[pg_test]
|
||||
fn test_integrity_status() {
|
||||
let status = ruvector_integrity_status();
|
||||
assert!(status.0.get("enabled").is_some());
|
||||
}
|
||||
|
||||
#[pg_test]
|
||||
fn test_mincut_simple() {
|
||||
let edges = vec![(0, 1, 1.0), (1, 2, 1.0)];
|
||||
let mincut = stoer_wagner_mincut(3, &edges);
|
||||
assert!(mincut >= 0.0);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user