Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
104
vendor/ruvector/crates/ruvector-dag/src/mincut/bottleneck.rs
vendored
Normal file
104
vendor/ruvector/crates/ruvector-dag/src/mincut/bottleneck.rs
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
//! Bottleneck Detection
|
||||
|
||||
use crate::dag::{OperatorType, QueryDag};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// A detected bottleneck in the DAG
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Bottleneck {
|
||||
pub node_id: usize,
|
||||
pub score: f64,
|
||||
pub impact_estimate: f64,
|
||||
pub suggested_action: String,
|
||||
}
|
||||
|
||||
/// Analysis of bottlenecks in a DAG
|
||||
#[derive(Debug)]
|
||||
pub struct BottleneckAnalysis {
|
||||
pub bottlenecks: Vec<Bottleneck>,
|
||||
pub total_cost: f64,
|
||||
pub critical_path_cost: f64,
|
||||
pub parallelization_potential: f64,
|
||||
}
|
||||
|
||||
impl BottleneckAnalysis {
|
||||
pub fn analyze(dag: &QueryDag, criticality: &HashMap<usize, f64>) -> Self {
|
||||
let mut bottlenecks = Vec::new();
|
||||
|
||||
for (&node_id, &score) in criticality {
|
||||
if score > 0.5 {
|
||||
// Threshold for bottleneck
|
||||
let node = dag.get_node(node_id).unwrap();
|
||||
let action = Self::suggest_action(&node.op_type);
|
||||
|
||||
bottlenecks.push(Bottleneck {
|
||||
node_id,
|
||||
score,
|
||||
impact_estimate: node.estimated_cost * score,
|
||||
suggested_action: action,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by score descending
|
||||
bottlenecks.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||
|
||||
// Calculate total cost by iterating over all node IDs
|
||||
let total_cost: f64 = (0..dag.node_count())
|
||||
.filter_map(|id| dag.get_node(id))
|
||||
.map(|n| n.estimated_cost)
|
||||
.sum();
|
||||
|
||||
let critical_path_cost = Self::compute_critical_path_cost(dag);
|
||||
let parallelization_potential = 1.0 - (critical_path_cost / total_cost.max(1.0));
|
||||
|
||||
Self {
|
||||
bottlenecks,
|
||||
total_cost,
|
||||
critical_path_cost,
|
||||
parallelization_potential,
|
||||
}
|
||||
}
|
||||
|
||||
fn suggest_action(op_type: &OperatorType) -> String {
|
||||
match op_type {
|
||||
OperatorType::SeqScan { table } => {
|
||||
format!("Consider adding index on {}", table)
|
||||
}
|
||||
OperatorType::NestedLoopJoin => "Consider using hash join instead".to_string(),
|
||||
OperatorType::Sort { .. } => "Consider adding sorted index".to_string(),
|
||||
OperatorType::HnswScan { .. } => "Consider increasing ef_search parameter".to_string(),
|
||||
_ => "Review operator parameters".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_critical_path_cost(dag: &QueryDag) -> f64 {
|
||||
// Longest path by cost
|
||||
let mut max_cost: HashMap<usize, f64> = HashMap::new();
|
||||
|
||||
// Get topological sort, return 0 if there's a cycle
|
||||
let sorted = match dag.topological_sort() {
|
||||
Ok(s) => s,
|
||||
Err(_) => return 0.0,
|
||||
};
|
||||
|
||||
for node_id in sorted {
|
||||
let node = dag.get_node(node_id).unwrap();
|
||||
let parent_max = dag
|
||||
.parents(node_id)
|
||||
.iter()
|
||||
.filter_map(|&p| max_cost.get(&p))
|
||||
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
||||
.copied()
|
||||
.unwrap_or(0.0);
|
||||
|
||||
max_cost.insert(node_id, parent_max + node.estimated_cost);
|
||||
}
|
||||
|
||||
max_cost
|
||||
.values()
|
||||
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
||||
.copied()
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
}
|
||||
47
vendor/ruvector/crates/ruvector-dag/src/mincut/dynamic_updates.rs
vendored
Normal file
47
vendor/ruvector/crates/ruvector-dag/src/mincut/dynamic_updates.rs
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
//! Dynamic Updates: O(n^0.12) amortized update algorithms
|
||||
|
||||
use super::engine::FlowEdge;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Maintains hierarchical decomposition for fast updates
|
||||
#[allow(dead_code)]
|
||||
pub struct HierarchicalDecomposition {
|
||||
levels: Vec<HashMap<usize, Vec<usize>>>,
|
||||
level_count: usize,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl HierarchicalDecomposition {
|
||||
pub fn new(node_count: usize) -> Self {
|
||||
// Number of levels = O(log n)
|
||||
let level_count = (node_count as f64).log2().ceil() as usize;
|
||||
|
||||
Self {
|
||||
levels: vec![HashMap::new(); level_count],
|
||||
level_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update decomposition after edge change
|
||||
/// Amortized O(n^0.12) by only updating affected levels
|
||||
pub fn update(&mut self, from: usize, to: usize, _graph: &HashMap<usize, Vec<FlowEdge>>) {
|
||||
// Find affected level based on edge criticality
|
||||
let affected_level = self.find_affected_level(from, to);
|
||||
|
||||
// Only rebuild affected level and above
|
||||
for level in affected_level..self.level_count {
|
||||
self.rebuild_level(level);
|
||||
}
|
||||
}
|
||||
|
||||
fn find_affected_level(&self, _from: usize, _to: usize) -> usize {
|
||||
// Heuristic: lower levels for local changes
|
||||
0
|
||||
}
|
||||
|
||||
fn rebuild_level(&mut self, level: usize) {
|
||||
// Rebuild partition at this level
|
||||
// Cost: O(n / 2^level)
|
||||
self.levels[level].clear();
|
||||
}
|
||||
}
|
||||
196
vendor/ruvector/crates/ruvector-dag/src/mincut/engine.rs
vendored
Normal file
196
vendor/ruvector/crates/ruvector-dag/src/mincut/engine.rs
vendored
Normal file
@@ -0,0 +1,196 @@
|
||||
//! DagMinCutEngine: Main min-cut computation engine
|
||||
|
||||
use super::local_kcut::LocalKCut;
|
||||
use crate::dag::QueryDag;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MinCutConfig {
|
||||
pub epsilon: f32, // Approximation factor
|
||||
pub local_search_depth: usize,
|
||||
pub cache_cuts: bool,
|
||||
}
|
||||
|
||||
impl Default for MinCutConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
epsilon: 0.1,
|
||||
local_search_depth: 3,
|
||||
cache_cuts: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Edge in the flow graph
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FlowEdge {
|
||||
pub from: usize,
|
||||
pub to: usize,
|
||||
pub capacity: f64,
|
||||
pub flow: f64,
|
||||
}
|
||||
|
||||
/// Result of min-cut computation
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MinCutResult {
|
||||
pub cut_value: f64,
|
||||
pub source_side: HashSet<usize>,
|
||||
pub sink_side: HashSet<usize>,
|
||||
pub cut_edges: Vec<(usize, usize)>,
|
||||
}
|
||||
|
||||
pub struct DagMinCutEngine {
|
||||
config: MinCutConfig,
|
||||
adjacency: HashMap<usize, Vec<FlowEdge>>,
|
||||
node_count: usize,
|
||||
local_kcut: LocalKCut,
|
||||
cached_cuts: HashMap<(usize, usize), MinCutResult>,
|
||||
}
|
||||
|
||||
impl DagMinCutEngine {
|
||||
pub fn new(config: MinCutConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
adjacency: HashMap::new(),
|
||||
node_count: 0,
|
||||
local_kcut: LocalKCut::new(),
|
||||
cached_cuts: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build flow graph from DAG
|
||||
pub fn build_from_dag(&mut self, dag: &QueryDag) {
|
||||
self.adjacency.clear();
|
||||
self.node_count = dag.node_count();
|
||||
|
||||
// Iterate over all possible node IDs
|
||||
for node_id in 0..dag.node_count() {
|
||||
if let Some(node) = dag.get_node(node_id) {
|
||||
let capacity = node.estimated_cost.max(1.0);
|
||||
|
||||
for &child_id in dag.children(node_id) {
|
||||
self.add_edge(node_id, child_id, capacity);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_edge(&mut self, from: usize, to: usize, capacity: f64) {
|
||||
self.adjacency.entry(from).or_default().push(FlowEdge {
|
||||
from,
|
||||
to,
|
||||
capacity,
|
||||
flow: 0.0,
|
||||
});
|
||||
// Add reverse edge for residual graph
|
||||
self.adjacency.entry(to).or_default().push(FlowEdge {
|
||||
from: to,
|
||||
to: from,
|
||||
capacity: 0.0,
|
||||
flow: 0.0,
|
||||
});
|
||||
|
||||
self.node_count = self.node_count.max(from + 1).max(to + 1);
|
||||
|
||||
// Invalidate cache
|
||||
self.cached_cuts.clear();
|
||||
}
|
||||
|
||||
/// Compute min-cut between source and sink
|
||||
pub fn compute_mincut(&mut self, source: usize, sink: usize) -> MinCutResult {
|
||||
// Check cache
|
||||
if self.config.cache_cuts {
|
||||
if let Some(cached) = self.cached_cuts.get(&(source, sink)) {
|
||||
return cached.clone();
|
||||
}
|
||||
}
|
||||
|
||||
// Use local k-cut for approximate but fast computation
|
||||
let result = self.local_kcut.compute(
|
||||
&self.adjacency,
|
||||
source,
|
||||
sink,
|
||||
self.config.local_search_depth,
|
||||
);
|
||||
|
||||
if self.config.cache_cuts {
|
||||
self.cached_cuts.insert((source, sink), result.clone());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Dynamic update after edge weight change - O(n^0.12) amortized
|
||||
pub fn update_edge(&mut self, from: usize, to: usize, new_capacity: f64) {
|
||||
if let Some(edges) = self.adjacency.get_mut(&from) {
|
||||
for edge in edges.iter_mut() {
|
||||
if edge.to == to {
|
||||
edge.capacity = new_capacity;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate affected cached cuts
|
||||
// Extract keys to avoid borrowing issues
|
||||
let keys_to_remove: Vec<(usize, usize)> = self
|
||||
.cached_cuts
|
||||
.keys()
|
||||
.filter(|(s, t)| self.cut_involves_edge(*s, *t, from, to))
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
for key in keys_to_remove {
|
||||
self.cached_cuts.remove(&key);
|
||||
}
|
||||
}
|
||||
|
||||
fn cut_involves_edge(&self, _source: usize, _sink: usize, _from: usize, _to: usize) -> bool {
|
||||
// Conservative: invalidate if edge is on any path from source to sink
|
||||
// This is a simplified check
|
||||
true
|
||||
}
|
||||
|
||||
/// Compute criticality scores for all nodes
|
||||
pub fn compute_criticality(&mut self, dag: &QueryDag) -> HashMap<usize, f64> {
|
||||
let mut criticality = HashMap::new();
|
||||
|
||||
let leaves = dag.leaves();
|
||||
let root = dag.root();
|
||||
|
||||
if leaves.is_empty() || root.is_none() {
|
||||
return criticality;
|
||||
}
|
||||
|
||||
let root = root.unwrap();
|
||||
|
||||
// For each node, compute how much it affects the min-cut
|
||||
for node_id in 0..dag.node_count() {
|
||||
if dag.get_node(node_id).is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compute min-cut with node vs without
|
||||
let cut_with = self.compute_mincut(leaves[0], root);
|
||||
|
||||
// Temporarily increase node capacity
|
||||
for &child in dag.children(node_id) {
|
||||
self.update_edge(node_id, child, f64::INFINITY);
|
||||
}
|
||||
|
||||
let cut_without = self.compute_mincut(leaves[0], root);
|
||||
|
||||
// Restore capacity
|
||||
let node = dag.get_node(node_id).unwrap();
|
||||
for &child in dag.children(node_id) {
|
||||
self.update_edge(node_id, child, node.estimated_cost);
|
||||
}
|
||||
|
||||
// Criticality = how much the cut increases without the node
|
||||
let crit = (cut_without.cut_value - cut_with.cut_value) / cut_with.cut_value.max(1.0);
|
||||
criticality.insert(node_id, crit.max(0.0));
|
||||
}
|
||||
|
||||
criticality
|
||||
}
|
||||
}
|
||||
90
vendor/ruvector/crates/ruvector-dag/src/mincut/local_kcut.rs
vendored
Normal file
90
vendor/ruvector/crates/ruvector-dag/src/mincut/local_kcut.rs
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
//! Local K-Cut: Sublinear min-cut approximation
|
||||
|
||||
use super::engine::{FlowEdge, MinCutResult};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
/// Local K-Cut oracle for approximate min-cut
|
||||
pub struct LocalKCut {
|
||||
visited: HashSet<usize>,
|
||||
distance: HashMap<usize, usize>,
|
||||
}
|
||||
|
||||
impl LocalKCut {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
visited: HashSet::new(),
|
||||
distance: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute approximate min-cut using local search
|
||||
/// Time complexity: O(k * local_depth) where k << n
|
||||
pub fn compute(
|
||||
&mut self,
|
||||
graph: &HashMap<usize, Vec<FlowEdge>>,
|
||||
source: usize,
|
||||
sink: usize,
|
||||
depth: usize,
|
||||
) -> MinCutResult {
|
||||
self.visited.clear();
|
||||
self.distance.clear();
|
||||
|
||||
// BFS from source with limited depth
|
||||
let source_reachable = self.limited_bfs(graph, source, depth);
|
||||
|
||||
// BFS from sink with limited depth
|
||||
let sink_reachable = self.limited_bfs(graph, sink, depth);
|
||||
|
||||
// Find cut edges
|
||||
let mut cut_edges = Vec::new();
|
||||
let mut cut_value = 0.0;
|
||||
|
||||
for &node in &source_reachable {
|
||||
if let Some(edges) = graph.get(&node) {
|
||||
for edge in edges {
|
||||
if !source_reachable.contains(&edge.to) && edge.capacity > 0.0 {
|
||||
cut_edges.push((edge.from, edge.to));
|
||||
cut_value += edge.capacity;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MinCutResult {
|
||||
cut_value,
|
||||
source_side: source_reachable,
|
||||
sink_side: sink_reachable,
|
||||
cut_edges,
|
||||
}
|
||||
}
|
||||
|
||||
fn limited_bfs(
|
||||
&mut self,
|
||||
graph: &HashMap<usize, Vec<FlowEdge>>,
|
||||
start: usize,
|
||||
max_depth: usize,
|
||||
) -> HashSet<usize> {
|
||||
let mut reachable = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
|
||||
queue.push_back((start, 0));
|
||||
reachable.insert(start);
|
||||
|
||||
while let Some((node, depth)) = queue.pop_front() {
|
||||
if depth >= max_depth {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(edges) = graph.get(&node) {
|
||||
for edge in edges {
|
||||
if edge.capacity > edge.flow && !reachable.contains(&edge.to) {
|
||||
reachable.insert(edge.to);
|
||||
queue.push_back((edge.to, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reachable
|
||||
}
|
||||
}
|
||||
12
vendor/ruvector/crates/ruvector-dag/src/mincut/mod.rs
vendored
Normal file
12
vendor/ruvector/crates/ruvector-dag/src/mincut/mod.rs
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
//! MinCut Optimization: Subpolynomial bottleneck detection
|
||||
|
||||
mod bottleneck;
|
||||
mod dynamic_updates;
|
||||
mod engine;
|
||||
mod local_kcut;
|
||||
mod redundancy;
|
||||
|
||||
pub use bottleneck::{Bottleneck, BottleneckAnalysis};
|
||||
pub use engine::{DagMinCutEngine, FlowEdge, MinCutConfig, MinCutResult};
|
||||
pub use local_kcut::LocalKCut;
|
||||
pub use redundancy::{RedundancyStrategy, RedundancySuggestion};
|
||||
57
vendor/ruvector/crates/ruvector-dag/src/mincut/redundancy.rs
vendored
Normal file
57
vendor/ruvector/crates/ruvector-dag/src/mincut/redundancy.rs
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
//! Redundancy Suggestions for reliability
|
||||
|
||||
use super::bottleneck::Bottleneck;
|
||||
use crate::dag::{OperatorType, QueryDag};
|
||||
|
||||
/// Suggestion for adding redundancy
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RedundancySuggestion {
|
||||
pub target_node: usize,
|
||||
pub strategy: RedundancyStrategy,
|
||||
pub expected_improvement: f64,
|
||||
pub cost_increase: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RedundancyStrategy {
|
||||
/// Duplicate the node's computation
|
||||
Replicate,
|
||||
/// Add alternative path
|
||||
AlternativePath,
|
||||
/// Cache intermediate results
|
||||
Materialize,
|
||||
/// Pre-compute during idle time
|
||||
Prefetch,
|
||||
}
|
||||
|
||||
impl RedundancySuggestion {
|
||||
pub fn generate(dag: &QueryDag, bottlenecks: &[Bottleneck]) -> Vec<Self> {
|
||||
let mut suggestions = Vec::new();
|
||||
|
||||
for bottleneck in bottlenecks {
|
||||
let node = dag.get_node(bottleneck.node_id);
|
||||
if node.is_none() {
|
||||
continue;
|
||||
}
|
||||
let node = node.unwrap();
|
||||
|
||||
// Determine best strategy based on operator type
|
||||
let strategy = match &node.op_type {
|
||||
OperatorType::SeqScan { .. }
|
||||
| OperatorType::IndexScan { .. }
|
||||
| OperatorType::IvfFlatScan { .. } => RedundancyStrategy::Materialize,
|
||||
OperatorType::HnswScan { .. } => RedundancyStrategy::Prefetch,
|
||||
_ => RedundancyStrategy::Replicate,
|
||||
};
|
||||
|
||||
suggestions.push(RedundancySuggestion {
|
||||
target_node: bottleneck.node_id,
|
||||
strategy,
|
||||
expected_improvement: bottleneck.impact_estimate * 0.3,
|
||||
cost_increase: node.estimated_cost * 0.1,
|
||||
});
|
||||
}
|
||||
|
||||
suggestions
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user