Files
wifi-densepose/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

29 KiB
Raw Blame History

Tiny Dancer Routing Integration Plan

Overview

Integrate AI agent routing capabilities from ruvector-tiny-dancer into PostgreSQL, enabling intelligent request routing, model selection, and cost optimization directly in SQL.

Architecture

┌─────────────────────────────────────────────────────────────────┐
│                     PostgreSQL Extension                         │
├─────────────────────────────────────────────────────────────────┤
│  ┌─────────────────────────────────────────────────────────┐    │
│  │                  Tiny Dancer Router                      │    │
│  │  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐   │    │
│  │  │   FastGRNN   │  │    Route     │  │    Cost      │   │    │
│  │  │   Inference  │  │   Classifier │  │   Optimizer  │   │    │
│  │  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘   │    │
│  └─────────┼─────────────────┼─────────────────┼───────────┘    │
│            └─────────────────┴─────────────────┘                │
│                              ▼                                   │
│              ┌───────────────────────────┐                       │
│              │   Agent Registry & Pool   │                       │
│              │   (LLMs, Tools, APIs)     │                       │
│              └───────────────────────────┘                       │
└─────────────────────────────────────────────────────────────────┘

Module Structure

src/
├── routing/
│   ├── mod.rs              # Module exports
│   ├── fastgrnn.rs         # FastGRNN neural inference
│   ├── router.rs           # Main routing engine
│   ├── classifier.rs       # Route classification
│   ├── cost_optimizer.rs   # Cost/latency optimization
│   ├── agents/
│   │   ├── registry.rs     # Agent registration
│   │   ├── pool.rs         # Agent pool management
│   │   └── capabilities.rs # Capability matching
│   ├── policies/
│   │   ├── cost.rs         # Cost-based routing
│   │   ├── latency.rs      # Latency-based routing
│   │   ├── quality.rs      # Quality-based routing
│   │   └── hybrid.rs       # Multi-objective routing
│   └── operators.rs        # SQL operators

SQL Interface

Agent Registration

-- Register AI agents/models
SELECT ruvector_register_agent(
    name := 'gpt-4',
    agent_type := 'llm',
    capabilities := ARRAY['reasoning', 'code', 'analysis', 'creative'],
    cost_per_1k_tokens := 0.03,
    avg_latency_ms := 2500,
    quality_score := 0.95,
    metadata := '{"provider": "openai", "context_window": 128000}'
);

SELECT ruvector_register_agent(
    name := 'claude-3-haiku',
    agent_type := 'llm',
    capabilities := ARRAY['fast-response', 'simple-tasks', 'classification'],
    cost_per_1k_tokens := 0.00025,
    avg_latency_ms := 400,
    quality_score := 0.80,
    metadata := '{"provider": "anthropic", "context_window": 200000}'
);

SELECT ruvector_register_agent(
    name := 'code-specialist',
    agent_type := 'tool',
    capabilities := ARRAY['code-execution', 'debugging', 'testing'],
    cost_per_call := 0.001,
    avg_latency_ms := 100,
    quality_score := 0.90
);

-- List registered agents
SELECT * FROM ruvector_list_agents();

Basic Routing

-- Route a request to the best agent
SELECT * FROM ruvector_route(
    request := 'Write a Python function to calculate Fibonacci numbers',
    optimize_for := 'cost'  -- or 'latency', 'quality', 'balanced'
);

-- Result:
-- | agent_name | confidence | estimated_cost | estimated_latency |
-- |------------|------------|----------------|-------------------|
-- | claude-3-haiku | 0.85 | 0.001 | 400ms |

-- Route with constraints
SELECT * FROM ruvector_route(
    request := 'Analyze this complex legal document',
    required_capabilities := ARRAY['reasoning', 'analysis'],
    max_cost := 0.10,
    max_latency_ms := 5000,
    min_quality := 0.90
);

-- Multi-agent routing (for complex tasks)
SELECT * FROM ruvector_route_multi(
    request := 'Build and deploy a web application',
    num_agents := 3,
    strategy := 'pipeline'  -- or 'parallel', 'ensemble'
);

Semantic Routing

-- Create semantic routes (like function calling)
SELECT ruvector_create_route(
    name := 'customer_support',
    description := 'Handle customer support inquiries, complaints, and feedback',
    embedding := ruvector_embed('Customer support and help requests'),
    target_agent := 'support-agent',
    priority := 1
);

SELECT ruvector_create_route(
    name := 'technical_docs',
    description := 'Answer questions about technical documentation and APIs',
    embedding := ruvector_embed('Technical documentation and API reference'),
    target_agent := 'docs-agent',
    priority := 2
);

-- Semantic route matching
SELECT * FROM ruvector_semantic_route(
    query := 'How do I reset my password?',
    top_k := 3
);

-- Result:
-- | route_name | similarity | target_agent | confidence |
-- |------------|------------|--------------|------------|
-- | customer_support | 0.92 | support-agent | 0.95 |

Cost Optimization

-- Analyze routing costs
SELECT * FROM ruvector_routing_analytics(
    time_range := '7 days',
    group_by := 'agent'
);

-- Result:
-- | agent | total_requests | total_cost | avg_latency | success_rate |
-- |-------|----------------|------------|-------------|--------------|
-- | gpt-4 | 1000 | $30.00 | 2.5s | 99.2% |
-- | haiku | 5000 | $1.25 | 0.4s | 98.5% |

-- Optimize budget allocation
SELECT * FROM ruvector_optimize_budget(
    monthly_budget := 100.00,
    quality_threshold := 0.85,
    latency_threshold_ms := 2000
);

-- Auto-route with budget awareness
SELECT * FROM ruvector_route(
    request := 'Summarize this article',
    budget_remaining := 10.00,
    optimize_for := 'quality_per_dollar'
);

Batch Routing

-- Route multiple requests efficiently
SELECT * FROM ruvector_batch_route(
    requests := ARRAY[
        'Simple question 1',
        'Complex analysis task',
        'Code generation request'
    ],
    optimize_for := 'total_cost'
);

-- Classify requests in batch (for preprocessing)
SELECT request_id, ruvector_classify_request(content) AS classification
FROM pending_requests;

Implementation Phases

Phase 1: FastGRNN Core (Week 1-3)

// src/routing/fastgrnn.rs

use simsimd::SpatialSimilarity;

/// FastGRNN (Fast Gated Recurrent Neural Network)
/// Lightweight neural network for fast inference
pub struct FastGRNN {
    // Gate weights
    w_gate: Vec<f32>,   // [hidden, input]
    u_gate: Vec<f32>,   // [hidden, hidden]
    b_gate: Vec<f32>,   // [hidden]

    // Update weights
    w_update: Vec<f32>, // [hidden, input]
    u_update: Vec<f32>, // [hidden, hidden]
    b_update: Vec<f32>, // [hidden]

    // Hyperparameters
    zeta: f32,          // Gate sparsity
    nu: f32,            // Update sparsity

    input_dim: usize,
    hidden_dim: usize,
}

impl FastGRNN {
    pub fn new(input_dim: usize, hidden_dim: usize) -> Self {
        Self {
            w_gate: Self::init_weights(hidden_dim, input_dim),
            u_gate: Self::init_weights(hidden_dim, hidden_dim),
            b_gate: vec![0.0; hidden_dim],
            w_update: Self::init_weights(hidden_dim, input_dim),
            u_update: Self::init_weights(hidden_dim, hidden_dim),
            b_update: vec![0.0; hidden_dim],
            zeta: 1.0,
            nu: 1.0,
            input_dim,
            hidden_dim,
        }
    }

    /// Single step forward pass
    /// h_t = (ζ * (1 - z_t) + ν) ⊙ tanh(Wx_t + Uh_{t-1} + b_h) + z_t ⊙ h_{t-1}
    pub fn step(&self, input: &[f32], hidden: &[f32]) -> Vec<f32> {
        // Gate: z = σ(W_z x + U_z h + b_z)
        let gate = self.sigmoid(&self.linear_combine(
            input, hidden,
            &self.w_gate, &self.u_gate, &self.b_gate
        ));

        // Update: h̃ = tanh(W_h x + U_h h + b_h)
        let update = self.tanh(&self.linear_combine(
            input, hidden,
            &self.w_update, &self.u_update, &self.b_update
        ));

        // New hidden: h = (ζ(1-z) + ν) ⊙ h̃ + z ⊙ h
        let mut new_hidden = vec![0.0; self.hidden_dim];
        for i in 0..self.hidden_dim {
            let gate_factor = self.zeta * (1.0 - gate[i]) + self.nu;
            new_hidden[i] = gate_factor * update[i] + gate[i] * hidden[i];
        }

        new_hidden
    }

    /// Process sequence
    pub fn forward(&self, sequence: &[Vec<f32>]) -> Vec<f32> {
        let mut hidden = vec![0.0; self.hidden_dim];

        for input in sequence {
            hidden = self.step(input, &hidden);
        }

        hidden
    }

    /// Process single input (common case for routing)
    pub fn forward_single(&self, input: &[f32]) -> Vec<f32> {
        let hidden = vec![0.0; self.hidden_dim];
        self.step(input, &hidden)
    }

    #[inline]
    fn linear_combine(
        &self,
        input: &[f32],
        hidden: &[f32],
        w: &[f32],
        u: &[f32],
        b: &[f32],
    ) -> Vec<f32> {
        let mut result = b.to_vec();

        // W @ x
        for i in 0..self.hidden_dim {
            for j in 0..self.input_dim {
                result[i] += w[i * self.input_dim + j] * input[j];
            }
        }

        // U @ h
        for i in 0..self.hidden_dim {
            for j in 0..self.hidden_dim {
                result[i] += u[i * self.hidden_dim + j] * hidden[j];
            }
        }

        result
    }

    #[inline]
    fn sigmoid(&self, x: &[f32]) -> Vec<f32> {
        x.iter().map(|&v| 1.0 / (1.0 + (-v).exp())).collect()
    }

    #[inline]
    fn tanh(&self, x: &[f32]) -> Vec<f32> {
        x.iter().map(|&v| v.tanh()).collect()
    }
}

Phase 2: Route Classifier (Week 4-5)

// src/routing/classifier.rs

/// Route classifier using FastGRNN + linear head
pub struct RouteClassifier {
    fastgrnn: FastGRNN,
    classifier_head: Vec<f32>,  // [num_classes, hidden_dim]
    num_classes: usize,
    class_names: Vec<String>,
}

impl RouteClassifier {
    /// Classify request to route category
    pub fn classify(&self, embedding: &[f32]) -> Vec<(String, f32)> {
        // FastGRNN encoding
        let hidden = self.fastgrnn.forward_single(embedding);

        // Linear classifier
        let mut logits = vec![0.0; self.num_classes];
        for i in 0..self.num_classes {
            for j in 0..hidden.len() {
                logits[i] += self.classifier_head[i * hidden.len() + j] * hidden[j];
            }
        }

        // Softmax
        let probs = softmax(&logits);

        // Return sorted by probability
        let mut results: Vec<_> = self.class_names.iter()
            .zip(probs.iter())
            .map(|(name, &prob)| (name.clone(), prob))
            .collect();

        results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
        results
    }

    /// Multi-label classification (request may need multiple capabilities)
    pub fn classify_capabilities(&self, embedding: &[f32]) -> Vec<(String, f32)> {
        let hidden = self.fastgrnn.forward_single(embedding);

        // Sigmoid for multi-label
        let mut results = Vec::new();
        for i in 0..self.num_classes {
            let mut logit = 0.0;
            for j in 0..hidden.len() {
                logit += self.classifier_head[i * hidden.len() + j] * hidden[j];
            }
            let prob = 1.0 / (1.0 + (-logit).exp());

            if prob > 0.5 {
                results.push((self.class_names[i].clone(), prob));
            }
        }

        results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
        results
    }
}

#[pg_extern]
fn ruvector_classify_request(request: &str) -> pgrx::JsonB {
    let embedding = get_embedding(request);
    let classifier = get_route_classifier();

    let classifications = classifier.classify(&embedding);

    pgrx::JsonB(serde_json::json!({
        "classifications": classifications,
        "top_category": classifications.first().map(|(name, _)| name),
        "confidence": classifications.first().map(|(_, prob)| prob),
    }))
}

Phase 3: Agent Registry (Week 6-7)

// src/routing/agents/registry.rs

use dashmap::DashMap;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Agent {
    pub name: String,
    pub agent_type: AgentType,
    pub capabilities: Vec<String>,
    pub capability_embedding: Vec<f32>,  // Embedding of capabilities for semantic matching
    pub cost_model: CostModel,
    pub performance: AgentPerformance,
    pub metadata: serde_json::Value,
    pub active: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AgentType {
    LLM,
    Tool,
    API,
    Human,
    Ensemble,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostModel {
    pub cost_per_1k_tokens: Option<f64>,
    pub cost_per_call: Option<f64>,
    pub cost_per_second: Option<f64>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentPerformance {
    pub avg_latency_ms: f64,
    pub p99_latency_ms: f64,
    pub quality_score: f64,
    pub success_rate: f64,
    pub total_requests: u64,
}

/// Global agent registry
pub struct AgentRegistry {
    agents: DashMap<String, Agent>,
    capability_index: HnswIndex,  // For semantic capability matching
}

impl AgentRegistry {
    pub fn register(&self, agent: Agent) -> Result<(), RegistryError> {
        // Index capability embedding
        let embedding = &agent.capability_embedding;
        self.capability_index.insert(&agent.name, embedding);

        self.agents.insert(agent.name.clone(), agent);
        Ok(())
    }

    pub fn get(&self, name: &str) -> Option<Agent> {
        self.agents.get(name).map(|a| a.clone())
    }

    pub fn find_by_capability(&self, capability: &str, k: usize) -> Vec<&Agent> {
        let embedding = get_embedding(capability);
        let results = self.capability_index.search(&embedding, k);

        results.iter()
            .filter_map(|(name, _)| self.agents.get(name.as_str()).map(|a| a.value()))
            .collect()
    }

    pub fn list_active(&self) -> Vec<Agent> {
        self.agents.iter()
            .filter(|a| a.active)
            .map(|a| a.clone())
            .collect()
    }
}

#[pg_extern]
fn ruvector_register_agent(
    name: &str,
    agent_type: &str,
    capabilities: Vec<String>,
    cost_per_1k_tokens: default!(Option<f64>, "NULL"),
    cost_per_call: default!(Option<f64>, "NULL"),
    avg_latency_ms: f64,
    quality_score: f64,
    metadata: default!(Option<pgrx::JsonB>, "NULL"),
) -> bool {
    let registry = get_agent_registry();

    // Create capability embedding
    let capability_text = capabilities.join(", ");
    let capability_embedding = get_embedding(&capability_text);

    let agent = Agent {
        name: name.to_string(),
        agent_type: agent_type.parse().unwrap_or(AgentType::LLM),
        capabilities,
        capability_embedding,
        cost_model: CostModel {
            cost_per_1k_tokens,
            cost_per_call,
            cost_per_second: None,
        },
        performance: AgentPerformance {
            avg_latency_ms,
            p99_latency_ms: avg_latency_ms * 2.0,
            quality_score,
            success_rate: 1.0,
            total_requests: 0,
        },
        metadata: metadata.map(|m| m.0).unwrap_or(serde_json::json!({})),
        active: true,
    };

    registry.register(agent).is_ok()
}

Phase 4: Routing Engine (Week 8-9)

// src/routing/router.rs

pub struct Router {
    registry: Arc<AgentRegistry>,
    classifier: Arc<RouteClassifier>,
    optimizer: Arc<CostOptimizer>,
    semantic_routes: Arc<SemanticRoutes>,
}

#[derive(Debug, Clone)]
pub struct RoutingDecision {
    pub agent: Agent,
    pub confidence: f64,
    pub estimated_cost: f64,
    pub estimated_latency_ms: f64,
    pub reasoning: String,
}

#[derive(Debug, Clone)]
pub struct RoutingConstraints {
    pub required_capabilities: Option<Vec<String>>,
    pub max_cost: Option<f64>,
    pub max_latency_ms: Option<f64>,
    pub min_quality: Option<f64>,
    pub excluded_agents: Option<Vec<String>>,
}

impl Router {
    /// Route request to best agent
    pub fn route(
        &self,
        request: &str,
        constraints: &RoutingConstraints,
        optimize_for: OptimizationTarget,
    ) -> Result<RoutingDecision, RoutingError> {
        let embedding = get_embedding(request);

        // Get candidate agents
        let mut candidates = self.get_candidates(&embedding, constraints)?;

        if candidates.is_empty() {
            return Err(RoutingError::NoSuitableAgent);
        }

        // Score candidates
        let scored: Vec<_> = candidates.iter()
            .map(|agent| {
                let score = self.score_agent(agent, &embedding, optimize_for);
                (agent, score)
            })
            .collect();

        // Select best
        let (best_agent, confidence) = scored.into_iter()
            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
            .unwrap();

        Ok(RoutingDecision {
            agent: best_agent.clone(),
            confidence,
            estimated_cost: self.estimate_cost(best_agent, request),
            estimated_latency_ms: best_agent.performance.avg_latency_ms,
            reasoning: format!("Selected {} based on {:?} optimization", best_agent.name, optimize_for),
        })
    }

    fn get_candidates(
        &self,
        embedding: &[f32],
        constraints: &RoutingConstraints,
    ) -> Result<Vec<Agent>, RoutingError> {
        let mut candidates: Vec<_> = self.registry.list_active();

        // Filter by required capabilities
        if let Some(required) = &constraints.required_capabilities {
            candidates.retain(|a| {
                required.iter().all(|cap| a.capabilities.contains(cap))
            });
        }

        // Filter by cost
        if let Some(max_cost) = constraints.max_cost {
            candidates.retain(|a| {
                a.cost_model.cost_per_1k_tokens.unwrap_or(0.0) <= max_cost ||
                a.cost_model.cost_per_call.unwrap_or(0.0) <= max_cost
            });
        }

        // Filter by latency
        if let Some(max_latency) = constraints.max_latency_ms {
            candidates.retain(|a| a.performance.avg_latency_ms <= max_latency);
        }

        // Filter by quality
        if let Some(min_quality) = constraints.min_quality {
            candidates.retain(|a| a.performance.quality_score >= min_quality);
        }

        // Filter excluded
        if let Some(excluded) = &constraints.excluded_agents {
            candidates.retain(|a| !excluded.contains(&a.name));
        }

        Ok(candidates)
    }

    fn score_agent(
        &self,
        agent: &Agent,
        request_embedding: &[f32],
        optimize_for: OptimizationTarget,
    ) -> f64 {
        // Capability match score
        let capability_sim = cosine_similarity(request_embedding, &agent.capability_embedding);

        match optimize_for {
            OptimizationTarget::Cost => {
                let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
                capability_sim * (1.0 / (1.0 + cost))
            }
            OptimizationTarget::Latency => {
                let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0);
                capability_sim * latency_factor
            }
            OptimizationTarget::Quality => {
                capability_sim * agent.performance.quality_score
            }
            OptimizationTarget::Balanced => {
                let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
                let cost_factor = 1.0 / (1.0 + cost);
                let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0);
                let quality = agent.performance.quality_score;

                capability_sim * (0.3 * cost_factor + 0.3 * latency_factor + 0.4 * quality)
            }
            OptimizationTarget::QualityPerDollar => {
                let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
                capability_sim * agent.performance.quality_score / (cost + 0.001)
            }
        }
    }

    fn estimate_cost(&self, agent: &Agent, request: &str) -> f64 {
        let estimated_tokens = (request.len() / 4) as f64;  // Rough estimate

        if let Some(cost_per_1k) = agent.cost_model.cost_per_1k_tokens {
            cost_per_1k * estimated_tokens / 1000.0
        } else if let Some(cost_per_call) = agent.cost_model.cost_per_call {
            cost_per_call
        } else {
            0.0
        }
    }
}

#[derive(Debug, Clone, Copy)]
pub enum OptimizationTarget {
    Cost,
    Latency,
    Quality,
    Balanced,
    QualityPerDollar,
}

#[pg_extern]
fn ruvector_route(
    request: &str,
    optimize_for: default!(&str, "'balanced'"),
    required_capabilities: default!(Option<Vec<String>>, "NULL"),
    max_cost: default!(Option<f64>, "NULL"),
    max_latency_ms: default!(Option<f64>, "NULL"),
    min_quality: default!(Option<f64>, "NULL"),
) -> pgrx::JsonB {
    let router = get_router();

    let constraints = RoutingConstraints {
        required_capabilities,
        max_cost,
        max_latency_ms,
        min_quality,
        excluded_agents: None,
    };

    let target = match optimize_for {
        "cost" => OptimizationTarget::Cost,
        "latency" => OptimizationTarget::Latency,
        "quality" => OptimizationTarget::Quality,
        "quality_per_dollar" => OptimizationTarget::QualityPerDollar,
        _ => OptimizationTarget::Balanced,
    };

    match router.route(request, &constraints, target) {
        Ok(decision) => pgrx::JsonB(serde_json::json!({
            "agent_name": decision.agent.name,
            "confidence": decision.confidence,
            "estimated_cost": decision.estimated_cost,
            "estimated_latency_ms": decision.estimated_latency_ms,
            "reasoning": decision.reasoning,
        })),
        Err(e) => pgrx::JsonB(serde_json::json!({
            "error": format!("{:?}", e),
        })),
    }
}

Phase 5: Semantic Routes (Week 10-11)

// src/routing/semantic_routes.rs

pub struct SemanticRoutes {
    routes: DashMap<String, SemanticRoute>,
    index: HnswIndex,
}

#[derive(Debug, Clone)]
pub struct SemanticRoute {
    pub name: String,
    pub description: String,
    pub embedding: Vec<f32>,
    pub target_agent: String,
    pub priority: i32,
    pub conditions: Option<RouteConditions>,
}

#[derive(Debug, Clone)]
pub struct RouteConditions {
    pub time_range: Option<(chrono::NaiveTime, chrono::NaiveTime)>,
    pub user_tier: Option<Vec<String>>,
    pub rate_limit: Option<u32>,
}

impl SemanticRoutes {
    pub fn add_route(&self, route: SemanticRoute) {
        self.index.insert(&route.name, &route.embedding);
        self.routes.insert(route.name.clone(), route);
    }

    pub fn match_route(&self, query_embedding: &[f32], k: usize) -> Vec<(SemanticRoute, f32)> {
        let results = self.index.search(query_embedding, k);

        results.iter()
            .filter_map(|(name, score)| {
                self.routes.get(name.as_str())
                    .map(|r| (r.clone(), *score))
            })
            .collect()
    }
}

#[pg_extern]
fn ruvector_create_route(
    name: &str,
    description: &str,
    target_agent: &str,
    priority: default!(i32, 0),
    embedding: default!(Option<Vec<f32>>, "NULL"),
) -> bool {
    let routes = get_semantic_routes();

    let embedding = embedding.unwrap_or_else(|| get_embedding(description));

    let route = SemanticRoute {
        name: name.to_string(),
        description: description.to_string(),
        embedding,
        target_agent: target_agent.to_string(),
        priority,
        conditions: None,
    };

    routes.add_route(route);
    true
}

#[pg_extern]
fn ruvector_semantic_route(
    query: &str,
    top_k: default!(i32, 3),
) -> TableIterator<'static, (
    name!(route_name, String),
    name!(similarity, f32),
    name!(target_agent, String),
    name!(confidence, f32),
)> {
    let routes = get_semantic_routes();
    let embedding = get_embedding(query);

    let matches = routes.match_route(&embedding, top_k as usize);

    let results: Vec<_> = matches.into_iter()
        .map(|(route, similarity)| {
            let confidence = similarity * (route.priority as f32 + 1.0) / 10.0;
            (route.name, similarity, route.target_agent, confidence.min(1.0))
        })
        .collect();

    TableIterator::new(results)
}

Phase 6: Cost Optimizer (Week 12)

// src/routing/cost_optimizer.rs

pub struct CostOptimizer {
    budget_tracker: BudgetTracker,
    usage_history: UsageHistory,
}

#[derive(Debug, Clone)]
pub struct BudgetAllocation {
    pub agent_budgets: HashMap<String, f64>,
    pub total_budget: f64,
    pub period: chrono::Duration,
}

impl CostOptimizer {
    /// Optimize budget allocation across agents
    pub fn optimize_budget(
        &self,
        total_budget: f64,
        quality_threshold: f64,
        latency_threshold: f64,
        period_days: i64,
    ) -> BudgetAllocation {
        let agents = get_agent_registry().list_active();
        let history = self.usage_history.get_period(period_days);

        // Calculate value score for each agent
        let agent_values: HashMap<String, f64> = agents.iter()
            .filter(|a| {
                a.performance.quality_score >= quality_threshold &&
                a.performance.avg_latency_ms <= latency_threshold
            })
            .map(|a| {
                let historical_usage = history.get(&a.name).map(|h| h.request_count).unwrap_or(1);
                let quality = a.performance.quality_score;
                let cost_efficiency = 1.0 / (a.cost_model.cost_per_1k_tokens.unwrap_or(0.01) + 0.001);

                let value = quality * cost_efficiency * (historical_usage as f64).ln();
                (a.name.clone(), value)
            })
            .collect();

        // Allocate budget proportionally to value
        let total_value: f64 = agent_values.values().sum();
        let agent_budgets: HashMap<String, f64> = agent_values.iter()
            .map(|(name, value)| {
                let allocation = (value / total_value) * total_budget;
                (name.clone(), allocation)
            })
            .collect();

        BudgetAllocation {
            agent_budgets,
            total_budget,
            period: chrono::Duration::days(period_days),
        }
    }

    /// Check if request fits within budget
    pub fn check_budget(&self, agent: &str, estimated_cost: f64) -> bool {
        self.budget_tracker.remaining(agent) >= estimated_cost
    }

    /// Record usage
    pub fn record_usage(&self, agent: &str, actual_cost: f64, success: bool, latency_ms: f64) {
        self.budget_tracker.deduct(agent, actual_cost);
        self.usage_history.record(agent, actual_cost, success, latency_ms);
    }
}

#[pg_extern]
fn ruvector_optimize_budget(
    monthly_budget: f64,
    quality_threshold: default!(f64, 0.8),
    latency_threshold_ms: default!(f64, 5000.0),
) -> pgrx::JsonB {
    let optimizer = get_cost_optimizer();

    let allocation = optimizer.optimize_budget(
        monthly_budget,
        quality_threshold,
        latency_threshold_ms,
        30,
    );

    pgrx::JsonB(serde_json::json!({
        "allocations": allocation.agent_budgets,
        "total_budget": allocation.total_budget,
        "period_days": 30,
    }))
}

#[pg_extern]
fn ruvector_routing_analytics(
    time_range: default!(&str, "'7 days'"),
    group_by: default!(&str, "'agent'"),
) -> TableIterator<'static, (
    name!(agent, String),
    name!(total_requests, i64),
    name!(total_cost, f64),
    name!(avg_latency_ms, f64),
    name!(success_rate, f64),
)> {
    let optimizer = get_cost_optimizer();
    let days = parse_time_range(time_range);

    let stats = optimizer.usage_history.aggregate(days, group_by);

    TableIterator::new(stats)
}

Benchmarks

Operation Input Size Time (μs) Memory
FastGRNN step 768-dim 45 1KB
Route classification 768-dim 120 4KB
Semantic route match (1K routes) 768-dim 250 8KB
Full routing decision 768-dim 500 16KB

Dependencies

[dependencies]
# Link to ruvector-tiny-dancer
ruvector-tiny-dancer-core = { path = "../ruvector-tiny-dancer-core", optional = true }

# SIMD
simsimd = "5.9"

# Time handling
chrono = "0.4"

# Concurrent collections
dashmap = "6.0"

Feature Flags

[features]
routing = []
routing-fastgrnn = ["routing"]
routing-semantic = ["routing", "index-hnsw"]
routing-optimizer = ["routing"]
routing-all = ["routing-fastgrnn", "routing-semantic", "routing-optimizer"]