Files
wifi-densepose/crates/ruvector-postgres/docs/integration-plans/07-tiny-dancer-routing.md
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

986 lines
29 KiB
Markdown
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Tiny Dancer Routing Integration Plan
## Overview
Integrate AI agent routing capabilities from `ruvector-tiny-dancer` into PostgreSQL, enabling intelligent request routing, model selection, and cost optimization directly in SQL.
## Architecture
```
┌─────────────────────────────────────────────────────────────────┐
│ PostgreSQL Extension │
├─────────────────────────────────────────────────────────────────┤
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Tiny Dancer Router │ │
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │
│ │ │ FastGRNN │ │ Route │ │ Cost │ │ │
│ │ │ Inference │ │ Classifier │ │ Optimizer │ │ │
│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │
│ └─────────┼─────────────────┼─────────────────┼───────────┘ │
│ └─────────────────┴─────────────────┘ │
│ ▼ │
│ ┌───────────────────────────┐ │
│ │ Agent Registry & Pool │ │
│ │ (LLMs, Tools, APIs) │ │
│ └───────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
```
## Module Structure
```
src/
├── routing/
│ ├── mod.rs # Module exports
│ ├── fastgrnn.rs # FastGRNN neural inference
│ ├── router.rs # Main routing engine
│ ├── classifier.rs # Route classification
│ ├── cost_optimizer.rs # Cost/latency optimization
│ ├── agents/
│ │ ├── registry.rs # Agent registration
│ │ ├── pool.rs # Agent pool management
│ │ └── capabilities.rs # Capability matching
│ ├── policies/
│ │ ├── cost.rs # Cost-based routing
│ │ ├── latency.rs # Latency-based routing
│ │ ├── quality.rs # Quality-based routing
│ │ └── hybrid.rs # Multi-objective routing
│ └── operators.rs # SQL operators
```
## SQL Interface
### Agent Registration
```sql
-- Register AI agents/models
SELECT ruvector_register_agent(
name := 'gpt-4',
agent_type := 'llm',
capabilities := ARRAY['reasoning', 'code', 'analysis', 'creative'],
cost_per_1k_tokens := 0.03,
avg_latency_ms := 2500,
quality_score := 0.95,
metadata := '{"provider": "openai", "context_window": 128000}'
);
SELECT ruvector_register_agent(
name := 'claude-3-haiku',
agent_type := 'llm',
capabilities := ARRAY['fast-response', 'simple-tasks', 'classification'],
cost_per_1k_tokens := 0.00025,
avg_latency_ms := 400,
quality_score := 0.80,
metadata := '{"provider": "anthropic", "context_window": 200000}'
);
SELECT ruvector_register_agent(
name := 'code-specialist',
agent_type := 'tool',
capabilities := ARRAY['code-execution', 'debugging', 'testing'],
cost_per_call := 0.001,
avg_latency_ms := 100,
quality_score := 0.90
);
-- List registered agents
SELECT * FROM ruvector_list_agents();
```
### Basic Routing
```sql
-- Route a request to the best agent
SELECT * FROM ruvector_route(
request := 'Write a Python function to calculate Fibonacci numbers',
optimize_for := 'cost' -- or 'latency', 'quality', 'balanced'
);
-- Result:
-- | agent_name | confidence | estimated_cost | estimated_latency |
-- |------------|------------|----------------|-------------------|
-- | claude-3-haiku | 0.85 | 0.001 | 400ms |
-- Route with constraints
SELECT * FROM ruvector_route(
request := 'Analyze this complex legal document',
required_capabilities := ARRAY['reasoning', 'analysis'],
max_cost := 0.10,
max_latency_ms := 5000,
min_quality := 0.90
);
-- Multi-agent routing (for complex tasks)
SELECT * FROM ruvector_route_multi(
request := 'Build and deploy a web application',
num_agents := 3,
strategy := 'pipeline' -- or 'parallel', 'ensemble'
);
```
### Semantic Routing
```sql
-- Create semantic routes (like function calling)
SELECT ruvector_create_route(
name := 'customer_support',
description := 'Handle customer support inquiries, complaints, and feedback',
embedding := ruvector_embed('Customer support and help requests'),
target_agent := 'support-agent',
priority := 1
);
SELECT ruvector_create_route(
name := 'technical_docs',
description := 'Answer questions about technical documentation and APIs',
embedding := ruvector_embed('Technical documentation and API reference'),
target_agent := 'docs-agent',
priority := 2
);
-- Semantic route matching
SELECT * FROM ruvector_semantic_route(
query := 'How do I reset my password?',
top_k := 3
);
-- Result:
-- | route_name | similarity | target_agent | confidence |
-- |------------|------------|--------------|------------|
-- | customer_support | 0.92 | support-agent | 0.95 |
```
### Cost Optimization
```sql
-- Analyze routing costs
SELECT * FROM ruvector_routing_analytics(
time_range := '7 days',
group_by := 'agent'
);
-- Result:
-- | agent | total_requests | total_cost | avg_latency | success_rate |
-- |-------|----------------|------------|-------------|--------------|
-- | gpt-4 | 1000 | $30.00 | 2.5s | 99.2% |
-- | haiku | 5000 | $1.25 | 0.4s | 98.5% |
-- Optimize budget allocation
SELECT * FROM ruvector_optimize_budget(
monthly_budget := 100.00,
quality_threshold := 0.85,
latency_threshold_ms := 2000
);
-- Auto-route with budget awareness
SELECT * FROM ruvector_route(
request := 'Summarize this article',
budget_remaining := 10.00,
optimize_for := 'quality_per_dollar'
);
```
### Batch Routing
```sql
-- Route multiple requests efficiently
SELECT * FROM ruvector_batch_route(
requests := ARRAY[
'Simple question 1',
'Complex analysis task',
'Code generation request'
],
optimize_for := 'total_cost'
);
-- Classify requests in batch (for preprocessing)
SELECT request_id, ruvector_classify_request(content) AS classification
FROM pending_requests;
```
## Implementation Phases
### Phase 1: FastGRNN Core (Week 1-3)
```rust
// src/routing/fastgrnn.rs
use simsimd::SpatialSimilarity;
/// FastGRNN (Fast Gated Recurrent Neural Network)
/// Lightweight neural network for fast inference
pub struct FastGRNN {
// Gate weights
w_gate: Vec<f32>, // [hidden, input]
u_gate: Vec<f32>, // [hidden, hidden]
b_gate: Vec<f32>, // [hidden]
// Update weights
w_update: Vec<f32>, // [hidden, input]
u_update: Vec<f32>, // [hidden, hidden]
b_update: Vec<f32>, // [hidden]
// Hyperparameters
zeta: f32, // Gate sparsity
nu: f32, // Update sparsity
input_dim: usize,
hidden_dim: usize,
}
impl FastGRNN {
pub fn new(input_dim: usize, hidden_dim: usize) -> Self {
Self {
w_gate: Self::init_weights(hidden_dim, input_dim),
u_gate: Self::init_weights(hidden_dim, hidden_dim),
b_gate: vec![0.0; hidden_dim],
w_update: Self::init_weights(hidden_dim, input_dim),
u_update: Self::init_weights(hidden_dim, hidden_dim),
b_update: vec![0.0; hidden_dim],
zeta: 1.0,
nu: 1.0,
input_dim,
hidden_dim,
}
}
/// Single step forward pass
/// h_t = (ζ * (1 - z_t) + ν) ⊙ tanh(Wx_t + Uh_{t-1} + b_h) + z_t ⊙ h_{t-1}
pub fn step(&self, input: &[f32], hidden: &[f32]) -> Vec<f32> {
// Gate: z = σ(W_z x + U_z h + b_z)
let gate = self.sigmoid(&self.linear_combine(
input, hidden,
&self.w_gate, &self.u_gate, &self.b_gate
));
// Update: h̃ = tanh(W_h x + U_h h + b_h)
let update = self.tanh(&self.linear_combine(
input, hidden,
&self.w_update, &self.u_update, &self.b_update
));
// New hidden: h = (ζ(1-z) + ν) ⊙ h̃ + z ⊙ h
let mut new_hidden = vec![0.0; self.hidden_dim];
for i in 0..self.hidden_dim {
let gate_factor = self.zeta * (1.0 - gate[i]) + self.nu;
new_hidden[i] = gate_factor * update[i] + gate[i] * hidden[i];
}
new_hidden
}
/// Process sequence
pub fn forward(&self, sequence: &[Vec<f32>]) -> Vec<f32> {
let mut hidden = vec![0.0; self.hidden_dim];
for input in sequence {
hidden = self.step(input, &hidden);
}
hidden
}
/// Process single input (common case for routing)
pub fn forward_single(&self, input: &[f32]) -> Vec<f32> {
let hidden = vec![0.0; self.hidden_dim];
self.step(input, &hidden)
}
#[inline]
fn linear_combine(
&self,
input: &[f32],
hidden: &[f32],
w: &[f32],
u: &[f32],
b: &[f32],
) -> Vec<f32> {
let mut result = b.to_vec();
// W @ x
for i in 0..self.hidden_dim {
for j in 0..self.input_dim {
result[i] += w[i * self.input_dim + j] * input[j];
}
}
// U @ h
for i in 0..self.hidden_dim {
for j in 0..self.hidden_dim {
result[i] += u[i * self.hidden_dim + j] * hidden[j];
}
}
result
}
#[inline]
fn sigmoid(&self, x: &[f32]) -> Vec<f32> {
x.iter().map(|&v| 1.0 / (1.0 + (-v).exp())).collect()
}
#[inline]
fn tanh(&self, x: &[f32]) -> Vec<f32> {
x.iter().map(|&v| v.tanh()).collect()
}
}
```
### Phase 2: Route Classifier (Week 4-5)
```rust
// src/routing/classifier.rs
/// Route classifier using FastGRNN + linear head
pub struct RouteClassifier {
fastgrnn: FastGRNN,
classifier_head: Vec<f32>, // [num_classes, hidden_dim]
num_classes: usize,
class_names: Vec<String>,
}
impl RouteClassifier {
/// Classify request to route category
pub fn classify(&self, embedding: &[f32]) -> Vec<(String, f32)> {
// FastGRNN encoding
let hidden = self.fastgrnn.forward_single(embedding);
// Linear classifier
let mut logits = vec![0.0; self.num_classes];
for i in 0..self.num_classes {
for j in 0..hidden.len() {
logits[i] += self.classifier_head[i * hidden.len() + j] * hidden[j];
}
}
// Softmax
let probs = softmax(&logits);
// Return sorted by probability
let mut results: Vec<_> = self.class_names.iter()
.zip(probs.iter())
.map(|(name, &prob)| (name.clone(), prob))
.collect();
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
results
}
/// Multi-label classification (request may need multiple capabilities)
pub fn classify_capabilities(&self, embedding: &[f32]) -> Vec<(String, f32)> {
let hidden = self.fastgrnn.forward_single(embedding);
// Sigmoid for multi-label
let mut results = Vec::new();
for i in 0..self.num_classes {
let mut logit = 0.0;
for j in 0..hidden.len() {
logit += self.classifier_head[i * hidden.len() + j] * hidden[j];
}
let prob = 1.0 / (1.0 + (-logit).exp());
if prob > 0.5 {
results.push((self.class_names[i].clone(), prob));
}
}
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap());
results
}
}
#[pg_extern]
fn ruvector_classify_request(request: &str) -> pgrx::JsonB {
let embedding = get_embedding(request);
let classifier = get_route_classifier();
let classifications = classifier.classify(&embedding);
pgrx::JsonB(serde_json::json!({
"classifications": classifications,
"top_category": classifications.first().map(|(name, _)| name),
"confidence": classifications.first().map(|(_, prob)| prob),
}))
}
```
### Phase 3: Agent Registry (Week 6-7)
```rust
// src/routing/agents/registry.rs
use dashmap::DashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Agent {
pub name: String,
pub agent_type: AgentType,
pub capabilities: Vec<String>,
pub capability_embedding: Vec<f32>, // Embedding of capabilities for semantic matching
pub cost_model: CostModel,
pub performance: AgentPerformance,
pub metadata: serde_json::Value,
pub active: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AgentType {
LLM,
Tool,
API,
Human,
Ensemble,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostModel {
pub cost_per_1k_tokens: Option<f64>,
pub cost_per_call: Option<f64>,
pub cost_per_second: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentPerformance {
pub avg_latency_ms: f64,
pub p99_latency_ms: f64,
pub quality_score: f64,
pub success_rate: f64,
pub total_requests: u64,
}
/// Global agent registry
pub struct AgentRegistry {
agents: DashMap<String, Agent>,
capability_index: HnswIndex, // For semantic capability matching
}
impl AgentRegistry {
pub fn register(&self, agent: Agent) -> Result<(), RegistryError> {
// Index capability embedding
let embedding = &agent.capability_embedding;
self.capability_index.insert(&agent.name, embedding);
self.agents.insert(agent.name.clone(), agent);
Ok(())
}
pub fn get(&self, name: &str) -> Option<Agent> {
self.agents.get(name).map(|a| a.clone())
}
pub fn find_by_capability(&self, capability: &str, k: usize) -> Vec<&Agent> {
let embedding = get_embedding(capability);
let results = self.capability_index.search(&embedding, k);
results.iter()
.filter_map(|(name, _)| self.agents.get(name.as_str()).map(|a| a.value()))
.collect()
}
pub fn list_active(&self) -> Vec<Agent> {
self.agents.iter()
.filter(|a| a.active)
.map(|a| a.clone())
.collect()
}
}
#[pg_extern]
fn ruvector_register_agent(
name: &str,
agent_type: &str,
capabilities: Vec<String>,
cost_per_1k_tokens: default!(Option<f64>, "NULL"),
cost_per_call: default!(Option<f64>, "NULL"),
avg_latency_ms: f64,
quality_score: f64,
metadata: default!(Option<pgrx::JsonB>, "NULL"),
) -> bool {
let registry = get_agent_registry();
// Create capability embedding
let capability_text = capabilities.join(", ");
let capability_embedding = get_embedding(&capability_text);
let agent = Agent {
name: name.to_string(),
agent_type: agent_type.parse().unwrap_or(AgentType::LLM),
capabilities,
capability_embedding,
cost_model: CostModel {
cost_per_1k_tokens,
cost_per_call,
cost_per_second: None,
},
performance: AgentPerformance {
avg_latency_ms,
p99_latency_ms: avg_latency_ms * 2.0,
quality_score,
success_rate: 1.0,
total_requests: 0,
},
metadata: metadata.map(|m| m.0).unwrap_or(serde_json::json!({})),
active: true,
};
registry.register(agent).is_ok()
}
```
### Phase 4: Routing Engine (Week 8-9)
```rust
// src/routing/router.rs
pub struct Router {
registry: Arc<AgentRegistry>,
classifier: Arc<RouteClassifier>,
optimizer: Arc<CostOptimizer>,
semantic_routes: Arc<SemanticRoutes>,
}
#[derive(Debug, Clone)]
pub struct RoutingDecision {
pub agent: Agent,
pub confidence: f64,
pub estimated_cost: f64,
pub estimated_latency_ms: f64,
pub reasoning: String,
}
#[derive(Debug, Clone)]
pub struct RoutingConstraints {
pub required_capabilities: Option<Vec<String>>,
pub max_cost: Option<f64>,
pub max_latency_ms: Option<f64>,
pub min_quality: Option<f64>,
pub excluded_agents: Option<Vec<String>>,
}
impl Router {
/// Route request to best agent
pub fn route(
&self,
request: &str,
constraints: &RoutingConstraints,
optimize_for: OptimizationTarget,
) -> Result<RoutingDecision, RoutingError> {
let embedding = get_embedding(request);
// Get candidate agents
let mut candidates = self.get_candidates(&embedding, constraints)?;
if candidates.is_empty() {
return Err(RoutingError::NoSuitableAgent);
}
// Score candidates
let scored: Vec<_> = candidates.iter()
.map(|agent| {
let score = self.score_agent(agent, &embedding, optimize_for);
(agent, score)
})
.collect();
// Select best
let (best_agent, confidence) = scored.into_iter()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.unwrap();
Ok(RoutingDecision {
agent: best_agent.clone(),
confidence,
estimated_cost: self.estimate_cost(best_agent, request),
estimated_latency_ms: best_agent.performance.avg_latency_ms,
reasoning: format!("Selected {} based on {:?} optimization", best_agent.name, optimize_for),
})
}
fn get_candidates(
&self,
embedding: &[f32],
constraints: &RoutingConstraints,
) -> Result<Vec<Agent>, RoutingError> {
let mut candidates: Vec<_> = self.registry.list_active();
// Filter by required capabilities
if let Some(required) = &constraints.required_capabilities {
candidates.retain(|a| {
required.iter().all(|cap| a.capabilities.contains(cap))
});
}
// Filter by cost
if let Some(max_cost) = constraints.max_cost {
candidates.retain(|a| {
a.cost_model.cost_per_1k_tokens.unwrap_or(0.0) <= max_cost ||
a.cost_model.cost_per_call.unwrap_or(0.0) <= max_cost
});
}
// Filter by latency
if let Some(max_latency) = constraints.max_latency_ms {
candidates.retain(|a| a.performance.avg_latency_ms <= max_latency);
}
// Filter by quality
if let Some(min_quality) = constraints.min_quality {
candidates.retain(|a| a.performance.quality_score >= min_quality);
}
// Filter excluded
if let Some(excluded) = &constraints.excluded_agents {
candidates.retain(|a| !excluded.contains(&a.name));
}
Ok(candidates)
}
fn score_agent(
&self,
agent: &Agent,
request_embedding: &[f32],
optimize_for: OptimizationTarget,
) -> f64 {
// Capability match score
let capability_sim = cosine_similarity(request_embedding, &agent.capability_embedding);
match optimize_for {
OptimizationTarget::Cost => {
let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
capability_sim * (1.0 / (1.0 + cost))
}
OptimizationTarget::Latency => {
let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0);
capability_sim * latency_factor
}
OptimizationTarget::Quality => {
capability_sim * agent.performance.quality_score
}
OptimizationTarget::Balanced => {
let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
let cost_factor = 1.0 / (1.0 + cost);
let latency_factor = 1.0 / (1.0 + agent.performance.avg_latency_ms / 1000.0);
let quality = agent.performance.quality_score;
capability_sim * (0.3 * cost_factor + 0.3 * latency_factor + 0.4 * quality)
}
OptimizationTarget::QualityPerDollar => {
let cost = agent.cost_model.cost_per_1k_tokens.unwrap_or(0.01);
capability_sim * agent.performance.quality_score / (cost + 0.001)
}
}
}
fn estimate_cost(&self, agent: &Agent, request: &str) -> f64 {
let estimated_tokens = (request.len() / 4) as f64; // Rough estimate
if let Some(cost_per_1k) = agent.cost_model.cost_per_1k_tokens {
cost_per_1k * estimated_tokens / 1000.0
} else if let Some(cost_per_call) = agent.cost_model.cost_per_call {
cost_per_call
} else {
0.0
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum OptimizationTarget {
Cost,
Latency,
Quality,
Balanced,
QualityPerDollar,
}
#[pg_extern]
fn ruvector_route(
request: &str,
optimize_for: default!(&str, "'balanced'"),
required_capabilities: default!(Option<Vec<String>>, "NULL"),
max_cost: default!(Option<f64>, "NULL"),
max_latency_ms: default!(Option<f64>, "NULL"),
min_quality: default!(Option<f64>, "NULL"),
) -> pgrx::JsonB {
let router = get_router();
let constraints = RoutingConstraints {
required_capabilities,
max_cost,
max_latency_ms,
min_quality,
excluded_agents: None,
};
let target = match optimize_for {
"cost" => OptimizationTarget::Cost,
"latency" => OptimizationTarget::Latency,
"quality" => OptimizationTarget::Quality,
"quality_per_dollar" => OptimizationTarget::QualityPerDollar,
_ => OptimizationTarget::Balanced,
};
match router.route(request, &constraints, target) {
Ok(decision) => pgrx::JsonB(serde_json::json!({
"agent_name": decision.agent.name,
"confidence": decision.confidence,
"estimated_cost": decision.estimated_cost,
"estimated_latency_ms": decision.estimated_latency_ms,
"reasoning": decision.reasoning,
})),
Err(e) => pgrx::JsonB(serde_json::json!({
"error": format!("{:?}", e),
})),
}
}
```
### Phase 5: Semantic Routes (Week 10-11)
```rust
// src/routing/semantic_routes.rs
pub struct SemanticRoutes {
routes: DashMap<String, SemanticRoute>,
index: HnswIndex,
}
#[derive(Debug, Clone)]
pub struct SemanticRoute {
pub name: String,
pub description: String,
pub embedding: Vec<f32>,
pub target_agent: String,
pub priority: i32,
pub conditions: Option<RouteConditions>,
}
#[derive(Debug, Clone)]
pub struct RouteConditions {
pub time_range: Option<(chrono::NaiveTime, chrono::NaiveTime)>,
pub user_tier: Option<Vec<String>>,
pub rate_limit: Option<u32>,
}
impl SemanticRoutes {
pub fn add_route(&self, route: SemanticRoute) {
self.index.insert(&route.name, &route.embedding);
self.routes.insert(route.name.clone(), route);
}
pub fn match_route(&self, query_embedding: &[f32], k: usize) -> Vec<(SemanticRoute, f32)> {
let results = self.index.search(query_embedding, k);
results.iter()
.filter_map(|(name, score)| {
self.routes.get(name.as_str())
.map(|r| (r.clone(), *score))
})
.collect()
}
}
#[pg_extern]
fn ruvector_create_route(
name: &str,
description: &str,
target_agent: &str,
priority: default!(i32, 0),
embedding: default!(Option<Vec<f32>>, "NULL"),
) -> bool {
let routes = get_semantic_routes();
let embedding = embedding.unwrap_or_else(|| get_embedding(description));
let route = SemanticRoute {
name: name.to_string(),
description: description.to_string(),
embedding,
target_agent: target_agent.to_string(),
priority,
conditions: None,
};
routes.add_route(route);
true
}
#[pg_extern]
fn ruvector_semantic_route(
query: &str,
top_k: default!(i32, 3),
) -> TableIterator<'static, (
name!(route_name, String),
name!(similarity, f32),
name!(target_agent, String),
name!(confidence, f32),
)> {
let routes = get_semantic_routes();
let embedding = get_embedding(query);
let matches = routes.match_route(&embedding, top_k as usize);
let results: Vec<_> = matches.into_iter()
.map(|(route, similarity)| {
let confidence = similarity * (route.priority as f32 + 1.0) / 10.0;
(route.name, similarity, route.target_agent, confidence.min(1.0))
})
.collect();
TableIterator::new(results)
}
```
### Phase 6: Cost Optimizer (Week 12)
```rust
// src/routing/cost_optimizer.rs
pub struct CostOptimizer {
budget_tracker: BudgetTracker,
usage_history: UsageHistory,
}
#[derive(Debug, Clone)]
pub struct BudgetAllocation {
pub agent_budgets: HashMap<String, f64>,
pub total_budget: f64,
pub period: chrono::Duration,
}
impl CostOptimizer {
/// Optimize budget allocation across agents
pub fn optimize_budget(
&self,
total_budget: f64,
quality_threshold: f64,
latency_threshold: f64,
period_days: i64,
) -> BudgetAllocation {
let agents = get_agent_registry().list_active();
let history = self.usage_history.get_period(period_days);
// Calculate value score for each agent
let agent_values: HashMap<String, f64> = agents.iter()
.filter(|a| {
a.performance.quality_score >= quality_threshold &&
a.performance.avg_latency_ms <= latency_threshold
})
.map(|a| {
let historical_usage = history.get(&a.name).map(|h| h.request_count).unwrap_or(1);
let quality = a.performance.quality_score;
let cost_efficiency = 1.0 / (a.cost_model.cost_per_1k_tokens.unwrap_or(0.01) + 0.001);
let value = quality * cost_efficiency * (historical_usage as f64).ln();
(a.name.clone(), value)
})
.collect();
// Allocate budget proportionally to value
let total_value: f64 = agent_values.values().sum();
let agent_budgets: HashMap<String, f64> = agent_values.iter()
.map(|(name, value)| {
let allocation = (value / total_value) * total_budget;
(name.clone(), allocation)
})
.collect();
BudgetAllocation {
agent_budgets,
total_budget,
period: chrono::Duration::days(period_days),
}
}
/// Check if request fits within budget
pub fn check_budget(&self, agent: &str, estimated_cost: f64) -> bool {
self.budget_tracker.remaining(agent) >= estimated_cost
}
/// Record usage
pub fn record_usage(&self, agent: &str, actual_cost: f64, success: bool, latency_ms: f64) {
self.budget_tracker.deduct(agent, actual_cost);
self.usage_history.record(agent, actual_cost, success, latency_ms);
}
}
#[pg_extern]
fn ruvector_optimize_budget(
monthly_budget: f64,
quality_threshold: default!(f64, 0.8),
latency_threshold_ms: default!(f64, 5000.0),
) -> pgrx::JsonB {
let optimizer = get_cost_optimizer();
let allocation = optimizer.optimize_budget(
monthly_budget,
quality_threshold,
latency_threshold_ms,
30,
);
pgrx::JsonB(serde_json::json!({
"allocations": allocation.agent_budgets,
"total_budget": allocation.total_budget,
"period_days": 30,
}))
}
#[pg_extern]
fn ruvector_routing_analytics(
time_range: default!(&str, "'7 days'"),
group_by: default!(&str, "'agent'"),
) -> TableIterator<'static, (
name!(agent, String),
name!(total_requests, i64),
name!(total_cost, f64),
name!(avg_latency_ms, f64),
name!(success_rate, f64),
)> {
let optimizer = get_cost_optimizer();
let days = parse_time_range(time_range);
let stats = optimizer.usage_history.aggregate(days, group_by);
TableIterator::new(stats)
}
```
## Benchmarks
| Operation | Input Size | Time (μs) | Memory |
|-----------|------------|-----------|--------|
| FastGRNN step | 768-dim | 45 | 1KB |
| Route classification | 768-dim | 120 | 4KB |
| Semantic route match (1K routes) | 768-dim | 250 | 8KB |
| Full routing decision | 768-dim | 500 | 16KB |
## Dependencies
```toml
[dependencies]
# Link to ruvector-tiny-dancer
ruvector-tiny-dancer-core = { path = "../ruvector-tiny-dancer-core", optional = true }
# SIMD
simsimd = "5.9"
# Time handling
chrono = "0.4"
# Concurrent collections
dashmap = "6.0"
```
## Feature Flags
```toml
[features]
routing = []
routing-fastgrnn = ["routing"]
routing-semantic = ["routing", "index-hnsw"]
routing-optimizer = ["routing"]
routing-all = ["routing-fastgrnn", "routing-semantic", "routing-optimizer"]
```