Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
# Tiny Dancer Observability Examples
|
||||
|
||||
This directory contains examples demonstrating the observability features of Tiny Dancer.
|
||||
|
||||
## Examples
|
||||
|
||||
### 1. Metrics Example (`metrics_example.rs`)
|
||||
|
||||
**Purpose**: Demonstrates Prometheus metrics collection
|
||||
|
||||
**Features**:
|
||||
- Request counting
|
||||
- Latency tracking
|
||||
- Circuit breaker monitoring
|
||||
- Routing decision metrics
|
||||
- Prometheus format export
|
||||
|
||||
**Run**:
|
||||
```bash
|
||||
cargo run --example metrics_example
|
||||
```
|
||||
|
||||
**Output**: Shows metrics in Prometheus text format
|
||||
|
||||
### 2. Tracing Example (`tracing_example.rs`)
|
||||
|
||||
**Purpose**: Shows distributed tracing with OpenTelemetry
|
||||
|
||||
**Features**:
|
||||
- Jaeger integration
|
||||
- Span creation
|
||||
- Trace context propagation
|
||||
- W3C Trace Context format
|
||||
|
||||
**Prerequisites**:
|
||||
```bash
|
||||
# Start Jaeger
|
||||
docker run -d -p6831:6831/udp -p16686:16686 jaegertracing/all-in-one:latest
|
||||
```
|
||||
|
||||
**Run**:
|
||||
```bash
|
||||
cargo run --example tracing_example
|
||||
```
|
||||
|
||||
**View Traces**: http://localhost:16686
|
||||
|
||||
### 3. Full Observability Example (`full_observability.rs`)
|
||||
|
||||
**Purpose**: Comprehensive example combining all observability features
|
||||
|
||||
**Features**:
|
||||
- Prometheus metrics
|
||||
- Distributed tracing
|
||||
- Structured logging
|
||||
- Multiple scenarios (normal load, high load)
|
||||
- Performance statistics
|
||||
|
||||
**Run**:
|
||||
```bash
|
||||
cargo run --example full_observability
|
||||
```
|
||||
|
||||
**Output**: Complete observability stack demonstration
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Basic Metrics** (no dependencies):
|
||||
```bash
|
||||
cargo run --example metrics_example
|
||||
```
|
||||
|
||||
2. **With Tracing** (requires Jaeger):
|
||||
```bash
|
||||
# Terminal 1: Start Jaeger
|
||||
docker run -p6831:6831/udp -p16686:16686 jaegertracing/all-in-one:latest
|
||||
|
||||
# Terminal 2: Run example
|
||||
cargo run --example tracing_example
|
||||
|
||||
# Browser: Open http://localhost:16686
|
||||
```
|
||||
|
||||
3. **Full Stack**:
|
||||
```bash
|
||||
cargo run --example full_observability
|
||||
```
|
||||
|
||||
## Metrics Available
|
||||
|
||||
- `tiny_dancer_routing_requests_total` - Request counter
|
||||
- `tiny_dancer_routing_latency_seconds` - Latency histogram
|
||||
- `tiny_dancer_circuit_breaker_state` - Circuit breaker gauge
|
||||
- `tiny_dancer_routing_decisions_total` - Decision counter
|
||||
- `tiny_dancer_confidence_scores` - Confidence histogram
|
||||
- `tiny_dancer_uncertainty_estimates` - Uncertainty histogram
|
||||
- `tiny_dancer_candidates_processed_total` - Candidates counter
|
||||
- `tiny_dancer_errors_total` - Error counter
|
||||
- `tiny_dancer_feature_engineering_duration_seconds` - Feature time
|
||||
- `tiny_dancer_model_inference_duration_seconds` - Inference time
|
||||
|
||||
## Tracing Spans
|
||||
|
||||
Automatically created spans:
|
||||
- `routing_request` - Full routing operation
|
||||
- `circuit_breaker_check` - Circuit breaker validation
|
||||
- `feature_engineering` - Feature extraction
|
||||
- `model_inference` - Model inference (per candidate)
|
||||
- `uncertainty_estimation` - Uncertainty calculation
|
||||
|
||||
## Production Setup
|
||||
|
||||
### Prometheus
|
||||
|
||||
```yaml
|
||||
# prometheus.yml
|
||||
scrape_configs:
|
||||
- job_name: 'tiny-dancer'
|
||||
scrape_interval: 15s
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
```
|
||||
|
||||
### Jaeger
|
||||
|
||||
```bash
|
||||
# Production deployment
|
||||
docker run -d \
|
||||
--name jaeger \
|
||||
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||
-p 5775:5775/udp \
|
||||
-p 6831:6831/udp \
|
||||
-p 6832:6832/udp \
|
||||
-p 5778:5778 \
|
||||
-p 16686:16686 \
|
||||
-p 14268:14268 \
|
||||
-p 14250:14250 \
|
||||
-p 9411:9411 \
|
||||
jaegertracing/all-in-one:latest
|
||||
```
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
1. Add Prometheus data source
|
||||
2. Import dashboard from `docs/OBSERVABILITY.md`
|
||||
3. Create alerts:
|
||||
- Circuit breaker open
|
||||
- High error rate
|
||||
- High latency
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Metrics not showing
|
||||
|
||||
```rust
|
||||
// Ensure router is processing requests
|
||||
let response = router.route(request)?;
|
||||
|
||||
// Export and check metrics
|
||||
let metrics = router.export_metrics()?;
|
||||
println!("{}", metrics);
|
||||
```
|
||||
|
||||
### Traces not in Jaeger
|
||||
|
||||
1. Check Jaeger is running: `docker ps`
|
||||
2. Verify endpoint in config
|
||||
3. Ensure sampling_ratio > 0
|
||||
4. Call `tracing_system.shutdown()` to flush
|
||||
|
||||
### High memory usage
|
||||
|
||||
- Reduce sampling ratio to 0.01 (1%)
|
||||
- Set log level to INFO
|
||||
- Use appropriate histogram buckets
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- Full documentation: `../docs/OBSERVABILITY.md`
|
||||
- Implementation summary: `../docs/OBSERVABILITY_SUMMARY.md`
|
||||
- Prometheus docs: https://prometheus.io/docs/
|
||||
- OpenTelemetry docs: https://opentelemetry.io/docs/
|
||||
- Jaeger docs: https://www.jaegertracing.io/docs/
|
||||
120
crates/ruvector-tiny-dancer-core/examples/README.md
Normal file
120
crates/ruvector-tiny-dancer-core/examples/README.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# Tiny Dancer Examples
|
||||
|
||||
This directory contains example applications demonstrating how to use Tiny Dancer.
|
||||
|
||||
## Admin Server Example
|
||||
|
||||
**File:** `admin-server.rs`
|
||||
|
||||
A production-ready admin API server with health checks, metrics, and administration endpoints.
|
||||
|
||||
### Features
|
||||
|
||||
- Health check endpoints (K8s liveness & readiness probes)
|
||||
- Prometheus metrics export
|
||||
- Hot model reloading
|
||||
- Configuration management
|
||||
- Circuit breaker monitoring
|
||||
- Optional bearer token authentication
|
||||
|
||||
### Running
|
||||
|
||||
```bash
|
||||
cargo run --example admin-server --features admin-api
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
Once running, test the endpoints:
|
||||
|
||||
```bash
|
||||
# Health check
|
||||
curl http://localhost:8080/health
|
||||
|
||||
# Readiness check
|
||||
curl http://localhost:8080/health/ready
|
||||
|
||||
# Prometheus metrics
|
||||
curl http://localhost:8080/metrics
|
||||
|
||||
# System information
|
||||
curl http://localhost:8080/info
|
||||
```
|
||||
|
||||
### Admin Endpoints
|
||||
|
||||
Admin endpoints support optional authentication:
|
||||
|
||||
```bash
|
||||
# Reload model (if auth enabled)
|
||||
curl -X POST http://localhost:8080/admin/reload \
|
||||
-H "Authorization: Bearer your-token-here"
|
||||
|
||||
# Get configuration
|
||||
curl http://localhost:8080/admin/config \
|
||||
-H "Authorization: Bearer your-token-here"
|
||||
|
||||
# Circuit breaker status
|
||||
curl http://localhost:8080/admin/circuit-breaker \
|
||||
-H "Authorization: Bearer your-token-here"
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
Edit the example to configure:
|
||||
- Bind address and port
|
||||
- Authentication token
|
||||
- CORS settings
|
||||
- Router configuration
|
||||
|
||||
### Production Deployment
|
||||
|
||||
For production use:
|
||||
|
||||
1. **Enable authentication:**
|
||||
```rust
|
||||
auth_token: Some("your-secret-token".to_string())
|
||||
```
|
||||
|
||||
2. **Use environment variables:**
|
||||
```rust
|
||||
let token = std::env::var("ADMIN_AUTH_TOKEN").ok();
|
||||
```
|
||||
|
||||
3. **Deploy behind HTTPS proxy** (nginx, Envoy, etc.)
|
||||
|
||||
4. **Set up Prometheus scraping:**
|
||||
```yaml
|
||||
scrape_configs:
|
||||
- job_name: 'tiny-dancer'
|
||||
static_configs:
|
||||
- targets: ['localhost:8080']
|
||||
```
|
||||
|
||||
5. **Configure Kubernetes probes:**
|
||||
```yaml
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health/ready
|
||||
port: 8080
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- [Admin API Full Documentation](../docs/API.md)
|
||||
- [Quick Start Guide](../docs/ADMIN_API_QUICKSTART.md)
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. Integrate with your application
|
||||
2. Set up monitoring (Prometheus + Grafana)
|
||||
3. Configure alerts
|
||||
4. Deploy to production
|
||||
|
||||
## Support
|
||||
|
||||
For issues or questions, see the main repository documentation.
|
||||
135
crates/ruvector-tiny-dancer-core/examples/admin-server.rs
Normal file
135
crates/ruvector-tiny-dancer-core/examples/admin-server.rs
Normal file
@@ -0,0 +1,135 @@
|
||||
//! Admin and health check example for Tiny Dancer
|
||||
//!
|
||||
//! This example demonstrates how to implement health checks and
|
||||
//! administrative functionality for the Tiny Dancer routing system.
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! ```bash
|
||||
//! cargo run --example admin-server
|
||||
//! ```
|
||||
//!
|
||||
//! This example shows:
|
||||
//! - Health check implementations
|
||||
//! - Configuration inspection
|
||||
//! - Circuit breaker status monitoring
|
||||
//! - Hot model reloading
|
||||
//!
|
||||
//! For a full HTTP admin server implementation, see the `api` module
|
||||
//! documentation which requires additional dependencies (axum, tokio).
|
||||
|
||||
use ruvector_tiny_dancer_core::{Candidate, Router, RouterConfig, RoutingRequest};
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Tiny Dancer Admin Example ===\n");
|
||||
|
||||
// Create router with default configuration
|
||||
let router_config = RouterConfig {
|
||||
model_path: "./models/fastgrnn.safetensors".to_string(),
|
||||
confidence_threshold: 0.85,
|
||||
max_uncertainty: 0.15,
|
||||
enable_circuit_breaker: true,
|
||||
circuit_breaker_threshold: 5,
|
||||
enable_quantization: true,
|
||||
database_path: None,
|
||||
};
|
||||
|
||||
println!("Creating router with config:");
|
||||
println!(" Model path: {}", router_config.model_path);
|
||||
println!(
|
||||
" Confidence threshold: {}",
|
||||
router_config.confidence_threshold
|
||||
);
|
||||
println!(" Max uncertainty: {}", router_config.max_uncertainty);
|
||||
println!(
|
||||
" Circuit breaker: {}",
|
||||
router_config.enable_circuit_breaker
|
||||
);
|
||||
|
||||
let router = Router::new(router_config.clone())?;
|
||||
|
||||
// Health check implementation
|
||||
println!("\n--- Health Check ---");
|
||||
let health = check_health(&router);
|
||||
println!("Status: {}", if health { "healthy" } else { "unhealthy" });
|
||||
|
||||
// Readiness check
|
||||
println!("\n--- Readiness Check ---");
|
||||
let ready = check_readiness(&router);
|
||||
println!("Ready: {}", ready);
|
||||
|
||||
// Configuration info
|
||||
println!("\n--- Configuration ---");
|
||||
let config = router.config();
|
||||
println!("Current configuration: {:?}", config);
|
||||
|
||||
// Circuit breaker status
|
||||
println!("\n--- Circuit Breaker Status ---");
|
||||
match router.circuit_breaker_status() {
|
||||
Some(true) => println!("State: Closed (accepting requests)"),
|
||||
Some(false) => println!("State: Open (rejecting requests)"),
|
||||
None => println!("State: Disabled"),
|
||||
}
|
||||
|
||||
// Test routing to verify system works
|
||||
println!("\n--- Test Routing ---");
|
||||
let candidates = vec![Candidate {
|
||||
id: "test-1".to_string(),
|
||||
embedding: vec![0.5; 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 10,
|
||||
success_rate: 0.95,
|
||||
}];
|
||||
|
||||
let request = RoutingRequest {
|
||||
query_embedding: vec![0.5; 384],
|
||||
candidates,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
match router.route(request) {
|
||||
Ok(response) => {
|
||||
println!(
|
||||
"Test routing successful: {} candidates in {}μs",
|
||||
response.candidates_processed, response.inference_time_us
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Test routing failed: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Model reload demonstration
|
||||
println!("\n--- Model Reload ---");
|
||||
println!("Attempting model reload...");
|
||||
match router.reload_model() {
|
||||
Ok(_) => println!("Model reload: Success"),
|
||||
Err(e) => println!("Model reload: {} (expected if model file doesn't exist)", e),
|
||||
}
|
||||
|
||||
println!("\n=== Admin Example Complete ===");
|
||||
println!("\nFor a full HTTP admin server, you would need:");
|
||||
println!("1. Add axum and tokio dependencies");
|
||||
println!("2. Enable the admin-api feature");
|
||||
println!("3. Use the AdminServer from the api module");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Basic health check - returns true if the router is operational
|
||||
fn check_health(router: &Router) -> bool {
|
||||
// A simple health check just verifies the router exists
|
||||
// In production, you might also check model availability
|
||||
router.config().model_path.len() > 0
|
||||
}
|
||||
|
||||
/// Readiness check - returns true if ready to accept traffic
|
||||
fn check_readiness(router: &Router) -> bool {
|
||||
// Check circuit breaker status
|
||||
match router.circuit_breaker_status() {
|
||||
Some(is_closed) => is_closed, // Ready only if circuit breaker is closed
|
||||
None => true, // Ready if circuit breaker is disabled
|
||||
}
|
||||
}
|
||||
204
crates/ruvector-tiny-dancer-core/examples/full_observability.rs
Normal file
204
crates/ruvector-tiny-dancer-core/examples/full_observability.rs
Normal file
@@ -0,0 +1,204 @@
|
||||
//! Comprehensive observability example demonstrating routing performance
|
||||
//!
|
||||
//! This example demonstrates:
|
||||
//! - Circuit breaker monitoring
|
||||
//! - Performance tracking
|
||||
//! - Response statistics
|
||||
//! - Different load scenarios
|
||||
//!
|
||||
//! Run with: cargo run --example full_observability
|
||||
|
||||
use ruvector_tiny_dancer_core::{Candidate, Router, RouterConfig, RoutingRequest, RoutingResponse};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Tiny Dancer Full Observability Example ===\n");
|
||||
|
||||
// Create router with full configuration
|
||||
let config = RouterConfig {
|
||||
model_path: "./models/fastgrnn.safetensors".to_string(),
|
||||
confidence_threshold: 0.85,
|
||||
max_uncertainty: 0.15,
|
||||
enable_circuit_breaker: true,
|
||||
circuit_breaker_threshold: 3,
|
||||
enable_quantization: true,
|
||||
database_path: None,
|
||||
};
|
||||
|
||||
let router = Router::new(config)?;
|
||||
|
||||
// Track metrics manually
|
||||
let mut total_requests = 0u64;
|
||||
let mut successful_requests = 0u64;
|
||||
let mut total_latency_us = 0u64;
|
||||
let mut lightweight_routes = 0usize;
|
||||
let mut powerful_routes = 0usize;
|
||||
|
||||
println!("\n=== Scenario 1: Normal Operations ===\n");
|
||||
|
||||
// Process normal requests
|
||||
for i in 0..5 {
|
||||
let candidates = create_candidates(i, 3);
|
||||
let request = RoutingRequest {
|
||||
query_embedding: vec![0.5 + (i as f32 * 0.05); 384],
|
||||
candidates,
|
||||
metadata: Some(HashMap::from([(
|
||||
"scenario".to_string(),
|
||||
serde_json::json!("normal_operations"),
|
||||
)])),
|
||||
};
|
||||
|
||||
total_requests += 1;
|
||||
match router.route(request) {
|
||||
Ok(response) => {
|
||||
successful_requests += 1;
|
||||
total_latency_us += response.inference_time_us;
|
||||
let (lw, pw) = count_routes(&response);
|
||||
lightweight_routes += lw;
|
||||
powerful_routes += pw;
|
||||
print_response_summary(i + 1, &response);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Request {} failed: {}", i + 1, e);
|
||||
}
|
||||
}
|
||||
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
|
||||
println!("\n=== Scenario 2: High Load ===\n");
|
||||
|
||||
// Simulate high load with many candidates
|
||||
for i in 0..3 {
|
||||
let candidates = create_candidates(i, 20); // More candidates
|
||||
let request = RoutingRequest {
|
||||
query_embedding: vec![0.6; 384],
|
||||
candidates,
|
||||
metadata: Some(HashMap::from([(
|
||||
"scenario".to_string(),
|
||||
serde_json::json!("high_load"),
|
||||
)])),
|
||||
};
|
||||
|
||||
total_requests += 1;
|
||||
match router.route(request) {
|
||||
Ok(response) => {
|
||||
successful_requests += 1;
|
||||
total_latency_us += response.inference_time_us;
|
||||
let (lw, pw) = count_routes(&response);
|
||||
lightweight_routes += lw;
|
||||
powerful_routes += pw;
|
||||
print_response_summary(i + 1, &response);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Request {} failed: {}", i + 1, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Display statistics
|
||||
println!("\n=== Performance Statistics ===\n");
|
||||
display_statistics(
|
||||
total_requests,
|
||||
successful_requests,
|
||||
total_latency_us,
|
||||
lightweight_routes,
|
||||
powerful_routes,
|
||||
&router,
|
||||
);
|
||||
|
||||
println!("\n=== Full Observability Example Complete ===");
|
||||
println!("\nMetrics Summary:");
|
||||
println!("- Total requests processed");
|
||||
println!("- Success/failure rates tracked");
|
||||
println!("- Latency statistics computed");
|
||||
println!("- Routing decisions categorized");
|
||||
println!("- Circuit breaker state monitored");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_candidates(offset: i32, count: usize) -> Vec<Candidate> {
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
let base_score = 0.7 + ((i + offset as usize) as f32 * 0.02) % 0.3;
|
||||
Candidate {
|
||||
id: format!("candidate-{}-{}", offset, i),
|
||||
embedding: vec![base_score; 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 10 + i as u64,
|
||||
success_rate: 0.85 + (base_score * 0.15),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn count_routes(response: &RoutingResponse) -> (usize, usize) {
|
||||
let lightweight = response
|
||||
.decisions
|
||||
.iter()
|
||||
.filter(|d| d.use_lightweight)
|
||||
.count();
|
||||
let powerful = response.decisions.len() - lightweight;
|
||||
(lightweight, powerful)
|
||||
}
|
||||
|
||||
fn print_response_summary(request_num: i32, response: &RoutingResponse) {
|
||||
let (lightweight_count, powerful_count) = count_routes(response);
|
||||
|
||||
println!(
|
||||
"Request {}: {}μs total, {}μs features, {} candidates",
|
||||
request_num,
|
||||
response.inference_time_us,
|
||||
response.feature_time_us,
|
||||
response.candidates_processed
|
||||
);
|
||||
println!(
|
||||
" Routing: {} lightweight, {} powerful",
|
||||
lightweight_count, powerful_count
|
||||
);
|
||||
|
||||
if let Some(top_decision) = response.decisions.first() {
|
||||
println!(
|
||||
" Top: {} (confidence: {:.3}, uncertainty: {:.3})",
|
||||
top_decision.candidate_id, top_decision.confidence, top_decision.uncertainty
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn display_statistics(
|
||||
total_requests: u64,
|
||||
successful_requests: u64,
|
||||
total_latency_us: u64,
|
||||
lightweight_routes: usize,
|
||||
powerful_routes: usize,
|
||||
router: &Router,
|
||||
) {
|
||||
let cb_state = match router.circuit_breaker_status() {
|
||||
Some(true) => "Closed",
|
||||
Some(false) => "Open",
|
||||
None => "Disabled",
|
||||
};
|
||||
|
||||
let success_rate = if total_requests > 0 {
|
||||
(successful_requests as f64 / total_requests as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let avg_latency = if successful_requests > 0 {
|
||||
total_latency_us / successful_requests
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
println!("Circuit Breaker: {}", cb_state);
|
||||
println!("Total Requests: {}", total_requests);
|
||||
println!("Successful Requests: {}", successful_requests);
|
||||
println!("Success Rate: {:.1}%", success_rate);
|
||||
println!("Avg Latency: {}μs", avg_latency);
|
||||
println!("Lightweight Routes: {}", lightweight_routes);
|
||||
println!("Powerful Routes: {}", powerful_routes);
|
||||
}
|
||||
144
crates/ruvector-tiny-dancer-core/examples/metrics_example.rs
Normal file
144
crates/ruvector-tiny-dancer-core/examples/metrics_example.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
//! Example demonstrating metrics collection with Tiny Dancer
|
||||
//!
|
||||
//! This example shows how to:
|
||||
//! - Collect routing metrics manually
|
||||
//! - Monitor circuit breaker state
|
||||
//! - Track routing decisions and latencies
|
||||
//!
|
||||
//! Run with: cargo run --example metrics_example
|
||||
|
||||
use ruvector_tiny_dancer_core::{Candidate, Router, RouterConfig, RoutingRequest};
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Tiny Dancer Metrics Example ===\n");
|
||||
|
||||
// Create router with metrics enabled
|
||||
let config = RouterConfig {
|
||||
model_path: "./models/fastgrnn.safetensors".to_string(),
|
||||
confidence_threshold: 0.85,
|
||||
max_uncertainty: 0.15,
|
||||
enable_circuit_breaker: true,
|
||||
circuit_breaker_threshold: 5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let router = Router::new(config)?;
|
||||
|
||||
// Track metrics manually
|
||||
let mut total_requests = 0u64;
|
||||
let mut total_candidates = 0u64;
|
||||
let mut total_latency_us = 0u64;
|
||||
let mut lightweight_count = 0u64;
|
||||
let mut powerful_count = 0u64;
|
||||
|
||||
// Process multiple routing requests
|
||||
println!("Processing routing requests...\n");
|
||||
|
||||
for i in 0..10 {
|
||||
let candidates = vec![
|
||||
Candidate {
|
||||
id: format!("candidate-{}-1", i),
|
||||
embedding: vec![0.5 + (i as f32 * 0.01); 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 10 + i as u64,
|
||||
success_rate: 0.95 - (i as f32 * 0.01),
|
||||
},
|
||||
Candidate {
|
||||
id: format!("candidate-{}-2", i),
|
||||
embedding: vec![0.3 + (i as f32 * 0.01); 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 5 + i as u64,
|
||||
success_rate: 0.85 - (i as f32 * 0.01),
|
||||
},
|
||||
Candidate {
|
||||
id: format!("candidate-{}-3", i),
|
||||
embedding: vec![0.7 + (i as f32 * 0.01); 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 15 + i as u64,
|
||||
success_rate: 0.98 - (i as f32 * 0.01),
|
||||
},
|
||||
];
|
||||
|
||||
let request = RoutingRequest {
|
||||
query_embedding: vec![0.5; 384],
|
||||
candidates,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
match router.route(request) {
|
||||
Ok(response) => {
|
||||
total_requests += 1;
|
||||
total_candidates += response.candidates_processed as u64;
|
||||
total_latency_us += response.inference_time_us;
|
||||
|
||||
// Count routing decisions
|
||||
for decision in &response.decisions {
|
||||
if decision.use_lightweight {
|
||||
lightweight_count += 1;
|
||||
} else {
|
||||
powerful_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"Request {}: Processed {} candidates in {}μs",
|
||||
i + 1,
|
||||
response.candidates_processed,
|
||||
response.inference_time_us
|
||||
);
|
||||
if let Some(top) = response.decisions.first() {
|
||||
println!(
|
||||
" Top decision: {} (confidence: {:.3}, lightweight: {})",
|
||||
top.candidate_id, top.confidence, top.use_lightweight
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error processing request {}: {}", i + 1, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Display collected metrics
|
||||
println!("\n=== Collected Metrics ===\n");
|
||||
|
||||
let cb_state = match router.circuit_breaker_status() {
|
||||
Some(true) => "closed",
|
||||
Some(false) => "open",
|
||||
None => "disabled",
|
||||
};
|
||||
|
||||
let avg_latency = if total_requests > 0 {
|
||||
total_latency_us / total_requests
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
println!("tiny_dancer_routing_requests_total {}", total_requests);
|
||||
println!(
|
||||
"tiny_dancer_candidates_processed_total {}",
|
||||
total_candidates
|
||||
);
|
||||
println!(
|
||||
"tiny_dancer_routing_decisions_total{{model_type=\"lightweight\"}} {}",
|
||||
lightweight_count
|
||||
);
|
||||
println!(
|
||||
"tiny_dancer_routing_decisions_total{{model_type=\"powerful\"}} {}",
|
||||
powerful_count
|
||||
);
|
||||
println!("tiny_dancer_avg_latency_us {}", avg_latency);
|
||||
println!("tiny_dancer_circuit_breaker_state {}", cb_state);
|
||||
|
||||
println!("\n=== Metrics Collection Complete ===");
|
||||
println!("\nThese metrics can be exported to monitoring systems:");
|
||||
println!("- Prometheus for time-series collection");
|
||||
println!("- Grafana for visualization");
|
||||
println!("- Custom dashboards for real-time monitoring");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
96
crates/ruvector-tiny-dancer-core/examples/tracing_example.rs
Normal file
96
crates/ruvector-tiny-dancer-core/examples/tracing_example.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
//! Example demonstrating basic tracing with the Tiny Dancer routing system
|
||||
//!
|
||||
//! This example shows how to:
|
||||
//! - Create and configure a router
|
||||
//! - Process routing requests
|
||||
//! - Monitor timing and performance
|
||||
//!
|
||||
//! Run with: cargo run --example tracing_example
|
||||
|
||||
use ruvector_tiny_dancer_core::{Candidate, Router, RouterConfig, RoutingRequest};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Tiny Dancer Routing Example with Timing ===\n");
|
||||
|
||||
// Create router with configuration
|
||||
let config = RouterConfig {
|
||||
model_path: "./models/fastgrnn.safetensors".to_string(),
|
||||
confidence_threshold: 0.85,
|
||||
max_uncertainty: 0.15,
|
||||
enable_circuit_breaker: true,
|
||||
circuit_breaker_threshold: 5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let router = Router::new(config)?;
|
||||
|
||||
// Process requests with timing
|
||||
println!("Processing requests with timing information...\n");
|
||||
|
||||
for i in 0..3 {
|
||||
let request_start = Instant::now();
|
||||
println!("Request {} - Processing", i + 1);
|
||||
|
||||
// Create candidates
|
||||
let candidates = vec![
|
||||
Candidate {
|
||||
id: format!("candidate-{}-1", i),
|
||||
embedding: vec![0.5; 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 10,
|
||||
success_rate: 0.95,
|
||||
},
|
||||
Candidate {
|
||||
id: format!("candidate-{}-2", i),
|
||||
embedding: vec![0.3; 384],
|
||||
metadata: HashMap::new(),
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
access_count: 5,
|
||||
success_rate: 0.85,
|
||||
},
|
||||
];
|
||||
|
||||
let request = RoutingRequest {
|
||||
query_embedding: vec![0.5; 384],
|
||||
candidates: candidates.clone(),
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
// Route request
|
||||
match router.route(request) {
|
||||
Ok(response) => {
|
||||
let total_time = request_start.elapsed();
|
||||
println!(
|
||||
"\nRequest {}: Processed {} candidates in {}μs (total: {:?})",
|
||||
i + 1,
|
||||
response.candidates_processed,
|
||||
response.inference_time_us,
|
||||
total_time
|
||||
);
|
||||
|
||||
for decision in response.decisions.iter().take(2) {
|
||||
println!(
|
||||
" - {} (confidence: {:.2}, lightweight: {})",
|
||||
decision.candidate_id, decision.confidence, decision.use_lightweight
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
println!("\n=== Routing Example Complete ===");
|
||||
println!("\nTiming breakdown available in each response:");
|
||||
println!("- inference_time_us: Total inference time");
|
||||
println!("- feature_time_us: Feature engineering time");
|
||||
println!("- candidates_processed: Number of candidates evaluated");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
313
crates/ruvector-tiny-dancer-core/examples/train-model.rs
Normal file
313
crates/ruvector-tiny-dancer-core/examples/train-model.rs
Normal file
@@ -0,0 +1,313 @@
|
||||
//! Example: Training a FastGRNN model for routing decisions
|
||||
//!
|
||||
//! This example demonstrates:
|
||||
//! - Synthetic data generation for routing tasks
|
||||
//! - Training a FastGRNN model with validation
|
||||
//! - Knowledge distillation from a teacher model
|
||||
//! - Early stopping and learning rate scheduling
|
||||
//! - Model evaluation and saving
|
||||
|
||||
use rand::Rng;
|
||||
use ruvector_tiny_dancer_core::{
|
||||
model::{FastGRNN, FastGRNNConfig},
|
||||
training::{generate_teacher_predictions, Trainer, TrainingConfig, TrainingDataset},
|
||||
Result,
|
||||
};
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
println!("=== FastGRNN Training Example ===\n");
|
||||
|
||||
// 1. Generate synthetic training data
|
||||
println!("Generating synthetic training data...");
|
||||
let (features, labels) = generate_synthetic_data(1000);
|
||||
let mut dataset = TrainingDataset::new(features, labels)?;
|
||||
|
||||
// Normalize features
|
||||
println!("Normalizing features...");
|
||||
let (means, stds) = dataset.normalize()?;
|
||||
println!("Feature means: {:?}", means);
|
||||
println!("Feature stds: {:?}\n", stds);
|
||||
|
||||
// 2. Create model configuration
|
||||
let model_config = FastGRNNConfig {
|
||||
input_dim: 5,
|
||||
hidden_dim: 16,
|
||||
output_dim: 1,
|
||||
nu: 0.8,
|
||||
zeta: 1.2,
|
||||
rank: Some(8),
|
||||
};
|
||||
|
||||
// 3. Create and initialize model
|
||||
println!("Creating FastGRNN model...");
|
||||
let mut model = FastGRNN::new(model_config.clone())?;
|
||||
println!("Model size: {} bytes\n", model.size_bytes());
|
||||
|
||||
// 4. Optional: Knowledge distillation setup
|
||||
println!("Setting up knowledge distillation...");
|
||||
let teacher_model = create_pretrained_teacher(&model_config)?;
|
||||
let temperature = 3.0;
|
||||
let soft_targets =
|
||||
generate_teacher_predictions(&teacher_model, &dataset.features, temperature)?;
|
||||
dataset = dataset.with_soft_targets(soft_targets)?;
|
||||
println!("Generated soft targets from teacher model\n");
|
||||
|
||||
// 5. Configure training
|
||||
let training_config = TrainingConfig {
|
||||
learning_rate: 0.01,
|
||||
batch_size: 32,
|
||||
epochs: 50,
|
||||
validation_split: 0.2,
|
||||
early_stopping_patience: Some(5),
|
||||
lr_decay: 0.8,
|
||||
lr_decay_step: 10,
|
||||
grad_clip: 5.0,
|
||||
adam_beta1: 0.9,
|
||||
adam_beta2: 0.999,
|
||||
adam_epsilon: 1e-8,
|
||||
l2_reg: 1e-4,
|
||||
enable_distillation: true,
|
||||
distillation_temperature: temperature,
|
||||
distillation_alpha: 0.7,
|
||||
};
|
||||
|
||||
// 6. Create trainer and train model
|
||||
println!("Starting training...\n");
|
||||
let mut trainer = Trainer::new(&model_config, training_config);
|
||||
let metrics = trainer.train(&mut model, &dataset)?;
|
||||
|
||||
// 7. Print training summary
|
||||
println!("\n=== Training Summary ===");
|
||||
println!("Total epochs: {}", metrics.len());
|
||||
if let Some(last_metrics) = metrics.last() {
|
||||
println!("Final train loss: {:.4}", last_metrics.train_loss);
|
||||
println!("Final val loss: {:.4}", last_metrics.val_loss);
|
||||
println!(
|
||||
"Final train accuracy: {:.2}%",
|
||||
last_metrics.train_accuracy * 100.0
|
||||
);
|
||||
println!(
|
||||
"Final val accuracy: {:.2}%",
|
||||
last_metrics.val_accuracy * 100.0
|
||||
);
|
||||
}
|
||||
|
||||
// 8. Find best epoch
|
||||
if let Some(best) = metrics
|
||||
.iter()
|
||||
.min_by(|a, b| a.val_loss.partial_cmp(&b.val_loss).unwrap())
|
||||
{
|
||||
println!(
|
||||
"\nBest validation loss: {:.4} at epoch {}",
|
||||
best.val_loss,
|
||||
best.epoch + 1
|
||||
);
|
||||
println!(
|
||||
"Best validation accuracy: {:.2}%",
|
||||
best.val_accuracy * 100.0
|
||||
);
|
||||
}
|
||||
|
||||
// 9. Test inference on sample data
|
||||
println!("\n=== Testing Inference ===");
|
||||
test_inference(&model)?;
|
||||
|
||||
// 10. Save model and metrics
|
||||
println!("\n=== Saving Model ===");
|
||||
let model_path = PathBuf::from("models/fastgrnn_trained.safetensors");
|
||||
let metrics_path = PathBuf::from("models/training_metrics.json");
|
||||
|
||||
// Create models directory if it doesn't exist
|
||||
std::fs::create_dir_all("models").ok();
|
||||
|
||||
model.save(&model_path)?;
|
||||
trainer.save_metrics(&metrics_path)?;
|
||||
|
||||
println!("Model saved to: {:?}", model_path);
|
||||
println!("Metrics saved to: {:?}", metrics_path);
|
||||
|
||||
// 11. Demonstrate model optimization
|
||||
println!("\n=== Model Optimization ===");
|
||||
let original_size = model.size_bytes();
|
||||
println!("Original model size: {} bytes", original_size);
|
||||
|
||||
model.quantize()?;
|
||||
let quantized_size = model.size_bytes();
|
||||
println!("Quantized model size: {} bytes", quantized_size);
|
||||
println!(
|
||||
"Size reduction: {:.1}%",
|
||||
(1.0 - quantized_size as f32 / original_size as f32) * 100.0
|
||||
);
|
||||
|
||||
println!("\n=== Training Complete ===");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate synthetic training data for routing decisions
|
||||
///
|
||||
/// Features represent:
|
||||
/// - [0]: Semantic similarity (0.0 to 1.0)
|
||||
/// - [1]: Recency score (0.0 to 1.0)
|
||||
/// - [2]: Popularity score (0.0 to 1.0)
|
||||
/// - [3]: Historical success rate (0.0 to 1.0)
|
||||
/// - [4]: Query complexity (0.0 to 1.0)
|
||||
///
|
||||
/// Label: 1.0 = route to lightweight model, 0.0 = route to powerful model
|
||||
fn generate_synthetic_data(n_samples: usize) -> (Vec<Vec<f32>>, Vec<f32>) {
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut features = Vec::with_capacity(n_samples);
|
||||
let mut labels = Vec::with_capacity(n_samples);
|
||||
|
||||
for _ in 0..n_samples {
|
||||
// Generate random features
|
||||
let similarity: f32 = rng.gen();
|
||||
let recency: f32 = rng.gen();
|
||||
let popularity: f32 = rng.gen();
|
||||
let success_rate: f32 = rng.gen();
|
||||
let complexity: f32 = rng.gen();
|
||||
|
||||
let feature_vec = vec![similarity, recency, popularity, success_rate, complexity];
|
||||
|
||||
// Generate label based on heuristic rules
|
||||
// High similarity + high success rate + low complexity -> lightweight (1.0)
|
||||
// Low similarity + low success rate + high complexity -> powerful (0.0)
|
||||
let lightweight_score = similarity * 0.4 + success_rate * 0.3 + (1.0 - complexity) * 0.3;
|
||||
|
||||
// Add some noise and threshold
|
||||
let noise: f32 = rng.gen_range(-0.1..0.1);
|
||||
let label = if lightweight_score + noise > 0.6 {
|
||||
1.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
features.push(feature_vec);
|
||||
labels.push(label);
|
||||
}
|
||||
|
||||
(features, labels)
|
||||
}
|
||||
|
||||
/// Create a pretrained teacher model (simulated)
|
||||
///
|
||||
/// In practice, this would be a larger, more accurate model
|
||||
/// For this example, we create a model with similar architecture
|
||||
/// but pretend it's been trained to high accuracy
|
||||
fn create_pretrained_teacher(config: &FastGRNNConfig) -> Result<FastGRNN> {
|
||||
// Create a teacher model with larger capacity
|
||||
let teacher_config = FastGRNNConfig {
|
||||
input_dim: config.input_dim,
|
||||
hidden_dim: config.hidden_dim * 2, // Larger model
|
||||
output_dim: config.output_dim,
|
||||
nu: config.nu,
|
||||
zeta: config.zeta,
|
||||
rank: config.rank.map(|r| r * 2),
|
||||
};
|
||||
|
||||
let teacher = FastGRNN::new(teacher_config)?;
|
||||
// In practice, you would load pretrained weights here:
|
||||
// teacher.load("path/to/teacher/model.safetensors")?;
|
||||
|
||||
Ok(teacher)
|
||||
}
|
||||
|
||||
/// Test model inference on sample inputs
|
||||
fn test_inference(model: &FastGRNN) -> Result<()> {
|
||||
// Test case 1: High confidence -> lightweight
|
||||
let high_confidence = vec![0.9, 0.8, 0.7, 0.9, 0.2]; // high sim, low complexity
|
||||
let pred1 = model.forward(&high_confidence, None)?;
|
||||
println!("High confidence case: prediction = {:.4}", pred1);
|
||||
|
||||
// Test case 2: Low confidence -> powerful
|
||||
let low_confidence = vec![0.3, 0.2, 0.1, 0.4, 0.9]; // low sim, high complexity
|
||||
let pred2 = model.forward(&low_confidence, None)?;
|
||||
println!("Low confidence case: prediction = {:.4}", pred2);
|
||||
|
||||
// Test case 3: Medium confidence
|
||||
let medium_confidence = vec![0.5, 0.5, 0.5, 0.5, 0.5];
|
||||
let pred3 = model.forward(&medium_confidence, None)?;
|
||||
println!("Medium confidence case: prediction = {:.4}", pred3);
|
||||
|
||||
// Batch inference
|
||||
let batch = vec![high_confidence, low_confidence, medium_confidence];
|
||||
let batch_preds = model.forward_batch(&batch)?;
|
||||
println!("\nBatch predictions: {:?}", batch_preds);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Example: Custom training loop with manual control
|
||||
#[allow(dead_code)]
|
||||
fn example_custom_training_loop() -> Result<()> {
|
||||
println!("=== Custom Training Loop Example ===\n");
|
||||
|
||||
// Setup
|
||||
let (features, labels) = generate_synthetic_data(500);
|
||||
let dataset = TrainingDataset::new(features, labels)?;
|
||||
let (train_dataset, val_dataset) = dataset.split(0.2)?;
|
||||
|
||||
let config = FastGRNNConfig::default();
|
||||
let mut model = FastGRNN::new(config.clone())?;
|
||||
|
||||
let training_config = TrainingConfig {
|
||||
batch_size: 16,
|
||||
learning_rate: 0.005,
|
||||
epochs: 20,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut trainer = Trainer::new(&config, training_config);
|
||||
|
||||
// Custom training with per-epoch callbacks
|
||||
println!("Training with custom callbacks...");
|
||||
for epoch in 0..10 {
|
||||
// You could implement custom logic here
|
||||
// For example: dynamic batch size, custom metrics, etc.
|
||||
|
||||
println!("Epoch {}: Custom preprocessing...", epoch + 1);
|
||||
|
||||
// Train for one epoch
|
||||
// In practice, you'd call trainer.train_epoch() here
|
||||
// This is just to demonstrate the pattern
|
||||
}
|
||||
|
||||
println!("Custom training complete!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Example: Continual learning scenario
|
||||
#[allow(dead_code)]
|
||||
fn example_continual_learning() -> Result<()> {
|
||||
println!("=== Continual Learning Example ===\n");
|
||||
|
||||
let config = FastGRNNConfig::default();
|
||||
let mut model = FastGRNN::new(config.clone())?;
|
||||
|
||||
// Train on initial dataset
|
||||
println!("Phase 1: Training on initial data...");
|
||||
let (features1, labels1) = generate_synthetic_data(500);
|
||||
let dataset1 = TrainingDataset::new(features1, labels1)?;
|
||||
|
||||
let training_config = TrainingConfig {
|
||||
epochs: 20,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut trainer = Trainer::new(&config, training_config.clone());
|
||||
trainer.train(&mut model, &dataset1)?;
|
||||
|
||||
// Continue training on new data
|
||||
println!("\nPhase 2: Continual learning on new data...");
|
||||
let (features2, labels2) = generate_synthetic_data(300);
|
||||
let dataset2 = TrainingDataset::new(features2, labels2)?;
|
||||
|
||||
let mut trainer2 = Trainer::new(&config, training_config);
|
||||
trainer2.train(&mut model, &dataset2)?;
|
||||
|
||||
println!("\nContinual learning complete!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user