//! Intelligence Metrics Module //! //! Measures cognitive capabilities, reasoning quality, and learning indicators //! for agent evaluation based on established AI benchmarking methodologies. //! //! Key metrics tracked: //! - Reasoning quality (logical coherence, constraint satisfaction) //! - Learning efficiency (regret curves, sample efficiency) //! - Working memory (context utilization, information integration) //! - Tool use proficiency (appropriate selection, effective utilization) //! - Meta-cognitive awareness (self-correction, uncertainty estimation) use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// Intelligence assessment result #[derive(Clone, Debug, Serialize, Deserialize)] pub struct IntelligenceAssessment { /// Overall intelligence score (0-100) pub overall_score: f64, /// Individual capability scores pub capabilities: CapabilityScores, /// Reasoning quality metrics pub reasoning: ReasoningMetrics, /// Learning efficiency metrics pub learning: LearningMetrics, /// Tool use proficiency pub tool_use: ToolUseMetrics, /// Meta-cognitive indicators pub meta_cognition: MetaCognitiveMetrics, /// Cost efficiency metrics pub cost: CostMetrics, /// Robustness under noise pub robustness: RobustnessMetrics, /// Raw performance data pub raw_data: RawMetrics, } /// Capability scores across dimensions #[derive(Clone, Debug, Serialize, Deserialize)] pub struct CapabilityScores { /// Temporal reasoning (date inference, calendar math) pub temporal_reasoning: f64, /// Constraint satisfaction (multi-constraint solving) pub constraint_satisfaction: f64, /// Information retrieval (semantic search, recall) pub information_retrieval: f64, /// Pattern recognition (learning from examples) pub pattern_recognition: f64, /// Planning and sequencing pub planning: f64, /// Error recovery and adaptation pub adaptation: f64, } impl Default for CapabilityScores { fn default() -> Self { Self { temporal_reasoning: 0.0, constraint_satisfaction: 0.0, information_retrieval: 0.0, pattern_recognition: 0.0, planning: 0.0, adaptation: 0.0, } } } impl CapabilityScores { /// Compute weighted average pub fn weighted_average(&self, weights: &[f64; 6]) -> f64 { let scores = [ self.temporal_reasoning, self.constraint_satisfaction, self.information_retrieval, self.pattern_recognition, self.planning, self.adaptation, ]; let total_weight: f64 = weights.iter().sum(); if total_weight == 0.0 { return 0.0; } scores .iter() .zip(weights.iter()) .map(|(s, w)| s * w) .sum::() / total_weight } } /// Reasoning quality metrics #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ReasoningMetrics { /// Logical coherence (steps follow logically) pub logical_coherence: f64, /// Constraint satisfaction rate pub constraint_satisfaction_rate: f64, /// Solution optimality (vs. best possible) pub solution_optimality: f64, /// Reasoning efficiency (steps to solution) pub reasoning_efficiency: f64, /// Error rate in logical steps pub error_rate: f64, } impl Default for ReasoningMetrics { fn default() -> Self { Self { logical_coherence: 0.0, constraint_satisfaction_rate: 0.0, solution_optimality: 0.0, reasoning_efficiency: 0.0, error_rate: 0.0, } } } /// Learning efficiency metrics #[derive(Clone, Debug, Serialize, Deserialize)] pub struct LearningMetrics { /// Sample efficiency (performance vs. examples seen) pub sample_efficiency: f64, /// Regret trajectory (sublinear indicator) pub regret_sublinearity: f64, /// Transfer learning capability pub transfer_capability: f64, /// Learning rate (improvement per episode) pub learning_rate: f64, /// Generalization ability pub generalization: f64, } impl Default for LearningMetrics { fn default() -> Self { Self { sample_efficiency: 0.0, regret_sublinearity: 0.0, transfer_capability: 0.0, learning_rate: 0.0, generalization: 0.0, } } } /// Tool use proficiency metrics #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ToolUseMetrics { /// Tool selection appropriateness pub selection_appropriateness: f64, /// Tool utilization effectiveness pub utilization_effectiveness: f64, /// Tool composition (combining tools) pub composition_ability: f64, /// Tool discovery (finding needed tools) pub discovery_ability: f64, } impl Default for ToolUseMetrics { fn default() -> Self { Self { selection_appropriateness: 0.0, utilization_effectiveness: 0.0, composition_ability: 0.0, discovery_ability: 0.0, } } } /// Meta-cognitive metrics #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MetaCognitiveMetrics { /// Self-correction rate pub self_correction_rate: f64, /// Uncertainty calibration (confidence vs. accuracy) pub uncertainty_calibration: f64, /// Strategy adaptation pub strategy_adaptation: f64, /// Progress monitoring accuracy pub progress_monitoring: f64, } impl Default for MetaCognitiveMetrics { fn default() -> Self { Self { self_correction_rate: 0.0, uncertainty_calibration: 0.0, strategy_adaptation: 0.0, progress_monitoring: 0.0, } } } /// Cost efficiency metrics — first-class IQ dimension #[derive(Clone, Debug, Serialize, Deserialize)] pub struct CostMetrics { /// Steps per correct solve (lower = better) pub steps_per_solve: f64, /// Tool calls per correct solve (lower = better) pub tools_per_solve: f64, /// Cost efficiency score (0-1, higher = cheaper) pub cost_efficiency: f64, /// Cost trend over episodes (positive = improving) pub cost_trend: f64, } impl Default for CostMetrics { fn default() -> Self { Self { steps_per_solve: 100.0, tools_per_solve: 10.0, cost_efficiency: 0.0, cost_trend: 0.0, } } } /// Robustness under adversarial conditions — first-class IQ dimension #[derive(Clone, Debug, Serialize, Deserialize)] pub struct RobustnessMetrics { /// Accuracy on noise-injected tasks pub noise_accuracy: f64, /// Accuracy drop from clean to noisy (lower = more robust) pub noise_degradation: f64, /// Per-episode accuracy consistency (higher = steadier) pub consistency: f64, /// Composite robustness score (0-1) pub robustness_score: f64, } impl Default for RobustnessMetrics { fn default() -> Self { Self { noise_accuracy: 0.0, noise_degradation: 1.0, consistency: 0.0, robustness_score: 0.0, } } } /// Raw metrics from benchmarks #[derive(Clone, Debug, Serialize, Deserialize)] pub struct RawMetrics { /// Total tasks attempted pub tasks_attempted: usize, /// Tasks completed successfully pub tasks_completed: usize, /// Tasks with correct solutions pub tasks_correct: usize, /// Total steps taken pub total_steps: usize, /// Total tool calls pub total_tool_calls: usize, /// Total latency in ms pub total_latency_ms: u64, /// Performance by difficulty pub by_difficulty: HashMap, /// Episode-level metrics pub episodes: Vec, /// Tasks attempted under noise injection pub noise_tasks_attempted: usize, /// Tasks correct under noise injection pub noise_tasks_correct: usize, /// Policy violations (contradictions, budget overruns) pub policy_violations: usize, /// Solved-but-incorrect count (contradiction rate numerator) pub contradictions: usize, /// Successful rollbacks from noisy to clean pub rollback_successes: usize, /// Attempted rollbacks from noisy to clean pub rollback_attempts: usize, } impl Default for RawMetrics { fn default() -> Self { Self { tasks_attempted: 0, tasks_completed: 0, tasks_correct: 0, total_steps: 0, total_tool_calls: 0, total_latency_ms: 0, by_difficulty: HashMap::new(), episodes: Vec::new(), noise_tasks_attempted: 0, noise_tasks_correct: 0, policy_violations: 0, contradictions: 0, rollback_successes: 0, rollback_attempts: 0, } } } /// Stats per difficulty level #[derive(Clone, Debug, Serialize, Deserialize)] pub struct DifficultyStats { pub attempted: usize, pub completed: usize, pub correct: usize, pub avg_steps: f64, } /// Per-episode metrics #[derive(Clone, Debug, Serialize, Deserialize)] pub struct EpisodeMetrics { pub episode: usize, pub accuracy: f64, pub reward: f64, pub regret: f64, pub cumulative_regret: f64, } /// Intelligence metrics calculator pub struct IntelligenceCalculator { /// Weights for capability scoring pub capability_weights: [f64; 6], /// Baseline for comparison pub baseline_accuracy: f64, /// Oracle performance for regret calculation pub oracle_reward: f64, } impl Default for IntelligenceCalculator { fn default() -> Self { Self { capability_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], baseline_accuracy: 0.5, oracle_reward: 100.0, } } } impl IntelligenceCalculator { /// Calculate intelligence assessment from raw metrics pub fn calculate(&self, raw: &RawMetrics) -> IntelligenceAssessment { let capabilities = self.calculate_capabilities(raw); let reasoning = self.calculate_reasoning(raw); let learning = self.calculate_learning(raw); let tool_use = self.calculate_tool_use(raw); let meta_cognition = self.calculate_meta_cognition(raw); let cost = self.calculate_cost(raw); let robustness = self.calculate_robustness(raw); // Overall score: three equal pillars — graded outcomes, cost, robustness let overall_score = self.calculate_overall_score( &capabilities, &reasoning, &learning, &tool_use, &meta_cognition, &cost, &robustness, ); IntelligenceAssessment { overall_score, capabilities, reasoning, learning, tool_use, meta_cognition, cost, robustness, raw_data: raw.clone(), } } fn calculate_capabilities(&self, raw: &RawMetrics) -> CapabilityScores { let base_accuracy = if raw.tasks_attempted > 0 { raw.tasks_correct as f64 / raw.tasks_attempted as f64 } else { 0.0 }; // Temporal reasoning: accuracy on time-based tasks let temporal_reasoning = base_accuracy * 100.0; // Constraint satisfaction: correct solutions let constraint_satisfaction = base_accuracy * 100.0; // Information retrieval: based on steps to solution let avg_steps = if raw.tasks_attempted > 0 { raw.total_steps as f64 / raw.tasks_attempted as f64 } else { 100.0 }; let information_retrieval = (100.0 - avg_steps).max(0.0).min(100.0); // Pattern recognition: performance improvement across difficulties let pattern_recognition = self.calculate_pattern_recognition(raw); // Planning: efficiency of tool use let avg_tools = if raw.tasks_attempted > 0 { raw.total_tool_calls as f64 / raw.tasks_attempted as f64 } else { 0.0 }; let planning = if avg_tools > 0.0 && avg_tools <= 2.0 { 100.0 * (1.0 - (avg_tools - 1.0).abs() / 2.0) } else { 50.0 }; // Adaptation: improvement over episodes let adaptation = self.calculate_adaptation(raw); CapabilityScores { temporal_reasoning, constraint_satisfaction, information_retrieval, pattern_recognition, planning, adaptation, } } fn calculate_pattern_recognition(&self, raw: &RawMetrics) -> f64 { if raw.by_difficulty.len() < 2 { return 50.0; } // Check if harder problems are still solvable let mut difficulties: Vec<_> = raw.by_difficulty.keys().copied().collect(); difficulties.sort(); let mut scores = Vec::new(); for d in &difficulties { if let Some(stats) = raw.by_difficulty.get(d) { if stats.attempted > 0 { scores.push(stats.correct as f64 / stats.attempted as f64); } } } if scores.is_empty() { return 50.0; } // Average accuracy across difficulties let avg: f64 = scores.iter().sum::() / scores.len() as f64; avg * 100.0 } fn calculate_adaptation(&self, raw: &RawMetrics) -> f64 { if raw.episodes.len() < 3 { return 50.0; } // Check if accuracy improves over episodes let first_half: f64 = raw.episodes[..raw.episodes.len() / 2] .iter() .map(|e| e.accuracy) .sum::() / (raw.episodes.len() / 2) as f64; let second_half: f64 = raw.episodes[raw.episodes.len() / 2..] .iter() .map(|e| e.accuracy) .sum::() / (raw.episodes.len() - raw.episodes.len() / 2) as f64; let improvement = second_half - first_half; // Scale: -0.2 to +0.2 improvement maps to 0-100 ((improvement + 0.2) / 0.4 * 100.0).max(0.0).min(100.0) } fn calculate_reasoning(&self, raw: &RawMetrics) -> ReasoningMetrics { let constraint_satisfaction_rate = if raw.tasks_attempted > 0 { raw.tasks_correct as f64 / raw.tasks_attempted as f64 } else { 0.0 }; let avg_steps = if raw.tasks_attempted > 0 { raw.total_steps as f64 / raw.tasks_attempted as f64 } else { 100.0 }; // Reasoning efficiency: inverse of steps (normalized) let reasoning_efficiency = (100.0 - avg_steps).max(0.0).min(100.0) / 100.0; // Logical coherence: based on completion rate vs correct rate let completion_rate = if raw.tasks_attempted > 0 { raw.tasks_completed as f64 / raw.tasks_attempted as f64 } else { 0.0 }; let logical_coherence = if completion_rate > 0.0 { constraint_satisfaction_rate / completion_rate } else { 0.0 }; ReasoningMetrics { logical_coherence, constraint_satisfaction_rate, solution_optimality: constraint_satisfaction_rate, reasoning_efficiency, error_rate: 1.0 - constraint_satisfaction_rate, } } fn calculate_learning(&self, raw: &RawMetrics) -> LearningMetrics { let mut learning = LearningMetrics::default(); if raw.episodes.is_empty() { return learning; } // Sample efficiency: accuracy per episode learning.sample_efficiency = raw.episodes.iter().map(|e| e.accuracy).sum::() / raw.episodes.len() as f64; // Regret sublinearity: check if cumulative regret grows sublinearly // True sublinearity means R_k/k → 0 as k → ∞ (regret per episode decreasing) if raw.episodes.len() >= 5 { // Calculate regret trend using linear regression let n = raw.episodes.len() as f64; let mut sum_x = 0.0; let mut sum_y = 0.0; let mut sum_xy = 0.0; let mut sum_xx = 0.0; for (i, ep) in raw.episodes.iter().enumerate() { let x = (i + 1) as f64; let y = ep.regret; sum_x += x; sum_y += y; sum_xy += x * y; sum_xx += x * x; } let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x); // Negative slope = decreasing regret = sublinear // Transform: slope < 0 → sublinearity > 0 if slope < 0.0 { // Stronger negative slope = better sublinearity (cap at 1.0) learning.regret_sublinearity = (-slope / 10.0).min(1.0); } // Also check cumulative average let last = raw.episodes.last().unwrap(); let avg_regret = last.cumulative_regret / n; let first_half_avg = raw .episodes .iter() .take(raw.episodes.len() / 2) .map(|e| e.regret) .sum::() / (n / 2.0); // If second half has lower per-episode regret, that's sublinear if avg_regret < first_half_avg && learning.regret_sublinearity == 0.0 { learning.regret_sublinearity = ((first_half_avg - avg_regret) / first_half_avg).max(0.0); } } // Learning rate: improvement in accuracy over episodes if raw.episodes.len() >= 2 { let first_acc = raw.episodes[0].accuracy; let last_acc = raw.episodes.last().unwrap().accuracy; learning.learning_rate = (last_acc - first_acc + 1.0) / 2.0; } // Generalization: consistency across difficulties if raw.by_difficulty.len() >= 2 { let accuracies: Vec = raw .by_difficulty .values() .filter(|s| s.attempted > 0) .map(|s| s.correct as f64 / s.attempted as f64) .collect(); if !accuracies.is_empty() { let mean = accuracies.iter().sum::() / accuracies.len() as f64; let variance = accuracies.iter().map(|a| (a - mean).powi(2)).sum::() / accuracies.len() as f64; let std_dev = variance.sqrt(); // Lower variance = better generalization learning.generalization = (1.0 - std_dev).max(0.0); } } learning } fn calculate_tool_use(&self, raw: &RawMetrics) -> ToolUseMetrics { let avg_tools = if raw.tasks_attempted > 0 { raw.total_tool_calls as f64 / raw.tasks_attempted as f64 } else { 0.0 }; // Selection appropriateness: using tools when helpful let accuracy = if raw.tasks_attempted > 0 { raw.tasks_correct as f64 / raw.tasks_attempted as f64 } else { 0.0 }; // Effectiveness: accuracy when tools are used let utilization_effectiveness = accuracy; // Appropriateness: not overusing tools let selection_appropriateness = if avg_tools > 0.0 { (accuracy / avg_tools.min(2.0)).min(1.0) } else { 0.5 }; ToolUseMetrics { selection_appropriateness, utilization_effectiveness, composition_ability: avg_tools.min(1.0), // Using multiple tools discovery_ability: accuracy, // Finding solutions } } fn calculate_meta_cognition(&self, raw: &RawMetrics) -> MetaCognitiveMetrics { // Self-correction: completed but not correct -> corrected let completed_but_wrong = raw.tasks_completed.saturating_sub(raw.tasks_correct); let self_correction_rate = if completed_but_wrong > 0 { 0.0 // No self-correction if still wrong } else if raw.tasks_completed > 0 { 1.0 // All completed are correct } else { 0.5 }; // Strategy adaptation: improvement over episodes let strategy_adaptation = if raw.episodes.len() >= 3 { let trend: f64 = raw .episodes .windows(2) .map(|w| { if w[1].accuracy > w[0].accuracy { 1.0 } else { 0.0 } }) .sum::(); trend / (raw.episodes.len() - 1) as f64 } else { 0.5 }; MetaCognitiveMetrics { self_correction_rate, uncertainty_calibration: 0.5, // Would need confidence scores strategy_adaptation, progress_monitoring: strategy_adaptation, // Similar metric } } fn calculate_cost(&self, raw: &RawMetrics) -> CostMetrics { let steps_per_solve = if raw.tasks_correct > 0 { raw.total_steps as f64 / raw.tasks_correct as f64 } else if raw.tasks_attempted > 0 { raw.total_steps as f64 } else { 100.0 }; let tools_per_solve = if raw.tasks_correct > 0 { raw.total_tool_calls as f64 / raw.tasks_correct as f64 } else { 10.0 }; // Efficiency: 1.0 at <=5 steps/solve, 0.0 at >=100 steps/solve let cost_efficiency = (1.0 - (steps_per_solve - 5.0) / 95.0).clamp(0.0, 1.0); // Cost trend: compare early vs late episode accuracy per step let cost_trend = if raw.episodes.len() >= 4 { let half = raw.episodes.len() / 2; let early_acc: f64 = raw.episodes[..half].iter().map(|e| e.accuracy).sum::() / half as f64; let late_acc: f64 = raw.episodes[half..].iter().map(|e| e.accuracy).sum::() / (raw.episodes.len() - half) as f64; // If accuracy improves, effective cost per solve drops if early_acc > 0.01 { (late_acc - early_acc) / early_acc } else { 0.0 } } else { 0.0 }; CostMetrics { steps_per_solve, tools_per_solve, cost_efficiency, cost_trend, } } fn calculate_robustness(&self, raw: &RawMetrics) -> RobustnessMetrics { let noise_accuracy = if raw.noise_tasks_attempted > 0 { raw.noise_tasks_correct as f64 / raw.noise_tasks_attempted as f64 } else { 0.5 // no noise data -> neutral prior }; let clean_attempted = raw .tasks_attempted .saturating_sub(raw.noise_tasks_attempted); let clean_correct = raw.tasks_correct.saturating_sub(raw.noise_tasks_correct); let clean_accuracy = if clean_attempted > 0 { clean_correct as f64 / clean_attempted as f64 } else { 0.0 }; let noise_degradation = (clean_accuracy - noise_accuracy).max(0.0); let consistency = if raw.episodes.len() >= 2 { let mean = raw.episodes.iter().map(|e| e.accuracy).sum::() / raw.episodes.len() as f64; let variance = raw .episodes .iter() .map(|e| (e.accuracy - mean).powi(2)) .sum::() / raw.episodes.len() as f64; (1.0 - variance.sqrt()).max(0.0) } else { 0.5 }; let robustness_score = noise_accuracy * 0.4 + (1.0 - noise_degradation.min(1.0)) * 0.3 + consistency * 0.3; RobustnessMetrics { noise_accuracy, noise_degradation, consistency, robustness_score, } } fn calculate_overall_score( &self, capabilities: &CapabilityScores, reasoning: &ReasoningMetrics, learning: &LearningMetrics, tool_use: &ToolUseMetrics, meta_cognition: &MetaCognitiveMetrics, cost: &CostMetrics, robustness: &RobustnessMetrics, ) -> f64 { // Sub-scores (0-100 scale) let cap_score = capabilities.weighted_average(&self.capability_weights); let reasoning_score = (reasoning.logical_coherence + reasoning.constraint_satisfaction_rate + reasoning.solution_optimality + reasoning.reasoning_efficiency) / 4.0 * 100.0; let learning_score = (learning.sample_efficiency + learning.regret_sublinearity + learning.learning_rate + learning.generalization) / 4.0 * 100.0; let tool_score = (tool_use.selection_appropriateness + tool_use.utilization_effectiveness + tool_use.composition_ability + tool_use.discovery_ability) / 4.0 * 100.0; let meta_score = (meta_cognition.self_correction_rate + meta_cognition.strategy_adaptation + meta_cognition.progress_monitoring) / 3.0 * 100.0; let cost_score = cost.cost_efficiency * 100.0; let robustness_score = robustness.robustness_score * 100.0; // Three equal pillars: graded outcomes (~0.34), cost (~0.33), robustness (~0.33) // Graded outcomes = capabilities + reasoning + learning + tool + meta cap_score * 0.12 + reasoning_score * 0.10 + learning_score * 0.06 + tool_score * 0.03 + meta_score * 0.03 + cost_score * 0.33 + robustness_score * 0.33 } } /// Print a formatted intelligence report pub fn print_intelligence_report(assessment: &IntelligenceAssessment) { println!("╔══════════════════════════════════════════════════════════════╗"); println!("║ Intelligence Assessment Report ║"); println!("╚══════════════════════════════════════════════════════════════╝"); println!(); println!( "🧠 Overall Intelligence Score: {:.1}/100", assessment.overall_score ); println!(); println!("📊 Capability Scores:"); println!( " Temporal Reasoning: {:5.1}", assessment.capabilities.temporal_reasoning ); println!( " Constraint Satisfaction:{:5.1}", assessment.capabilities.constraint_satisfaction ); println!( " Information Retrieval: {:5.1}", assessment.capabilities.information_retrieval ); println!( " Pattern Recognition: {:5.1}", assessment.capabilities.pattern_recognition ); println!( " Planning: {:5.1}", assessment.capabilities.planning ); println!( " Adaptation: {:5.1}", assessment.capabilities.adaptation ); println!(); println!("🔍 Reasoning Quality:"); println!( " Logical Coherence: {:.2}", assessment.reasoning.logical_coherence ); println!( " Constraint Satisfaction:{:.2}", assessment.reasoning.constraint_satisfaction_rate ); println!( " Solution Optimality: {:.2}", assessment.reasoning.solution_optimality ); println!( " Reasoning Efficiency: {:.2}", assessment.reasoning.reasoning_efficiency ); println!( " Error Rate: {:.2}", assessment.reasoning.error_rate ); println!(); println!("📈 Learning Metrics:"); println!( " Sample Efficiency: {:.2}", assessment.learning.sample_efficiency ); println!( " Regret Sublinearity: {:.2}", assessment.learning.regret_sublinearity ); println!( " Learning Rate: {:.2}", assessment.learning.learning_rate ); println!( " Generalization: {:.2}", assessment.learning.generalization ); println!(); println!("🔧 Tool Use Proficiency:"); println!( " Selection: {:.2}", assessment.tool_use.selection_appropriateness ); println!( " Effectiveness: {:.2}", assessment.tool_use.utilization_effectiveness ); println!( " Composition: {:.2}", assessment.tool_use.composition_ability ); println!(); println!("🪞 Meta-Cognitive Indicators:"); println!( " Self-Correction: {:.2}", assessment.meta_cognition.self_correction_rate ); println!( " Strategy Adaptation: {:.2}", assessment.meta_cognition.strategy_adaptation ); println!( " Progress Monitoring: {:.2}", assessment.meta_cognition.progress_monitoring ); } #[cfg(test)] mod tests { use super::*; #[test] fn test_intelligence_calculation() { let mut raw = RawMetrics::default(); raw.tasks_attempted = 100; raw.tasks_completed = 90; raw.tasks_correct = 80; raw.total_steps = 500; raw.total_tool_calls = 100; let calculator = IntelligenceCalculator::default(); let assessment = calculator.calculate(&raw); assert!(assessment.overall_score > 0.0); assert!(assessment.capabilities.temporal_reasoning > 0.0); } #[test] fn test_learning_metrics() { let mut raw = RawMetrics::default(); raw.tasks_attempted = 50; raw.tasks_correct = 40; // Add episodes showing improvement for i in 0..10 { raw.episodes.push(EpisodeMetrics { episode: i + 1, accuracy: 0.5 + 0.04 * i as f64, reward: 50.0 + 4.0 * i as f64, regret: 50.0 - 4.0 * i as f64, cumulative_regret: (0..=i).map(|j| 50.0 - 4.0 * j as f64).sum(), }); } let calculator = IntelligenceCalculator::default(); let assessment = calculator.calculate(&raw); // Should show learning (improvement over time) assert!(assessment.learning.learning_rate > 0.5); } }