Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/examples/benchmarks/src/agi_contract.rs
+++ b/examples/benchmarks/src/agi_contract.rs
@@ -0,0 +1,627 @@
+//! AGI Contract — Defines intelligence as a measurable, falsifiable contract.
+//!
+//! The AGI contract states: a system improves utility over time without violating
+//! policy, while maintaining structural health.
+//!
+//! ## Core Metrics (all deterministic, all auditable)
+//!
+//! - **Solved tasks per cost** — graded outcomes normalized by compute
+//! - **Stability under noise** — accuracy retention when inputs are corrupted
+//! - **Contradiction rate** — solved-but-wrong / total attempted
+//! - **Rollback correctness** — recovery rate when bad inputs are detected
+//! - **Policy violations** — budget overruns + contradictions (must be zero)
+//!
+//! ## Autonomy Ladder
+//!
+//! Each level requires sustained health metrics before advancement:
+//! 0. Read-only (observe only)
+//! 1. Write to memory (store episodes, no execution)
+//! 2. Execute tools (run solver, generate puzzles)
+//! 3. Write to external systems (publish results)
+//! 4. Deploy and operate (self-directed improvement)
+
+use crate::intelligence_metrics::{IntelligenceAssessment, RawMetrics};
+use serde::{Deserialize, Serialize};
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Contract Health Snapshot
+// ═══════════════════════════════════════════════════════════════════════════
+
+/// A single point-in-time health measurement against the AGI contract.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ContractHealth {
+    /// Solved tasks per unit cost (tasks_correct / total_steps)
+    pub solved_per_cost: f64,
+    /// Accuracy on noise-injected tasks
+    pub noise_stability: f64,
+    /// Contradiction rate: solved-but-wrong / attempted
+    pub contradiction_rate: f64,
+    /// Rollback correctness: successful rollbacks / attempted rollbacks
+    pub rollback_correctness: f64,
+    /// Total policy violations (must be zero for contract compliance)
+    pub policy_violations: usize,
+    /// Clean accuracy (graded outcome baseline)
+    pub accuracy: f64,
+    /// Cost efficiency (0-1, higher = cheaper per solve)
+    pub cost_efficiency: f64,
+    /// Whether the contract is satisfied
+    pub compliant: bool,
+}
+
+impl ContractHealth {
+    /// Evaluate contract health from raw metrics.
+    pub fn from_raw(raw: &RawMetrics) -> Self {
+        let accuracy = if raw.tasks_attempted > 0 {
+            raw.tasks_correct as f64 / raw.tasks_attempted as f64
+        } else {
+            0.0
+        };
+
+        let solved_per_cost = if raw.total_steps > 0 {
+            raw.tasks_correct as f64 / raw.total_steps as f64
+        } else {
+            0.0
+        };
+
+        let noise_stability = if raw.noise_tasks_attempted > 0 {
+            raw.noise_tasks_correct as f64 / raw.noise_tasks_attempted as f64
+        } else {
+            0.0
+        };
+
+        let contradiction_rate = if raw.tasks_attempted > 0 {
+            raw.contradictions as f64 / raw.tasks_attempted as f64
+        } else {
+            0.0
+        };
+
+        let rollback_correctness = if raw.rollback_attempts > 0 {
+            raw.rollback_successes as f64 / raw.rollback_attempts as f64
+        } else {
+            1.0 // no rollbacks needed => perfect
+        };
+
+        let cost_efficiency = (1.0 - {
+            let sps = if raw.tasks_correct > 0 {
+                raw.total_steps as f64 / raw.tasks_correct as f64
+            } else {
+                100.0
+            };
+            (sps - 5.0) / 95.0
+        })
+        .clamp(0.0, 1.0);
+
+        let compliant = raw.policy_violations == 0 && contradiction_rate < 0.01 && accuracy >= 0.90;
+
+        ContractHealth {
+            solved_per_cost,
+            noise_stability,
+            contradiction_rate,
+            rollback_correctness,
+            policy_violations: raw.policy_violations,
+            accuracy,
+            cost_efficiency,
+            compliant,
+        }
+    }
+
+    /// Evaluate contract health from an IntelligenceAssessment.
+    pub fn from_assessment(assessment: &IntelligenceAssessment) -> Self {
+        Self::from_raw(&assessment.raw_data)
+    }
+
+    /// Print formatted contract health report.
+    pub fn print(&self) {
+        println!("  Contract Health:");
+        println!("    Solved/Cost:        {:.4}", self.solved_per_cost);
+        println!(
+            "    Noise Stability:    {:.2}%",
+            self.noise_stability * 100.0
+        );
+        println!(
+            "    Contradiction Rate: {:.4}%",
+            self.contradiction_rate * 100.0
+        );
+        println!(
+            "    Rollback Correct:   {:.2}%",
+            self.rollback_correctness * 100.0
+        );
+        println!("    Policy Violations:  {}", self.policy_violations);
+        println!("    Accuracy:           {:.2}%", self.accuracy * 100.0);
+        println!(
+            "    Cost Efficiency:    {:.2}%",
+            self.cost_efficiency * 100.0
+        );
+        println!(
+            "    Compliant:          {}",
+            if self.compliant { "YES" } else { "NO" }
+        );
+    }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Contract Trend — compares two snapshots
+// ═══════════════════════════════════════════════════════════════════════════
+
+/// Tracks improvement across contract dimensions between two measurement points.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ContractDelta {
+    /// Change in solved-per-cost (positive = improving)
+    pub solved_per_cost_delta: f64,
+    /// Change in noise stability (positive = more robust)
+    pub noise_stability_delta: f64,
+    /// Change in contradiction rate (negative = improving)
+    pub contradiction_rate_delta: f64,
+    /// Change in rollback correctness (positive = better recovery)
+    pub rollback_delta: f64,
+    /// Change in accuracy (positive = better)
+    pub accuracy_delta: f64,
+    /// Change in cost efficiency (positive = cheaper)
+    pub cost_efficiency_delta: f64,
+    /// Number of dimensions that improved
+    pub dimensions_improved: usize,
+    /// Number of dimensions that regressed
+    pub dimensions_regressed: usize,
+}
+
+impl ContractDelta {
+    /// Compute delta between two health snapshots.
+    pub fn between(before: &ContractHealth, after: &ContractHealth) -> Self {
+        let solved_per_cost_delta = after.solved_per_cost - before.solved_per_cost;
+        let noise_stability_delta = after.noise_stability - before.noise_stability;
+        let contradiction_rate_delta = after.contradiction_rate - before.contradiction_rate;
+        let rollback_delta = after.rollback_correctness - before.rollback_correctness;
+        let accuracy_delta = after.accuracy - before.accuracy;
+        let cost_efficiency_delta = after.cost_efficiency - before.cost_efficiency;
+
+        // Count improvements (positive is better for all except contradiction_rate)
+        let deltas = [
+            solved_per_cost_delta > 0.001,
+            noise_stability_delta > 0.001,
+            contradiction_rate_delta < -0.001, // decrease = improvement
+            rollback_delta > 0.001,
+            accuracy_delta > 0.001,
+            cost_efficiency_delta > 0.001,
+        ];
+        let regressions = [
+            solved_per_cost_delta < -0.001,
+            noise_stability_delta < -0.001,
+            contradiction_rate_delta > 0.001,
+            rollback_delta < -0.001,
+            accuracy_delta < -0.01,
+            cost_efficiency_delta < -0.001,
+        ];
+
+        ContractDelta {
+            solved_per_cost_delta,
+            noise_stability_delta,
+            contradiction_rate_delta,
+            rollback_delta,
+            accuracy_delta,
+            cost_efficiency_delta,
+            dimensions_improved: deltas.iter().filter(|&&d| d).count(),
+            dimensions_regressed: regressions.iter().filter(|&&r| r).count(),
+        }
+    }
+
+    pub fn print(&self) {
+        let arrow = |v: f64, invert: bool| {
+            let positive = if invert { v < 0.0 } else { v > 0.0 };
+            if positive {
+                "+"
+            } else if v == 0.0 {
+                "="
+            } else {
+                "-"
+            }
+        };
+        println!("  Contract Delta:");
+        println!(
+            "    Solved/Cost:     {:>+.4} [{}]",
+            self.solved_per_cost_delta,
+            arrow(self.solved_per_cost_delta, false)
+        );
+        println!(
+            "    Noise Stability: {:>+.4} [{}]",
+            self.noise_stability_delta,
+            arrow(self.noise_stability_delta, false)
+        );
+        println!(
+            "    Contradiction:   {:>+.4} [{}]",
+            self.contradiction_rate_delta,
+            arrow(self.contradiction_rate_delta, true)
+        );
+        println!(
+            "    Rollback:        {:>+.4} [{}]",
+            self.rollback_delta,
+            arrow(self.rollback_delta, false)
+        );
+        println!(
+            "    Accuracy:        {:>+.4} [{}]",
+            self.accuracy_delta,
+            arrow(self.accuracy_delta, false)
+        );
+        println!(
+            "    Cost Efficiency: {:>+.4} [{}]",
+            self.cost_efficiency_delta,
+            arrow(self.cost_efficiency_delta, false)
+        );
+        println!("    Dimensions improved:  {}/6", self.dimensions_improved);
+        println!("    Dimensions regressed: {}/6", self.dimensions_regressed);
+    }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Autonomy Ladder
+// ═══════════════════════════════════════════════════════════════════════════
+
+/// Autonomy level gated by sustained contract health.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+pub enum AutonomyLevel {
+    /// Level 0: Read-only observation
+    ReadOnly = 0,
+    /// Level 1: Write to memory (store episodes)
+    WriteMemory = 1,
+    /// Level 2: Execute tools (run solver)
+    ExecuteTools = 2,
+    /// Level 3: Write to external systems (publish results)
+    WriteExternal = 3,
+    /// Level 4: Deploy and operate (self-directed improvement)
+    DeployOperate = 4,
+}
+
+/// Thresholds for advancing autonomy levels.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct AutonomyGates {
+    /// Minimum consecutive compliant cycles to advance
+    pub min_compliant_cycles: usize,
+    /// Maximum allowed contradiction rate per level
+    pub max_contradiction_rate: [f64; 5],
+    /// Minimum accuracy per level
+    pub min_accuracy: [f64; 5],
+    /// Minimum cost efficiency per level
+    pub min_cost_efficiency: [f64; 5],
+    /// Minimum noise stability per level
+    pub min_noise_stability: [f64; 5],
+    /// Must have zero policy violations for levels >= 2
+    pub zero_violations_above: AutonomyLevel,
+}
+
+impl Default for AutonomyGates {
+    fn default() -> Self {
+        Self {
+            min_compliant_cycles: 3,
+            //                          L0    L1    L2    L3    L4
+            max_contradiction_rate: [1.0, 0.05, 0.02, 0.01, 0.005],
+            min_accuracy: [0.0, 0.70, 0.85, 0.92, 0.96],
+            min_cost_efficiency: [0.0, 0.20, 0.40, 0.60, 0.75],
+            min_noise_stability: [0.0, 0.50, 0.65, 0.80, 0.90],
+            zero_violations_above: AutonomyLevel::ExecuteTools,
+        }
+    }
+}
+
+/// Evaluator that determines current autonomy level from contract history.
+pub struct AutonomyEvaluator {
+    pub gates: AutonomyGates,
+}
+
+impl Default for AutonomyEvaluator {
+    fn default() -> Self {
+        Self {
+            gates: AutonomyGates::default(),
+        }
+    }
+}
+
+impl AutonomyEvaluator {
+    /// Determine the highest autonomy level supported by the health history.
+    /// `history` is ordered oldest-first.
+    pub fn evaluate(&self, history: &[ContractHealth]) -> AutonomyLevel {
+        if history.is_empty() {
+            return AutonomyLevel::ReadOnly;
+        }
+
+        let mut level = AutonomyLevel::ReadOnly;
+        let levels = [
+            AutonomyLevel::WriteMemory,
+            AutonomyLevel::ExecuteTools,
+            AutonomyLevel::WriteExternal,
+            AutonomyLevel::DeployOperate,
+        ];
+
+        for &candidate in &levels {
+            let idx = candidate as usize;
+            let required = self.gates.min_compliant_cycles;
+
+            // Need enough recent history
+            if history.len() < required {
+                break;
+            }
+
+            let recent = &history[history.len().saturating_sub(required)..];
+            let all_pass = recent.iter().all(|h| {
+                h.accuracy >= self.gates.min_accuracy[idx]
+                    && h.contradiction_rate <= self.gates.max_contradiction_rate[idx]
+                    && h.cost_efficiency >= self.gates.min_cost_efficiency[idx]
+                    && h.noise_stability >= self.gates.min_noise_stability[idx]
+                    && (candidate < self.gates.zero_violations_above || h.policy_violations == 0)
+            });
+
+            if all_pass {
+                level = candidate;
+            } else {
+                break;
+            }
+        }
+
+        level
+    }
+
+    pub fn print_status(&self, level: AutonomyLevel, health: &ContractHealth) {
+        let labels = [
+            "Read-Only",
+            "Write Memory",
+            "Execute Tools",
+            "Write External",
+            "Deploy & Operate",
+        ];
+        println!(
+            "  Autonomy Level: {} ({})",
+            level as usize, labels[level as usize]
+        );
+        println!("  Gates for next level:");
+        let next = (level as usize + 1).min(4);
+        println!(
+            "    Accuracy:       {:.0}% (need {:.0}%)",
+            health.accuracy * 100.0,
+            self.gates.min_accuracy[next] * 100.0
+        );
+        println!(
+            "    Contradiction:  {:.3}% (need <{:.3}%)",
+            health.contradiction_rate * 100.0,
+            self.gates.max_contradiction_rate[next] * 100.0
+        );
+        println!(
+            "    Cost Eff:       {:.0}% (need {:.0}%)",
+            health.cost_efficiency * 100.0,
+            self.gates.min_cost_efficiency[next] * 100.0
+        );
+        println!(
+            "    Noise Stab:     {:.0}% (need {:.0}%)",
+            health.noise_stability * 100.0,
+            self.gates.min_noise_stability[next] * 100.0
+        );
+    }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Viability Checklist
+// ═══════════════════════════════════════════════════════════════════════════
+
+/// The 5 viability checks that determine if the system is on an AGI trajectory.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ViabilityChecklist {
+    /// Can replay runs and get identical grades
+    pub deterministic_replay: bool,
+    /// Improves utility over time without raising policy violations
+    pub improving_without_violations: bool,
+    /// Can roll back bad learning reliably
+    pub reliable_rollback: bool,
+    /// Can generate infinite novel tasks with automatic grading
+    pub infinite_gradeable_tasks: bool,
+    /// Cost per solve trending down over weeks
+    pub cost_trending_down: bool,
+}
+
+impl ViabilityChecklist {
+    /// Evaluate from contract health history.
+    pub fn evaluate(history: &[ContractHealth]) -> Self {
+        // Deterministic replay: verified externally (always true in our harness)
+        let deterministic_replay = true;
+
+        // Improving without violations: later health better than earlier, zero violations
+        let improving_without_violations = if history.len() >= 2 {
+            let first = &history[0];
+            let last = &history[history.len() - 1];
+            last.accuracy >= first.accuracy
+                && last.policy_violations == 0
+                && history.iter().all(|h| h.policy_violations == 0)
+        } else {
+            false
+        };
+
+        // Reliable rollback: rollback correctness >= 80% when attempted
+        let reliable_rollback = history.iter().all(|h| h.rollback_correctness >= 0.8);
+
+        // Infinite gradeable tasks: always true (PuzzleGenerator is unbounded)
+        let infinite_gradeable_tasks = true;
+
+        // Cost trending down: solved_per_cost increases over time
+        let cost_trending_down = if history.len() >= 3 {
+            let first_third: f64 = history[..history.len() / 3]
+                .iter()
+                .map(|h| h.solved_per_cost)
+                .sum::<f64>()
+                / (history.len() / 3) as f64;
+            let last_third: f64 = history[history.len() * 2 / 3..]
+                .iter()
+                .map(|h| h.solved_per_cost)
+                .sum::<f64>()
+                / (history.len() - history.len() * 2 / 3) as f64;
+            last_third > first_third
+        } else {
+            false
+        };
+
+        ViabilityChecklist {
+            deterministic_replay,
+            improving_without_violations,
+            reliable_rollback,
+            infinite_gradeable_tasks,
+            cost_trending_down,
+        }
+    }
+
+    pub fn all_pass(&self) -> bool {
+        self.deterministic_replay
+            && self.improving_without_violations
+            && self.reliable_rollback
+            && self.infinite_gradeable_tasks
+            && self.cost_trending_down
+    }
+
+    pub fn print(&self) {
+        let check = |b: bool| if b { "PASS" } else { "FAIL" };
+        println!("  Viability Checklist:");
+        println!(
+            "    1. Deterministic replay:       {}",
+            check(self.deterministic_replay)
+        );
+        println!(
+            "    2. Improving w/o violations:    {}",
+            check(self.improving_without_violations)
+        );
+        println!(
+            "    3. Reliable rollback:           {}",
+            check(self.reliable_rollback)
+        );
+        println!(
+            "    4. Infinite gradeable tasks:    {}",
+            check(self.infinite_gradeable_tasks)
+        );
+        println!(
+            "    5. Cost trending down:          {}",
+            check(self.cost_trending_down)
+        );
+        println!(
+            "    Overall: {}",
+            if self.all_pass() {
+                "VIABLE AGI TRAJECTORY"
+            } else {
+                "NOT YET VIABLE"
+            }
+        );
+    }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn contract_health_from_raw() {
+        let mut raw = RawMetrics::default();
+        raw.tasks_attempted = 100;
+        raw.tasks_completed = 95;
+        raw.tasks_correct = 92;
+        raw.total_steps = 600;
+        raw.noise_tasks_attempted = 30;
+        raw.noise_tasks_correct = 25;
+        raw.contradictions = 0; // zero contradictions for compliance
+        raw.rollback_attempts = 5;
+        raw.rollback_successes = 4;
+
+        let health = ContractHealth::from_raw(&raw);
+        assert!((health.accuracy - 0.92).abs() < 0.01);
+        assert!((health.solved_per_cost - 92.0 / 600.0).abs() < 0.01);
+        assert!((health.noise_stability - 25.0 / 30.0).abs() < 0.01);
+        assert!((health.contradiction_rate).abs() < 0.001);
+        assert!((health.rollback_correctness - 0.8).abs() < 0.01);
+        assert!(health.compliant); // 0 violations, 0% contradictions, >=90% accuracy
+    }
+
+    #[test]
+    fn contract_delta_detects_improvement() {
+        let before = ContractHealth {
+            solved_per_cost: 0.10,
+            noise_stability: 0.70,
+            contradiction_rate: 0.03,
+            rollback_correctness: 0.80,
+            policy_violations: 0,
+            accuracy: 0.85,
+            cost_efficiency: 0.50,
+            compliant: false,
+        };
+        let after = ContractHealth {
+            solved_per_cost: 0.15,
+            noise_stability: 0.85,
+            contradiction_rate: 0.01,
+            rollback_correctness: 0.90,
+            policy_violations: 0,
+            accuracy: 0.93,
+            cost_efficiency: 0.70,
+            compliant: true,
+        };
+        let delta = ContractDelta::between(&before, &after);
+        assert_eq!(delta.dimensions_improved, 6);
+        assert_eq!(delta.dimensions_regressed, 0);
+    }
+
+    #[test]
+    fn autonomy_ladder_advances() {
+        let evaluator = AutonomyEvaluator::default();
+
+        // No history => ReadOnly
+        assert_eq!(evaluator.evaluate(&[]), AutonomyLevel::ReadOnly);
+
+        // 3 compliant cycles at L1 level
+        let h = ContractHealth {
+            solved_per_cost: 0.15,
+            noise_stability: 0.55,
+            contradiction_rate: 0.04,
+            rollback_correctness: 1.0,
+            policy_violations: 0,
+            accuracy: 0.75,
+            cost_efficiency: 0.30,
+            compliant: true,
+        };
+        let history = vec![h.clone(), h.clone(), h.clone()];
+        assert_eq!(evaluator.evaluate(&history), AutonomyLevel::WriteMemory);
+    }
+
+    #[test]
+    fn viability_checklist_basic() {
+        let h1 = ContractHealth {
+            solved_per_cost: 0.10,
+            noise_stability: 0.70,
+            contradiction_rate: 0.01,
+            rollback_correctness: 0.90,
+            policy_violations: 0,
+            accuracy: 0.85,
+            cost_efficiency: 0.50,
+            compliant: true,
+        };
+        let h2 = ContractHealth {
+            solved_per_cost: 0.12,
+            noise_stability: 0.80,
+            contradiction_rate: 0.005,
+            rollback_correctness: 0.95,
+            policy_violations: 0,
+            accuracy: 0.90,
+            cost_efficiency: 0.60,
+            compliant: true,
+        };
+        let h3 = ContractHealth {
+            solved_per_cost: 0.15,
+            noise_stability: 0.85,
+            contradiction_rate: 0.002,
+            rollback_correctness: 0.95,
+            policy_violations: 0,
+            accuracy: 0.93,
+            cost_efficiency: 0.70,
+            compliant: true,
+        };
+        let viability = ViabilityChecklist::evaluate(&[h1, h2, h3]);
+        assert!(viability.deterministic_replay);
+        assert!(viability.improving_without_violations);
+        assert!(viability.reliable_rollback);
+        assert!(viability.infinite_gradeable_tasks);
+        assert!(viability.cost_trending_down);
+        assert!(viability.all_pass());
+    }
+}