//! End-to-end integration tests and benchmarks for the AGI Cognitive Container //! system (ADR-036). Covers the full build -> serialize -> parse -> validate //! cycle, signed container tamper detection, execution mode validation matrix, //! authority levels, resource budgets, coherence thresholds, and perf benchmarks. use rvf_runtime::agi_container::{AgiContainerBuilder, ParsedAgiManifest}; use rvf_runtime::seed_crypto; use rvf_types::agi_container::*; const SIGNING_KEY: &[u8] = b"agi-e2e-test-signing-key-32bytes"; const ORCH_JSON: &[u8] = br#"{"model":"claude-opus-4-6","max_turns":100}"#; const TASKS_JSON: &[u8] = br#"[{"id":1,"spec":"fix bug"}]"#; const GRADERS_JSON: &[u8] = br#"[{"type":"test_pass"}]"#; const TOOLS_JSON: &[u8] = br#"[{"name":"ruvector_query","type":"search"}]"#; const COHER_JSON: &[u8] = br#"{"min_cut":0.7,"rollback":true}"#; /// Build a fully-populated container with every optional section. fn build_full_container() -> (Vec, AgiContainerHeader) { AgiContainerBuilder::new([0x01; 16], [0x02; 16]) .with_model_id("claude-opus-4-6") .with_policy(b"autonomous", [0xAA; 8]) .with_orchestrator(ORCH_JSON) .with_tool_registry(TOOLS_JSON) .with_agent_prompts(b"You are a coder agent.") .with_eval_tasks(TASKS_JSON) .with_eval_graders(GRADERS_JSON) .with_skill_library(b"[]") .with_replay_script(b"#!/bin/sh\nrvf replay $1") .with_kernel_config(b"console=ttyS0") .with_network_config(b"{\"port\":8080}") .with_coherence_config(COHER_JSON) .with_project_instructions(b"# CLAUDE.md") .with_dependency_snapshot(b"sha256:abc123") .with_authority_config(b"{\"level\":\"WriteMemory\"}") .with_domain_profile(b"coding") .offline_capable() .with_segments(ContainerSegments { kernel_present: true, kernel_size: 5_000_000, wasm_count: 2, wasm_total_size: 60_000, vec_segment_count: 4, index_segment_count: 2, witness_count: 100, crypto_present: false, manifest_present: true, orchestrator_present: true, world_model_present: true, domain_expansion_present: false, total_size: 0, }) .build() .unwrap() } // -- 1. Full Container Lifecycle -- #[test] fn full_container_lifecycle() { let (payload, header) = build_full_container(); assert!(header.is_valid_magic()); assert_eq!(header.version, 1); assert!(header.has_kernel()); assert!(header.has_orchestrator()); assert!(header.has_world_model()); assert!(header.is_replay_capable()); assert!(header.is_offline_capable()); assert!( header.created_ns > 0, "created_ns should be a real timestamp" ); // Header round-trip. let header_rt = AgiContainerHeader::from_bytes(&header.to_bytes()).unwrap(); assert_eq!(header_rt, header); // Parse manifest and verify every section. let p = ParsedAgiManifest::parse(&payload).unwrap(); assert_eq!(p.model_id_str(), Some("claude-opus-4-6")); assert_eq!(p.orchestrator_config.unwrap(), ORCH_JSON); assert_eq!(p.tool_registry.unwrap(), TOOLS_JSON); assert_eq!(p.eval_tasks.unwrap(), TASKS_JSON); assert_eq!(p.eval_graders.unwrap(), GRADERS_JSON); assert_eq!(p.coherence_config.unwrap(), COHER_JSON); assert!(p.policy.is_some()); assert!(p.agent_prompts.is_some()); assert!(p.skill_library.is_some()); assert!(p.replay_script.is_some()); assert!(p.kernel_config.is_some()); assert!(p.network_config.is_some()); assert!(p.project_instructions.is_some()); assert!(p.dependency_snapshot.is_some()); assert!(p.authority_config.is_some()); assert!(p.domain_profile.is_some()); assert!(p.is_autonomous_capable()); // Segment-derived flags should all be present in the header. let seg_flags = ContainerSegments { kernel_present: true, wasm_count: 2, witness_count: 100, orchestrator_present: true, world_model_present: true, ..Default::default() } .to_flags(); assert_eq!(header.flags & seg_flags, seg_flags); } // -- 2. Signed Container Tamper Detection -- #[test] fn signed_container_tamper_detection() { let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16]) .with_model_id("claude-opus-4-6") .with_orchestrator(ORCH_JSON) .with_eval_tasks(TASKS_JSON) .with_eval_graders(GRADERS_JSON) .with_segments(ContainerSegments { kernel_present: true, manifest_present: true, world_model_present: true, ..Default::default() }); let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap(); assert!(header.is_signed()); let unsigned_len = payload.len() - 32; let sig = &payload[unsigned_len..]; assert!(seed_crypto::verify_seed( SIGNING_KEY, &payload[..unsigned_len], sig )); // Tamper with one byte in the TLV payload area. let mut tampered = payload.clone(); tampered[AGI_HEADER_SIZE + 10] ^= 0xFF; assert!( !seed_crypto::verify_seed(SIGNING_KEY, &tampered[..unsigned_len], sig), "tampered payload must fail verification" ); // Tamper with header byte. let mut tampered_hdr = payload.clone(); tampered_hdr[7] ^= 0x01; assert!( !seed_crypto::verify_seed(SIGNING_KEY, &tampered_hdr[..unsigned_len], sig), "tampered header must fail verification" ); } // -- 3. Execution Mode Validation Matrix -- #[test] fn execution_mode_validation_matrix() { let m = |mp, kp, wc, wmc, vsc, isc, wnc| ContainerSegments { manifest_present: mp, kernel_present: kp, wasm_count: wc, world_model_present: wmc, vec_segment_count: vsc, index_segment_count: isc, witness_count: wnc, ..Default::default() }; // Replay + no witness -> fail assert!(m(true, false, 0, false, 0, 0, 0) .validate(ExecutionMode::Replay) .is_err()); // Replay + witness -> pass assert!(m(true, false, 0, false, 0, 0, 10) .validate(ExecutionMode::Replay) .is_ok()); // Verify + no runtime -> fail assert!(m(true, false, 0, false, 0, 0, 0) .validate(ExecutionMode::Verify) .is_err()); // Verify + kernel + world_model -> pass assert!(m(true, true, 0, true, 0, 0, 0) .validate(ExecutionMode::Verify) .is_ok()); // Verify + wasm + vec -> pass assert!(m(true, false, 1, false, 2, 0, 0) .validate(ExecutionMode::Verify) .is_ok()); // Live + kernel only (no world model) -> fail assert!(m(true, true, 0, false, 0, 0, 0) .validate(ExecutionMode::Live) .is_err()); // Live + kernel + world model -> pass assert!(m(true, true, 0, true, 0, 0, 0) .validate(ExecutionMode::Live) .is_ok()); } // -- 4. Authority Level Tests -- #[test] fn authority_level_defaults_per_mode() { assert_eq!( AuthorityLevel::default_for_mode(ExecutionMode::Replay), AuthorityLevel::ReadOnly ); assert_eq!( AuthorityLevel::default_for_mode(ExecutionMode::Verify), AuthorityLevel::ExecuteTools ); assert_eq!( AuthorityLevel::default_for_mode(ExecutionMode::Live), AuthorityLevel::WriteMemory ); } #[test] fn authority_level_hierarchy() { // WriteExternal permits all. assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ReadOnly)); assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteMemory)); assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ExecuteTools)); assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteExternal)); // ExecuteTools permits itself and below. assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ReadOnly)); assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteMemory)); assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ExecuteTools)); assert!(!AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteExternal)); // ReadOnly permits nothing above itself. assert!(AuthorityLevel::ReadOnly.permits(AuthorityLevel::ReadOnly)); assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteMemory)); assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::ExecuteTools)); assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteExternal)); } // -- 5. Resource Budget Tests -- #[test] fn resource_budget_clamping() { let clamped = ResourceBudget { max_time_secs: 99999, max_tokens: 99999999, max_cost_microdollars: 99999999, max_tool_calls: 65535, max_external_writes: 65535, } .clamped(); assert_eq!(clamped.max_time_secs, 3600); assert_eq!(clamped.max_tokens, 1_000_000); assert_eq!(clamped.max_cost_microdollars, 10_000_000); assert_eq!(clamped.max_tool_calls, 500); assert_eq!(clamped.max_external_writes, 50); } #[test] fn resource_budget_within_max_unchanged() { assert_eq!(ResourceBudget::DEFAULT.clamped(), ResourceBudget::DEFAULT); } // -- 6. Coherence Threshold Validation -- #[test] fn coherence_threshold_validation() { assert!(CoherenceThresholds::DEFAULT.validate().is_ok()); assert!(CoherenceThresholds::STRICT.validate().is_ok()); // Invalid: score > 1.0 let bad = CoherenceThresholds { min_coherence_score: 1.5, ..CoherenceThresholds::DEFAULT }; assert!(bad.validate().is_err()); // Invalid: negative rate let bad2 = CoherenceThresholds { max_contradiction_rate: -1.0, ..CoherenceThresholds::DEFAULT }; assert!(bad2.validate().is_err()); // Invalid: rollback ratio > 1.0 let bad3 = CoherenceThresholds { max_rollback_ratio: 2.0, ..CoherenceThresholds::DEFAULT }; assert!(bad3.validate().is_err()); // Edge: zero values are valid let edge = CoherenceThresholds { min_coherence_score: 0.0, max_contradiction_rate: 0.0, max_rollback_ratio: 0.0, }; assert!(edge.validate().is_ok()); } // -- 7. Container Size Limit -- #[test] fn container_size_limit_enforced() { let oversized = ContainerSegments { manifest_present: true, total_size: AGI_MAX_CONTAINER_SIZE + 1, ..Default::default() }; assert_eq!( oversized.validate(ExecutionMode::Replay), Err(ContainerError::TooLarge { size: AGI_MAX_CONTAINER_SIZE + 1 }) ); } // -- 8. Performance Benchmarks (using std::time) -- #[test] fn bench_header_serialize_deserialize() { use std::time::Instant; let header = AgiContainerHeader { magic: AGI_MAGIC, version: 1, flags: AGI_HAS_KERNEL | AGI_HAS_WASM | AGI_HAS_ORCHESTRATOR | AGI_SIGNED, container_id: [0x42; 16], build_id: [0x43; 16], created_ns: 1_700_000_000_000_000_000, model_id_hash: [0xAA; 8], policy_hash: [0xBB; 8], }; let n: u128 = 100_000; let start = Instant::now(); for _ in 0..n { let _ = std::hint::black_box(header.to_bytes()); } let ser = start.elapsed(); let bytes = header.to_bytes(); let start = Instant::now(); for _ in 0..n { let _ = std::hint::black_box(AgiContainerHeader::from_bytes(&bytes).unwrap()); } let deser = start.elapsed(); let ser_ns = ser.as_nanos() / n; let deser_ns = deser.as_nanos() / n; eprintln!("Header serialize: {ser_ns:>8} ns/op ({n} iterations in {ser:?})"); eprintln!("Header deserialize: {deser_ns:>8} ns/op ({n} iterations in {deser:?})"); assert!(ser_ns < 1000, "serialize too slow: {ser_ns} ns/op"); assert!(deser_ns < 1000, "deserialize too slow: {deser_ns} ns/op"); } #[test] fn bench_container_build_parse() { use std::time::Instant; let n: u128 = 10_000; let segs = || ContainerSegments { kernel_present: true, manifest_present: true, world_model_present: true, ..Default::default() }; let start = Instant::now(); for _ in 0..n { let b = AgiContainerBuilder::new([0x01; 16], [0x02; 16]) .with_model_id("claude-opus-4-6") .with_policy(b"autonomous", [0xAA; 8]) .with_orchestrator(ORCH_JSON) .with_eval_tasks(TASKS_JSON) .with_eval_graders(GRADERS_JSON) .with_segments(segs()); let _ = std::hint::black_box(b.build().unwrap()); } let build_elapsed = start.elapsed(); let (payload, _) = AgiContainerBuilder::new([0x01; 16], [0x02; 16]) .with_model_id("claude-opus-4-6") .with_orchestrator(ORCH_JSON) .with_eval_tasks(TASKS_JSON) .with_eval_graders(GRADERS_JSON) .with_segments(segs()) .build() .unwrap(); let start = Instant::now(); for _ in 0..n { let _ = std::hint::black_box(ParsedAgiManifest::parse(&payload).unwrap()); } let parse_elapsed = start.elapsed(); let build_ns = build_elapsed.as_nanos() / n; let parse_ns = parse_elapsed.as_nanos() / n; eprintln!("Container build: {build_ns:>8} ns/op ({n} iterations in {build_elapsed:?})"); eprintln!("Container parse: {parse_ns:>8} ns/op ({n} iterations in {parse_elapsed:?})"); assert!(build_ns < 10_000, "build too slow: {build_ns} ns/op"); assert!(parse_ns < 5_000, "parse too slow: {parse_ns} ns/op"); } #[test] fn bench_flags_computation() { use std::time::Instant; let n: u128 = 1_000_000; let segs = ContainerSegments { kernel_present: true, wasm_count: 2, witness_count: 100, crypto_present: true, orchestrator_present: true, world_model_present: true, vec_segment_count: 4, index_segment_count: 2, ..Default::default() }; let start = Instant::now(); for _ in 0..n { let _ = std::hint::black_box(segs.to_flags()); } let elapsed = start.elapsed(); let ns = elapsed.as_nanos() / n; eprintln!("Flags computation: {ns:>8} ns/op ({n} iterations in {elapsed:?})"); assert!(ns < 100, "flags computation too slow: {ns} ns/op"); }