Files
wifi-densepose/crates/rvf/rvf-runtime/tests/agi_e2e.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

423 lines
14 KiB
Rust

//! End-to-end integration tests and benchmarks for the AGI Cognitive Container
//! system (ADR-036). Covers the full build -> serialize -> parse -> validate
//! cycle, signed container tamper detection, execution mode validation matrix,
//! authority levels, resource budgets, coherence thresholds, and perf benchmarks.
use rvf_runtime::agi_container::{AgiContainerBuilder, ParsedAgiManifest};
use rvf_runtime::seed_crypto;
use rvf_types::agi_container::*;
const SIGNING_KEY: &[u8] = b"agi-e2e-test-signing-key-32bytes";
const ORCH_JSON: &[u8] = br#"{"model":"claude-opus-4-6","max_turns":100}"#;
const TASKS_JSON: &[u8] = br#"[{"id":1,"spec":"fix bug"}]"#;
const GRADERS_JSON: &[u8] = br#"[{"type":"test_pass"}]"#;
const TOOLS_JSON: &[u8] = br#"[{"name":"ruvector_query","type":"search"}]"#;
const COHER_JSON: &[u8] = br#"{"min_cut":0.7,"rollback":true}"#;
/// Build a fully-populated container with every optional section.
fn build_full_container() -> (Vec<u8>, AgiContainerHeader) {
AgiContainerBuilder::new([0x01; 16], [0x02; 16])
.with_model_id("claude-opus-4-6")
.with_policy(b"autonomous", [0xAA; 8])
.with_orchestrator(ORCH_JSON)
.with_tool_registry(TOOLS_JSON)
.with_agent_prompts(b"You are a coder agent.")
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_skill_library(b"[]")
.with_replay_script(b"#!/bin/sh\nrvf replay $1")
.with_kernel_config(b"console=ttyS0")
.with_network_config(b"{\"port\":8080}")
.with_coherence_config(COHER_JSON)
.with_project_instructions(b"# CLAUDE.md")
.with_dependency_snapshot(b"sha256:abc123")
.with_authority_config(b"{\"level\":\"WriteMemory\"}")
.with_domain_profile(b"coding")
.offline_capable()
.with_segments(ContainerSegments {
kernel_present: true,
kernel_size: 5_000_000,
wasm_count: 2,
wasm_total_size: 60_000,
vec_segment_count: 4,
index_segment_count: 2,
witness_count: 100,
crypto_present: false,
manifest_present: true,
orchestrator_present: true,
world_model_present: true,
domain_expansion_present: false,
total_size: 0,
})
.build()
.unwrap()
}
// -- 1. Full Container Lifecycle --
#[test]
fn full_container_lifecycle() {
let (payload, header) = build_full_container();
assert!(header.is_valid_magic());
assert_eq!(header.version, 1);
assert!(header.has_kernel());
assert!(header.has_orchestrator());
assert!(header.has_world_model());
assert!(header.is_replay_capable());
assert!(header.is_offline_capable());
assert!(
header.created_ns > 0,
"created_ns should be a real timestamp"
);
// Header round-trip.
let header_rt = AgiContainerHeader::from_bytes(&header.to_bytes()).unwrap();
assert_eq!(header_rt, header);
// Parse manifest and verify every section.
let p = ParsedAgiManifest::parse(&payload).unwrap();
assert_eq!(p.model_id_str(), Some("claude-opus-4-6"));
assert_eq!(p.orchestrator_config.unwrap(), ORCH_JSON);
assert_eq!(p.tool_registry.unwrap(), TOOLS_JSON);
assert_eq!(p.eval_tasks.unwrap(), TASKS_JSON);
assert_eq!(p.eval_graders.unwrap(), GRADERS_JSON);
assert_eq!(p.coherence_config.unwrap(), COHER_JSON);
assert!(p.policy.is_some());
assert!(p.agent_prompts.is_some());
assert!(p.skill_library.is_some());
assert!(p.replay_script.is_some());
assert!(p.kernel_config.is_some());
assert!(p.network_config.is_some());
assert!(p.project_instructions.is_some());
assert!(p.dependency_snapshot.is_some());
assert!(p.authority_config.is_some());
assert!(p.domain_profile.is_some());
assert!(p.is_autonomous_capable());
// Segment-derived flags should all be present in the header.
let seg_flags = ContainerSegments {
kernel_present: true,
wasm_count: 2,
witness_count: 100,
orchestrator_present: true,
world_model_present: true,
..Default::default()
}
.to_flags();
assert_eq!(header.flags & seg_flags, seg_flags);
}
// -- 2. Signed Container Tamper Detection --
#[test]
fn signed_container_tamper_detection() {
let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16])
.with_model_id("claude-opus-4-6")
.with_orchestrator(ORCH_JSON)
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_segments(ContainerSegments {
kernel_present: true,
manifest_present: true,
world_model_present: true,
..Default::default()
});
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
assert!(header.is_signed());
let unsigned_len = payload.len() - 32;
let sig = &payload[unsigned_len..];
assert!(seed_crypto::verify_seed(
SIGNING_KEY,
&payload[..unsigned_len],
sig
));
// Tamper with one byte in the TLV payload area.
let mut tampered = payload.clone();
tampered[AGI_HEADER_SIZE + 10] ^= 0xFF;
assert!(
!seed_crypto::verify_seed(SIGNING_KEY, &tampered[..unsigned_len], sig),
"tampered payload must fail verification"
);
// Tamper with header byte.
let mut tampered_hdr = payload.clone();
tampered_hdr[7] ^= 0x01;
assert!(
!seed_crypto::verify_seed(SIGNING_KEY, &tampered_hdr[..unsigned_len], sig),
"tampered header must fail verification"
);
}
// -- 3. Execution Mode Validation Matrix --
#[test]
fn execution_mode_validation_matrix() {
let m = |mp, kp, wc, wmc, vsc, isc, wnc| ContainerSegments {
manifest_present: mp,
kernel_present: kp,
wasm_count: wc,
world_model_present: wmc,
vec_segment_count: vsc,
index_segment_count: isc,
witness_count: wnc,
..Default::default()
};
// Replay + no witness -> fail
assert!(m(true, false, 0, false, 0, 0, 0)
.validate(ExecutionMode::Replay)
.is_err());
// Replay + witness -> pass
assert!(m(true, false, 0, false, 0, 0, 10)
.validate(ExecutionMode::Replay)
.is_ok());
// Verify + no runtime -> fail
assert!(m(true, false, 0, false, 0, 0, 0)
.validate(ExecutionMode::Verify)
.is_err());
// Verify + kernel + world_model -> pass
assert!(m(true, true, 0, true, 0, 0, 0)
.validate(ExecutionMode::Verify)
.is_ok());
// Verify + wasm + vec -> pass
assert!(m(true, false, 1, false, 2, 0, 0)
.validate(ExecutionMode::Verify)
.is_ok());
// Live + kernel only (no world model) -> fail
assert!(m(true, true, 0, false, 0, 0, 0)
.validate(ExecutionMode::Live)
.is_err());
// Live + kernel + world model -> pass
assert!(m(true, true, 0, true, 0, 0, 0)
.validate(ExecutionMode::Live)
.is_ok());
}
// -- 4. Authority Level Tests --
#[test]
fn authority_level_defaults_per_mode() {
assert_eq!(
AuthorityLevel::default_for_mode(ExecutionMode::Replay),
AuthorityLevel::ReadOnly
);
assert_eq!(
AuthorityLevel::default_for_mode(ExecutionMode::Verify),
AuthorityLevel::ExecuteTools
);
assert_eq!(
AuthorityLevel::default_for_mode(ExecutionMode::Live),
AuthorityLevel::WriteMemory
);
}
#[test]
fn authority_level_hierarchy() {
// WriteExternal permits all.
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ReadOnly));
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteMemory));
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ExecuteTools));
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteExternal));
// ExecuteTools permits itself and below.
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ReadOnly));
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteMemory));
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ExecuteTools));
assert!(!AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteExternal));
// ReadOnly permits nothing above itself.
assert!(AuthorityLevel::ReadOnly.permits(AuthorityLevel::ReadOnly));
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteMemory));
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::ExecuteTools));
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteExternal));
}
// -- 5. Resource Budget Tests --
#[test]
fn resource_budget_clamping() {
let clamped = ResourceBudget {
max_time_secs: 99999,
max_tokens: 99999999,
max_cost_microdollars: 99999999,
max_tool_calls: 65535,
max_external_writes: 65535,
}
.clamped();
assert_eq!(clamped.max_time_secs, 3600);
assert_eq!(clamped.max_tokens, 1_000_000);
assert_eq!(clamped.max_cost_microdollars, 10_000_000);
assert_eq!(clamped.max_tool_calls, 500);
assert_eq!(clamped.max_external_writes, 50);
}
#[test]
fn resource_budget_within_max_unchanged() {
assert_eq!(ResourceBudget::DEFAULT.clamped(), ResourceBudget::DEFAULT);
}
// -- 6. Coherence Threshold Validation --
#[test]
fn coherence_threshold_validation() {
assert!(CoherenceThresholds::DEFAULT.validate().is_ok());
assert!(CoherenceThresholds::STRICT.validate().is_ok());
// Invalid: score > 1.0
let bad = CoherenceThresholds {
min_coherence_score: 1.5,
..CoherenceThresholds::DEFAULT
};
assert!(bad.validate().is_err());
// Invalid: negative rate
let bad2 = CoherenceThresholds {
max_contradiction_rate: -1.0,
..CoherenceThresholds::DEFAULT
};
assert!(bad2.validate().is_err());
// Invalid: rollback ratio > 1.0
let bad3 = CoherenceThresholds {
max_rollback_ratio: 2.0,
..CoherenceThresholds::DEFAULT
};
assert!(bad3.validate().is_err());
// Edge: zero values are valid
let edge = CoherenceThresholds {
min_coherence_score: 0.0,
max_contradiction_rate: 0.0,
max_rollback_ratio: 0.0,
};
assert!(edge.validate().is_ok());
}
// -- 7. Container Size Limit --
#[test]
fn container_size_limit_enforced() {
let oversized = ContainerSegments {
manifest_present: true,
total_size: AGI_MAX_CONTAINER_SIZE + 1,
..Default::default()
};
assert_eq!(
oversized.validate(ExecutionMode::Replay),
Err(ContainerError::TooLarge {
size: AGI_MAX_CONTAINER_SIZE + 1
})
);
}
// -- 8. Performance Benchmarks (using std::time) --
#[test]
fn bench_header_serialize_deserialize() {
use std::time::Instant;
let header = AgiContainerHeader {
magic: AGI_MAGIC,
version: 1,
flags: AGI_HAS_KERNEL | AGI_HAS_WASM | AGI_HAS_ORCHESTRATOR | AGI_SIGNED,
container_id: [0x42; 16],
build_id: [0x43; 16],
created_ns: 1_700_000_000_000_000_000,
model_id_hash: [0xAA; 8],
policy_hash: [0xBB; 8],
};
let n: u128 = 100_000;
let start = Instant::now();
for _ in 0..n {
let _ = std::hint::black_box(header.to_bytes());
}
let ser = start.elapsed();
let bytes = header.to_bytes();
let start = Instant::now();
for _ in 0..n {
let _ = std::hint::black_box(AgiContainerHeader::from_bytes(&bytes).unwrap());
}
let deser = start.elapsed();
let ser_ns = ser.as_nanos() / n;
let deser_ns = deser.as_nanos() / n;
eprintln!("Header serialize: {ser_ns:>8} ns/op ({n} iterations in {ser:?})");
eprintln!("Header deserialize: {deser_ns:>8} ns/op ({n} iterations in {deser:?})");
assert!(ser_ns < 1000, "serialize too slow: {ser_ns} ns/op");
assert!(deser_ns < 1000, "deserialize too slow: {deser_ns} ns/op");
}
#[test]
fn bench_container_build_parse() {
use std::time::Instant;
let n: u128 = 10_000;
let segs = || ContainerSegments {
kernel_present: true,
manifest_present: true,
world_model_present: true,
..Default::default()
};
let start = Instant::now();
for _ in 0..n {
let b = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
.with_model_id("claude-opus-4-6")
.with_policy(b"autonomous", [0xAA; 8])
.with_orchestrator(ORCH_JSON)
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_segments(segs());
let _ = std::hint::black_box(b.build().unwrap());
}
let build_elapsed = start.elapsed();
let (payload, _) = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
.with_model_id("claude-opus-4-6")
.with_orchestrator(ORCH_JSON)
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_segments(segs())
.build()
.unwrap();
let start = Instant::now();
for _ in 0..n {
let _ = std::hint::black_box(ParsedAgiManifest::parse(&payload).unwrap());
}
let parse_elapsed = start.elapsed();
let build_ns = build_elapsed.as_nanos() / n;
let parse_ns = parse_elapsed.as_nanos() / n;
eprintln!("Container build: {build_ns:>8} ns/op ({n} iterations in {build_elapsed:?})");
eprintln!("Container parse: {parse_ns:>8} ns/op ({n} iterations in {parse_elapsed:?})");
assert!(build_ns < 10_000, "build too slow: {build_ns} ns/op");
assert!(parse_ns < 5_000, "parse too slow: {parse_ns} ns/op");
}
#[test]
fn bench_flags_computation() {
use std::time::Instant;
let n: u128 = 1_000_000;
let segs = ContainerSegments {
kernel_present: true,
wasm_count: 2,
witness_count: 100,
crypto_present: true,
orchestrator_present: true,
world_model_present: true,
vec_segment_count: 4,
index_segment_count: 2,
..Default::default()
};
let start = Instant::now();
for _ in 0..n {
let _ = std::hint::black_box(segs.to_flags());
}
let elapsed = start.elapsed();
let ns = elapsed.as_nanos() / n;
eprintln!("Flags computation: {ns:>8} ns/op ({n} iterations in {elapsed:?})");
assert!(ns < 100, "flags computation too slow: {ns} ns/op");
}