Files
wifi-densepose/crates/rvf/rvf-runtime/tests/witness_e2e.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

289 lines
10 KiB
Rust

//! End-to-end integration tests for ADR-035 capability reports.
//!
//! Tests the full witness → scorecard → governance pipeline with real
//! HMAC-SHA256 signatures, policy enforcement, and deterministic replay.
use rvf_runtime::seed_crypto;
use rvf_runtime::witness::{
GovernancePolicy, ParsedWitness, ScorecardBuilder, WitnessBuilder, WitnessError,
};
use rvf_types::witness::*;
const KEY: &[u8] = b"e2e-test-key-for-witness-bundle!";
fn make_entry(tool: &str, latency: u32, cost: u32, tokens: u32) -> ToolCallEntry {
ToolCallEntry {
action: tool.as_bytes().to_vec(),
args_hash: seed_crypto::seed_content_hash(tool.as_bytes()),
result_hash: [0x00; 8],
latency_ms: latency,
cost_microdollars: cost,
tokens,
policy_check: PolicyCheck::Allowed,
}
}
#[test]
fn full_capability_report_pipeline() {
let mut scorecard = ScorecardBuilder::new();
let policy = GovernancePolicy::autonomous();
// Simulate 5 tasks.
let tasks: Vec<([u8; 16], TaskOutcome, bool)> = vec![
([0x01; 16], TaskOutcome::Solved, true),
([0x02; 16], TaskOutcome::Solved, true),
([0x03; 16], TaskOutcome::Solved, false), // solved but no full evidence
([0x04; 16], TaskOutcome::Failed, true),
([0x05; 16], TaskOutcome::Errored, false),
];
for (task_id, outcome, full_evidence) in &tasks {
let mut builder = WitnessBuilder::new(*task_id, policy.clone())
.with_spec(b"fix issue #123")
.with_outcome(*outcome);
if *full_evidence {
builder = builder
.with_plan(b"1. read\n2. fix\n3. test")
.with_diff(b"--- a/file.rs\n+++ b/file.rs")
.with_test_log(b"test ... ok");
}
builder.record_tool_call(make_entry("Read", 50, 100, 500));
builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
builder.record_tool_call(make_entry("Bash", 3000, 0, 0));
let (payload, header) = builder.build_and_sign(KEY).unwrap();
// Verify signature.
let parsed = ParsedWitness::parse(&payload).unwrap();
parsed.verify_all(KEY, &payload).unwrap();
assert_eq!(header.tool_call_count, 3);
assert_eq!(header.total_cost_microdollars, 300);
scorecard.add_witness(&parsed, 0, 0);
}
let card = scorecard.finish();
assert_eq!(card.total_tasks, 5);
assert_eq!(card.solved, 3);
assert_eq!(card.failed, 1);
assert_eq!(card.errors, 1);
assert!((card.solve_rate - 0.6).abs() < 0.01);
// 2 out of 3 solved have full evidence.
assert!((card.evidence_coverage - 0.6667).abs() < 0.01);
// Total cost = 5 tasks * 300 each = 1500. Solved = 3. 1500/3 = 500.
assert_eq!(card.cost_per_solve_microdollars, 500);
}
#[test]
fn governance_restricted_mode_blocks_writes() {
let policy = GovernancePolicy::restricted();
let mut builder = WitnessBuilder::new([0x10; 16], policy)
.with_spec(b"audit code")
.with_outcome(TaskOutcome::Solved);
// Read is allowed.
let check = builder.record_tool_call(make_entry("Read", 50, 100, 500));
assert_eq!(check, PolicyCheck::Allowed);
// Write is denied.
let check = builder.record_tool_call(make_entry("Write", 100, 200, 1000));
assert_eq!(check, PolicyCheck::Denied);
// Edit is denied.
let check = builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
assert_eq!(check, PolicyCheck::Denied);
// Bash is denied.
let check = builder.record_tool_call(make_entry("Bash", 100, 0, 0));
assert_eq!(check, PolicyCheck::Denied);
assert_eq!(builder.policy_violations.len(), 3);
let (payload, _) = builder.build_and_sign(KEY).unwrap();
let parsed = ParsedWitness::parse(&payload).unwrap();
let entries = parsed.parse_trace();
assert_eq!(entries.len(), 4);
assert_eq!(entries[0].policy_check, PolicyCheck::Allowed);
assert_eq!(entries[1].policy_check, PolicyCheck::Denied);
assert_eq!(entries[2].policy_check, PolicyCheck::Denied);
assert_eq!(entries[3].policy_check, PolicyCheck::Denied);
}
#[test]
fn governance_approved_mode_gates_all() {
let policy = GovernancePolicy::approved();
let mut builder = WitnessBuilder::new([0x20; 16], policy).with_outcome(TaskOutcome::Solved);
let check = builder.record_tool_call(make_entry("Read", 50, 100, 500));
assert_eq!(check, PolicyCheck::Confirmed);
let check = builder.record_tool_call(make_entry("Bash", 100, 0, 0));
assert_eq!(check, PolicyCheck::Confirmed);
// No policy violations — confirmed is not a violation.
assert!(builder.policy_violations.is_empty());
}
#[test]
fn governance_autonomous_with_cost_cap() {
let mut policy = GovernancePolicy::autonomous();
policy.max_cost_microdollars = 500;
let mut builder = WitnessBuilder::new([0x30; 16], policy).with_outcome(TaskOutcome::Solved);
builder.record_tool_call(make_entry("Read", 50, 400, 500));
assert!(builder.policy_violations.is_empty());
builder.record_tool_call(make_entry("Edit", 50, 200, 500));
assert_eq!(builder.policy_violations.len(), 1);
assert!(builder.policy_violations[0].contains("cost budget"));
}
#[test]
fn deterministic_replay_same_bytes() {
let policy = GovernancePolicy::autonomous();
let mut builder = WitnessBuilder::new([0x40; 16], policy)
.with_spec(b"fix bug #42")
.with_plan(b"1. read auth.rs\n2. fix validation")
.with_diff(b"@@ -10,3 +10,5 @@\n+ validate(input);")
.with_test_log(b"test auth::validate ... ok\n3 passed")
.with_outcome(TaskOutcome::Solved);
builder.record_tool_call(make_entry("Read", 50, 100, 500));
builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
builder.record_tool_call(make_entry("Bash", 2000, 0, 0));
let (payload, _) = builder.build_and_sign(KEY).unwrap();
// Parse and extract all sections.
let parsed = ParsedWitness::parse(&payload).unwrap();
parsed.verify_all(KEY, &payload).unwrap();
assert_eq!(parsed.spec.unwrap(), b"fix bug #42");
assert_eq!(parsed.plan.unwrap(), b"1. read auth.rs\n2. fix validation");
assert_eq!(
parsed.diff.unwrap(),
b"@@ -10,3 +10,5 @@\n+ validate(input);"
);
assert_eq!(
parsed.test_log.unwrap(),
b"test auth::validate ... ok\n3 passed"
);
let entries = parsed.parse_trace();
assert_eq!(entries.len(), 3);
assert_eq!(entries[0].action, b"Read");
assert_eq!(entries[1].action, b"Edit");
assert_eq!(entries[2].action, b"Bash");
assert_eq!(entries[0].latency_ms, 50);
assert_eq!(entries[1].cost_microdollars, 200);
assert_eq!(entries[2].tokens, 0);
// The bundle is self-contained evidence.
assert!(parsed.evidence_complete());
}
#[test]
fn tampered_bundle_detected() {
let mut builder = WitnessBuilder::new([0x50; 16], GovernancePolicy::autonomous())
.with_spec(b"original spec")
.with_outcome(TaskOutcome::Solved);
builder.record_tool_call(make_entry("Bash", 100, 0, 0));
let (mut payload, _) = builder.build_and_sign(KEY).unwrap();
// Tamper.
payload[WITNESS_HEADER_SIZE + 10] ^= 0xFF;
let parsed = ParsedWitness::parse(&payload).unwrap();
assert!(parsed.verify_signature(KEY, &payload).is_err());
}
#[test]
fn postmortem_on_failure() {
let builder = WitnessBuilder::new([0x60; 16], GovernancePolicy::autonomous())
.with_spec(b"implement feature X")
.with_diff(b"partial diff")
.with_test_log(b"test feature_x ... FAILED\n0 passed, 1 failed")
.with_postmortem(b"Root cause: missing null check in parser")
.with_outcome(TaskOutcome::Failed);
let (payload, header) = builder.build_and_sign(KEY).unwrap();
assert_eq!(header.outcome, TaskOutcome::Failed as u8);
let parsed = ParsedWitness::parse(&payload).unwrap();
parsed.verify_all(KEY, &payload).unwrap();
assert_eq!(
parsed.postmortem.unwrap(),
b"Root cause: missing null check in parser"
);
}
#[test]
fn scorecard_percentiles() {
let policy = GovernancePolicy::autonomous();
let mut sc = ScorecardBuilder::new();
// Create 20 tasks with varying latencies.
for i in 0..20u8 {
let mut builder = WitnessBuilder::new([i; 16], policy.clone())
.with_spec(b"task")
.with_diff(b"diff")
.with_test_log(b"ok")
.with_outcome(TaskOutcome::Solved);
// Latency: 100, 200, ..., 2000 ms.
builder.record_tool_call(make_entry("Bash", (i as u32 + 1) * 100, 100, 100));
let (payload, _) = builder.build().unwrap();
let parsed = ParsedWitness::parse(&payload).unwrap();
sc.add_witness(&parsed, 0, 0);
}
let card = sc.finish();
assert_eq!(card.total_tasks, 20);
assert_eq!(card.solved, 20);
assert!((card.solve_rate - 1.0).abs() < 0.01);
assert!((card.evidence_coverage - 1.0).abs() < 0.01);
// Median of 100..2000 (step 100) = ~1000
assert!(card.median_latency_ms >= 900 && card.median_latency_ms <= 1100);
// p95 should be near 1900-2000.
assert!(card.p95_latency_ms >= 1800);
}
#[test]
fn rollback_tracking() {
let mut builder = WitnessBuilder::new([0x70; 16], GovernancePolicy::autonomous())
.with_outcome(TaskOutcome::Solved);
builder.record_rollback();
builder.record_rollback();
assert_eq!(builder.rollback_count, 2);
let (payload, _) = builder.build().unwrap();
let parsed = ParsedWitness::parse(&payload).unwrap();
let mut sc = ScorecardBuilder::new();
sc.add_witness(&parsed, 0, 2);
let card = sc.finish();
assert_eq!(card.rollback_count, 2);
}
#[test]
fn zero_policy_violations_in_autonomous() {
let policy = GovernancePolicy::autonomous();
let mut total_violations = 0u32;
for i in 0..100u8 {
let mut builder =
WitnessBuilder::new([i; 16], policy.clone()).with_outcome(TaskOutcome::Solved);
builder.record_tool_call(make_entry("Read", 10, 10, 10));
builder.record_tool_call(make_entry("Edit", 10, 10, 10));
builder.record_tool_call(make_entry("Bash", 10, 10, 10));
total_violations += builder.policy_violations.len() as u32;
}
assert_eq!(total_violations, 0, "zero policy violations in 100 runs");
}