git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
208 lines
8.6 KiB
Rust
208 lines
8.6 KiB
Rust
//! Capability Report — "Prove It or It Didn't Happen"
|
|
//!
|
|
//! Demonstrates the full proof pipeline:
|
|
//!
|
|
//! 1. Define governance policy (restricted / approved / autonomous)
|
|
//! 2. Execute tasks with policy enforcement and tool call tracing
|
|
//! 3. Build signed witness bundles per task
|
|
//! 4. Verify signatures and evidence completeness
|
|
//! 5. Aggregate into a capability scorecard
|
|
//!
|
|
//! Zero external dependencies. Real HMAC-SHA256 signatures.
|
|
//!
|
|
//! Run: cargo run --example capability_report -p rvf-runtime
|
|
|
|
use rvf_runtime::seed_crypto;
|
|
use rvf_runtime::witness::{GovernancePolicy, ParsedWitness, ScorecardBuilder, WitnessBuilder};
|
|
use rvf_types::witness::*;
|
|
|
|
/// HMAC-SHA256 signing key (in production, load from secure storage).
|
|
const SIGNING_KEY: &[u8] = b"capability-report-signing-key-ok";
|
|
|
|
fn main() {
|
|
println!("=== Capability Report: Prove It or It Didn't Happen ===\n");
|
|
|
|
// --- Define the task suite ---
|
|
let tasks = vec![
|
|
Task {
|
|
id: [0x01; 16],
|
|
spec: "Fix authentication bypass in /api/login endpoint",
|
|
plan: "1. Read auth.rs 2. Add input validation 3. Run tests",
|
|
diff: "--- a/src/auth.rs\n+++ b/src/auth.rs\n@@ -42,6 +42,8 @@\n+ if !validate_token(token) {\n+ return Err(AuthError::InvalidToken);\n+ }",
|
|
test_log: "test auth::login_valid ... ok\ntest auth::login_invalid ... ok\ntest auth::login_bypass ... ok\n3 passed, 0 failed",
|
|
outcome: TaskOutcome::Solved,
|
|
tools: vec![
|
|
("Read", 50, 100, 500),
|
|
("Read", 30, 80, 400),
|
|
("Edit", 100, 200, 1000),
|
|
("Bash", 3000, 0, 0),
|
|
],
|
|
},
|
|
Task {
|
|
id: [0x02; 16],
|
|
spec: "Add rate limiting to /api/search endpoint",
|
|
plan: "1. Read search.rs 2. Add rate limiter 3. Test",
|
|
diff: "--- a/src/search.rs\n+++ b/src/search.rs\n@@ -10,3 +10,8 @@\n+ let limiter = RateLimiter::new(100, Duration::from_secs(60));\n+ limiter.check(client_ip)?;",
|
|
test_log: "test search::rate_limit ... ok\ntest search::normal_query ... ok\n2 passed, 0 failed",
|
|
outcome: TaskOutcome::Solved,
|
|
tools: vec![
|
|
("Read", 40, 90, 450),
|
|
("Edit", 80, 180, 900),
|
|
("Bash", 2500, 0, 0),
|
|
],
|
|
},
|
|
Task {
|
|
id: [0x03; 16],
|
|
spec: "Implement WebSocket reconnection with exponential backoff",
|
|
plan: "1. Read ws.rs 2. Add reconnect logic 3. Test",
|
|
diff: "--- a/src/ws.rs\n+++ b/src/ws.rs\n@@ -50,2 +50,15 @@\n+ async fn reconnect(&mut self) -> Result<()> {\n+ for attempt in 0..5 {\n+ let delay = Duration::from_millis(100 * 2u64.pow(attempt));\n+ tokio::time::sleep(delay).await;\n+ // ...reconnect logic...\n+ }\n+ }",
|
|
test_log: "test ws::reconnect_success ... ok\ntest ws::reconnect_max_retries ... ok\ntest ws::reconnect_backoff_timing ... FAILED\n2 passed, 1 failed",
|
|
outcome: TaskOutcome::Failed,
|
|
tools: vec![
|
|
("Read", 60, 120, 600),
|
|
("Edit", 150, 300, 1500),
|
|
("Bash", 5000, 0, 0),
|
|
("Edit", 100, 200, 1000),
|
|
("Bash", 4000, 0, 0),
|
|
],
|
|
},
|
|
Task {
|
|
id: [0x04; 16],
|
|
spec: "Refactor error handling to use thiserror derive",
|
|
plan: "1. Read error.rs 2. Replace manual impls 3. Test",
|
|
diff: "--- a/src/error.rs\n+++ b/src/error.rs\n@@ -1,30 +1,15 @@\n+#[derive(Debug, thiserror::Error)]\n+pub enum AppError {\n+ #[error(\"IO error: {0}\")]\n+ Io(#[from] std::io::Error),\n+}",
|
|
test_log: "test error::display ... ok\ntest error::from_io ... ok\n2 passed, 0 failed",
|
|
outcome: TaskOutcome::Solved,
|
|
tools: vec![
|
|
("Read", 30, 70, 350),
|
|
("Edit", 90, 190, 950),
|
|
("Bash", 1500, 0, 0),
|
|
],
|
|
},
|
|
Task {
|
|
id: [0x05; 16],
|
|
spec: "Add CORS headers to all API responses",
|
|
plan: "1. Read middleware.rs 2. Add CORS middleware 3. Test",
|
|
diff: "--- a/src/middleware.rs\n+++ b/src/middleware.rs\n@@ -5,2 +5,8 @@\n+ .allow_origin(Any)\n+ .allow_methods([GET, POST, PUT, DELETE])\n+ .allow_headers(Any)",
|
|
test_log: "test middleware::cors_preflight ... ok\ntest middleware::cors_headers ... ok\n2 passed, 0 failed",
|
|
outcome: TaskOutcome::Solved,
|
|
tools: vec![
|
|
("Read", 25, 60, 300),
|
|
("Edit", 70, 150, 750),
|
|
("Bash", 1200, 0, 0),
|
|
],
|
|
},
|
|
];
|
|
|
|
// --- Run all three governance modes ---
|
|
let modes = vec![
|
|
("RESTRICTED", GovernancePolicy::restricted()),
|
|
("APPROVED", GovernancePolicy::approved()),
|
|
("AUTONOMOUS", GovernancePolicy::autonomous()),
|
|
];
|
|
|
|
for (mode_name, policy) in &modes {
|
|
println!("--- Governance Mode: {mode_name} ---");
|
|
println!(" Policy hash: {:02x?}", policy.hash());
|
|
|
|
let mut scorecard = ScorecardBuilder::new();
|
|
|
|
for task in &tasks {
|
|
let mut builder = WitnessBuilder::new(task.id, policy.clone())
|
|
.with_spec(task.spec.as_bytes())
|
|
.with_plan(task.plan.as_bytes())
|
|
.with_diff(task.diff.as_bytes())
|
|
.with_test_log(task.test_log.as_bytes())
|
|
.with_outcome(task.outcome);
|
|
|
|
if task.outcome == TaskOutcome::Failed {
|
|
builder = builder.with_postmortem(
|
|
b"Timing-dependent test failed due to non-deterministic backoff",
|
|
);
|
|
}
|
|
|
|
let mut denials = 0u32;
|
|
for &(tool, latency, cost, tokens) in &task.tools {
|
|
let entry = ToolCallEntry {
|
|
action: tool.as_bytes().to_vec(),
|
|
args_hash: seed_crypto::seed_content_hash(tool.as_bytes()),
|
|
result_hash: [0x00; 8],
|
|
latency_ms: latency,
|
|
cost_microdollars: cost,
|
|
tokens,
|
|
policy_check: PolicyCheck::Allowed,
|
|
};
|
|
let check = builder.record_tool_call(entry);
|
|
if check == PolicyCheck::Denied {
|
|
denials += 1;
|
|
}
|
|
}
|
|
|
|
let violations = builder.policy_violations.len() as u32;
|
|
|
|
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
|
|
|
// Verify.
|
|
let parsed = ParsedWitness::parse(&payload).unwrap();
|
|
parsed.verify_all(SIGNING_KEY, &payload).unwrap();
|
|
|
|
let outcome_str = match task.outcome {
|
|
TaskOutcome::Solved => "SOLVED",
|
|
TaskOutcome::Failed => "FAILED",
|
|
_ => "OTHER",
|
|
};
|
|
|
|
println!(
|
|
" [{outcome_str}] {} | {} bytes | {}ms | ${:.4} | {} tools | {} denied | {} violations",
|
|
&task.spec[..40.min(task.spec.len())],
|
|
header.total_bundle_size,
|
|
header.total_latency_ms,
|
|
header.total_cost_microdollars as f64 / 1_000_000.0,
|
|
header.tool_call_count,
|
|
denials,
|
|
violations,
|
|
);
|
|
|
|
scorecard.add_witness(&parsed, violations, 0);
|
|
}
|
|
|
|
let card = scorecard.finish();
|
|
println!();
|
|
println!(" Scorecard:");
|
|
println!(" Tasks: {}", card.total_tasks);
|
|
println!(
|
|
" Solved: {} ({:.0}%)",
|
|
card.solved,
|
|
card.solve_rate * 100.0
|
|
);
|
|
println!(" Failed: {}", card.failed);
|
|
println!(" Errors: {}", card.errors);
|
|
println!(" Violations: {}", card.policy_violations);
|
|
println!(" Rollbacks: {}", card.rollback_count);
|
|
println!(
|
|
" Cost/solve: ${:.4}",
|
|
card.cost_per_solve_microdollars as f64 / 1_000_000.0
|
|
);
|
|
println!(" Median latency: {} ms", card.median_latency_ms);
|
|
println!(" P95 latency: {} ms", card.p95_latency_ms);
|
|
println!(" Total tokens: {}", card.total_tokens);
|
|
println!(
|
|
" Evidence: {:.0}%",
|
|
card.evidence_coverage * 100.0
|
|
);
|
|
println!();
|
|
}
|
|
|
|
println!("=== Report complete. Evidence > claims. ===");
|
|
}
|
|
|
|
struct Task {
|
|
id: [u8; 16],
|
|
spec: &'static str,
|
|
plan: &'static str,
|
|
diff: &'static str,
|
|
test_log: &'static str,
|
|
outcome: TaskOutcome,
|
|
tools: Vec<(&'static str, u32, u32, u32)>,
|
|
}
|