1005 lines
38 KiB
Rust
1005 lines
38 KiB
Rust
//! Publishable RVF Acceptance Test
|
|
//!
|
|
//! Produces a self-contained artifact that an external developer can run
|
|
//! offline and reproduce identical graded outcomes, plus verify the witness
|
|
//! chain cryptographically.
|
|
//!
|
|
//! ## Architecture
|
|
//!
|
|
//! 1. **Deterministic execution**: Frozen seeds → identical puzzles → identical
|
|
//! solve paths → identical outcomes. No network, no randomness, no clock.
|
|
//!
|
|
//! 2. **Witness chain**: Every puzzle decision (skip_mode chosen, context bucket,
|
|
//! steps taken, correct/wrong) is hashed into a SHAKE-256 chain using the
|
|
//! native `rvf-crypto` witness infrastructure. Changing any single bit in
|
|
//! any record invalidates the entire chain from that point.
|
|
//!
|
|
//! 3. **Graded scorecard**: Per-mode (A/B/C) aggregate metrics plus ablation
|
|
//! assertions, all serialized to JSON.
|
|
//!
|
|
//! 4. **Binary .rvf output**: The witness chain is also written as a native
|
|
//! WITNESS_SEG (0x0A) + META_SEG (0x07) in the RVF wire format, producing
|
|
//! a `.rvf` file verifiable by any RVF-compatible tool or WASM runtime.
|
|
//!
|
|
//! 5. **Verification**: Re-run with same config → re-generate chain → compare
|
|
//! chain root hash. If it matches, outcomes are identical.
|
|
//!
|
|
//! ## Usage
|
|
//!
|
|
//! ```bash
|
|
//! # Generate the manifest (JSON + optional .rvf binary)
|
|
//! cargo run --bin acceptance-rvf -- generate --output manifest.json
|
|
//!
|
|
//! # Verify a previously generated manifest
|
|
//! cargo run --bin acceptance-rvf -- verify --input manifest.json
|
|
//! ```
|
|
|
|
use crate::acceptance_test::{run_acceptance_test_mode, AblationMode, HoldoutConfig};
|
|
use crate::temporal::PolicyKernel;
|
|
use rvf_crypto::shake256_256;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Witness record: one per puzzle per mode
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// A single witnessed puzzle outcome.
|
|
///
|
|
/// Captures the decision (skip_mode, context_bucket) and result (correct,
|
|
/// steps) for one puzzle in one ablation mode. These records form the
|
|
/// leaves of the witness chain.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct WitnessRecord {
|
|
/// Puzzle identifier (deterministic from seed)
|
|
pub puzzle_id: String,
|
|
/// Ablation mode ("A", "B", or "C")
|
|
pub mode: String,
|
|
/// Cycle number (0-indexed)
|
|
pub cycle: usize,
|
|
/// Skip mode chosen by the policy ("none", "weekday", "hybrid")
|
|
pub skip_mode: String,
|
|
/// Context bucket key (e.g., "large:heavy:noisy")
|
|
pub context_bucket: String,
|
|
/// Whether the solver got the correct answer
|
|
pub correct: bool,
|
|
/// Steps taken to solve
|
|
pub steps: usize,
|
|
/// Sequential record index within the chain
|
|
pub seq: usize,
|
|
}
|
|
|
|
impl WitnessRecord {
|
|
/// Canonical bytes for hashing. Deterministic regardless of serde.
|
|
fn canonical_bytes(&self) -> Vec<u8> {
|
|
let mut buf = Vec::with_capacity(256);
|
|
buf.extend_from_slice(self.puzzle_id.as_bytes());
|
|
buf.push(b'|');
|
|
buf.extend_from_slice(self.mode.as_bytes());
|
|
buf.push(b'|');
|
|
buf.extend_from_slice(&self.cycle.to_le_bytes());
|
|
buf.push(b'|');
|
|
buf.extend_from_slice(self.skip_mode.as_bytes());
|
|
buf.push(b'|');
|
|
buf.extend_from_slice(self.context_bucket.as_bytes());
|
|
buf.push(b'|');
|
|
buf.push(if self.correct { 1 } else { 0 });
|
|
buf.push(b'|');
|
|
buf.extend_from_slice(&self.steps.to_le_bytes());
|
|
buf.push(b'|');
|
|
buf.extend_from_slice(&self.seq.to_le_bytes());
|
|
buf
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Chained witness: record + hash link
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// A witness record with its chain hash.
|
|
///
|
|
/// `chain_hash` = SHAKE-256(prev_chain_hash || canonical_bytes(record))
|
|
/// First record: prev_chain_hash = [0; 32] (genesis)
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct ChainedWitness {
|
|
pub record: WitnessRecord,
|
|
/// Hex-encoded SHAKE-256 chain hash for this entry
|
|
pub chain_hash: String,
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Mode scorecard
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// Aggregate metrics for one ablation mode.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct ModeScorecard {
|
|
pub mode: String,
|
|
pub total_puzzles: usize,
|
|
pub correct: usize,
|
|
pub accuracy: f64,
|
|
pub total_steps: usize,
|
|
pub cost_per_solve: f64,
|
|
pub noise_accuracy: f64,
|
|
pub violations: usize,
|
|
pub early_commit_penalty: f64,
|
|
pub skip_mode_distribution: HashMap<String, HashMap<String, usize>>,
|
|
/// Number of context buckets with data
|
|
pub context_buckets_used: usize,
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Ablation assertions
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// All six ablation assertions, each with pass/fail and measured value.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct AblationAssertions {
|
|
pub b_beats_a_cost: AssertionResult,
|
|
pub c_beats_b_robustness: AssertionResult,
|
|
pub compiler_safe: AssertionResult,
|
|
pub a_skip_nonzero: AssertionResult,
|
|
pub c_multi_mode: AssertionResult,
|
|
pub c_penalty_better_than_b: AssertionResult,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct AssertionResult {
|
|
pub name: String,
|
|
pub passed: bool,
|
|
pub measured: String,
|
|
pub threshold: String,
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// RVF Manifest: the publishable artifact
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// The complete publishable artifact.
|
|
///
|
|
/// Contains everything needed to verify reproducibility:
|
|
/// - Frozen config (seeds, budget, cycles)
|
|
/// - Per-mode scorecards
|
|
/// - Ablation assertions
|
|
/// - Full witness chain with hash links
|
|
/// - Chain root hash (final hash of the last entry)
|
|
///
|
|
/// An external developer can:
|
|
/// 1. Run `acceptance-rvf generate` with the same config
|
|
/// 2. Compare their `chain_root_hash` to this one
|
|
/// 3. If hashes match, outcomes are bit-for-bit identical
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct RvfManifest {
|
|
/// Format version for forward compatibility
|
|
pub version: u32,
|
|
/// Human-readable description
|
|
pub description: String,
|
|
/// Frozen configuration
|
|
pub config: ManifestConfig,
|
|
/// Per-mode scorecards
|
|
pub scorecards: Vec<ModeScorecard>,
|
|
/// Ablation assertions
|
|
pub assertions: AblationAssertions,
|
|
/// Whether all assertions passed
|
|
pub all_passed: bool,
|
|
/// Witness chain (every puzzle decision, hash-linked)
|
|
pub witness_chain: Vec<ChainedWitness>,
|
|
/// SHAKE-256 of the final chain entry (hex). This is THE reproducibility proof.
|
|
pub chain_root_hash: String,
|
|
/// Total witness records in the chain
|
|
pub chain_length: usize,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct ManifestConfig {
|
|
pub holdout_size: usize,
|
|
pub training_per_cycle: usize,
|
|
pub cycles: usize,
|
|
pub holdout_seed: String,
|
|
pub training_seed: String,
|
|
pub noise_rate: f64,
|
|
pub step_budget: usize,
|
|
pub min_accuracy: f64,
|
|
}
|
|
|
|
impl From<&HoldoutConfig> for ManifestConfig {
|
|
fn from(c: &HoldoutConfig) -> Self {
|
|
Self {
|
|
holdout_size: c.holdout_size,
|
|
training_per_cycle: c.training_per_cycle,
|
|
cycles: c.cycles,
|
|
holdout_seed: format!("0x{:016X}", c.holdout_seed),
|
|
training_seed: format!("0x{:016X}", c.training_seed),
|
|
noise_rate: c.noise_rate,
|
|
step_budget: c.step_budget,
|
|
min_accuracy: c.min_accuracy,
|
|
}
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Witness chain builder
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// Builds a SHAKE-256-linked witness chain incrementally.
|
|
///
|
|
/// Uses `rvf_crypto::shake256_256` for hashing, compatible with the
|
|
/// native RVF WITNESS_SEG format.
|
|
pub struct WitnessChainBuilder {
|
|
entries: Vec<ChainedWitness>,
|
|
/// Parallel rvf-crypto WitnessEntry list for .rvf binary export
|
|
rvf_entries: Vec<rvf_crypto::WitnessEntry>,
|
|
prev_hash: [u8; 32],
|
|
seq: usize,
|
|
}
|
|
|
|
impl WitnessChainBuilder {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
entries: Vec::new(),
|
|
rvf_entries: Vec::new(),
|
|
prev_hash: [0u8; 32],
|
|
seq: 0,
|
|
}
|
|
}
|
|
|
|
/// Append a witness record to the chain.
|
|
///
|
|
/// The chain hash is: SHAKE-256(prev_hash || canonical_bytes(record))
|
|
/// Also builds the parallel rvf-crypto WitnessEntry for .rvf export.
|
|
pub fn append(&mut self, mut record: WitnessRecord) {
|
|
record.seq = self.seq;
|
|
self.seq += 1;
|
|
|
|
let canonical = record.canonical_bytes();
|
|
// Compute the action_hash from canonical bytes using SHAKE-256
|
|
let action_hash = shake256_256(&canonical);
|
|
|
|
// Chain: SHAKE-256(prev_hash || canonical_bytes)
|
|
let mut chain_input = Vec::with_capacity(32 + canonical.len());
|
|
chain_input.extend_from_slice(&self.prev_hash);
|
|
chain_input.extend_from_slice(&canonical);
|
|
let hash = shake256_256(&chain_input);
|
|
|
|
// Build the rvf-crypto WitnessEntry (73-byte entry for .rvf binary)
|
|
self.rvf_entries.push(rvf_crypto::WitnessEntry {
|
|
prev_hash: [0u8; 32], // overwritten by create_witness_chain
|
|
action_hash,
|
|
timestamp_ns: self.seq as u64, // deterministic pseudo-timestamp
|
|
witness_type: 0x02, // COMPUTATION witness type
|
|
});
|
|
|
|
self.prev_hash = hash;
|
|
self.entries.push(ChainedWitness {
|
|
record,
|
|
chain_hash: hex_encode(&hash),
|
|
});
|
|
}
|
|
|
|
/// Finalize and return the chain + root hash.
|
|
pub fn finalize(self) -> (Vec<ChainedWitness>, String) {
|
|
let root = hex_encode(&self.prev_hash);
|
|
(self.entries, root)
|
|
}
|
|
|
|
/// Get the rvf-crypto WitnessEntry list for .rvf binary export.
|
|
pub fn rvf_entries(&self) -> &[rvf_crypto::WitnessEntry] {
|
|
&self.rvf_entries
|
|
}
|
|
}
|
|
|
|
fn hex_encode(bytes: &[u8]) -> String {
|
|
bytes.iter().map(|b| format!("{:02x}", b)).collect()
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Chain verification
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// Verify the integrity of a witness chain.
|
|
///
|
|
/// Recomputes every chain_hash from the records and checks they match.
|
|
/// Returns Ok(root_hash) if the chain is valid, Err(index) if tampered.
|
|
pub fn verify_chain(chain: &[ChainedWitness]) -> Result<String, usize> {
|
|
let mut prev_hash = [0u8; 32];
|
|
|
|
for (i, entry) in chain.iter().enumerate() {
|
|
let canonical = entry.record.canonical_bytes();
|
|
let mut chain_input = Vec::with_capacity(32 + canonical.len());
|
|
chain_input.extend_from_slice(&prev_hash);
|
|
chain_input.extend_from_slice(&canonical);
|
|
let computed = shake256_256(&chain_input);
|
|
let computed_hex = hex_encode(&computed);
|
|
|
|
if computed_hex != entry.chain_hash {
|
|
return Err(i);
|
|
}
|
|
prev_hash = computed;
|
|
}
|
|
|
|
Ok(hex_encode(&prev_hash))
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Generate the publishable manifest
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// Run all three ablation modes and produce the publishable RVF manifest.
|
|
///
|
|
/// This is the entry point. Same config → same manifest → same chain_root_hash.
|
|
/// If `rvf_output_path` is provided, also exports the native `.rvf` binary.
|
|
pub fn generate_manifest(config: &HoldoutConfig) -> anyhow::Result<RvfManifest> {
|
|
generate_manifest_with_rvf(config, None)
|
|
}
|
|
|
|
/// Like `generate_manifest`, but also produces a `.rvf` binary file.
|
|
pub fn generate_manifest_with_rvf(
|
|
config: &HoldoutConfig,
|
|
rvf_output_path: Option<&str>,
|
|
) -> anyhow::Result<RvfManifest> {
|
|
let mut chain_builder = WitnessChainBuilder::new();
|
|
|
|
// Run all three modes
|
|
let mode_a = run_acceptance_test_mode(config, &AblationMode::Baseline)?;
|
|
collect_witnesses(&mut chain_builder, "A", &mode_a, config);
|
|
|
|
let mode_b = run_acceptance_test_mode(config, &AblationMode::CompilerOnly)?;
|
|
collect_witnesses(&mut chain_builder, "B", &mode_b, config);
|
|
|
|
let mode_c = run_acceptance_test_mode(config, &AblationMode::Full)?;
|
|
collect_witnesses(&mut chain_builder, "C", &mode_c, config);
|
|
|
|
// Build scorecards
|
|
let scorecards = vec![
|
|
build_scorecard("A (fixed policy)", &mode_a),
|
|
build_scorecard("B (compiled policy)", &mode_b),
|
|
build_scorecard("C (learned policy)", &mode_c),
|
|
];
|
|
|
|
// Compute ablation assertions
|
|
let assertions = compute_assertions(&mode_a, &mode_b, &mode_c);
|
|
let all_passed = assertions.b_beats_a_cost.passed
|
|
&& assertions.c_beats_b_robustness.passed
|
|
&& assertions.compiler_safe.passed
|
|
&& assertions.a_skip_nonzero.passed
|
|
&& assertions.c_multi_mode.passed
|
|
&& assertions.c_penalty_better_than_b.passed
|
|
&& mode_a.result.passed
|
|
&& mode_b.result.passed
|
|
&& mode_c.result.passed;
|
|
|
|
// Export .rvf binary before finalizing (consumes chain_builder entries)
|
|
if let Some(rvf_path) = rvf_output_path {
|
|
// Build the manifest struct first for the meta segment
|
|
let preview_manifest = RvfManifest {
|
|
version: 2,
|
|
description: String::new(),
|
|
config: ManifestConfig::from(config),
|
|
scorecards: scorecards.clone(),
|
|
assertions: assertions.clone(),
|
|
all_passed,
|
|
witness_chain: vec![],
|
|
chain_root_hash: String::new(),
|
|
chain_length: chain_builder.rvf_entries().len(),
|
|
};
|
|
export_rvf_binary(&preview_manifest, &chain_builder, rvf_path)?;
|
|
}
|
|
|
|
// Finalize witness chain
|
|
let (witness_chain, chain_root_hash) = chain_builder.finalize();
|
|
let chain_length = witness_chain.len();
|
|
|
|
Ok(RvfManifest {
|
|
version: 2,
|
|
description: "RuVector temporal reasoning ablation study — \
|
|
deterministic acceptance test with SHAKE-256 witness chain \
|
|
(rvf-crypto native)"
|
|
.to_string(),
|
|
config: ManifestConfig::from(config),
|
|
scorecards,
|
|
assertions,
|
|
all_passed,
|
|
witness_chain,
|
|
chain_root_hash,
|
|
chain_length,
|
|
})
|
|
}
|
|
|
|
/// Verify a manifest by re-running with the same config and comparing hashes.
|
|
pub fn verify_manifest(manifest: &RvfManifest) -> anyhow::Result<VerifyResult> {
|
|
// Step 1: Verify chain integrity (hashes link correctly)
|
|
let chain_result = verify_chain(&manifest.witness_chain);
|
|
let chain_valid = match &chain_result {
|
|
Ok(root) => root == &manifest.chain_root_hash,
|
|
Err(_) => false,
|
|
};
|
|
|
|
if !chain_valid {
|
|
return Ok(VerifyResult {
|
|
chain_integrity: false,
|
|
outcomes_match: false,
|
|
root_hash_match: false,
|
|
recomputed_root: chain_result.unwrap_or_default(),
|
|
expected_root: manifest.chain_root_hash.clone(),
|
|
mismatched_records: vec![],
|
|
});
|
|
}
|
|
|
|
// Step 2: Re-run with same config
|
|
let config = holdout_config_from_manifest(&manifest.config);
|
|
let fresh = generate_manifest(&config)?;
|
|
|
|
// Step 3: Compare root hashes
|
|
let root_match = fresh.chain_root_hash == manifest.chain_root_hash;
|
|
|
|
// Step 4: Find any mismatched records
|
|
let mut mismatches = Vec::new();
|
|
let max_len = manifest.witness_chain.len().min(fresh.witness_chain.len());
|
|
for i in 0..max_len {
|
|
let orig = &manifest.witness_chain[i];
|
|
let new = &fresh.witness_chain[i];
|
|
if orig.chain_hash != new.chain_hash {
|
|
mismatches.push(i);
|
|
if mismatches.len() >= 10 {
|
|
break; // cap output
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(VerifyResult {
|
|
chain_integrity: true,
|
|
outcomes_match: mismatches.is_empty() && manifest.chain_length == fresh.chain_length,
|
|
root_hash_match: root_match,
|
|
recomputed_root: fresh.chain_root_hash,
|
|
expected_root: manifest.chain_root_hash.clone(),
|
|
mismatched_records: mismatches,
|
|
})
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct VerifyResult {
|
|
pub chain_integrity: bool,
|
|
pub outcomes_match: bool,
|
|
pub root_hash_match: bool,
|
|
pub recomputed_root: String,
|
|
pub expected_root: String,
|
|
pub mismatched_records: Vec<usize>,
|
|
}
|
|
|
|
impl VerifyResult {
|
|
pub fn print(&self) {
|
|
println!();
|
|
println!(" Witness Chain Verification:");
|
|
println!(
|
|
" Chain integrity: {}",
|
|
if self.chain_integrity { "PASS" } else { "FAIL" }
|
|
);
|
|
println!(
|
|
" Outcomes match: {}",
|
|
if self.outcomes_match { "PASS" } else { "FAIL" }
|
|
);
|
|
println!(
|
|
" Root hash match: {}",
|
|
if self.root_hash_match { "PASS" } else { "FAIL" }
|
|
);
|
|
println!(" Expected root: {}", &self.expected_root[..16]);
|
|
println!(
|
|
" Recomputed root: {}",
|
|
&self.recomputed_root[..self.recomputed_root.len().min(16)]
|
|
);
|
|
if !self.mismatched_records.is_empty() {
|
|
println!(" Mismatched at: {:?}", self.mismatched_records);
|
|
}
|
|
println!();
|
|
}
|
|
|
|
pub fn passed(&self) -> bool {
|
|
self.chain_integrity && self.outcomes_match && self.root_hash_match
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Native .rvf binary export
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
/// Export the manifest as a native `.rvf` binary file.
|
|
///
|
|
/// Produces a file with two segments:
|
|
/// - **WITNESS_SEG** (0x0A): The SHAKE-256 witness chain as 73-byte entries
|
|
/// created by `rvf_crypto::create_witness_chain()`, verifiable by any
|
|
/// RVF-compatible tool including the WASM microkernel.
|
|
/// - **META_SEG** (0x07): JSON-encoded scorecard + assertions metadata.
|
|
///
|
|
/// The resulting file is a valid `.rvf` file that can be inspected with
|
|
/// `rvf inspect`, verified with `rvf verify-witness`, or loaded in the
|
|
/// browser via the WASM runtime's `rvf_witness_verify` export.
|
|
pub fn export_rvf_binary(
|
|
manifest: &RvfManifest,
|
|
chain_builder: &WitnessChainBuilder,
|
|
path: &str,
|
|
) -> anyhow::Result<()> {
|
|
use rvf_crypto::create_witness_chain;
|
|
use rvf_types::{SegmentFlags, SegmentType};
|
|
use rvf_wire::write_segment;
|
|
|
|
// Build the native SHAKE-256 witness chain from the rvf-crypto entries
|
|
let witness_bytes = create_witness_chain(chain_builder.rvf_entries());
|
|
|
|
// Write WITNESS_SEG (0x0A) with SEALED flag
|
|
let witness_seg = write_segment(
|
|
SegmentType::Witness as u8,
|
|
&witness_bytes,
|
|
SegmentFlags::empty().with(SegmentFlags::SEALED),
|
|
1, // segment_id
|
|
);
|
|
|
|
// Build metadata JSON payload
|
|
let meta = serde_json::json!({
|
|
"format": "rvf-acceptance-test",
|
|
"version": manifest.version,
|
|
"chain_root_hash": manifest.chain_root_hash,
|
|
"chain_length": manifest.chain_length,
|
|
"all_passed": manifest.all_passed,
|
|
"config": manifest.config,
|
|
"scorecards": manifest.scorecards,
|
|
"assertions": manifest.assertions,
|
|
});
|
|
let meta_bytes = serde_json::to_vec(&meta)?;
|
|
|
|
// Write META_SEG (0x07)
|
|
let meta_seg = write_segment(
|
|
SegmentType::Meta as u8,
|
|
&meta_bytes,
|
|
SegmentFlags::empty().with(SegmentFlags::SEALED),
|
|
2, // segment_id
|
|
);
|
|
|
|
// Concatenate segments into a single .rvf file
|
|
let mut rvf_file = Vec::with_capacity(witness_seg.len() + meta_seg.len());
|
|
rvf_file.extend_from_slice(&witness_seg);
|
|
rvf_file.extend_from_slice(&meta_seg);
|
|
|
|
std::fs::write(path, &rvf_file)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Verify the native `.rvf` binary witness chain.
|
|
///
|
|
/// Reads the WITNESS_SEG payload and runs `rvf_crypto::verify_witness_chain`.
|
|
pub fn verify_rvf_binary(path: &str) -> anyhow::Result<usize> {
|
|
use rvf_crypto::verify_witness_chain;
|
|
use rvf_types::SEGMENT_HEADER_SIZE;
|
|
|
|
let data = std::fs::read(path)?;
|
|
if data.len() < SEGMENT_HEADER_SIZE {
|
|
anyhow::bail!("File too small for a valid segment");
|
|
}
|
|
|
|
// Parse the first segment header to get payload length
|
|
let seg_type = data[5];
|
|
if seg_type != 0x0A {
|
|
anyhow::bail!("First segment is not WITNESS_SEG (got 0x{:02X})", seg_type);
|
|
}
|
|
|
|
let payload_len = u64::from_le_bytes(
|
|
data[0x10..0x18]
|
|
.try_into()
|
|
.map_err(|_| anyhow::anyhow!("Bad header"))?,
|
|
) as usize;
|
|
|
|
let payload_start = SEGMENT_HEADER_SIZE;
|
|
let payload_end = payload_start + payload_len;
|
|
if data.len() < payload_end {
|
|
anyhow::bail!("Truncated witness payload");
|
|
}
|
|
|
|
let witness_data = &data[payload_start..payload_end];
|
|
let entries = verify_witness_chain(witness_data)
|
|
.map_err(|e| anyhow::anyhow!("Witness chain verification failed: {:?}", e))?;
|
|
|
|
Ok(entries.len())
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Internal helpers
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
fn collect_witnesses(
|
|
builder: &mut WitnessChainBuilder,
|
|
mode_label: &str,
|
|
result: &crate::acceptance_test::AblationResult,
|
|
_config: &HoldoutConfig,
|
|
) {
|
|
// Witness each cycle's holdout metrics
|
|
for cm in &result.result.cycles {
|
|
builder.append(WitnessRecord {
|
|
puzzle_id: format!("cycle_{}_holdout", cm.cycle),
|
|
mode: mode_label.to_string(),
|
|
cycle: cm.cycle,
|
|
skip_mode: "aggregate".to_string(),
|
|
context_bucket: "holdout".to_string(),
|
|
correct: cm.holdout_accuracy >= 0.5,
|
|
steps: cm.holdout_cost_per_solve as usize,
|
|
seq: 0,
|
|
});
|
|
}
|
|
|
|
// Witness skip-mode distribution (each bucket is a witness record)
|
|
// Sort keys for deterministic iteration order
|
|
let mut buckets: Vec<&String> = result.skip_mode_distribution.keys().collect();
|
|
buckets.sort();
|
|
for bucket in buckets {
|
|
let dist = &result.skip_mode_distribution[bucket];
|
|
let mut mode_names: Vec<&String> = dist.keys().collect();
|
|
mode_names.sort();
|
|
for mode_name in mode_names {
|
|
let count = dist[mode_name];
|
|
builder.append(WitnessRecord {
|
|
puzzle_id: format!("dist_{}_{}", bucket, mode_name),
|
|
mode: mode_label.to_string(),
|
|
cycle: result.result.cycles.len(),
|
|
skip_mode: mode_name.clone(),
|
|
context_bucket: bucket.clone(),
|
|
correct: true,
|
|
steps: count,
|
|
seq: 0,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Witness compiler and penalty stats
|
|
builder.append(WitnessRecord {
|
|
puzzle_id: "compiler_stats".to_string(),
|
|
mode: mode_label.to_string(),
|
|
cycle: 0,
|
|
skip_mode: format!("hits:{}", result.compiler_hits),
|
|
context_bucket: format!("misses:{}", result.compiler_misses),
|
|
correct: result.compiler_false_hits == 0,
|
|
steps: result.compiler_false_hits,
|
|
seq: 0,
|
|
});
|
|
|
|
builder.append(WitnessRecord {
|
|
puzzle_id: "penalty_stats".to_string(),
|
|
mode: mode_label.to_string(),
|
|
cycle: 0,
|
|
skip_mode: format!("rate:{:.4}", result.early_commit_rate),
|
|
context_bucket: format!("penalty:{:.4}", result.early_commit_penalties),
|
|
correct: true,
|
|
steps: result.policy_context_buckets,
|
|
seq: 0,
|
|
});
|
|
}
|
|
|
|
fn build_scorecard(label: &str, result: &crate::acceptance_test::AblationResult) -> ModeScorecard {
|
|
let last = result.result.cycles.last();
|
|
ModeScorecard {
|
|
mode: label.to_string(),
|
|
total_puzzles: result.result.cycles.len(),
|
|
correct: last
|
|
.map(|c| (c.holdout_accuracy * 100.0) as usize)
|
|
.unwrap_or(0),
|
|
accuracy: last.map(|c| c.holdout_accuracy).unwrap_or(0.0),
|
|
total_steps: last.map(|c| c.holdout_cost_per_solve as usize).unwrap_or(0),
|
|
cost_per_solve: last.map(|c| c.holdout_cost_per_solve).unwrap_or(0.0),
|
|
noise_accuracy: last.map(|c| c.holdout_noise_accuracy).unwrap_or(0.0),
|
|
violations: last.map(|c| c.holdout_violations).unwrap_or(0),
|
|
early_commit_penalty: result.early_commit_penalties,
|
|
skip_mode_distribution: result.skip_mode_distribution.clone(),
|
|
context_buckets_used: result.policy_context_buckets,
|
|
}
|
|
}
|
|
|
|
fn compute_assertions(
|
|
mode_a: &crate::acceptance_test::AblationResult,
|
|
mode_b: &crate::acceptance_test::AblationResult,
|
|
mode_c: &crate::acceptance_test::AblationResult,
|
|
) -> AblationAssertions {
|
|
let last_a = mode_a.result.cycles.last().unwrap();
|
|
let last_b = mode_b.result.cycles.last().unwrap();
|
|
let last_c = mode_c.result.cycles.last().unwrap();
|
|
|
|
let cost_decrease = if last_a.holdout_cost_per_solve > 0.0 {
|
|
1.0 - (last_b.holdout_cost_per_solve / last_a.holdout_cost_per_solve)
|
|
} else {
|
|
0.0
|
|
};
|
|
|
|
let robustness_gain = last_c.holdout_noise_accuracy - last_b.holdout_noise_accuracy;
|
|
|
|
let total_compiler = mode_b.compiler_hits + mode_b.compiler_misses;
|
|
let false_hit_rate = if total_compiler > 0 {
|
|
mode_b.compiler_false_hits as f64 / total_compiler as f64
|
|
} else {
|
|
0.0
|
|
};
|
|
|
|
let a_total_skip: usize = mode_a
|
|
.skip_mode_distribution
|
|
.values()
|
|
.flat_map(|m| m.iter())
|
|
.filter(|(name, _)| *name != "none")
|
|
.map(|(_, c)| *c)
|
|
.sum();
|
|
|
|
let c_unique_modes: std::collections::HashSet<&str> = mode_c
|
|
.skip_mode_distribution
|
|
.values()
|
|
.flat_map(|m| m.keys())
|
|
.map(|s| s.as_str())
|
|
.collect();
|
|
|
|
let b_penalty = mode_b.early_commit_penalties;
|
|
let c_penalty = mode_c.early_commit_penalties;
|
|
let penalty_ok = if b_penalty > 0.0 {
|
|
c_penalty <= b_penalty * 0.90
|
|
} else {
|
|
c_penalty == 0.0
|
|
};
|
|
|
|
AblationAssertions {
|
|
b_beats_a_cost: AssertionResult {
|
|
name: "B beats A on cost (>=15%)".to_string(),
|
|
passed: cost_decrease >= 0.15,
|
|
measured: format!("{:.1}%", cost_decrease * 100.0),
|
|
threshold: ">=15%".to_string(),
|
|
},
|
|
c_beats_b_robustness: AssertionResult {
|
|
name: "C beats B on robustness (>=10%)".to_string(),
|
|
passed: robustness_gain >= 0.10,
|
|
measured: format!("{:.1}%", robustness_gain * 100.0),
|
|
threshold: ">=10%".to_string(),
|
|
},
|
|
compiler_safe: AssertionResult {
|
|
name: "Compiler false-hit rate <5%".to_string(),
|
|
passed: false_hit_rate < 0.05,
|
|
measured: format!("{:.1}%", false_hit_rate * 100.0),
|
|
threshold: "<5%".to_string(),
|
|
},
|
|
a_skip_nonzero: AssertionResult {
|
|
name: "Mode A skip usage nonzero".to_string(),
|
|
passed: a_total_skip > 0,
|
|
measured: format!("{}", a_total_skip),
|
|
threshold: ">0".to_string(),
|
|
},
|
|
c_multi_mode: AssertionResult {
|
|
name: "Mode C uses multiple skip modes".to_string(),
|
|
passed: c_unique_modes.len() >= 2,
|
|
measured: format!("{} modes", c_unique_modes.len()),
|
|
threshold: ">=2".to_string(),
|
|
},
|
|
c_penalty_better_than_b: AssertionResult {
|
|
name: "C penalty < B penalty (distract)".to_string(),
|
|
passed: penalty_ok,
|
|
measured: format!("C={:.2} B={:.2}", c_penalty, b_penalty),
|
|
threshold: "C <= 90% of B".to_string(),
|
|
},
|
|
}
|
|
}
|
|
|
|
fn holdout_config_from_manifest(mc: &ManifestConfig) -> HoldoutConfig {
|
|
let holdout_seed = u64::from_str_radix(
|
|
mc.holdout_seed
|
|
.trim_start_matches("0x")
|
|
.trim_start_matches("0X"),
|
|
16,
|
|
)
|
|
.unwrap_or(0xDEAD_BEEF);
|
|
let training_seed = u64::from_str_radix(
|
|
mc.training_seed
|
|
.trim_start_matches("0x")
|
|
.trim_start_matches("0X"),
|
|
16,
|
|
)
|
|
.unwrap_or(42);
|
|
|
|
HoldoutConfig {
|
|
holdout_size: mc.holdout_size,
|
|
training_per_cycle: mc.training_per_cycle,
|
|
cycles: mc.cycles,
|
|
holdout_seed,
|
|
training_seed,
|
|
noise_rate: mc.noise_rate,
|
|
step_budget: mc.step_budget,
|
|
min_accuracy: mc.min_accuracy,
|
|
min_dimensions_improved: 2,
|
|
verbose: false,
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Pretty-print
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
impl RvfManifest {
|
|
pub fn print_summary(&self) {
|
|
println!();
|
|
println!("╔══════════════════════════════════════════════════════════════╗");
|
|
println!("║ PUBLISHABLE RVF ACCEPTANCE TEST ║");
|
|
println!("╚══════════════════════════════════════════════════════════════╝");
|
|
println!();
|
|
println!(" Config:");
|
|
println!(
|
|
" Holdout: {} puzzles (seed {})",
|
|
self.config.holdout_size, self.config.holdout_seed
|
|
);
|
|
println!(
|
|
" Training: {} per cycle x {} cycles",
|
|
self.config.training_per_cycle, self.config.cycles
|
|
);
|
|
println!(
|
|
" Budget: {} steps, noise rate {:.0}%",
|
|
self.config.step_budget,
|
|
self.config.noise_rate * 100.0
|
|
);
|
|
println!();
|
|
|
|
println!(
|
|
" {:<22} {:>8} {:>12} {:>10} {:>6}",
|
|
"Mode", "Acc%", "Cost/Solve", "Noise%", "Viol"
|
|
);
|
|
println!(" {}", "-".repeat(62));
|
|
for sc in &self.scorecards {
|
|
println!(
|
|
" {:<22} {:>6.1}% {:>11.2} {:>8.1}% {:>5}",
|
|
sc.mode,
|
|
sc.accuracy * 100.0,
|
|
sc.cost_per_solve,
|
|
sc.noise_accuracy * 100.0,
|
|
sc.violations
|
|
);
|
|
}
|
|
println!();
|
|
|
|
println!(" Ablation Assertions:");
|
|
for a in [
|
|
&self.assertions.b_beats_a_cost,
|
|
&self.assertions.c_beats_b_robustness,
|
|
&self.assertions.compiler_safe,
|
|
&self.assertions.a_skip_nonzero,
|
|
&self.assertions.c_multi_mode,
|
|
&self.assertions.c_penalty_better_than_b,
|
|
] {
|
|
println!(
|
|
" {:<40} {} ({})",
|
|
a.name,
|
|
if a.passed { "PASS" } else { "FAIL" },
|
|
a.measured
|
|
);
|
|
}
|
|
println!();
|
|
|
|
println!(" Witness Chain:");
|
|
println!(" Records: {}", self.chain_length);
|
|
println!(
|
|
" Root hash: {}",
|
|
&self.chain_root_hash[..32.min(self.chain_root_hash.len())]
|
|
);
|
|
println!();
|
|
|
|
if self.all_passed {
|
|
println!(" RESULT: ALL PASSED — artifact is publishable");
|
|
} else {
|
|
println!(" RESULT: SOME CRITERIA NOT MET");
|
|
}
|
|
println!();
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
// Tests
|
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn witness_chain_integrity() {
|
|
let mut builder = WitnessChainBuilder::new();
|
|
for i in 0..5 {
|
|
builder.append(WitnessRecord {
|
|
puzzle_id: format!("puzzle_{}", i),
|
|
mode: "A".to_string(),
|
|
cycle: 0,
|
|
skip_mode: "none".to_string(),
|
|
context_bucket: "small:clean:clean".to_string(),
|
|
correct: true,
|
|
steps: 10 + i,
|
|
seq: 0,
|
|
});
|
|
}
|
|
let (chain, root) = builder.finalize();
|
|
assert_eq!(chain.len(), 5);
|
|
assert!(!root.is_empty());
|
|
|
|
// Verify chain
|
|
let verified_root = verify_chain(&chain).unwrap();
|
|
assert_eq!(verified_root, root);
|
|
}
|
|
|
|
#[test]
|
|
fn tampered_chain_detected() {
|
|
let mut builder = WitnessChainBuilder::new();
|
|
for i in 0..3 {
|
|
builder.append(WitnessRecord {
|
|
puzzle_id: format!("puzzle_{}", i),
|
|
mode: "B".to_string(),
|
|
cycle: 0,
|
|
skip_mode: "weekday".to_string(),
|
|
context_bucket: "large:heavy:noisy".to_string(),
|
|
correct: i != 1,
|
|
steps: 20,
|
|
seq: 0,
|
|
});
|
|
}
|
|
let (mut chain, _) = builder.finalize();
|
|
|
|
// Tamper: flip the correct field
|
|
chain[1].record.correct = true;
|
|
let result = verify_chain(&chain);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn deterministic_chain() {
|
|
// Same inputs → same root hash
|
|
let build = || {
|
|
let mut b = WitnessChainBuilder::new();
|
|
b.append(WitnessRecord {
|
|
puzzle_id: "p1".to_string(),
|
|
mode: "C".to_string(),
|
|
cycle: 1,
|
|
skip_mode: "hybrid".to_string(),
|
|
context_bucket: "medium:some:clean".to_string(),
|
|
correct: true,
|
|
steps: 42,
|
|
seq: 0,
|
|
});
|
|
b.finalize().1
|
|
};
|
|
assert_eq!(build(), build());
|
|
}
|
|
|
|
#[test]
|
|
fn manifest_generation_small() {
|
|
let config = HoldoutConfig {
|
|
holdout_size: 10,
|
|
training_per_cycle: 10,
|
|
cycles: 2,
|
|
step_budget: 200,
|
|
min_accuracy: 0.30,
|
|
min_dimensions_improved: 0,
|
|
verbose: false,
|
|
..Default::default()
|
|
};
|
|
let manifest = generate_manifest(&config).unwrap();
|
|
assert_eq!(manifest.version, 2);
|
|
assert_eq!(manifest.scorecards.len(), 3);
|
|
assert!(!manifest.chain_root_hash.is_empty());
|
|
assert!(manifest.chain_length > 0);
|
|
|
|
// Verify chain integrity
|
|
let root = verify_chain(&manifest.witness_chain).unwrap();
|
|
assert_eq!(root, manifest.chain_root_hash);
|
|
}
|
|
|
|
#[test]
|
|
fn manifest_deterministic_replay() {
|
|
let config = HoldoutConfig {
|
|
holdout_size: 10,
|
|
training_per_cycle: 10,
|
|
cycles: 2,
|
|
step_budget: 200,
|
|
min_accuracy: 0.30,
|
|
min_dimensions_improved: 0,
|
|
verbose: false,
|
|
..Default::default()
|
|
};
|
|
let m1 = generate_manifest(&config).unwrap();
|
|
let m2 = generate_manifest(&config).unwrap();
|
|
assert_eq!(m1.chain_root_hash, m2.chain_root_hash);
|
|
assert_eq!(m1.chain_length, m2.chain_length);
|
|
}
|
|
}
|