Files
wifi-densepose/vendor/ruvector/examples/benchmarks/src/publishable_rvf.rs

1005 lines
38 KiB
Rust

//! Publishable RVF Acceptance Test
//!
//! Produces a self-contained artifact that an external developer can run
//! offline and reproduce identical graded outcomes, plus verify the witness
//! chain cryptographically.
//!
//! ## Architecture
//!
//! 1. **Deterministic execution**: Frozen seeds → identical puzzles → identical
//! solve paths → identical outcomes. No network, no randomness, no clock.
//!
//! 2. **Witness chain**: Every puzzle decision (skip_mode chosen, context bucket,
//! steps taken, correct/wrong) is hashed into a SHAKE-256 chain using the
//! native `rvf-crypto` witness infrastructure. Changing any single bit in
//! any record invalidates the entire chain from that point.
//!
//! 3. **Graded scorecard**: Per-mode (A/B/C) aggregate metrics plus ablation
//! assertions, all serialized to JSON.
//!
//! 4. **Binary .rvf output**: The witness chain is also written as a native
//! WITNESS_SEG (0x0A) + META_SEG (0x07) in the RVF wire format, producing
//! a `.rvf` file verifiable by any RVF-compatible tool or WASM runtime.
//!
//! 5. **Verification**: Re-run with same config → re-generate chain → compare
//! chain root hash. If it matches, outcomes are identical.
//!
//! ## Usage
//!
//! ```bash
//! # Generate the manifest (JSON + optional .rvf binary)
//! cargo run --bin acceptance-rvf -- generate --output manifest.json
//!
//! # Verify a previously generated manifest
//! cargo run --bin acceptance-rvf -- verify --input manifest.json
//! ```
use crate::acceptance_test::{run_acceptance_test_mode, AblationMode, HoldoutConfig};
use crate::temporal::PolicyKernel;
use rvf_crypto::shake256_256;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
// ═══════════════════════════════════════════════════════════════════════════
// Witness record: one per puzzle per mode
// ═══════════════════════════════════════════════════════════════════════════
/// A single witnessed puzzle outcome.
///
/// Captures the decision (skip_mode, context_bucket) and result (correct,
/// steps) for one puzzle in one ablation mode. These records form the
/// leaves of the witness chain.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct WitnessRecord {
/// Puzzle identifier (deterministic from seed)
pub puzzle_id: String,
/// Ablation mode ("A", "B", or "C")
pub mode: String,
/// Cycle number (0-indexed)
pub cycle: usize,
/// Skip mode chosen by the policy ("none", "weekday", "hybrid")
pub skip_mode: String,
/// Context bucket key (e.g., "large:heavy:noisy")
pub context_bucket: String,
/// Whether the solver got the correct answer
pub correct: bool,
/// Steps taken to solve
pub steps: usize,
/// Sequential record index within the chain
pub seq: usize,
}
impl WitnessRecord {
/// Canonical bytes for hashing. Deterministic regardless of serde.
fn canonical_bytes(&self) -> Vec<u8> {
let mut buf = Vec::with_capacity(256);
buf.extend_from_slice(self.puzzle_id.as_bytes());
buf.push(b'|');
buf.extend_from_slice(self.mode.as_bytes());
buf.push(b'|');
buf.extend_from_slice(&self.cycle.to_le_bytes());
buf.push(b'|');
buf.extend_from_slice(self.skip_mode.as_bytes());
buf.push(b'|');
buf.extend_from_slice(self.context_bucket.as_bytes());
buf.push(b'|');
buf.push(if self.correct { 1 } else { 0 });
buf.push(b'|');
buf.extend_from_slice(&self.steps.to_le_bytes());
buf.push(b'|');
buf.extend_from_slice(&self.seq.to_le_bytes());
buf
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Chained witness: record + hash link
// ═══════════════════════════════════════════════════════════════════════════
/// A witness record with its chain hash.
///
/// `chain_hash` = SHAKE-256(prev_chain_hash || canonical_bytes(record))
/// First record: prev_chain_hash = [0; 32] (genesis)
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ChainedWitness {
pub record: WitnessRecord,
/// Hex-encoded SHAKE-256 chain hash for this entry
pub chain_hash: String,
}
// ═══════════════════════════════════════════════════════════════════════════
// Mode scorecard
// ═══════════════════════════════════════════════════════════════════════════
/// Aggregate metrics for one ablation mode.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ModeScorecard {
pub mode: String,
pub total_puzzles: usize,
pub correct: usize,
pub accuracy: f64,
pub total_steps: usize,
pub cost_per_solve: f64,
pub noise_accuracy: f64,
pub violations: usize,
pub early_commit_penalty: f64,
pub skip_mode_distribution: HashMap<String, HashMap<String, usize>>,
/// Number of context buckets with data
pub context_buckets_used: usize,
}
// ═══════════════════════════════════════════════════════════════════════════
// Ablation assertions
// ═══════════════════════════════════════════════════════════════════════════
/// All six ablation assertions, each with pass/fail and measured value.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AblationAssertions {
pub b_beats_a_cost: AssertionResult,
pub c_beats_b_robustness: AssertionResult,
pub compiler_safe: AssertionResult,
pub a_skip_nonzero: AssertionResult,
pub c_multi_mode: AssertionResult,
pub c_penalty_better_than_b: AssertionResult,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AssertionResult {
pub name: String,
pub passed: bool,
pub measured: String,
pub threshold: String,
}
// ═══════════════════════════════════════════════════════════════════════════
// RVF Manifest: the publishable artifact
// ═══════════════════════════════════════════════════════════════════════════
/// The complete publishable artifact.
///
/// Contains everything needed to verify reproducibility:
/// - Frozen config (seeds, budget, cycles)
/// - Per-mode scorecards
/// - Ablation assertions
/// - Full witness chain with hash links
/// - Chain root hash (final hash of the last entry)
///
/// An external developer can:
/// 1. Run `acceptance-rvf generate` with the same config
/// 2. Compare their `chain_root_hash` to this one
/// 3. If hashes match, outcomes are bit-for-bit identical
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RvfManifest {
/// Format version for forward compatibility
pub version: u32,
/// Human-readable description
pub description: String,
/// Frozen configuration
pub config: ManifestConfig,
/// Per-mode scorecards
pub scorecards: Vec<ModeScorecard>,
/// Ablation assertions
pub assertions: AblationAssertions,
/// Whether all assertions passed
pub all_passed: bool,
/// Witness chain (every puzzle decision, hash-linked)
pub witness_chain: Vec<ChainedWitness>,
/// SHAKE-256 of the final chain entry (hex). This is THE reproducibility proof.
pub chain_root_hash: String,
/// Total witness records in the chain
pub chain_length: usize,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ManifestConfig {
pub holdout_size: usize,
pub training_per_cycle: usize,
pub cycles: usize,
pub holdout_seed: String,
pub training_seed: String,
pub noise_rate: f64,
pub step_budget: usize,
pub min_accuracy: f64,
}
impl From<&HoldoutConfig> for ManifestConfig {
fn from(c: &HoldoutConfig) -> Self {
Self {
holdout_size: c.holdout_size,
training_per_cycle: c.training_per_cycle,
cycles: c.cycles,
holdout_seed: format!("0x{:016X}", c.holdout_seed),
training_seed: format!("0x{:016X}", c.training_seed),
noise_rate: c.noise_rate,
step_budget: c.step_budget,
min_accuracy: c.min_accuracy,
}
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Witness chain builder
// ═══════════════════════════════════════════════════════════════════════════
/// Builds a SHAKE-256-linked witness chain incrementally.
///
/// Uses `rvf_crypto::shake256_256` for hashing, compatible with the
/// native RVF WITNESS_SEG format.
pub struct WitnessChainBuilder {
entries: Vec<ChainedWitness>,
/// Parallel rvf-crypto WitnessEntry list for .rvf binary export
rvf_entries: Vec<rvf_crypto::WitnessEntry>,
prev_hash: [u8; 32],
seq: usize,
}
impl WitnessChainBuilder {
pub fn new() -> Self {
Self {
entries: Vec::new(),
rvf_entries: Vec::new(),
prev_hash: [0u8; 32],
seq: 0,
}
}
/// Append a witness record to the chain.
///
/// The chain hash is: SHAKE-256(prev_hash || canonical_bytes(record))
/// Also builds the parallel rvf-crypto WitnessEntry for .rvf export.
pub fn append(&mut self, mut record: WitnessRecord) {
record.seq = self.seq;
self.seq += 1;
let canonical = record.canonical_bytes();
// Compute the action_hash from canonical bytes using SHAKE-256
let action_hash = shake256_256(&canonical);
// Chain: SHAKE-256(prev_hash || canonical_bytes)
let mut chain_input = Vec::with_capacity(32 + canonical.len());
chain_input.extend_from_slice(&self.prev_hash);
chain_input.extend_from_slice(&canonical);
let hash = shake256_256(&chain_input);
// Build the rvf-crypto WitnessEntry (73-byte entry for .rvf binary)
self.rvf_entries.push(rvf_crypto::WitnessEntry {
prev_hash: [0u8; 32], // overwritten by create_witness_chain
action_hash,
timestamp_ns: self.seq as u64, // deterministic pseudo-timestamp
witness_type: 0x02, // COMPUTATION witness type
});
self.prev_hash = hash;
self.entries.push(ChainedWitness {
record,
chain_hash: hex_encode(&hash),
});
}
/// Finalize and return the chain + root hash.
pub fn finalize(self) -> (Vec<ChainedWitness>, String) {
let root = hex_encode(&self.prev_hash);
(self.entries, root)
}
/// Get the rvf-crypto WitnessEntry list for .rvf binary export.
pub fn rvf_entries(&self) -> &[rvf_crypto::WitnessEntry] {
&self.rvf_entries
}
}
fn hex_encode(bytes: &[u8]) -> String {
bytes.iter().map(|b| format!("{:02x}", b)).collect()
}
// ═══════════════════════════════════════════════════════════════════════════
// Chain verification
// ═══════════════════════════════════════════════════════════════════════════
/// Verify the integrity of a witness chain.
///
/// Recomputes every chain_hash from the records and checks they match.
/// Returns Ok(root_hash) if the chain is valid, Err(index) if tampered.
pub fn verify_chain(chain: &[ChainedWitness]) -> Result<String, usize> {
let mut prev_hash = [0u8; 32];
for (i, entry) in chain.iter().enumerate() {
let canonical = entry.record.canonical_bytes();
let mut chain_input = Vec::with_capacity(32 + canonical.len());
chain_input.extend_from_slice(&prev_hash);
chain_input.extend_from_slice(&canonical);
let computed = shake256_256(&chain_input);
let computed_hex = hex_encode(&computed);
if computed_hex != entry.chain_hash {
return Err(i);
}
prev_hash = computed;
}
Ok(hex_encode(&prev_hash))
}
// ═══════════════════════════════════════════════════════════════════════════
// Generate the publishable manifest
// ═══════════════════════════════════════════════════════════════════════════
/// Run all three ablation modes and produce the publishable RVF manifest.
///
/// This is the entry point. Same config → same manifest → same chain_root_hash.
/// If `rvf_output_path` is provided, also exports the native `.rvf` binary.
pub fn generate_manifest(config: &HoldoutConfig) -> anyhow::Result<RvfManifest> {
generate_manifest_with_rvf(config, None)
}
/// Like `generate_manifest`, but also produces a `.rvf` binary file.
pub fn generate_manifest_with_rvf(
config: &HoldoutConfig,
rvf_output_path: Option<&str>,
) -> anyhow::Result<RvfManifest> {
let mut chain_builder = WitnessChainBuilder::new();
// Run all three modes
let mode_a = run_acceptance_test_mode(config, &AblationMode::Baseline)?;
collect_witnesses(&mut chain_builder, "A", &mode_a, config);
let mode_b = run_acceptance_test_mode(config, &AblationMode::CompilerOnly)?;
collect_witnesses(&mut chain_builder, "B", &mode_b, config);
let mode_c = run_acceptance_test_mode(config, &AblationMode::Full)?;
collect_witnesses(&mut chain_builder, "C", &mode_c, config);
// Build scorecards
let scorecards = vec![
build_scorecard("A (fixed policy)", &mode_a),
build_scorecard("B (compiled policy)", &mode_b),
build_scorecard("C (learned policy)", &mode_c),
];
// Compute ablation assertions
let assertions = compute_assertions(&mode_a, &mode_b, &mode_c);
let all_passed = assertions.b_beats_a_cost.passed
&& assertions.c_beats_b_robustness.passed
&& assertions.compiler_safe.passed
&& assertions.a_skip_nonzero.passed
&& assertions.c_multi_mode.passed
&& assertions.c_penalty_better_than_b.passed
&& mode_a.result.passed
&& mode_b.result.passed
&& mode_c.result.passed;
// Export .rvf binary before finalizing (consumes chain_builder entries)
if let Some(rvf_path) = rvf_output_path {
// Build the manifest struct first for the meta segment
let preview_manifest = RvfManifest {
version: 2,
description: String::new(),
config: ManifestConfig::from(config),
scorecards: scorecards.clone(),
assertions: assertions.clone(),
all_passed,
witness_chain: vec![],
chain_root_hash: String::new(),
chain_length: chain_builder.rvf_entries().len(),
};
export_rvf_binary(&preview_manifest, &chain_builder, rvf_path)?;
}
// Finalize witness chain
let (witness_chain, chain_root_hash) = chain_builder.finalize();
let chain_length = witness_chain.len();
Ok(RvfManifest {
version: 2,
description: "RuVector temporal reasoning ablation study — \
deterministic acceptance test with SHAKE-256 witness chain \
(rvf-crypto native)"
.to_string(),
config: ManifestConfig::from(config),
scorecards,
assertions,
all_passed,
witness_chain,
chain_root_hash,
chain_length,
})
}
/// Verify a manifest by re-running with the same config and comparing hashes.
pub fn verify_manifest(manifest: &RvfManifest) -> anyhow::Result<VerifyResult> {
// Step 1: Verify chain integrity (hashes link correctly)
let chain_result = verify_chain(&manifest.witness_chain);
let chain_valid = match &chain_result {
Ok(root) => root == &manifest.chain_root_hash,
Err(_) => false,
};
if !chain_valid {
return Ok(VerifyResult {
chain_integrity: false,
outcomes_match: false,
root_hash_match: false,
recomputed_root: chain_result.unwrap_or_default(),
expected_root: manifest.chain_root_hash.clone(),
mismatched_records: vec![],
});
}
// Step 2: Re-run with same config
let config = holdout_config_from_manifest(&manifest.config);
let fresh = generate_manifest(&config)?;
// Step 3: Compare root hashes
let root_match = fresh.chain_root_hash == manifest.chain_root_hash;
// Step 4: Find any mismatched records
let mut mismatches = Vec::new();
let max_len = manifest.witness_chain.len().min(fresh.witness_chain.len());
for i in 0..max_len {
let orig = &manifest.witness_chain[i];
let new = &fresh.witness_chain[i];
if orig.chain_hash != new.chain_hash {
mismatches.push(i);
if mismatches.len() >= 10 {
break; // cap output
}
}
}
Ok(VerifyResult {
chain_integrity: true,
outcomes_match: mismatches.is_empty() && manifest.chain_length == fresh.chain_length,
root_hash_match: root_match,
recomputed_root: fresh.chain_root_hash,
expected_root: manifest.chain_root_hash.clone(),
mismatched_records: mismatches,
})
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct VerifyResult {
pub chain_integrity: bool,
pub outcomes_match: bool,
pub root_hash_match: bool,
pub recomputed_root: String,
pub expected_root: String,
pub mismatched_records: Vec<usize>,
}
impl VerifyResult {
pub fn print(&self) {
println!();
println!(" Witness Chain Verification:");
println!(
" Chain integrity: {}",
if self.chain_integrity { "PASS" } else { "FAIL" }
);
println!(
" Outcomes match: {}",
if self.outcomes_match { "PASS" } else { "FAIL" }
);
println!(
" Root hash match: {}",
if self.root_hash_match { "PASS" } else { "FAIL" }
);
println!(" Expected root: {}", &self.expected_root[..16]);
println!(
" Recomputed root: {}",
&self.recomputed_root[..self.recomputed_root.len().min(16)]
);
if !self.mismatched_records.is_empty() {
println!(" Mismatched at: {:?}", self.mismatched_records);
}
println!();
}
pub fn passed(&self) -> bool {
self.chain_integrity && self.outcomes_match && self.root_hash_match
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Native .rvf binary export
// ═══════════════════════════════════════════════════════════════════════════
/// Export the manifest as a native `.rvf` binary file.
///
/// Produces a file with two segments:
/// - **WITNESS_SEG** (0x0A): The SHAKE-256 witness chain as 73-byte entries
/// created by `rvf_crypto::create_witness_chain()`, verifiable by any
/// RVF-compatible tool including the WASM microkernel.
/// - **META_SEG** (0x07): JSON-encoded scorecard + assertions metadata.
///
/// The resulting file is a valid `.rvf` file that can be inspected with
/// `rvf inspect`, verified with `rvf verify-witness`, or loaded in the
/// browser via the WASM runtime's `rvf_witness_verify` export.
pub fn export_rvf_binary(
manifest: &RvfManifest,
chain_builder: &WitnessChainBuilder,
path: &str,
) -> anyhow::Result<()> {
use rvf_crypto::create_witness_chain;
use rvf_types::{SegmentFlags, SegmentType};
use rvf_wire::write_segment;
// Build the native SHAKE-256 witness chain from the rvf-crypto entries
let witness_bytes = create_witness_chain(chain_builder.rvf_entries());
// Write WITNESS_SEG (0x0A) with SEALED flag
let witness_seg = write_segment(
SegmentType::Witness as u8,
&witness_bytes,
SegmentFlags::empty().with(SegmentFlags::SEALED),
1, // segment_id
);
// Build metadata JSON payload
let meta = serde_json::json!({
"format": "rvf-acceptance-test",
"version": manifest.version,
"chain_root_hash": manifest.chain_root_hash,
"chain_length": manifest.chain_length,
"all_passed": manifest.all_passed,
"config": manifest.config,
"scorecards": manifest.scorecards,
"assertions": manifest.assertions,
});
let meta_bytes = serde_json::to_vec(&meta)?;
// Write META_SEG (0x07)
let meta_seg = write_segment(
SegmentType::Meta as u8,
&meta_bytes,
SegmentFlags::empty().with(SegmentFlags::SEALED),
2, // segment_id
);
// Concatenate segments into a single .rvf file
let mut rvf_file = Vec::with_capacity(witness_seg.len() + meta_seg.len());
rvf_file.extend_from_slice(&witness_seg);
rvf_file.extend_from_slice(&meta_seg);
std::fs::write(path, &rvf_file)?;
Ok(())
}
/// Verify the native `.rvf` binary witness chain.
///
/// Reads the WITNESS_SEG payload and runs `rvf_crypto::verify_witness_chain`.
pub fn verify_rvf_binary(path: &str) -> anyhow::Result<usize> {
use rvf_crypto::verify_witness_chain;
use rvf_types::SEGMENT_HEADER_SIZE;
let data = std::fs::read(path)?;
if data.len() < SEGMENT_HEADER_SIZE {
anyhow::bail!("File too small for a valid segment");
}
// Parse the first segment header to get payload length
let seg_type = data[5];
if seg_type != 0x0A {
anyhow::bail!("First segment is not WITNESS_SEG (got 0x{:02X})", seg_type);
}
let payload_len = u64::from_le_bytes(
data[0x10..0x18]
.try_into()
.map_err(|_| anyhow::anyhow!("Bad header"))?,
) as usize;
let payload_start = SEGMENT_HEADER_SIZE;
let payload_end = payload_start + payload_len;
if data.len() < payload_end {
anyhow::bail!("Truncated witness payload");
}
let witness_data = &data[payload_start..payload_end];
let entries = verify_witness_chain(witness_data)
.map_err(|e| anyhow::anyhow!("Witness chain verification failed: {:?}", e))?;
Ok(entries.len())
}
// ═══════════════════════════════════════════════════════════════════════════
// Internal helpers
// ═══════════════════════════════════════════════════════════════════════════
fn collect_witnesses(
builder: &mut WitnessChainBuilder,
mode_label: &str,
result: &crate::acceptance_test::AblationResult,
_config: &HoldoutConfig,
) {
// Witness each cycle's holdout metrics
for cm in &result.result.cycles {
builder.append(WitnessRecord {
puzzle_id: format!("cycle_{}_holdout", cm.cycle),
mode: mode_label.to_string(),
cycle: cm.cycle,
skip_mode: "aggregate".to_string(),
context_bucket: "holdout".to_string(),
correct: cm.holdout_accuracy >= 0.5,
steps: cm.holdout_cost_per_solve as usize,
seq: 0,
});
}
// Witness skip-mode distribution (each bucket is a witness record)
// Sort keys for deterministic iteration order
let mut buckets: Vec<&String> = result.skip_mode_distribution.keys().collect();
buckets.sort();
for bucket in buckets {
let dist = &result.skip_mode_distribution[bucket];
let mut mode_names: Vec<&String> = dist.keys().collect();
mode_names.sort();
for mode_name in mode_names {
let count = dist[mode_name];
builder.append(WitnessRecord {
puzzle_id: format!("dist_{}_{}", bucket, mode_name),
mode: mode_label.to_string(),
cycle: result.result.cycles.len(),
skip_mode: mode_name.clone(),
context_bucket: bucket.clone(),
correct: true,
steps: count,
seq: 0,
});
}
}
// Witness compiler and penalty stats
builder.append(WitnessRecord {
puzzle_id: "compiler_stats".to_string(),
mode: mode_label.to_string(),
cycle: 0,
skip_mode: format!("hits:{}", result.compiler_hits),
context_bucket: format!("misses:{}", result.compiler_misses),
correct: result.compiler_false_hits == 0,
steps: result.compiler_false_hits,
seq: 0,
});
builder.append(WitnessRecord {
puzzle_id: "penalty_stats".to_string(),
mode: mode_label.to_string(),
cycle: 0,
skip_mode: format!("rate:{:.4}", result.early_commit_rate),
context_bucket: format!("penalty:{:.4}", result.early_commit_penalties),
correct: true,
steps: result.policy_context_buckets,
seq: 0,
});
}
fn build_scorecard(label: &str, result: &crate::acceptance_test::AblationResult) -> ModeScorecard {
let last = result.result.cycles.last();
ModeScorecard {
mode: label.to_string(),
total_puzzles: result.result.cycles.len(),
correct: last
.map(|c| (c.holdout_accuracy * 100.0) as usize)
.unwrap_or(0),
accuracy: last.map(|c| c.holdout_accuracy).unwrap_or(0.0),
total_steps: last.map(|c| c.holdout_cost_per_solve as usize).unwrap_or(0),
cost_per_solve: last.map(|c| c.holdout_cost_per_solve).unwrap_or(0.0),
noise_accuracy: last.map(|c| c.holdout_noise_accuracy).unwrap_or(0.0),
violations: last.map(|c| c.holdout_violations).unwrap_or(0),
early_commit_penalty: result.early_commit_penalties,
skip_mode_distribution: result.skip_mode_distribution.clone(),
context_buckets_used: result.policy_context_buckets,
}
}
fn compute_assertions(
mode_a: &crate::acceptance_test::AblationResult,
mode_b: &crate::acceptance_test::AblationResult,
mode_c: &crate::acceptance_test::AblationResult,
) -> AblationAssertions {
let last_a = mode_a.result.cycles.last().unwrap();
let last_b = mode_b.result.cycles.last().unwrap();
let last_c = mode_c.result.cycles.last().unwrap();
let cost_decrease = if last_a.holdout_cost_per_solve > 0.0 {
1.0 - (last_b.holdout_cost_per_solve / last_a.holdout_cost_per_solve)
} else {
0.0
};
let robustness_gain = last_c.holdout_noise_accuracy - last_b.holdout_noise_accuracy;
let total_compiler = mode_b.compiler_hits + mode_b.compiler_misses;
let false_hit_rate = if total_compiler > 0 {
mode_b.compiler_false_hits as f64 / total_compiler as f64
} else {
0.0
};
let a_total_skip: usize = mode_a
.skip_mode_distribution
.values()
.flat_map(|m| m.iter())
.filter(|(name, _)| *name != "none")
.map(|(_, c)| *c)
.sum();
let c_unique_modes: std::collections::HashSet<&str> = mode_c
.skip_mode_distribution
.values()
.flat_map(|m| m.keys())
.map(|s| s.as_str())
.collect();
let b_penalty = mode_b.early_commit_penalties;
let c_penalty = mode_c.early_commit_penalties;
let penalty_ok = if b_penalty > 0.0 {
c_penalty <= b_penalty * 0.90
} else {
c_penalty == 0.0
};
AblationAssertions {
b_beats_a_cost: AssertionResult {
name: "B beats A on cost (>=15%)".to_string(),
passed: cost_decrease >= 0.15,
measured: format!("{:.1}%", cost_decrease * 100.0),
threshold: ">=15%".to_string(),
},
c_beats_b_robustness: AssertionResult {
name: "C beats B on robustness (>=10%)".to_string(),
passed: robustness_gain >= 0.10,
measured: format!("{:.1}%", robustness_gain * 100.0),
threshold: ">=10%".to_string(),
},
compiler_safe: AssertionResult {
name: "Compiler false-hit rate <5%".to_string(),
passed: false_hit_rate < 0.05,
measured: format!("{:.1}%", false_hit_rate * 100.0),
threshold: "<5%".to_string(),
},
a_skip_nonzero: AssertionResult {
name: "Mode A skip usage nonzero".to_string(),
passed: a_total_skip > 0,
measured: format!("{}", a_total_skip),
threshold: ">0".to_string(),
},
c_multi_mode: AssertionResult {
name: "Mode C uses multiple skip modes".to_string(),
passed: c_unique_modes.len() >= 2,
measured: format!("{} modes", c_unique_modes.len()),
threshold: ">=2".to_string(),
},
c_penalty_better_than_b: AssertionResult {
name: "C penalty < B penalty (distract)".to_string(),
passed: penalty_ok,
measured: format!("C={:.2} B={:.2}", c_penalty, b_penalty),
threshold: "C <= 90% of B".to_string(),
},
}
}
fn holdout_config_from_manifest(mc: &ManifestConfig) -> HoldoutConfig {
let holdout_seed = u64::from_str_radix(
mc.holdout_seed
.trim_start_matches("0x")
.trim_start_matches("0X"),
16,
)
.unwrap_or(0xDEAD_BEEF);
let training_seed = u64::from_str_radix(
mc.training_seed
.trim_start_matches("0x")
.trim_start_matches("0X"),
16,
)
.unwrap_or(42);
HoldoutConfig {
holdout_size: mc.holdout_size,
training_per_cycle: mc.training_per_cycle,
cycles: mc.cycles,
holdout_seed,
training_seed,
noise_rate: mc.noise_rate,
step_budget: mc.step_budget,
min_accuracy: mc.min_accuracy,
min_dimensions_improved: 2,
verbose: false,
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Pretty-print
// ═══════════════════════════════════════════════════════════════════════════
impl RvfManifest {
pub fn print_summary(&self) {
println!();
println!("╔══════════════════════════════════════════════════════════════╗");
println!("║ PUBLISHABLE RVF ACCEPTANCE TEST ║");
println!("╚══════════════════════════════════════════════════════════════╝");
println!();
println!(" Config:");
println!(
" Holdout: {} puzzles (seed {})",
self.config.holdout_size, self.config.holdout_seed
);
println!(
" Training: {} per cycle x {} cycles",
self.config.training_per_cycle, self.config.cycles
);
println!(
" Budget: {} steps, noise rate {:.0}%",
self.config.step_budget,
self.config.noise_rate * 100.0
);
println!();
println!(
" {:<22} {:>8} {:>12} {:>10} {:>6}",
"Mode", "Acc%", "Cost/Solve", "Noise%", "Viol"
);
println!(" {}", "-".repeat(62));
for sc in &self.scorecards {
println!(
" {:<22} {:>6.1}% {:>11.2} {:>8.1}% {:>5}",
sc.mode,
sc.accuracy * 100.0,
sc.cost_per_solve,
sc.noise_accuracy * 100.0,
sc.violations
);
}
println!();
println!(" Ablation Assertions:");
for a in [
&self.assertions.b_beats_a_cost,
&self.assertions.c_beats_b_robustness,
&self.assertions.compiler_safe,
&self.assertions.a_skip_nonzero,
&self.assertions.c_multi_mode,
&self.assertions.c_penalty_better_than_b,
] {
println!(
" {:<40} {} ({})",
a.name,
if a.passed { "PASS" } else { "FAIL" },
a.measured
);
}
println!();
println!(" Witness Chain:");
println!(" Records: {}", self.chain_length);
println!(
" Root hash: {}",
&self.chain_root_hash[..32.min(self.chain_root_hash.len())]
);
println!();
if self.all_passed {
println!(" RESULT: ALL PASSED — artifact is publishable");
} else {
println!(" RESULT: SOME CRITERIA NOT MET");
}
println!();
}
}
// ═══════════════════════════════════════════════════════════════════════════
// Tests
// ═══════════════════════════════════════════════════════════════════════════
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn witness_chain_integrity() {
let mut builder = WitnessChainBuilder::new();
for i in 0..5 {
builder.append(WitnessRecord {
puzzle_id: format!("puzzle_{}", i),
mode: "A".to_string(),
cycle: 0,
skip_mode: "none".to_string(),
context_bucket: "small:clean:clean".to_string(),
correct: true,
steps: 10 + i,
seq: 0,
});
}
let (chain, root) = builder.finalize();
assert_eq!(chain.len(), 5);
assert!(!root.is_empty());
// Verify chain
let verified_root = verify_chain(&chain).unwrap();
assert_eq!(verified_root, root);
}
#[test]
fn tampered_chain_detected() {
let mut builder = WitnessChainBuilder::new();
for i in 0..3 {
builder.append(WitnessRecord {
puzzle_id: format!("puzzle_{}", i),
mode: "B".to_string(),
cycle: 0,
skip_mode: "weekday".to_string(),
context_bucket: "large:heavy:noisy".to_string(),
correct: i != 1,
steps: 20,
seq: 0,
});
}
let (mut chain, _) = builder.finalize();
// Tamper: flip the correct field
chain[1].record.correct = true;
let result = verify_chain(&chain);
assert!(result.is_err());
}
#[test]
fn deterministic_chain() {
// Same inputs → same root hash
let build = || {
let mut b = WitnessChainBuilder::new();
b.append(WitnessRecord {
puzzle_id: "p1".to_string(),
mode: "C".to_string(),
cycle: 1,
skip_mode: "hybrid".to_string(),
context_bucket: "medium:some:clean".to_string(),
correct: true,
steps: 42,
seq: 0,
});
b.finalize().1
};
assert_eq!(build(), build());
}
#[test]
fn manifest_generation_small() {
let config = HoldoutConfig {
holdout_size: 10,
training_per_cycle: 10,
cycles: 2,
step_budget: 200,
min_accuracy: 0.30,
min_dimensions_improved: 0,
verbose: false,
..Default::default()
};
let manifest = generate_manifest(&config).unwrap();
assert_eq!(manifest.version, 2);
assert_eq!(manifest.scorecards.len(), 3);
assert!(!manifest.chain_root_hash.is_empty());
assert!(manifest.chain_length > 0);
// Verify chain integrity
let root = verify_chain(&manifest.witness_chain).unwrap();
assert_eq!(root, manifest.chain_root_hash);
}
#[test]
fn manifest_deterministic_replay() {
let config = HoldoutConfig {
holdout_size: 10,
training_per_cycle: 10,
cycles: 2,
step_budget: 200,
min_accuracy: 0.30,
min_dimensions_improved: 0,
verbose: false,
..Default::default()
};
let m1 = generate_manifest(&config).unwrap();
let m2 = generate_manifest(&config).unwrap();
assert_eq!(m1.chain_root_hash, m2.chain_root_hash);
assert_eq!(m1.chain_length, m2.chain_length);
}
}