wifi-densepose/vendor/ruvector/examples/benchmarks/src/publishable_rvf.rs

//! Publishable RVF Acceptance Test
//!
//! Produces a self-contained artifact that an external developer can run
//! offline and reproduce identical graded outcomes, plus verify the witness
//! chain cryptographically.
//!
//! ## Architecture
//!
//! 1. **Deterministic execution**: Frozen seeds → identical puzzles → identical
//!    solve paths → identical outcomes. No network, no randomness, no clock.
//!
//! 2. **Witness chain**: Every puzzle decision (skip_mode chosen, context bucket,
//!    steps taken, correct/wrong) is hashed into a SHAKE-256 chain using the
//!    native `rvf-crypto` witness infrastructure. Changing any single bit in
//!    any record invalidates the entire chain from that point.
//!
//! 3. **Graded scorecard**: Per-mode (A/B/C) aggregate metrics plus ablation
//!    assertions, all serialized to JSON.
//!
//! 4. **Binary .rvf output**: The witness chain is also written as a native
//!    WITNESS_SEG (0x0A) + META_SEG (0x07) in the RVF wire format, producing
//!    a `.rvf` file verifiable by any RVF-compatible tool or WASM runtime.
//!
//! 5. **Verification**: Re-run with same config → re-generate chain → compare
//!    chain root hash. If it matches, outcomes are identical.
//!
//! ## Usage
//!
//! ```bash
//! # Generate the manifest (JSON + optional .rvf binary)
//! cargo run --bin acceptance-rvf -- generate --output manifest.json
//!
//! # Verify a previously generated manifest
//! cargo run --bin acceptance-rvf -- verify --input manifest.json
//! ```

use crate::acceptance_test::{run_acceptance_test_mode, AblationMode, HoldoutConfig};
use crate::temporal::PolicyKernel;
use rvf_crypto::shake256_256;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

// ═══════════════════════════════════════════════════════════════════════════
// Witness record: one per puzzle per mode
// ═══════════════════════════════════════════════════════════════════════════

/// A single witnessed puzzle outcome.
///
/// Captures the decision (skip_mode, context_bucket) and result (correct,
/// steps) for one puzzle in one ablation mode. These records form the
/// leaves of the witness chain.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct WitnessRecord {
    /// Puzzle identifier (deterministic from seed)
    pub puzzle_id: String,
    /// Ablation mode ("A", "B", or "C")
    pub mode: String,
    /// Cycle number (0-indexed)
    pub cycle: usize,
    /// Skip mode chosen by the policy ("none", "weekday", "hybrid")
    pub skip_mode: String,
    /// Context bucket key (e.g., "large:heavy:noisy")
    pub context_bucket: String,
    /// Whether the solver got the correct answer
    pub correct: bool,
    /// Steps taken to solve
    pub steps: usize,
    /// Sequential record index within the chain
    pub seq: usize,
}

impl WitnessRecord {
    /// Canonical bytes for hashing. Deterministic regardless of serde.
    fn canonical_bytes(&self) -> Vec<u8> {
        let mut buf = Vec::with_capacity(256);
        buf.extend_from_slice(self.puzzle_id.as_bytes());
        buf.push(b'|');
        buf.extend_from_slice(self.mode.as_bytes());
        buf.push(b'|');
        buf.extend_from_slice(&self.cycle.to_le_bytes());
        buf.push(b'|');
        buf.extend_from_slice(self.skip_mode.as_bytes());
        buf.push(b'|');
        buf.extend_from_slice(self.context_bucket.as_bytes());
        buf.push(b'|');
        buf.push(if self.correct { 1 } else { 0 });
        buf.push(b'|');
        buf.extend_from_slice(&self.steps.to_le_bytes());
        buf.push(b'|');
        buf.extend_from_slice(&self.seq.to_le_bytes());
        buf
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Chained witness: record + hash link
// ═══════════════════════════════════════════════════════════════════════════

/// A witness record with its chain hash.
///
/// `chain_hash` = SHAKE-256(prev_chain_hash || canonical_bytes(record))
/// First record: prev_chain_hash = [0; 32] (genesis)
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ChainedWitness {
    pub record: WitnessRecord,
    /// Hex-encoded SHAKE-256 chain hash for this entry
    pub chain_hash: String,
}

// ═══════════════════════════════════════════════════════════════════════════
// Mode scorecard
// ═══════════════════════════════════════════════════════════════════════════

/// Aggregate metrics for one ablation mode.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ModeScorecard {
    pub mode: String,
    pub total_puzzles: usize,
    pub correct: usize,
    pub accuracy: f64,
    pub total_steps: usize,
    pub cost_per_solve: f64,
    pub noise_accuracy: f64,
    pub violations: usize,
    pub early_commit_penalty: f64,
    pub skip_mode_distribution: HashMap<String, HashMap<String, usize>>,
    /// Number of context buckets with data
    pub context_buckets_used: usize,
}

// ═══════════════════════════════════════════════════════════════════════════
// Ablation assertions
// ═══════════════════════════════════════════════════════════════════════════

/// All six ablation assertions, each with pass/fail and measured value.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AblationAssertions {
    pub b_beats_a_cost: AssertionResult,
    pub c_beats_b_robustness: AssertionResult,
    pub compiler_safe: AssertionResult,
    pub a_skip_nonzero: AssertionResult,
    pub c_multi_mode: AssertionResult,
    pub c_penalty_better_than_b: AssertionResult,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AssertionResult {
    pub name: String,
    pub passed: bool,
    pub measured: String,
    pub threshold: String,
}

// ═══════════════════════════════════════════════════════════════════════════
// RVF Manifest: the publishable artifact
// ═══════════════════════════════════════════════════════════════════════════

/// The complete publishable artifact.
///
/// Contains everything needed to verify reproducibility:
/// - Frozen config (seeds, budget, cycles)
/// - Per-mode scorecards
/// - Ablation assertions
/// - Full witness chain with hash links
/// - Chain root hash (final hash of the last entry)
///
/// An external developer can:
/// 1. Run `acceptance-rvf generate` with the same config
/// 2. Compare their `chain_root_hash` to this one
/// 3. If hashes match, outcomes are bit-for-bit identical
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RvfManifest {
    /// Format version for forward compatibility
    pub version: u32,
    /// Human-readable description
    pub description: String,
    /// Frozen configuration
    pub config: ManifestConfig,
    /// Per-mode scorecards
    pub scorecards: Vec<ModeScorecard>,
    /// Ablation assertions
    pub assertions: AblationAssertions,
    /// Whether all assertions passed
    pub all_passed: bool,
    /// Witness chain (every puzzle decision, hash-linked)
    pub witness_chain: Vec<ChainedWitness>,
    /// SHAKE-256 of the final chain entry (hex). This is THE reproducibility proof.
    pub chain_root_hash: String,
    /// Total witness records in the chain
    pub chain_length: usize,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ManifestConfig {
    pub holdout_size: usize,
    pub training_per_cycle: usize,
    pub cycles: usize,
    pub holdout_seed: String,
    pub training_seed: String,
    pub noise_rate: f64,
    pub step_budget: usize,
    pub min_accuracy: f64,
}

impl From<&HoldoutConfig> for ManifestConfig {
    fn from(c: &HoldoutConfig) -> Self {
        Self {
            holdout_size: c.holdout_size,
            training_per_cycle: c.training_per_cycle,
            cycles: c.cycles,
            holdout_seed: format!("0x{:016X}", c.holdout_seed),
            training_seed: format!("0x{:016X}", c.training_seed),
            noise_rate: c.noise_rate,
            step_budget: c.step_budget,
            min_accuracy: c.min_accuracy,
        }
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Witness chain builder
// ═══════════════════════════════════════════════════════════════════════════

/// Builds a SHAKE-256-linked witness chain incrementally.
///
/// Uses `rvf_crypto::shake256_256` for hashing, compatible with the
/// native RVF WITNESS_SEG format.
pub struct WitnessChainBuilder {
    entries: Vec<ChainedWitness>,
    /// Parallel rvf-crypto WitnessEntry list for .rvf binary export
    rvf_entries: Vec<rvf_crypto::WitnessEntry>,
    prev_hash: [u8; 32],
    seq: usize,
}

impl WitnessChainBuilder {
    pub fn new() -> Self {
        Self {
            entries: Vec::new(),
            rvf_entries: Vec::new(),
            prev_hash: [0u8; 32],
            seq: 0,
        }
    }

    /// Append a witness record to the chain.
    ///
    /// The chain hash is: SHAKE-256(prev_hash || canonical_bytes(record))
    /// Also builds the parallel rvf-crypto WitnessEntry for .rvf export.
    pub fn append(&mut self, mut record: WitnessRecord) {
        record.seq = self.seq;
        self.seq += 1;

        let canonical = record.canonical_bytes();
        // Compute the action_hash from canonical bytes using SHAKE-256
        let action_hash = shake256_256(&canonical);

        // Chain: SHAKE-256(prev_hash || canonical_bytes)
        let mut chain_input = Vec::with_capacity(32 + canonical.len());
        chain_input.extend_from_slice(&self.prev_hash);
        chain_input.extend_from_slice(&canonical);
        let hash = shake256_256(&chain_input);

        // Build the rvf-crypto WitnessEntry (73-byte entry for .rvf binary)
        self.rvf_entries.push(rvf_crypto::WitnessEntry {
            prev_hash: [0u8; 32], // overwritten by create_witness_chain
            action_hash,
            timestamp_ns: self.seq as u64, // deterministic pseudo-timestamp
            witness_type: 0x02,            // COMPUTATION witness type
        });

        self.prev_hash = hash;
        self.entries.push(ChainedWitness {
            record,
            chain_hash: hex_encode(&hash),
        });
    }

    /// Finalize and return the chain + root hash.
    pub fn finalize(self) -> (Vec<ChainedWitness>, String) {
        let root = hex_encode(&self.prev_hash);
        (self.entries, root)
    }

    /// Get the rvf-crypto WitnessEntry list for .rvf binary export.
    pub fn rvf_entries(&self) -> &[rvf_crypto::WitnessEntry] {
        &self.rvf_entries
    }
}

fn hex_encode(bytes: &[u8]) -> String {
    bytes.iter().map(|b| format!("{:02x}", b)).collect()
}

// ═══════════════════════════════════════════════════════════════════════════
// Chain verification
// ═══════════════════════════════════════════════════════════════════════════

/// Verify the integrity of a witness chain.
///
/// Recomputes every chain_hash from the records and checks they match.
/// Returns Ok(root_hash) if the chain is valid, Err(index) if tampered.
pub fn verify_chain(chain: &[ChainedWitness]) -> Result<String, usize> {
    let mut prev_hash = [0u8; 32];

    for (i, entry) in chain.iter().enumerate() {
        let canonical = entry.record.canonical_bytes();
        let mut chain_input = Vec::with_capacity(32 + canonical.len());
        chain_input.extend_from_slice(&prev_hash);
        chain_input.extend_from_slice(&canonical);
        let computed = shake256_256(&chain_input);
        let computed_hex = hex_encode(&computed);

        if computed_hex != entry.chain_hash {
            return Err(i);
        }
        prev_hash = computed;
    }

    Ok(hex_encode(&prev_hash))
}

// ═══════════════════════════════════════════════════════════════════════════
// Generate the publishable manifest
// ═══════════════════════════════════════════════════════════════════════════

/// Run all three ablation modes and produce the publishable RVF manifest.
///
/// This is the entry point. Same config → same manifest → same chain_root_hash.
/// If `rvf_output_path` is provided, also exports the native `.rvf` binary.
pub fn generate_manifest(config: &HoldoutConfig) -> anyhow::Result<RvfManifest> {
    generate_manifest_with_rvf(config, None)
}

/// Like `generate_manifest`, but also produces a `.rvf` binary file.
pub fn generate_manifest_with_rvf(
    config: &HoldoutConfig,
    rvf_output_path: Option<&str>,
) -> anyhow::Result<RvfManifest> {
    let mut chain_builder = WitnessChainBuilder::new();

    // Run all three modes
    let mode_a = run_acceptance_test_mode(config, &AblationMode::Baseline)?;
    collect_witnesses(&mut chain_builder, "A", &mode_a, config);

    let mode_b = run_acceptance_test_mode(config, &AblationMode::CompilerOnly)?;
    collect_witnesses(&mut chain_builder, "B", &mode_b, config);

    let mode_c = run_acceptance_test_mode(config, &AblationMode::Full)?;
    collect_witnesses(&mut chain_builder, "C", &mode_c, config);

    // Build scorecards
    let scorecards = vec![
        build_scorecard("A (fixed policy)", &mode_a),
        build_scorecard("B (compiled policy)", &mode_b),
        build_scorecard("C (learned policy)", &mode_c),
    ];

    // Compute ablation assertions
    let assertions = compute_assertions(&mode_a, &mode_b, &mode_c);
    let all_passed = assertions.b_beats_a_cost.passed
        && assertions.c_beats_b_robustness.passed
        && assertions.compiler_safe.passed
        && assertions.a_skip_nonzero.passed
        && assertions.c_multi_mode.passed
        && assertions.c_penalty_better_than_b.passed
        && mode_a.result.passed
        && mode_b.result.passed
        && mode_c.result.passed;

    // Export .rvf binary before finalizing (consumes chain_builder entries)
    if let Some(rvf_path) = rvf_output_path {
        // Build the manifest struct first for the meta segment
        let preview_manifest = RvfManifest {
            version: 2,
            description: String::new(),
            config: ManifestConfig::from(config),
            scorecards: scorecards.clone(),
            assertions: assertions.clone(),
            all_passed,
            witness_chain: vec![],
            chain_root_hash: String::new(),
            chain_length: chain_builder.rvf_entries().len(),
        };
        export_rvf_binary(&preview_manifest, &chain_builder, rvf_path)?;
    }

    // Finalize witness chain
    let (witness_chain, chain_root_hash) = chain_builder.finalize();
    let chain_length = witness_chain.len();

    Ok(RvfManifest {
        version: 2,
        description: "RuVector temporal reasoning ablation study — \
            deterministic acceptance test with SHAKE-256 witness chain \
            (rvf-crypto native)"
            .to_string(),
        config: ManifestConfig::from(config),
        scorecards,
        assertions,
        all_passed,
        witness_chain,
        chain_root_hash,
        chain_length,
    })
}

/// Verify a manifest by re-running with the same config and comparing hashes.
pub fn verify_manifest(manifest: &RvfManifest) -> anyhow::Result<VerifyResult> {
    // Step 1: Verify chain integrity (hashes link correctly)
    let chain_result = verify_chain(&manifest.witness_chain);
    let chain_valid = match &chain_result {
        Ok(root) => root == &manifest.chain_root_hash,
        Err(_) => false,
    };

    if !chain_valid {
        return Ok(VerifyResult {
            chain_integrity: false,
            outcomes_match: false,
            root_hash_match: false,
            recomputed_root: chain_result.unwrap_or_default(),
            expected_root: manifest.chain_root_hash.clone(),
            mismatched_records: vec![],
        });
    }

    // Step 2: Re-run with same config
    let config = holdout_config_from_manifest(&manifest.config);
    let fresh = generate_manifest(&config)?;

    // Step 3: Compare root hashes
    let root_match = fresh.chain_root_hash == manifest.chain_root_hash;

    // Step 4: Find any mismatched records
    let mut mismatches = Vec::new();
    let max_len = manifest.witness_chain.len().min(fresh.witness_chain.len());
    for i in 0..max_len {
        let orig = &manifest.witness_chain[i];
        let new = &fresh.witness_chain[i];
        if orig.chain_hash != new.chain_hash {
            mismatches.push(i);
            if mismatches.len() >= 10 {
                break; // cap output
            }
        }
    }

    Ok(VerifyResult {
        chain_integrity: true,
        outcomes_match: mismatches.is_empty() && manifest.chain_length == fresh.chain_length,
        root_hash_match: root_match,
        recomputed_root: fresh.chain_root_hash,
        expected_root: manifest.chain_root_hash.clone(),
        mismatched_records: mismatches,
    })
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct VerifyResult {
    pub chain_integrity: bool,
    pub outcomes_match: bool,
    pub root_hash_match: bool,
    pub recomputed_root: String,
    pub expected_root: String,
    pub mismatched_records: Vec<usize>,
}

impl VerifyResult {
    pub fn print(&self) {
        println!();
        println!("  Witness Chain Verification:");
        println!(
            "    Chain integrity:  {}",
            if self.chain_integrity { "PASS" } else { "FAIL" }
        );
        println!(
            "    Outcomes match:   {}",
            if self.outcomes_match { "PASS" } else { "FAIL" }
        );
        println!(
            "    Root hash match:  {}",
            if self.root_hash_match { "PASS" } else { "FAIL" }
        );
        println!("    Expected root:    {}", &self.expected_root[..16]);
        println!(
            "    Recomputed root:  {}",
            &self.recomputed_root[..self.recomputed_root.len().min(16)]
        );
        if !self.mismatched_records.is_empty() {
            println!("    Mismatched at:    {:?}", self.mismatched_records);
        }
        println!();
    }

    pub fn passed(&self) -> bool {
        self.chain_integrity && self.outcomes_match && self.root_hash_match
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Native .rvf binary export
// ═══════════════════════════════════════════════════════════════════════════

/// Export the manifest as a native `.rvf` binary file.
///
/// Produces a file with two segments:
/// - **WITNESS_SEG** (0x0A): The SHAKE-256 witness chain as 73-byte entries
///   created by `rvf_crypto::create_witness_chain()`, verifiable by any
///   RVF-compatible tool including the WASM microkernel.
/// - **META_SEG** (0x07): JSON-encoded scorecard + assertions metadata.
///
/// The resulting file is a valid `.rvf` file that can be inspected with
/// `rvf inspect`, verified with `rvf verify-witness`, or loaded in the
/// browser via the WASM runtime's `rvf_witness_verify` export.
pub fn export_rvf_binary(
    manifest: &RvfManifest,
    chain_builder: &WitnessChainBuilder,
    path: &str,
) -> anyhow::Result<()> {
    use rvf_crypto::create_witness_chain;
    use rvf_types::{SegmentFlags, SegmentType};
    use rvf_wire::write_segment;

    // Build the native SHAKE-256 witness chain from the rvf-crypto entries
    let witness_bytes = create_witness_chain(chain_builder.rvf_entries());

    // Write WITNESS_SEG (0x0A) with SEALED flag
    let witness_seg = write_segment(
        SegmentType::Witness as u8,
        &witness_bytes,
        SegmentFlags::empty().with(SegmentFlags::SEALED),
        1, // segment_id
    );

    // Build metadata JSON payload
    let meta = serde_json::json!({
        "format": "rvf-acceptance-test",
        "version": manifest.version,
        "chain_root_hash": manifest.chain_root_hash,
        "chain_length": manifest.chain_length,
        "all_passed": manifest.all_passed,
        "config": manifest.config,
        "scorecards": manifest.scorecards,
        "assertions": manifest.assertions,
    });
    let meta_bytes = serde_json::to_vec(&meta)?;

    // Write META_SEG (0x07)
    let meta_seg = write_segment(
        SegmentType::Meta as u8,
        &meta_bytes,
        SegmentFlags::empty().with(SegmentFlags::SEALED),
        2, // segment_id
    );

    // Concatenate segments into a single .rvf file
    let mut rvf_file = Vec::with_capacity(witness_seg.len() + meta_seg.len());
    rvf_file.extend_from_slice(&witness_seg);
    rvf_file.extend_from_slice(&meta_seg);

    std::fs::write(path, &rvf_file)?;
    Ok(())
}

/// Verify the native `.rvf` binary witness chain.
///
/// Reads the WITNESS_SEG payload and runs `rvf_crypto::verify_witness_chain`.
pub fn verify_rvf_binary(path: &str) -> anyhow::Result<usize> {
    use rvf_crypto::verify_witness_chain;
    use rvf_types::SEGMENT_HEADER_SIZE;

    let data = std::fs::read(path)?;
    if data.len() < SEGMENT_HEADER_SIZE {
        anyhow::bail!("File too small for a valid segment");
    }

    // Parse the first segment header to get payload length
    let seg_type = data[5];
    if seg_type != 0x0A {
        anyhow::bail!("First segment is not WITNESS_SEG (got 0x{:02X})", seg_type);
    }

    let payload_len = u64::from_le_bytes(
        data[0x10..0x18]
            .try_into()
            .map_err(|_| anyhow::anyhow!("Bad header"))?,
    ) as usize;

    let payload_start = SEGMENT_HEADER_SIZE;
    let payload_end = payload_start + payload_len;
    if data.len() < payload_end {
        anyhow::bail!("Truncated witness payload");
    }

    let witness_data = &data[payload_start..payload_end];
    let entries = verify_witness_chain(witness_data)
        .map_err(|e| anyhow::anyhow!("Witness chain verification failed: {:?}", e))?;

    Ok(entries.len())
}

// ═══════════════════════════════════════════════════════════════════════════
// Internal helpers
// ═══════════════════════════════════════════════════════════════════════════

fn collect_witnesses(
    builder: &mut WitnessChainBuilder,
    mode_label: &str,
    result: &crate::acceptance_test::AblationResult,
    _config: &HoldoutConfig,
) {
    // Witness each cycle's holdout metrics
    for cm in &result.result.cycles {
        builder.append(WitnessRecord {
            puzzle_id: format!("cycle_{}_holdout", cm.cycle),
            mode: mode_label.to_string(),
            cycle: cm.cycle,
            skip_mode: "aggregate".to_string(),
            context_bucket: "holdout".to_string(),
            correct: cm.holdout_accuracy >= 0.5,
            steps: cm.holdout_cost_per_solve as usize,
            seq: 0,
        });
    }

    // Witness skip-mode distribution (each bucket is a witness record)
    // Sort keys for deterministic iteration order
    let mut buckets: Vec<&String> = result.skip_mode_distribution.keys().collect();
    buckets.sort();
    for bucket in buckets {
        let dist = &result.skip_mode_distribution[bucket];
        let mut mode_names: Vec<&String> = dist.keys().collect();
        mode_names.sort();
        for mode_name in mode_names {
            let count = dist[mode_name];
            builder.append(WitnessRecord {
                puzzle_id: format!("dist_{}_{}", bucket, mode_name),
                mode: mode_label.to_string(),
                cycle: result.result.cycles.len(),
                skip_mode: mode_name.clone(),
                context_bucket: bucket.clone(),
                correct: true,
                steps: count,
                seq: 0,
            });
        }
    }

    // Witness compiler and penalty stats
    builder.append(WitnessRecord {
        puzzle_id: "compiler_stats".to_string(),
        mode: mode_label.to_string(),
        cycle: 0,
        skip_mode: format!("hits:{}", result.compiler_hits),
        context_bucket: format!("misses:{}", result.compiler_misses),
        correct: result.compiler_false_hits == 0,
        steps: result.compiler_false_hits,
        seq: 0,
    });

    builder.append(WitnessRecord {
        puzzle_id: "penalty_stats".to_string(),
        mode: mode_label.to_string(),
        cycle: 0,
        skip_mode: format!("rate:{:.4}", result.early_commit_rate),
        context_bucket: format!("penalty:{:.4}", result.early_commit_penalties),
        correct: true,
        steps: result.policy_context_buckets,
        seq: 0,
    });
}

fn build_scorecard(label: &str, result: &crate::acceptance_test::AblationResult) -> ModeScorecard {
    let last = result.result.cycles.last();
    ModeScorecard {
        mode: label.to_string(),
        total_puzzles: result.result.cycles.len(),
        correct: last
            .map(|c| (c.holdout_accuracy * 100.0) as usize)
            .unwrap_or(0),
        accuracy: last.map(|c| c.holdout_accuracy).unwrap_or(0.0),
        total_steps: last.map(|c| c.holdout_cost_per_solve as usize).unwrap_or(0),
        cost_per_solve: last.map(|c| c.holdout_cost_per_solve).unwrap_or(0.0),
        noise_accuracy: last.map(|c| c.holdout_noise_accuracy).unwrap_or(0.0),
        violations: last.map(|c| c.holdout_violations).unwrap_or(0),
        early_commit_penalty: result.early_commit_penalties,
        skip_mode_distribution: result.skip_mode_distribution.clone(),
        context_buckets_used: result.policy_context_buckets,
    }
}

fn compute_assertions(
    mode_a: &crate::acceptance_test::AblationResult,
    mode_b: &crate::acceptance_test::AblationResult,
    mode_c: &crate::acceptance_test::AblationResult,
) -> AblationAssertions {
    let last_a = mode_a.result.cycles.last().unwrap();
    let last_b = mode_b.result.cycles.last().unwrap();
    let last_c = mode_c.result.cycles.last().unwrap();

    let cost_decrease = if last_a.holdout_cost_per_solve > 0.0 {
        1.0 - (last_b.holdout_cost_per_solve / last_a.holdout_cost_per_solve)
    } else {
        0.0
    };

    let robustness_gain = last_c.holdout_noise_accuracy - last_b.holdout_noise_accuracy;

    let total_compiler = mode_b.compiler_hits + mode_b.compiler_misses;
    let false_hit_rate = if total_compiler > 0 {
        mode_b.compiler_false_hits as f64 / total_compiler as f64
    } else {
        0.0
    };

    let a_total_skip: usize = mode_a
        .skip_mode_distribution
        .values()
        .flat_map(|m| m.iter())
        .filter(|(name, _)| *name != "none")
        .map(|(_, c)| *c)
        .sum();

    let c_unique_modes: std::collections::HashSet<&str> = mode_c
        .skip_mode_distribution
        .values()
        .flat_map(|m| m.keys())
        .map(|s| s.as_str())
        .collect();

    let b_penalty = mode_b.early_commit_penalties;
    let c_penalty = mode_c.early_commit_penalties;
    let penalty_ok = if b_penalty > 0.0 {
        c_penalty <= b_penalty * 0.90
    } else {
        c_penalty == 0.0
    };

    AblationAssertions {
        b_beats_a_cost: AssertionResult {
            name: "B beats A on cost (>=15%)".to_string(),
            passed: cost_decrease >= 0.15,
            measured: format!("{:.1}%", cost_decrease * 100.0),
            threshold: ">=15%".to_string(),
        },
        c_beats_b_robustness: AssertionResult {
            name: "C beats B on robustness (>=10%)".to_string(),
            passed: robustness_gain >= 0.10,
            measured: format!("{:.1}%", robustness_gain * 100.0),
            threshold: ">=10%".to_string(),
        },
        compiler_safe: AssertionResult {
            name: "Compiler false-hit rate <5%".to_string(),
            passed: false_hit_rate < 0.05,
            measured: format!("{:.1}%", false_hit_rate * 100.0),
            threshold: "<5%".to_string(),
        },
        a_skip_nonzero: AssertionResult {
            name: "Mode A skip usage nonzero".to_string(),
            passed: a_total_skip > 0,
            measured: format!("{}", a_total_skip),
            threshold: ">0".to_string(),
        },
        c_multi_mode: AssertionResult {
            name: "Mode C uses multiple skip modes".to_string(),
            passed: c_unique_modes.len() >= 2,
            measured: format!("{} modes", c_unique_modes.len()),
            threshold: ">=2".to_string(),
        },
        c_penalty_better_than_b: AssertionResult {
            name: "C penalty < B penalty (distract)".to_string(),
            passed: penalty_ok,
            measured: format!("C={:.2} B={:.2}", c_penalty, b_penalty),
            threshold: "C <= 90% of B".to_string(),
        },
    }
}

fn holdout_config_from_manifest(mc: &ManifestConfig) -> HoldoutConfig {
    let holdout_seed = u64::from_str_radix(
        mc.holdout_seed
            .trim_start_matches("0x")
            .trim_start_matches("0X"),
        16,
    )
    .unwrap_or(0xDEAD_BEEF);
    let training_seed = u64::from_str_radix(
        mc.training_seed
            .trim_start_matches("0x")
            .trim_start_matches("0X"),
        16,
    )
    .unwrap_or(42);

    HoldoutConfig {
        holdout_size: mc.holdout_size,
        training_per_cycle: mc.training_per_cycle,
        cycles: mc.cycles,
        holdout_seed,
        training_seed,
        noise_rate: mc.noise_rate,
        step_budget: mc.step_budget,
        min_accuracy: mc.min_accuracy,
        min_dimensions_improved: 2,
        verbose: false,
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Pretty-print
// ═══════════════════════════════════════════════════════════════════════════

impl RvfManifest {
    pub fn print_summary(&self) {
        println!();
        println!("╔══════════════════════════════════════════════════════════════╗");
        println!("║         PUBLISHABLE RVF ACCEPTANCE TEST                      ║");
        println!("╚══════════════════════════════════════════════════════════════╝");
        println!();
        println!("  Config:");
        println!(
            "    Holdout:  {} puzzles (seed {})",
            self.config.holdout_size, self.config.holdout_seed
        );
        println!(
            "    Training: {} per cycle x {} cycles",
            self.config.training_per_cycle, self.config.cycles
        );
        println!(
            "    Budget:   {} steps, noise rate {:.0}%",
            self.config.step_budget,
            self.config.noise_rate * 100.0
        );
        println!();

        println!(
            "  {:<22} {:>8} {:>12} {:>10} {:>6}",
            "Mode", "Acc%", "Cost/Solve", "Noise%", "Viol"
        );
        println!("  {}", "-".repeat(62));
        for sc in &self.scorecards {
            println!(
                "  {:<22} {:>6.1}% {:>11.2} {:>8.1}% {:>5}",
                sc.mode,
                sc.accuracy * 100.0,
                sc.cost_per_solve,
                sc.noise_accuracy * 100.0,
                sc.violations
            );
        }
        println!();

        println!("  Ablation Assertions:");
        for a in [
            &self.assertions.b_beats_a_cost,
            &self.assertions.c_beats_b_robustness,
            &self.assertions.compiler_safe,
            &self.assertions.a_skip_nonzero,
            &self.assertions.c_multi_mode,
            &self.assertions.c_penalty_better_than_b,
        ] {
            println!(
                "    {:<40} {} ({})",
                a.name,
                if a.passed { "PASS" } else { "FAIL" },
                a.measured
            );
        }
        println!();

        println!("  Witness Chain:");
        println!("    Records:   {}", self.chain_length);
        println!(
            "    Root hash: {}",
            &self.chain_root_hash[..32.min(self.chain_root_hash.len())]
        );
        println!();

        if self.all_passed {
            println!("  RESULT: ALL PASSED — artifact is publishable");
        } else {
            println!("  RESULT: SOME CRITERIA NOT MET");
        }
        println!();
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Tests
// ═══════════════════════════════════════════════════════════════════════════

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn witness_chain_integrity() {
        let mut builder = WitnessChainBuilder::new();
        for i in 0..5 {
            builder.append(WitnessRecord {
                puzzle_id: format!("puzzle_{}", i),
                mode: "A".to_string(),
                cycle: 0,
                skip_mode: "none".to_string(),
                context_bucket: "small:clean:clean".to_string(),
                correct: true,
                steps: 10 + i,
                seq: 0,
            });
        }
        let (chain, root) = builder.finalize();
        assert_eq!(chain.len(), 5);
        assert!(!root.is_empty());

        // Verify chain
        let verified_root = verify_chain(&chain).unwrap();
        assert_eq!(verified_root, root);
    }

    #[test]
    fn tampered_chain_detected() {
        let mut builder = WitnessChainBuilder::new();
        for i in 0..3 {
            builder.append(WitnessRecord {
                puzzle_id: format!("puzzle_{}", i),
                mode: "B".to_string(),
                cycle: 0,
                skip_mode: "weekday".to_string(),
                context_bucket: "large:heavy:noisy".to_string(),
                correct: i != 1,
                steps: 20,
                seq: 0,
            });
        }
        let (mut chain, _) = builder.finalize();

        // Tamper: flip the correct field
        chain[1].record.correct = true;
        let result = verify_chain(&chain);
        assert!(result.is_err());
    }

    #[test]
    fn deterministic_chain() {
        // Same inputs → same root hash
        let build = || {
            let mut b = WitnessChainBuilder::new();
            b.append(WitnessRecord {
                puzzle_id: "p1".to_string(),
                mode: "C".to_string(),
                cycle: 1,
                skip_mode: "hybrid".to_string(),
                context_bucket: "medium:some:clean".to_string(),
                correct: true,
                steps: 42,
                seq: 0,
            });
            b.finalize().1
        };
        assert_eq!(build(), build());
    }

    #[test]
    fn manifest_generation_small() {
        let config = HoldoutConfig {
            holdout_size: 10,
            training_per_cycle: 10,
            cycles: 2,
            step_budget: 200,
            min_accuracy: 0.30,
            min_dimensions_improved: 0,
            verbose: false,
            ..Default::default()
        };
        let manifest = generate_manifest(&config).unwrap();
        assert_eq!(manifest.version, 2);
        assert_eq!(manifest.scorecards.len(), 3);
        assert!(!manifest.chain_root_hash.is_empty());
        assert!(manifest.chain_length > 0);

        // Verify chain integrity
        let root = verify_chain(&manifest.witness_chain).unwrap();
        assert_eq!(root, manifest.chain_root_hash);
    }

    #[test]
    fn manifest_deterministic_replay() {
        let config = HoldoutConfig {
            holdout_size: 10,
            training_per_cycle: 10,
            cycles: 2,
            step_budget: 200,
            min_accuracy: 0.30,
            min_dimensions_improved: 0,
            verbose: false,
            ..Default::default()
        };
        let m1 = generate_manifest(&config).unwrap();
        let m2 = generate_manifest(&config).unwrap();
        assert_eq!(m1.chain_root_hash, m2.chain_root_hash);
        assert_eq!(m1.chain_length, m2.chain_length);
    }
}