Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
494
vendor/ruvector/crates/rvf/rvf-runtime/tests/adr033_integration.rs
vendored
Normal file
494
vendor/ruvector/crates/rvf/rvf-runtime/tests/adr033_integration.rs
vendored
Normal file
@@ -0,0 +1,494 @@
|
||||
//! Integration tests for ADR-033: Progressive Indexing Hardening.
|
||||
//!
|
||||
//! Tests cover:
|
||||
//! 1. QualityEnvelope as mandatory outer return type
|
||||
//! 2. Budget cap enforcement under adversarial queries
|
||||
//! 3. Graceful degradation under degenerate conditions
|
||||
//! 4. SecurityPolicy enforcement
|
||||
//! 5. Content-addressed centroid stability (HardeningFields)
|
||||
//! 6. Adversarial distribution detection
|
||||
//! 7. DoS hardening mechanisms
|
||||
|
||||
use rvf_runtime::{
|
||||
adaptive_n_probe, centroid_distance_cv, combined_effective_n_probe,
|
||||
effective_n_probe_with_drift, is_degenerate_distribution, selective_safety_net_scan,
|
||||
should_activate_safety_net, BudgetTokenBucket, NegativeCache, ProofOfWork, QueryOptions,
|
||||
QuerySignature, RvfOptions, RvfStore, DEGENERATE_CV_THRESHOLD,
|
||||
};
|
||||
use rvf_types::quality::*;
|
||||
use rvf_types::security::*;
|
||||
use rvf_types::{ErrorCode, RvfError};
|
||||
use std::time::Duration;
|
||||
|
||||
// ---- Helper: create a test store ----
|
||||
|
||||
fn create_test_store(dim: u16, count: usize) -> (tempfile::TempDir, RvfStore) {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("test.rvf");
|
||||
let mut opts = RvfOptions::default();
|
||||
opts.dimension = dim;
|
||||
opts.security_policy = SecurityPolicy::Permissive; // For test simplicity.
|
||||
let mut store = RvfStore::create(&path, opts).unwrap();
|
||||
|
||||
// Ingest vectors in a single batch.
|
||||
let vectors: Vec<Vec<f32>> = (0..count)
|
||||
.map(|i| {
|
||||
(0..dim as usize)
|
||||
.map(|d| (i * dim as usize + d) as f32 * 0.01)
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
let vec_refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (0..count as u64).collect();
|
||||
store.ingest_batch(&vec_refs, &ids, None).unwrap();
|
||||
|
||||
(dir, store)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §1 QualityEnvelope Tests
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn quality_envelope_returned_for_normal_query() {
|
||||
let (_dir, store) = create_test_store(4, 100);
|
||||
let query = vec![0.5, 0.5, 0.5, 0.5];
|
||||
let opts = QueryOptions {
|
||||
quality_preference: QualityPreference::AcceptDegraded,
|
||||
..QueryOptions::default()
|
||||
};
|
||||
|
||||
let envelope = store.query_with_envelope(&query, 10, &opts).unwrap();
|
||||
|
||||
assert!(!envelope.results.is_empty());
|
||||
assert!(matches!(
|
||||
envelope.quality,
|
||||
ResponseQuality::Verified | ResponseQuality::Usable | ResponseQuality::Degraded
|
||||
));
|
||||
// Evidence must be populated.
|
||||
assert!(envelope.evidence.layers_used.layer_a);
|
||||
// Budget report must have non-zero total_us.
|
||||
assert!(envelope.budgets.total_us > 0 || envelope.results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quality_envelope_contains_all_fields() {
|
||||
let (_dir, store) = create_test_store(4, 50);
|
||||
let query = vec![0.1, 0.2, 0.3, 0.4];
|
||||
let opts = QueryOptions {
|
||||
quality_preference: QualityPreference::AcceptDegraded,
|
||||
..QueryOptions::default()
|
||||
};
|
||||
|
||||
let envelope = store.query_with_envelope(&query, 5, &opts).unwrap();
|
||||
|
||||
// All required fields must be present (not None where applicable).
|
||||
let _ = envelope.quality;
|
||||
let _ = envelope.evidence.layers_used;
|
||||
let _ = envelope.evidence.n_probe_effective;
|
||||
let _ = envelope.evidence.hnsw_candidate_count;
|
||||
let _ = envelope.budgets.total_us;
|
||||
let _ = envelope.budgets.distance_ops;
|
||||
// degradation may be None for non-degraded results.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quality_envelope_degraded_without_accept_returns_error() {
|
||||
let (_dir, store) = create_test_store(4, 2);
|
||||
let query = vec![0.5, 0.5, 0.5, 0.5];
|
||||
|
||||
// Request 100 results from a store with only 2 vectors.
|
||||
// Safety net should activate, and with tight budget, quality may degrade.
|
||||
let opts = QueryOptions {
|
||||
quality_preference: QualityPreference::Auto,
|
||||
safety_net_budget: SafetyNetBudget {
|
||||
max_scan_time_us: 1,
|
||||
max_scan_candidates: 1,
|
||||
max_distance_ops: 1,
|
||||
},
|
||||
..QueryOptions::default()
|
||||
};
|
||||
|
||||
let result = store.query_with_envelope(&query, 100, &opts);
|
||||
// With such a tiny budget asking for 100 from 2 vectors, we expect
|
||||
// either Ok (if 2 results are enough) or Err(QualityBelowThreshold).
|
||||
match result {
|
||||
Ok(envelope) => {
|
||||
// If Ok, quality should not be Degraded/Unreliable since we didn't AcceptDegraded.
|
||||
assert!(matches!(
|
||||
envelope.quality,
|
||||
ResponseQuality::Verified | ResponseQuality::Usable
|
||||
));
|
||||
}
|
||||
Err(RvfError::QualityBelowThreshold { quality, reason }) => {
|
||||
assert!(matches!(
|
||||
quality,
|
||||
ResponseQuality::Degraded | ResponseQuality::Unreliable
|
||||
));
|
||||
assert!(!reason.is_empty());
|
||||
}
|
||||
Err(other) => panic!("unexpected error: {other}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quality_envelope_accept_degraded_succeeds() {
|
||||
let (_dir, store) = create_test_store(4, 2);
|
||||
let query = vec![0.5, 0.5, 0.5, 0.5];
|
||||
|
||||
let opts = QueryOptions {
|
||||
quality_preference: QualityPreference::AcceptDegraded,
|
||||
safety_net_budget: SafetyNetBudget {
|
||||
max_scan_time_us: 1,
|
||||
max_scan_candidates: 1,
|
||||
max_distance_ops: 1,
|
||||
},
|
||||
..QueryOptions::default()
|
||||
};
|
||||
|
||||
// With AcceptDegraded, even degraded results should return Ok.
|
||||
let result = store.query_with_envelope(&query, 100, &opts);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §2 Budget Cap Enforcement
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn budget_caps_are_hard_limits() {
|
||||
let budget = SafetyNetBudget {
|
||||
max_scan_time_us: 1_000_000, // 1 second (won't hit in test).
|
||||
max_scan_candidates: 50,
|
||||
max_distance_ops: 50,
|
||||
};
|
||||
|
||||
let query = vec![0.0; 4];
|
||||
let vecs: Vec<(u64, Vec<f32>)> = (0..10_000)
|
||||
.map(|i| (i as u64, vec![i as f32 * 0.001; 4]))
|
||||
.collect();
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let result = selective_safety_net_scan(&query, 10, &[], &refs, &budget, 10_000);
|
||||
|
||||
// Hard cap: distance_ops must not exceed budget.
|
||||
assert!(
|
||||
result.budget_report.distance_ops <= budget.max_distance_ops + 1,
|
||||
"distance_ops {} exceeded budget {}",
|
||||
result.budget_report.distance_ops,
|
||||
budget.max_distance_ops,
|
||||
);
|
||||
assert!(
|
||||
result.budget_report.linear_scan_count <= budget.max_scan_candidates + 1,
|
||||
"scan_count {} exceeded budget {}",
|
||||
result.budget_report.linear_scan_count,
|
||||
budget.max_scan_candidates,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disabled_budget_produces_no_scan() {
|
||||
let query = vec![0.0; 4];
|
||||
let vecs: Vec<(u64, Vec<f32>)> = (0..100).map(|i| (i as u64, vec![i as f32; 4])).collect();
|
||||
let refs: Vec<(u64, &[f32])> = vecs.iter().map(|(id, v)| (*id, v.as_slice())).collect();
|
||||
|
||||
let result = selective_safety_net_scan(&query, 10, &[], &refs, &SafetyNetBudget::DISABLED, 100);
|
||||
assert!(result.candidates.is_empty());
|
||||
assert!(!result.budget_exhausted);
|
||||
assert_eq!(result.budget_report.distance_ops, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefer_quality_extends_budget_4x() {
|
||||
let base = SafetyNetBudget::LAYER_A;
|
||||
let extended = base.extended_4x();
|
||||
assert_eq!(extended.max_scan_time_us, base.max_scan_time_us * 4);
|
||||
assert_eq!(extended.max_scan_candidates, base.max_scan_candidates * 4);
|
||||
assert_eq!(extended.max_distance_ops, base.max_distance_ops * 4);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §3 Adversarial Distribution Detection
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn degenerate_detection_uniform() {
|
||||
let distances = vec![1.0; 100];
|
||||
assert!(is_degenerate_distribution(&distances, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degenerate_detection_natural() {
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32 * 0.5).collect();
|
||||
assert!(!is_degenerate_distribution(&distances, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adaptive_nprobe_widens_on_degenerate() {
|
||||
let distances = vec![1.0; 1000];
|
||||
let result = adaptive_n_probe(4, &distances, 1000);
|
||||
assert!(result > 4, "should widen from 4, got {result}");
|
||||
assert!(result <= 16, "should cap at 4x base");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adaptive_nprobe_no_change_natural() {
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32).collect();
|
||||
let result = adaptive_n_probe(10, &distances, 100);
|
||||
assert_eq!(result, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn epoch_drift_widening() {
|
||||
// No drift.
|
||||
assert_eq!(effective_n_probe_with_drift(10, 0, 64), 10);
|
||||
// Half drift.
|
||||
assert_eq!(effective_n_probe_with_drift(10, 32, 64), 10);
|
||||
// Beyond max drift: double.
|
||||
assert_eq!(effective_n_probe_with_drift(10, 100, 64), 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn combined_nprobe_takes_max() {
|
||||
let distances: Vec<f32> = (0..100).map(|i| i as f32).collect();
|
||||
let (result, degenerate) = combined_effective_n_probe(10, &distances, 100, 100, 64);
|
||||
assert_eq!(result, 20); // Drift dominates.
|
||||
assert!(!degenerate);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cv_threshold_consistent() {
|
||||
assert!(DEGENERATE_CV_THRESHOLD > 0.0);
|
||||
assert!(DEGENERATE_CV_THRESHOLD < 1.0);
|
||||
// Uniform distances should be below threshold.
|
||||
let cv = centroid_distance_cv(&vec![1.0; 100], 10);
|
||||
assert!(cv < DEGENERATE_CV_THRESHOLD);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §4 Security Policy
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn security_policy_default_is_strict() {
|
||||
assert_eq!(SecurityPolicy::default(), SecurityPolicy::Strict);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn security_policy_methods() {
|
||||
assert!(!SecurityPolicy::Permissive.requires_signature());
|
||||
assert!(SecurityPolicy::Strict.requires_signature());
|
||||
assert!(SecurityPolicy::Paranoid.requires_signature());
|
||||
|
||||
assert!(!SecurityPolicy::Permissive.verifies_content_hashes());
|
||||
assert!(SecurityPolicy::WarnOnly.verifies_content_hashes());
|
||||
assert!(SecurityPolicy::Strict.verifies_content_hashes());
|
||||
|
||||
assert!(!SecurityPolicy::Strict.verifies_level1());
|
||||
assert!(SecurityPolicy::Paranoid.verifies_level1());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn security_error_stable_display() {
|
||||
let err = SecurityError::UnsignedManifest {
|
||||
manifest_offset: 0x1000,
|
||||
};
|
||||
let s = format!("{err}");
|
||||
assert!(s.contains("unsigned manifest"));
|
||||
assert!(s.contains("1000"));
|
||||
|
||||
let err = SecurityError::ContentHashMismatch {
|
||||
pointer_name: "centroid",
|
||||
expected_hash: [0xAA; 16],
|
||||
actual_hash: [0xBB; 16],
|
||||
seg_offset: 0x2000,
|
||||
};
|
||||
let s = format!("{err}");
|
||||
assert!(s.contains("centroid"));
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §5 Content-Addressed Centroid Stability (HardeningFields)
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn hardening_fields_round_trip() {
|
||||
let fields = HardeningFields {
|
||||
entrypoint_content_hash: [0x11; 16],
|
||||
toplayer_content_hash: [0x22; 16],
|
||||
centroid_content_hash: [0x33; 16],
|
||||
quantdict_content_hash: [0x44; 16],
|
||||
hot_cache_content_hash: [0x55; 16],
|
||||
centroid_epoch: 42,
|
||||
max_epoch_drift: 64,
|
||||
reserved: [0u8; 8],
|
||||
};
|
||||
|
||||
let bytes = fields.to_bytes();
|
||||
let decoded = HardeningFields::from_bytes(&bytes);
|
||||
assert_eq!(fields, decoded);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hardening_fields_epoch_drift() {
|
||||
let fields = HardeningFields {
|
||||
centroid_epoch: 10,
|
||||
max_epoch_drift: 64,
|
||||
..HardeningFields::zeroed()
|
||||
};
|
||||
assert_eq!(fields.epoch_drift(50), 40);
|
||||
assert!(!fields.is_epoch_drift_exceeded(50));
|
||||
assert!(fields.is_epoch_drift_exceeded(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hardening_fields_pointer_lookup() {
|
||||
let mut fields = HardeningFields::zeroed();
|
||||
fields.centroid_content_hash = [0xAB; 16];
|
||||
assert_eq!(fields.hash_for_pointer("centroid"), Some(&[0xAB; 16]));
|
||||
assert_eq!(fields.hash_for_pointer("nonexistent"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hardening_fields_fits_in_reserved() {
|
||||
assert!(HardeningFields::RESERVED_OFFSET + 96 <= 252);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §6 DoS Hardening
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn budget_token_bucket_basic() {
|
||||
let mut bucket = BudgetTokenBucket::new(100, Duration::from_secs(60));
|
||||
assert_eq!(bucket.remaining(), 100);
|
||||
assert_eq!(bucket.try_consume(30), Ok(70));
|
||||
assert_eq!(bucket.try_consume(70), Ok(0));
|
||||
assert!(bucket.try_consume(1).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_cache_blacklists_repeated_degenerate() {
|
||||
let mut cache = NegativeCache::new(3, Duration::from_secs(60), 1000);
|
||||
let sig = QuerySignature::from_query(&[0.1, 0.2, 0.3]);
|
||||
|
||||
assert!(!cache.record_degenerate(sig));
|
||||
assert!(!cache.record_degenerate(sig));
|
||||
assert!(cache.record_degenerate(sig)); // 3rd hit = blacklisted.
|
||||
assert!(cache.is_blacklisted(&sig));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_cache_max_size_enforced() {
|
||||
let mut cache = NegativeCache::new(100, Duration::from_secs(60), 5);
|
||||
for i in 0..20 {
|
||||
let sig = QuerySignature::from_query(&[i as f32]);
|
||||
cache.record_degenerate(sig);
|
||||
}
|
||||
assert!(cache.len() <= 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proof_of_work_solve_and_verify() {
|
||||
let pow = ProofOfWork {
|
||||
challenge: [0x42; 16],
|
||||
difficulty: 4,
|
||||
};
|
||||
let nonce = pow.solve().expect("d=4 should solve quickly");
|
||||
assert!(pow.verify(nonce));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_signature_deterministic() {
|
||||
let q = vec![0.1, 0.2, 0.3];
|
||||
assert_eq!(
|
||||
QuerySignature::from_query(&q),
|
||||
QuerySignature::from_query(&q)
|
||||
);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §7 Error Code Completeness
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn new_error_codes_have_correct_categories() {
|
||||
assert_eq!(ErrorCode::UnsignedManifest.category(), 0x08);
|
||||
assert_eq!(ErrorCode::ContentHashMismatch.category(), 0x08);
|
||||
assert_eq!(ErrorCode::UnknownSigner.category(), 0x08);
|
||||
assert_eq!(ErrorCode::EpochDriftExceeded.category(), 0x08);
|
||||
assert_eq!(ErrorCode::Level1InvalidSignature.category(), 0x08);
|
||||
|
||||
assert_eq!(ErrorCode::QualityBelowThreshold.category(), 0x09);
|
||||
assert_eq!(ErrorCode::BudgetTokensExhausted.category(), 0x09);
|
||||
assert_eq!(ErrorCode::QueryBlacklisted.category(), 0x09);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn security_error_codes_are_security() {
|
||||
assert!(ErrorCode::UnsignedManifest.is_security_error());
|
||||
assert!(ErrorCode::ContentHashMismatch.is_security_error());
|
||||
assert!(!ErrorCode::Ok.is_security_error());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quality_error_codes_are_quality() {
|
||||
assert!(ErrorCode::QualityBelowThreshold.is_quality_error());
|
||||
assert!(ErrorCode::BudgetTokensExhausted.is_quality_error());
|
||||
assert!(!ErrorCode::Ok.is_quality_error());
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §8 Safety Net Activation Logic
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn safety_net_activates_correctly() {
|
||||
assert!(should_activate_safety_net(0, 5));
|
||||
assert!(should_activate_safety_net(5, 5));
|
||||
assert!(should_activate_safety_net(9, 5));
|
||||
assert!(!should_activate_safety_net(10, 5));
|
||||
assert!(!should_activate_safety_net(100, 5));
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §9 QualityPreference Behavior
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn prefer_latency_disables_safety_net() {
|
||||
let (_dir, store) = create_test_store(4, 5);
|
||||
let query = vec![0.1, 0.2, 0.3, 0.4];
|
||||
|
||||
let opts = QueryOptions {
|
||||
quality_preference: QualityPreference::PreferLatency,
|
||||
..QueryOptions::default()
|
||||
};
|
||||
|
||||
// PreferLatency should not trigger safety net scan.
|
||||
let envelope = store.query_with_envelope(&query, 3, &opts).unwrap();
|
||||
assert_eq!(envelope.budgets.safety_net_scan_us, 0);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// §10 Derive Response Quality
|
||||
// ========================================================================
|
||||
|
||||
#[test]
|
||||
fn derive_quality_from_mixed() {
|
||||
let q =
|
||||
derive_response_quality(&[RetrievalQuality::Full, RetrievalQuality::BruteForceBudgeted]);
|
||||
assert_eq!(q, ResponseQuality::Degraded);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_quality_all_full() {
|
||||
let q = derive_response_quality(&[RetrievalQuality::Full, RetrievalQuality::Full]);
|
||||
assert_eq!(q, ResponseQuality::Verified);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_quality_empty_is_unreliable() {
|
||||
let q = derive_response_quality(&[]);
|
||||
assert_eq!(q, ResponseQuality::Unreliable);
|
||||
}
|
||||
422
vendor/ruvector/crates/rvf/rvf-runtime/tests/agi_e2e.rs
vendored
Normal file
422
vendor/ruvector/crates/rvf/rvf-runtime/tests/agi_e2e.rs
vendored
Normal file
@@ -0,0 +1,422 @@
|
||||
//! End-to-end integration tests and benchmarks for the AGI Cognitive Container
|
||||
//! system (ADR-036). Covers the full build -> serialize -> parse -> validate
|
||||
//! cycle, signed container tamper detection, execution mode validation matrix,
|
||||
//! authority levels, resource budgets, coherence thresholds, and perf benchmarks.
|
||||
|
||||
use rvf_runtime::agi_container::{AgiContainerBuilder, ParsedAgiManifest};
|
||||
use rvf_runtime::seed_crypto;
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
const SIGNING_KEY: &[u8] = b"agi-e2e-test-signing-key-32bytes";
|
||||
const ORCH_JSON: &[u8] = br#"{"model":"claude-opus-4-6","max_turns":100}"#;
|
||||
const TASKS_JSON: &[u8] = br#"[{"id":1,"spec":"fix bug"}]"#;
|
||||
const GRADERS_JSON: &[u8] = br#"[{"type":"test_pass"}]"#;
|
||||
const TOOLS_JSON: &[u8] = br#"[{"name":"ruvector_query","type":"search"}]"#;
|
||||
const COHER_JSON: &[u8] = br#"{"min_cut":0.7,"rollback":true}"#;
|
||||
|
||||
/// Build a fully-populated container with every optional section.
|
||||
fn build_full_container() -> (Vec<u8>, AgiContainerHeader) {
|
||||
AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_policy(b"autonomous", [0xAA; 8])
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_tool_registry(TOOLS_JSON)
|
||||
.with_agent_prompts(b"You are a coder agent.")
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_skill_library(b"[]")
|
||||
.with_replay_script(b"#!/bin/sh\nrvf replay $1")
|
||||
.with_kernel_config(b"console=ttyS0")
|
||||
.with_network_config(b"{\"port\":8080}")
|
||||
.with_coherence_config(COHER_JSON)
|
||||
.with_project_instructions(b"# CLAUDE.md")
|
||||
.with_dependency_snapshot(b"sha256:abc123")
|
||||
.with_authority_config(b"{\"level\":\"WriteMemory\"}")
|
||||
.with_domain_profile(b"coding")
|
||||
.offline_capable()
|
||||
.with_segments(ContainerSegments {
|
||||
kernel_present: true,
|
||||
kernel_size: 5_000_000,
|
||||
wasm_count: 2,
|
||||
wasm_total_size: 60_000,
|
||||
vec_segment_count: 4,
|
||||
index_segment_count: 2,
|
||||
witness_count: 100,
|
||||
crypto_present: false,
|
||||
manifest_present: true,
|
||||
orchestrator_present: true,
|
||||
world_model_present: true,
|
||||
domain_expansion_present: false,
|
||||
total_size: 0,
|
||||
})
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
// -- 1. Full Container Lifecycle --
|
||||
|
||||
#[test]
|
||||
fn full_container_lifecycle() {
|
||||
let (payload, header) = build_full_container();
|
||||
|
||||
assert!(header.is_valid_magic());
|
||||
assert_eq!(header.version, 1);
|
||||
assert!(header.has_kernel());
|
||||
assert!(header.has_orchestrator());
|
||||
assert!(header.has_world_model());
|
||||
assert!(header.is_replay_capable());
|
||||
assert!(header.is_offline_capable());
|
||||
assert!(
|
||||
header.created_ns > 0,
|
||||
"created_ns should be a real timestamp"
|
||||
);
|
||||
|
||||
// Header round-trip.
|
||||
let header_rt = AgiContainerHeader::from_bytes(&header.to_bytes()).unwrap();
|
||||
assert_eq!(header_rt, header);
|
||||
|
||||
// Parse manifest and verify every section.
|
||||
let p = ParsedAgiManifest::parse(&payload).unwrap();
|
||||
assert_eq!(p.model_id_str(), Some("claude-opus-4-6"));
|
||||
assert_eq!(p.orchestrator_config.unwrap(), ORCH_JSON);
|
||||
assert_eq!(p.tool_registry.unwrap(), TOOLS_JSON);
|
||||
assert_eq!(p.eval_tasks.unwrap(), TASKS_JSON);
|
||||
assert_eq!(p.eval_graders.unwrap(), GRADERS_JSON);
|
||||
assert_eq!(p.coherence_config.unwrap(), COHER_JSON);
|
||||
assert!(p.policy.is_some());
|
||||
assert!(p.agent_prompts.is_some());
|
||||
assert!(p.skill_library.is_some());
|
||||
assert!(p.replay_script.is_some());
|
||||
assert!(p.kernel_config.is_some());
|
||||
assert!(p.network_config.is_some());
|
||||
assert!(p.project_instructions.is_some());
|
||||
assert!(p.dependency_snapshot.is_some());
|
||||
assert!(p.authority_config.is_some());
|
||||
assert!(p.domain_profile.is_some());
|
||||
assert!(p.is_autonomous_capable());
|
||||
|
||||
// Segment-derived flags should all be present in the header.
|
||||
let seg_flags = ContainerSegments {
|
||||
kernel_present: true,
|
||||
wasm_count: 2,
|
||||
witness_count: 100,
|
||||
orchestrator_present: true,
|
||||
world_model_present: true,
|
||||
..Default::default()
|
||||
}
|
||||
.to_flags();
|
||||
assert_eq!(header.flags & seg_flags, seg_flags);
|
||||
}
|
||||
|
||||
// -- 2. Signed Container Tamper Detection --
|
||||
|
||||
#[test]
|
||||
fn signed_container_tamper_detection() {
|
||||
let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_segments(ContainerSegments {
|
||||
kernel_present: true,
|
||||
manifest_present: true,
|
||||
world_model_present: true,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
assert!(header.is_signed());
|
||||
|
||||
let unsigned_len = payload.len() - 32;
|
||||
let sig = &payload[unsigned_len..];
|
||||
assert!(seed_crypto::verify_seed(
|
||||
SIGNING_KEY,
|
||||
&payload[..unsigned_len],
|
||||
sig
|
||||
));
|
||||
|
||||
// Tamper with one byte in the TLV payload area.
|
||||
let mut tampered = payload.clone();
|
||||
tampered[AGI_HEADER_SIZE + 10] ^= 0xFF;
|
||||
assert!(
|
||||
!seed_crypto::verify_seed(SIGNING_KEY, &tampered[..unsigned_len], sig),
|
||||
"tampered payload must fail verification"
|
||||
);
|
||||
|
||||
// Tamper with header byte.
|
||||
let mut tampered_hdr = payload.clone();
|
||||
tampered_hdr[7] ^= 0x01;
|
||||
assert!(
|
||||
!seed_crypto::verify_seed(SIGNING_KEY, &tampered_hdr[..unsigned_len], sig),
|
||||
"tampered header must fail verification"
|
||||
);
|
||||
}
|
||||
|
||||
// -- 3. Execution Mode Validation Matrix --
|
||||
|
||||
#[test]
|
||||
fn execution_mode_validation_matrix() {
|
||||
let m = |mp, kp, wc, wmc, vsc, isc, wnc| ContainerSegments {
|
||||
manifest_present: mp,
|
||||
kernel_present: kp,
|
||||
wasm_count: wc,
|
||||
world_model_present: wmc,
|
||||
vec_segment_count: vsc,
|
||||
index_segment_count: isc,
|
||||
witness_count: wnc,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Replay + no witness -> fail
|
||||
assert!(m(true, false, 0, false, 0, 0, 0)
|
||||
.validate(ExecutionMode::Replay)
|
||||
.is_err());
|
||||
// Replay + witness -> pass
|
||||
assert!(m(true, false, 0, false, 0, 0, 10)
|
||||
.validate(ExecutionMode::Replay)
|
||||
.is_ok());
|
||||
// Verify + no runtime -> fail
|
||||
assert!(m(true, false, 0, false, 0, 0, 0)
|
||||
.validate(ExecutionMode::Verify)
|
||||
.is_err());
|
||||
// Verify + kernel + world_model -> pass
|
||||
assert!(m(true, true, 0, true, 0, 0, 0)
|
||||
.validate(ExecutionMode::Verify)
|
||||
.is_ok());
|
||||
// Verify + wasm + vec -> pass
|
||||
assert!(m(true, false, 1, false, 2, 0, 0)
|
||||
.validate(ExecutionMode::Verify)
|
||||
.is_ok());
|
||||
// Live + kernel only (no world model) -> fail
|
||||
assert!(m(true, true, 0, false, 0, 0, 0)
|
||||
.validate(ExecutionMode::Live)
|
||||
.is_err());
|
||||
// Live + kernel + world model -> pass
|
||||
assert!(m(true, true, 0, true, 0, 0, 0)
|
||||
.validate(ExecutionMode::Live)
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
// -- 4. Authority Level Tests --
|
||||
|
||||
#[test]
|
||||
fn authority_level_defaults_per_mode() {
|
||||
assert_eq!(
|
||||
AuthorityLevel::default_for_mode(ExecutionMode::Replay),
|
||||
AuthorityLevel::ReadOnly
|
||||
);
|
||||
assert_eq!(
|
||||
AuthorityLevel::default_for_mode(ExecutionMode::Verify),
|
||||
AuthorityLevel::ExecuteTools
|
||||
);
|
||||
assert_eq!(
|
||||
AuthorityLevel::default_for_mode(ExecutionMode::Live),
|
||||
AuthorityLevel::WriteMemory
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authority_level_hierarchy() {
|
||||
// WriteExternal permits all.
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ReadOnly));
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteMemory));
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ExecuteTools));
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteExternal));
|
||||
// ExecuteTools permits itself and below.
|
||||
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ReadOnly));
|
||||
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteMemory));
|
||||
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ExecuteTools));
|
||||
assert!(!AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteExternal));
|
||||
// ReadOnly permits nothing above itself.
|
||||
assert!(AuthorityLevel::ReadOnly.permits(AuthorityLevel::ReadOnly));
|
||||
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteMemory));
|
||||
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::ExecuteTools));
|
||||
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteExternal));
|
||||
}
|
||||
|
||||
// -- 5. Resource Budget Tests --
|
||||
|
||||
#[test]
|
||||
fn resource_budget_clamping() {
|
||||
let clamped = ResourceBudget {
|
||||
max_time_secs: 99999,
|
||||
max_tokens: 99999999,
|
||||
max_cost_microdollars: 99999999,
|
||||
max_tool_calls: 65535,
|
||||
max_external_writes: 65535,
|
||||
}
|
||||
.clamped();
|
||||
assert_eq!(clamped.max_time_secs, 3600);
|
||||
assert_eq!(clamped.max_tokens, 1_000_000);
|
||||
assert_eq!(clamped.max_cost_microdollars, 10_000_000);
|
||||
assert_eq!(clamped.max_tool_calls, 500);
|
||||
assert_eq!(clamped.max_external_writes, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_budget_within_max_unchanged() {
|
||||
assert_eq!(ResourceBudget::DEFAULT.clamped(), ResourceBudget::DEFAULT);
|
||||
}
|
||||
|
||||
// -- 6. Coherence Threshold Validation --
|
||||
|
||||
#[test]
|
||||
fn coherence_threshold_validation() {
|
||||
assert!(CoherenceThresholds::DEFAULT.validate().is_ok());
|
||||
assert!(CoherenceThresholds::STRICT.validate().is_ok());
|
||||
|
||||
// Invalid: score > 1.0
|
||||
let bad = CoherenceThresholds {
|
||||
min_coherence_score: 1.5,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
assert!(bad.validate().is_err());
|
||||
// Invalid: negative rate
|
||||
let bad2 = CoherenceThresholds {
|
||||
max_contradiction_rate: -1.0,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
assert!(bad2.validate().is_err());
|
||||
// Invalid: rollback ratio > 1.0
|
||||
let bad3 = CoherenceThresholds {
|
||||
max_rollback_ratio: 2.0,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
assert!(bad3.validate().is_err());
|
||||
// Edge: zero values are valid
|
||||
let edge = CoherenceThresholds {
|
||||
min_coherence_score: 0.0,
|
||||
max_contradiction_rate: 0.0,
|
||||
max_rollback_ratio: 0.0,
|
||||
};
|
||||
assert!(edge.validate().is_ok());
|
||||
}
|
||||
|
||||
// -- 7. Container Size Limit --
|
||||
|
||||
#[test]
|
||||
fn container_size_limit_enforced() {
|
||||
let oversized = ContainerSegments {
|
||||
manifest_present: true,
|
||||
total_size: AGI_MAX_CONTAINER_SIZE + 1,
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(
|
||||
oversized.validate(ExecutionMode::Replay),
|
||||
Err(ContainerError::TooLarge {
|
||||
size: AGI_MAX_CONTAINER_SIZE + 1
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// -- 8. Performance Benchmarks (using std::time) --
|
||||
|
||||
#[test]
|
||||
fn bench_header_serialize_deserialize() {
|
||||
use std::time::Instant;
|
||||
let header = AgiContainerHeader {
|
||||
magic: AGI_MAGIC,
|
||||
version: 1,
|
||||
flags: AGI_HAS_KERNEL | AGI_HAS_WASM | AGI_HAS_ORCHESTRATOR | AGI_SIGNED,
|
||||
container_id: [0x42; 16],
|
||||
build_id: [0x43; 16],
|
||||
created_ns: 1_700_000_000_000_000_000,
|
||||
model_id_hash: [0xAA; 8],
|
||||
policy_hash: [0xBB; 8],
|
||||
};
|
||||
let n: u128 = 100_000;
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n {
|
||||
let _ = std::hint::black_box(header.to_bytes());
|
||||
}
|
||||
let ser = start.elapsed();
|
||||
|
||||
let bytes = header.to_bytes();
|
||||
let start = Instant::now();
|
||||
for _ in 0..n {
|
||||
let _ = std::hint::black_box(AgiContainerHeader::from_bytes(&bytes).unwrap());
|
||||
}
|
||||
let deser = start.elapsed();
|
||||
|
||||
let ser_ns = ser.as_nanos() / n;
|
||||
let deser_ns = deser.as_nanos() / n;
|
||||
eprintln!("Header serialize: {ser_ns:>8} ns/op ({n} iterations in {ser:?})");
|
||||
eprintln!("Header deserialize: {deser_ns:>8} ns/op ({n} iterations in {deser:?})");
|
||||
assert!(ser_ns < 1000, "serialize too slow: {ser_ns} ns/op");
|
||||
assert!(deser_ns < 1000, "deserialize too slow: {deser_ns} ns/op");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bench_container_build_parse() {
|
||||
use std::time::Instant;
|
||||
let n: u128 = 10_000;
|
||||
let segs = || ContainerSegments {
|
||||
kernel_present: true,
|
||||
manifest_present: true,
|
||||
world_model_present: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n {
|
||||
let b = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_policy(b"autonomous", [0xAA; 8])
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_segments(segs());
|
||||
let _ = std::hint::black_box(b.build().unwrap());
|
||||
}
|
||||
let build_elapsed = start.elapsed();
|
||||
|
||||
let (payload, _) = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_segments(segs())
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n {
|
||||
let _ = std::hint::black_box(ParsedAgiManifest::parse(&payload).unwrap());
|
||||
}
|
||||
let parse_elapsed = start.elapsed();
|
||||
|
||||
let build_ns = build_elapsed.as_nanos() / n;
|
||||
let parse_ns = parse_elapsed.as_nanos() / n;
|
||||
eprintln!("Container build: {build_ns:>8} ns/op ({n} iterations in {build_elapsed:?})");
|
||||
eprintln!("Container parse: {parse_ns:>8} ns/op ({n} iterations in {parse_elapsed:?})");
|
||||
assert!(build_ns < 10_000, "build too slow: {build_ns} ns/op");
|
||||
assert!(parse_ns < 5_000, "parse too slow: {parse_ns} ns/op");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bench_flags_computation() {
|
||||
use std::time::Instant;
|
||||
let n: u128 = 1_000_000;
|
||||
let segs = ContainerSegments {
|
||||
kernel_present: true,
|
||||
wasm_count: 2,
|
||||
witness_count: 100,
|
||||
crypto_present: true,
|
||||
orchestrator_present: true,
|
||||
world_model_present: true,
|
||||
vec_segment_count: 4,
|
||||
index_segment_count: 2,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n {
|
||||
let _ = std::hint::black_box(segs.to_flags());
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
let ns = elapsed.as_nanos() / n;
|
||||
eprintln!("Flags computation: {ns:>8} ns/op ({n} iterations in {elapsed:?})");
|
||||
assert!(ns < 100, "flags computation too slow: {ns} ns/op");
|
||||
}
|
||||
320
vendor/ruvector/crates/rvf/rvf-runtime/tests/qr_seed_e2e.rs
vendored
Normal file
320
vendor/ruvector/crates/rvf/rvf-runtime/tests/qr_seed_e2e.rs
vendored
Normal file
@@ -0,0 +1,320 @@
|
||||
//! End-to-end integration tests for the QR Cognitive Seed pipeline.
|
||||
//!
|
||||
//! Tests the full zero-dependency chain:
|
||||
//! Build → Compress → Hash → Sign → Serialize → Parse → Verify → Decompress
|
||||
|
||||
use rvf_runtime::compress;
|
||||
use rvf_runtime::qr_seed::*;
|
||||
use rvf_runtime::seed_crypto;
|
||||
use rvf_types::qr_seed::*;
|
||||
|
||||
const SIGNING_KEY: &[u8] = b"test-secret-key-for-hmac-sha256!";
|
||||
|
||||
/// Build a realistic fake WASM module.
|
||||
fn fake_wasm(size: usize) -> Vec<u8> {
|
||||
let mut wasm = Vec::with_capacity(size);
|
||||
// WASM magic + version.
|
||||
wasm.extend_from_slice(&[0x00, 0x61, 0x73, 0x6D, 0x01, 0x00, 0x00, 0x00]);
|
||||
// Repeated section patterns (compressible).
|
||||
while wasm.len() < size {
|
||||
wasm.extend_from_slice(&[0x01, 0x06, 0x01, 0x60, 0x01, 0x7F, 0x01, 0x7F]);
|
||||
}
|
||||
wasm.truncate(size);
|
||||
wasm
|
||||
}
|
||||
|
||||
fn default_layers_with_real_hashes() -> Vec<(LayerEntry, Vec<u8>)> {
|
||||
let layer_data_0 = vec![0x42u8; 4096];
|
||||
let layer_data_1 = vec![0x43u8; 51200];
|
||||
let layer_data_2 = vec![0x44u8; 204800];
|
||||
|
||||
vec![
|
||||
(
|
||||
LayerEntry {
|
||||
offset: 0,
|
||||
size: 4096,
|
||||
content_hash: seed_crypto::layer_content_hash(&layer_data_0),
|
||||
layer_id: layer_id::LEVEL0,
|
||||
priority: 0,
|
||||
required: 1,
|
||||
_pad: 0,
|
||||
},
|
||||
layer_data_0,
|
||||
),
|
||||
(
|
||||
LayerEntry {
|
||||
offset: 4096,
|
||||
size: 51200,
|
||||
content_hash: seed_crypto::layer_content_hash(&layer_data_1),
|
||||
layer_id: layer_id::HOT_CACHE,
|
||||
priority: 1,
|
||||
required: 1,
|
||||
_pad: 0,
|
||||
},
|
||||
layer_data_1,
|
||||
),
|
||||
(
|
||||
LayerEntry {
|
||||
offset: 55296,
|
||||
size: 204800,
|
||||
content_hash: seed_crypto::layer_content_hash(&layer_data_2),
|
||||
layer_id: layer_id::HNSW_LAYER_A,
|
||||
priority: 2,
|
||||
required: 0,
|
||||
_pad: 0,
|
||||
},
|
||||
layer_data_2,
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_round_trip_with_real_crypto() {
|
||||
// 1. Create and compress a fake WASM microkernel.
|
||||
let wasm = fake_wasm(5500);
|
||||
let compressed = compress::compress(&wasm);
|
||||
assert!(
|
||||
compressed.len() < wasm.len(),
|
||||
"compression failed: {} >= {}",
|
||||
compressed.len(),
|
||||
wasm.len()
|
||||
);
|
||||
|
||||
// 2. Build seed with real signing.
|
||||
let host = make_host_entry("https://cdn.example.com/brain.rvf", 0, 1, [0xAA; 16]).unwrap();
|
||||
let layers = default_layers_with_real_hashes();
|
||||
|
||||
let mut builder = SeedBuilder::new([0x01; 8], 384, 100_000)
|
||||
.with_microkernel(compressed.clone())
|
||||
.add_host(host);
|
||||
|
||||
builder.content_hash_full = Some(seed_crypto::full_content_hash(b"full rvf data"));
|
||||
builder.total_file_size = Some(260_096);
|
||||
|
||||
for (layer, _data) in &layers {
|
||||
builder = builder.add_layer(*layer);
|
||||
}
|
||||
|
||||
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
|
||||
// 3. Verify QR capacity.
|
||||
assert!(header.fits_in_qr());
|
||||
assert!(payload.len() <= QR_MAX_BYTES);
|
||||
|
||||
// 4. Parse it back.
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
assert_eq!(parsed.header.seed_magic, SEED_MAGIC);
|
||||
assert!(parsed.header.is_signed());
|
||||
assert_eq!(parsed.header.sig_algo, seed_crypto::SIG_ALGO_HMAC_SHA256);
|
||||
|
||||
// 5. Full verification (magic + content hash + signature).
|
||||
parsed.verify_all(SIGNING_KEY, &payload).unwrap();
|
||||
|
||||
// 6. Individual verification steps.
|
||||
assert!(parsed.verify_content_hash());
|
||||
parsed.verify_signature(SIGNING_KEY, &payload).unwrap();
|
||||
|
||||
// 7. Wrong key must fail.
|
||||
assert!(parsed
|
||||
.verify_signature(b"wrong-key-must-fail-immediately!", &payload)
|
||||
.is_err());
|
||||
|
||||
// 8. Decompress microkernel.
|
||||
let decompressed = parsed.decompress_microkernel().unwrap();
|
||||
assert_eq!(decompressed, wasm);
|
||||
|
||||
// 9. Parse manifest and verify layer hashes.
|
||||
let manifest = parsed.parse_manifest().unwrap();
|
||||
assert_eq!(manifest.hosts.len(), 1);
|
||||
assert_eq!(manifest.layers.len(), 3);
|
||||
|
||||
for (layer, data) in &layers {
|
||||
assert!(
|
||||
seed_crypto::verify_layer(&layer.content_hash, data),
|
||||
"layer {} hash mismatch",
|
||||
layer.layer_id
|
||||
);
|
||||
}
|
||||
|
||||
// 10. Tampered layer data must fail.
|
||||
let tampered = vec![0xFF; 4096];
|
||||
assert!(!seed_crypto::verify_layer(
|
||||
&layers[0].0.content_hash,
|
||||
&tampered
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compress_microkernel_method() {
|
||||
let wasm = fake_wasm(5500);
|
||||
|
||||
let builder = SeedBuilder::new([0x02; 8], 128, 1000).compress_microkernel(&wasm);
|
||||
|
||||
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
assert!(header.has_microkernel());
|
||||
assert!(header.fits_in_qr());
|
||||
|
||||
// Parse and decompress.
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
let decompressed = parsed.decompress_microkernel().unwrap();
|
||||
assert_eq!(decompressed, wasm);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsigned_build_still_works() {
|
||||
// The original build() method must still work for backward compatibility.
|
||||
let builder = SeedBuilder::new([0x03; 8], 128, 1000).with_content_hash([0xAA; 8]);
|
||||
let (payload, header) = builder.build().unwrap();
|
||||
assert!(!header.is_signed());
|
||||
assert_eq!(header.content_hash, [0xAA; 8]);
|
||||
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
assert!(parsed.signature.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tampered_payload_fails_signature() {
|
||||
let builder = SeedBuilder::new([0x04; 8], 128, 1000).compress_microkernel(&fake_wasm(2000));
|
||||
let (mut payload, _) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
|
||||
// Tamper with a byte in the microkernel area.
|
||||
payload[SEED_HEADER_SIZE + 10] ^= 0xFF;
|
||||
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
assert!(parsed.verify_signature(SIGNING_KEY, &payload).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tampered_payload_fails_content_hash() {
|
||||
let builder = SeedBuilder::new([0x05; 8], 128, 1000).compress_microkernel(&fake_wasm(2000));
|
||||
let (mut payload, _) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
|
||||
// Tamper with a byte in the microkernel.
|
||||
payload[SEED_HEADER_SIZE + 10] ^= 0xFF;
|
||||
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
assert!(!parsed.verify_content_hash());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_all_catches_bad_signature() {
|
||||
let builder = SeedBuilder::new([0x06; 8], 128, 1000).compress_microkernel(&fake_wasm(2000));
|
||||
let (payload, _) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
let err = parsed.verify_all(b"wrong-key-should-definitely-fail", &payload);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_progress_with_real_layers() {
|
||||
let layers = default_layers_with_real_hashes();
|
||||
let manifest = DownloadManifest {
|
||||
hosts: vec![],
|
||||
content_hash: None,
|
||||
total_file_size: Some(260_096),
|
||||
layers: layers.iter().map(|(l, _)| *l).collect(),
|
||||
session_token: None,
|
||||
token_ttl: None,
|
||||
cert_pin: None,
|
||||
};
|
||||
|
||||
let mut progress = BootstrapProgress::new(&manifest);
|
||||
assert!(!progress.query_ready);
|
||||
assert_eq!(progress.phase, 0);
|
||||
|
||||
// Download layers progressively.
|
||||
progress.record_layer(&layers[0].0);
|
||||
assert!(!progress.query_ready); // Level0 alone isn't query-ready.
|
||||
|
||||
progress.record_layer(&layers[1].0);
|
||||
assert!(progress.query_ready);
|
||||
assert_eq!(progress.phase, 1);
|
||||
assert!((progress.estimated_recall - 0.50).abs() < f32::EPSILON);
|
||||
|
||||
progress.record_layer(&layers[2].0);
|
||||
assert!((progress.estimated_recall - 0.70).abs() < f32::EPSILON);
|
||||
assert_eq!(progress.phase, 2); // All layers downloaded.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compression_ratio_for_wasm() {
|
||||
let wasm = fake_wasm(5500);
|
||||
let compressed = compress::compress(&wasm);
|
||||
let ratio = wasm.len() as f64 / compressed.len() as f64;
|
||||
assert!(
|
||||
ratio > 1.2,
|
||||
"expected compression ratio > 1.2x, got {:.2}x",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256_produces_correct_hash() {
|
||||
// Verify the built-in SHA-256 against a known vector.
|
||||
let hash = rvf_types::sha256::sha256(b"abc");
|
||||
// NIST test vector for SHA-256("abc").
|
||||
assert_eq!(hash[0], 0xba);
|
||||
assert_eq!(hash[1], 0x78);
|
||||
assert_eq!(hash[2], 0x16);
|
||||
assert_eq!(hash[3], 0xbf);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hmac_sha256_produces_correct_mac() {
|
||||
// RFC 4231 Test Case 2.
|
||||
let key = b"Jefe";
|
||||
let data = b"what do ya want for nothing?";
|
||||
let mac = rvf_types::sha256::hmac_sha256(key, data);
|
||||
assert_eq!(mac[0], 0x5b);
|
||||
assert_eq!(mac[1], 0xdc);
|
||||
assert_eq!(mac[2], 0xc1);
|
||||
assert_eq!(mac[3], 0x46);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn maximal_seed_size() {
|
||||
// Build the largest possible seed that still fits in QR.
|
||||
let wasm = fake_wasm(5500);
|
||||
let compressed = compress::compress(&wasm);
|
||||
|
||||
let host1 = make_host_entry("https://cdn.example.com/brain.rvf", 0, 1, [0xAA; 16]).unwrap();
|
||||
let host2 = make_host_entry("https://mirror.example.com/brain.rvf", 1, 2, [0xBB; 16]).unwrap();
|
||||
|
||||
let layers = default_layers_with_real_hashes();
|
||||
let mut builder = SeedBuilder::new([0x07; 8], 384, 100_000)
|
||||
.with_microkernel(compressed)
|
||||
.add_host(host1)
|
||||
.add_host(host2);
|
||||
|
||||
builder.content_hash_full = Some([0xDD; 32]);
|
||||
builder.total_file_size = Some(10_000_000);
|
||||
builder.session_token = Some([0xEE; 16]);
|
||||
builder.token_ttl = Some(3600);
|
||||
builder.cert_pin = Some([0xFF; 32]);
|
||||
builder.stream_upgrade = true;
|
||||
|
||||
for (layer, _) in &layers {
|
||||
builder = builder.add_layer(*layer);
|
||||
}
|
||||
|
||||
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
assert!(
|
||||
header.fits_in_qr(),
|
||||
"seed size {} exceeds QR max {}",
|
||||
header.total_seed_size,
|
||||
QR_MAX_BYTES
|
||||
);
|
||||
assert!(payload.len() <= QR_MAX_BYTES);
|
||||
|
||||
// Full round-trip verification.
|
||||
let parsed = ParsedSeed::parse(&payload).unwrap();
|
||||
parsed.verify_all(SIGNING_KEY, &payload).unwrap();
|
||||
|
||||
let manifest = parsed.parse_manifest().unwrap();
|
||||
assert_eq!(manifest.hosts.len(), 2);
|
||||
assert_eq!(manifest.layers.len(), 3);
|
||||
assert_eq!(manifest.session_token, Some([0xEE; 16]));
|
||||
assert_eq!(manifest.token_ttl, Some(3600));
|
||||
assert_eq!(manifest.cert_pin, Some([0xFF; 32]));
|
||||
}
|
||||
288
vendor/ruvector/crates/rvf/rvf-runtime/tests/witness_e2e.rs
vendored
Normal file
288
vendor/ruvector/crates/rvf/rvf-runtime/tests/witness_e2e.rs
vendored
Normal file
@@ -0,0 +1,288 @@
|
||||
//! End-to-end integration tests for ADR-035 capability reports.
|
||||
//!
|
||||
//! Tests the full witness → scorecard → governance pipeline with real
|
||||
//! HMAC-SHA256 signatures, policy enforcement, and deterministic replay.
|
||||
|
||||
use rvf_runtime::seed_crypto;
|
||||
use rvf_runtime::witness::{
|
||||
GovernancePolicy, ParsedWitness, ScorecardBuilder, WitnessBuilder, WitnessError,
|
||||
};
|
||||
use rvf_types::witness::*;
|
||||
|
||||
const KEY: &[u8] = b"e2e-test-key-for-witness-bundle!";
|
||||
|
||||
fn make_entry(tool: &str, latency: u32, cost: u32, tokens: u32) -> ToolCallEntry {
|
||||
ToolCallEntry {
|
||||
action: tool.as_bytes().to_vec(),
|
||||
args_hash: seed_crypto::seed_content_hash(tool.as_bytes()),
|
||||
result_hash: [0x00; 8],
|
||||
latency_ms: latency,
|
||||
cost_microdollars: cost,
|
||||
tokens,
|
||||
policy_check: PolicyCheck::Allowed,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_capability_report_pipeline() {
|
||||
let mut scorecard = ScorecardBuilder::new();
|
||||
let policy = GovernancePolicy::autonomous();
|
||||
|
||||
// Simulate 5 tasks.
|
||||
let tasks: Vec<([u8; 16], TaskOutcome, bool)> = vec![
|
||||
([0x01; 16], TaskOutcome::Solved, true),
|
||||
([0x02; 16], TaskOutcome::Solved, true),
|
||||
([0x03; 16], TaskOutcome::Solved, false), // solved but no full evidence
|
||||
([0x04; 16], TaskOutcome::Failed, true),
|
||||
([0x05; 16], TaskOutcome::Errored, false),
|
||||
];
|
||||
|
||||
for (task_id, outcome, full_evidence) in &tasks {
|
||||
let mut builder = WitnessBuilder::new(*task_id, policy.clone())
|
||||
.with_spec(b"fix issue #123")
|
||||
.with_outcome(*outcome);
|
||||
|
||||
if *full_evidence {
|
||||
builder = builder
|
||||
.with_plan(b"1. read\n2. fix\n3. test")
|
||||
.with_diff(b"--- a/file.rs\n+++ b/file.rs")
|
||||
.with_test_log(b"test ... ok");
|
||||
}
|
||||
|
||||
builder.record_tool_call(make_entry("Read", 50, 100, 500));
|
||||
builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
|
||||
builder.record_tool_call(make_entry("Bash", 3000, 0, 0));
|
||||
|
||||
let (payload, header) = builder.build_and_sign(KEY).unwrap();
|
||||
|
||||
// Verify signature.
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
parsed.verify_all(KEY, &payload).unwrap();
|
||||
|
||||
assert_eq!(header.tool_call_count, 3);
|
||||
assert_eq!(header.total_cost_microdollars, 300);
|
||||
|
||||
scorecard.add_witness(&parsed, 0, 0);
|
||||
}
|
||||
|
||||
let card = scorecard.finish();
|
||||
assert_eq!(card.total_tasks, 5);
|
||||
assert_eq!(card.solved, 3);
|
||||
assert_eq!(card.failed, 1);
|
||||
assert_eq!(card.errors, 1);
|
||||
assert!((card.solve_rate - 0.6).abs() < 0.01);
|
||||
// 2 out of 3 solved have full evidence.
|
||||
assert!((card.evidence_coverage - 0.6667).abs() < 0.01);
|
||||
// Total cost = 5 tasks * 300 each = 1500. Solved = 3. 1500/3 = 500.
|
||||
assert_eq!(card.cost_per_solve_microdollars, 500);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governance_restricted_mode_blocks_writes() {
|
||||
let policy = GovernancePolicy::restricted();
|
||||
let mut builder = WitnessBuilder::new([0x10; 16], policy)
|
||||
.with_spec(b"audit code")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
|
||||
// Read is allowed.
|
||||
let check = builder.record_tool_call(make_entry("Read", 50, 100, 500));
|
||||
assert_eq!(check, PolicyCheck::Allowed);
|
||||
|
||||
// Write is denied.
|
||||
let check = builder.record_tool_call(make_entry("Write", 100, 200, 1000));
|
||||
assert_eq!(check, PolicyCheck::Denied);
|
||||
|
||||
// Edit is denied.
|
||||
let check = builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
|
||||
assert_eq!(check, PolicyCheck::Denied);
|
||||
|
||||
// Bash is denied.
|
||||
let check = builder.record_tool_call(make_entry("Bash", 100, 0, 0));
|
||||
assert_eq!(check, PolicyCheck::Denied);
|
||||
|
||||
assert_eq!(builder.policy_violations.len(), 3);
|
||||
|
||||
let (payload, _) = builder.build_and_sign(KEY).unwrap();
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
let entries = parsed.parse_trace();
|
||||
assert_eq!(entries.len(), 4);
|
||||
assert_eq!(entries[0].policy_check, PolicyCheck::Allowed);
|
||||
assert_eq!(entries[1].policy_check, PolicyCheck::Denied);
|
||||
assert_eq!(entries[2].policy_check, PolicyCheck::Denied);
|
||||
assert_eq!(entries[3].policy_check, PolicyCheck::Denied);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governance_approved_mode_gates_all() {
|
||||
let policy = GovernancePolicy::approved();
|
||||
let mut builder = WitnessBuilder::new([0x20; 16], policy).with_outcome(TaskOutcome::Solved);
|
||||
|
||||
let check = builder.record_tool_call(make_entry("Read", 50, 100, 500));
|
||||
assert_eq!(check, PolicyCheck::Confirmed);
|
||||
|
||||
let check = builder.record_tool_call(make_entry("Bash", 100, 0, 0));
|
||||
assert_eq!(check, PolicyCheck::Confirmed);
|
||||
|
||||
// No policy violations — confirmed is not a violation.
|
||||
assert!(builder.policy_violations.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governance_autonomous_with_cost_cap() {
|
||||
let mut policy = GovernancePolicy::autonomous();
|
||||
policy.max_cost_microdollars = 500;
|
||||
|
||||
let mut builder = WitnessBuilder::new([0x30; 16], policy).with_outcome(TaskOutcome::Solved);
|
||||
|
||||
builder.record_tool_call(make_entry("Read", 50, 400, 500));
|
||||
assert!(builder.policy_violations.is_empty());
|
||||
|
||||
builder.record_tool_call(make_entry("Edit", 50, 200, 500));
|
||||
assert_eq!(builder.policy_violations.len(), 1);
|
||||
assert!(builder.policy_violations[0].contains("cost budget"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deterministic_replay_same_bytes() {
|
||||
let policy = GovernancePolicy::autonomous();
|
||||
let mut builder = WitnessBuilder::new([0x40; 16], policy)
|
||||
.with_spec(b"fix bug #42")
|
||||
.with_plan(b"1. read auth.rs\n2. fix validation")
|
||||
.with_diff(b"@@ -10,3 +10,5 @@\n+ validate(input);")
|
||||
.with_test_log(b"test auth::validate ... ok\n3 passed")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
|
||||
builder.record_tool_call(make_entry("Read", 50, 100, 500));
|
||||
builder.record_tool_call(make_entry("Edit", 100, 200, 1000));
|
||||
builder.record_tool_call(make_entry("Bash", 2000, 0, 0));
|
||||
|
||||
let (payload, _) = builder.build_and_sign(KEY).unwrap();
|
||||
|
||||
// Parse and extract all sections.
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
parsed.verify_all(KEY, &payload).unwrap();
|
||||
|
||||
assert_eq!(parsed.spec.unwrap(), b"fix bug #42");
|
||||
assert_eq!(parsed.plan.unwrap(), b"1. read auth.rs\n2. fix validation");
|
||||
assert_eq!(
|
||||
parsed.diff.unwrap(),
|
||||
b"@@ -10,3 +10,5 @@\n+ validate(input);"
|
||||
);
|
||||
assert_eq!(
|
||||
parsed.test_log.unwrap(),
|
||||
b"test auth::validate ... ok\n3 passed"
|
||||
);
|
||||
|
||||
let entries = parsed.parse_trace();
|
||||
assert_eq!(entries.len(), 3);
|
||||
assert_eq!(entries[0].action, b"Read");
|
||||
assert_eq!(entries[1].action, b"Edit");
|
||||
assert_eq!(entries[2].action, b"Bash");
|
||||
assert_eq!(entries[0].latency_ms, 50);
|
||||
assert_eq!(entries[1].cost_microdollars, 200);
|
||||
assert_eq!(entries[2].tokens, 0);
|
||||
|
||||
// The bundle is self-contained evidence.
|
||||
assert!(parsed.evidence_complete());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tampered_bundle_detected() {
|
||||
let mut builder = WitnessBuilder::new([0x50; 16], GovernancePolicy::autonomous())
|
||||
.with_spec(b"original spec")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
builder.record_tool_call(make_entry("Bash", 100, 0, 0));
|
||||
|
||||
let (mut payload, _) = builder.build_and_sign(KEY).unwrap();
|
||||
|
||||
// Tamper.
|
||||
payload[WITNESS_HEADER_SIZE + 10] ^= 0xFF;
|
||||
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
assert!(parsed.verify_signature(KEY, &payload).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn postmortem_on_failure() {
|
||||
let builder = WitnessBuilder::new([0x60; 16], GovernancePolicy::autonomous())
|
||||
.with_spec(b"implement feature X")
|
||||
.with_diff(b"partial diff")
|
||||
.with_test_log(b"test feature_x ... FAILED\n0 passed, 1 failed")
|
||||
.with_postmortem(b"Root cause: missing null check in parser")
|
||||
.with_outcome(TaskOutcome::Failed);
|
||||
|
||||
let (payload, header) = builder.build_and_sign(KEY).unwrap();
|
||||
assert_eq!(header.outcome, TaskOutcome::Failed as u8);
|
||||
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
parsed.verify_all(KEY, &payload).unwrap();
|
||||
assert_eq!(
|
||||
parsed.postmortem.unwrap(),
|
||||
b"Root cause: missing null check in parser"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scorecard_percentiles() {
|
||||
let policy = GovernancePolicy::autonomous();
|
||||
let mut sc = ScorecardBuilder::new();
|
||||
|
||||
// Create 20 tasks with varying latencies.
|
||||
for i in 0..20u8 {
|
||||
let mut builder = WitnessBuilder::new([i; 16], policy.clone())
|
||||
.with_spec(b"task")
|
||||
.with_diff(b"diff")
|
||||
.with_test_log(b"ok")
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
// Latency: 100, 200, ..., 2000 ms.
|
||||
builder.record_tool_call(make_entry("Bash", (i as u32 + 1) * 100, 100, 100));
|
||||
let (payload, _) = builder.build().unwrap();
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
sc.add_witness(&parsed, 0, 0);
|
||||
}
|
||||
|
||||
let card = sc.finish();
|
||||
assert_eq!(card.total_tasks, 20);
|
||||
assert_eq!(card.solved, 20);
|
||||
assert!((card.solve_rate - 1.0).abs() < 0.01);
|
||||
assert!((card.evidence_coverage - 1.0).abs() < 0.01);
|
||||
// Median of 100..2000 (step 100) = ~1000
|
||||
assert!(card.median_latency_ms >= 900 && card.median_latency_ms <= 1100);
|
||||
// p95 should be near 1900-2000.
|
||||
assert!(card.p95_latency_ms >= 1800);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollback_tracking() {
|
||||
let mut builder = WitnessBuilder::new([0x70; 16], GovernancePolicy::autonomous())
|
||||
.with_outcome(TaskOutcome::Solved);
|
||||
|
||||
builder.record_rollback();
|
||||
builder.record_rollback();
|
||||
assert_eq!(builder.rollback_count, 2);
|
||||
|
||||
let (payload, _) = builder.build().unwrap();
|
||||
let parsed = ParsedWitness::parse(&payload).unwrap();
|
||||
|
||||
let mut sc = ScorecardBuilder::new();
|
||||
sc.add_witness(&parsed, 0, 2);
|
||||
let card = sc.finish();
|
||||
assert_eq!(card.rollback_count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_policy_violations_in_autonomous() {
|
||||
let policy = GovernancePolicy::autonomous();
|
||||
let mut total_violations = 0u32;
|
||||
|
||||
for i in 0..100u8 {
|
||||
let mut builder =
|
||||
WitnessBuilder::new([i; 16], policy.clone()).with_outcome(TaskOutcome::Solved);
|
||||
builder.record_tool_call(make_entry("Read", 10, 10, 10));
|
||||
builder.record_tool_call(make_entry("Edit", 10, 10, 10));
|
||||
builder.record_tool_call(make_entry("Bash", 10, 10, 10));
|
||||
total_violations += builder.policy_violations.len() as u32;
|
||||
}
|
||||
|
||||
assert_eq!(total_violations, 0, "zero policy violations in 100 runs");
|
||||
}
|
||||
Reference in New Issue
Block a user