//! Correctness and determinism benchmarks use criterion::{black_box, criterion_group, criterion_main, Criterion}; use std::sync::Arc; use ruvector_fpga_transformer::{ artifact::{Manifest, ModelArtifact}, backend::native_sim::NativeSimBackend, backend::TransformerBackend, gating::DefaultCoherenceGate, types::{FixedShape, GateHint, InferenceRequest, QuantSpec}, }; fn create_test_artifact() -> ModelArtifact { let shape = FixedShape::micro(); let manifest = Manifest { name: "determinism_test".into(), model_hash: String::new(), shape, quant: QuantSpec::int8(), io: Default::default(), backend: Default::default(), tests: Default::default(), }; let embedding_size = shape.vocab as usize * shape.d_model as usize; let weights: Vec = (0..embedding_size).map(|i| (i % 256) as u8).collect(); ModelArtifact::new(manifest, weights, None, None, vec![]) } fn bench_determinism(c: &mut Criterion) { let gate = Arc::new(DefaultCoherenceGate::new()); let backend = NativeSimBackend::new(gate); let artifact = create_test_artifact(); let model_id = backend.load(&artifact).unwrap(); let shape = FixedShape::micro(); let tokens: Vec = (0..shape.seq_len) .map(|i| (i * 7) % shape.vocab as u16) .collect(); let mask = vec![1u8; shape.seq_len as usize]; c.bench_function("determinism_check_1000", |b| { b.iter(|| { let mut first_hash: Option = None; for _ in 0..1000 { let req = InferenceRequest::new( model_id, shape, black_box(&tokens), &mask, GateHint::allow_all(), ); let result = backend.infer(req).unwrap(); // Hash the logits let hash = result .logits_q .iter() .fold(0u64, |acc, &v| acc.wrapping_mul(31).wrapping_add(v as u64)); match first_hash { None => first_hash = Some(hash), Some(expected) => assert_eq!(hash, expected, "Non-deterministic output"), } } }) }); } fn bench_golden_vectors(c: &mut Criterion) { let gate = Arc::new(DefaultCoherenceGate::new()); let backend = NativeSimBackend::new(gate); let artifact = create_test_artifact(); let model_id = backend.load(&artifact).unwrap(); let shape = FixedShape::micro(); // Create golden vectors let test_inputs: Vec> = (0..128) .map(|seed| { (0..shape.seq_len) .map(|i| ((i as usize * seed + 1) % shape.vocab as usize) as u16) .collect() }) .collect(); let mask = vec![1u8; shape.seq_len as usize]; // Compute expected outputs let expected: Vec> = test_inputs .iter() .map(|tokens| { let req = InferenceRequest::new(model_id, shape, tokens, &mask, GateHint::allow_all()); backend.infer(req).unwrap().logits_q }) .collect(); c.bench_function("golden_vector_validation", |b| { b.iter(|| { for (tokens, exp) in test_inputs.iter().zip(&expected) { let req = InferenceRequest::new( model_id, shape, black_box(tokens), &mask, GateHint::allow_all(), ); let result = backend.infer(req).unwrap(); // Compute max abs error let max_err: i32 = result .logits_q .iter() .zip(exp) .map(|(&a, &b)| (a as i32 - b as i32).abs()) .max() .unwrap_or(0); assert_eq!(max_err, 0, "Golden vector mismatch"); } }) }); } fn bench_quantization_accuracy(c: &mut Criterion) { use ruvector_fpga_transformer::quant::qformat::{quantize_symmetric_i8, QuantizedMatrix}; c.bench_function("quantize_matrix_256x256", |b| { let data: Vec = (0..256 * 256).map(|i| (i as f32 * 0.001).sin()).collect(); b.iter(|| { let matrix = QuantizedMatrix::from_f32(black_box(&data), 256, 256); let dequant = matrix.to_f32(); // Check reconstruction error let max_err: f32 = data .iter() .zip(&dequant) .map(|(a, b)| (a - b).abs()) .fold(0.0f32, f32::max); assert!(max_err < 0.1, "Quantization error too high: {}", max_err); }) }); } criterion_group!( benches, bench_determinism, bench_golden_vectors, bench_quantization_accuracy ); criterion_main!(benches);