Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
157
vendor/ruvector/crates/ruvector-fpga-transformer/benches/correctness.rs
vendored
Normal file
157
vendor/ruvector/crates/ruvector-fpga-transformer/benches/correctness.rs
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
//! Correctness and determinism benchmarks
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruvector_fpga_transformer::{
|
||||
artifact::{Manifest, ModelArtifact},
|
||||
backend::native_sim::NativeSimBackend,
|
||||
backend::TransformerBackend,
|
||||
gating::DefaultCoherenceGate,
|
||||
types::{FixedShape, GateHint, InferenceRequest, QuantSpec},
|
||||
};
|
||||
|
||||
fn create_test_artifact() -> ModelArtifact {
|
||||
let shape = FixedShape::micro();
|
||||
let manifest = Manifest {
|
||||
name: "determinism_test".into(),
|
||||
model_hash: String::new(),
|
||||
shape,
|
||||
quant: QuantSpec::int8(),
|
||||
io: Default::default(),
|
||||
backend: Default::default(),
|
||||
tests: Default::default(),
|
||||
};
|
||||
|
||||
let embedding_size = shape.vocab as usize * shape.d_model as usize;
|
||||
let weights: Vec<u8> = (0..embedding_size).map(|i| (i % 256) as u8).collect();
|
||||
|
||||
ModelArtifact::new(manifest, weights, None, None, vec![])
|
||||
}
|
||||
|
||||
fn bench_determinism(c: &mut Criterion) {
|
||||
let gate = Arc::new(DefaultCoherenceGate::new());
|
||||
let backend = NativeSimBackend::new(gate);
|
||||
|
||||
let artifact = create_test_artifact();
|
||||
let model_id = backend.load(&artifact).unwrap();
|
||||
let shape = FixedShape::micro();
|
||||
|
||||
let tokens: Vec<u16> = (0..shape.seq_len)
|
||||
.map(|i| (i * 7) % shape.vocab as u16)
|
||||
.collect();
|
||||
let mask = vec![1u8; shape.seq_len as usize];
|
||||
|
||||
c.bench_function("determinism_check_1000", |b| {
|
||||
b.iter(|| {
|
||||
let mut first_hash: Option<u64> = None;
|
||||
|
||||
for _ in 0..1000 {
|
||||
let req = InferenceRequest::new(
|
||||
model_id,
|
||||
shape,
|
||||
black_box(&tokens),
|
||||
&mask,
|
||||
GateHint::allow_all(),
|
||||
);
|
||||
let result = backend.infer(req).unwrap();
|
||||
|
||||
// Hash the logits
|
||||
let hash = result
|
||||
.logits_q
|
||||
.iter()
|
||||
.fold(0u64, |acc, &v| acc.wrapping_mul(31).wrapping_add(v as u64));
|
||||
|
||||
match first_hash {
|
||||
None => first_hash = Some(hash),
|
||||
Some(expected) => assert_eq!(hash, expected, "Non-deterministic output"),
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_golden_vectors(c: &mut Criterion) {
|
||||
let gate = Arc::new(DefaultCoherenceGate::new());
|
||||
let backend = NativeSimBackend::new(gate);
|
||||
|
||||
let artifact = create_test_artifact();
|
||||
let model_id = backend.load(&artifact).unwrap();
|
||||
let shape = FixedShape::micro();
|
||||
|
||||
// Create golden vectors
|
||||
let test_inputs: Vec<Vec<u16>> = (0..128)
|
||||
.map(|seed| {
|
||||
(0..shape.seq_len)
|
||||
.map(|i| ((i as usize * seed + 1) % shape.vocab as usize) as u16)
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mask = vec![1u8; shape.seq_len as usize];
|
||||
|
||||
// Compute expected outputs
|
||||
let expected: Vec<Vec<i16>> = test_inputs
|
||||
.iter()
|
||||
.map(|tokens| {
|
||||
let req = InferenceRequest::new(model_id, shape, tokens, &mask, GateHint::allow_all());
|
||||
backend.infer(req).unwrap().logits_q
|
||||
})
|
||||
.collect();
|
||||
|
||||
c.bench_function("golden_vector_validation", |b| {
|
||||
b.iter(|| {
|
||||
for (tokens, exp) in test_inputs.iter().zip(&expected) {
|
||||
let req = InferenceRequest::new(
|
||||
model_id,
|
||||
shape,
|
||||
black_box(tokens),
|
||||
&mask,
|
||||
GateHint::allow_all(),
|
||||
);
|
||||
let result = backend.infer(req).unwrap();
|
||||
|
||||
// Compute max abs error
|
||||
let max_err: i32 = result
|
||||
.logits_q
|
||||
.iter()
|
||||
.zip(exp)
|
||||
.map(|(&a, &b)| (a as i32 - b as i32).abs())
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
|
||||
assert_eq!(max_err, 0, "Golden vector mismatch");
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_quantization_accuracy(c: &mut Criterion) {
|
||||
use ruvector_fpga_transformer::quant::qformat::{quantize_symmetric_i8, QuantizedMatrix};
|
||||
|
||||
c.bench_function("quantize_matrix_256x256", |b| {
|
||||
let data: Vec<f32> = (0..256 * 256).map(|i| (i as f32 * 0.001).sin()).collect();
|
||||
|
||||
b.iter(|| {
|
||||
let matrix = QuantizedMatrix::from_f32(black_box(&data), 256, 256);
|
||||
let dequant = matrix.to_f32();
|
||||
|
||||
// Check reconstruction error
|
||||
let max_err: f32 = data
|
||||
.iter()
|
||||
.zip(&dequant)
|
||||
.map(|(a, b)| (a - b).abs())
|
||||
.fold(0.0f32, f32::max);
|
||||
|
||||
assert!(max_err < 0.1, "Quantization error too high: {}", max_err);
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_determinism,
|
||||
bench_golden_vectors,
|
||||
bench_quantization_accuracy
|
||||
);
|
||||
criterion_main!(benches);
|
||||
154
vendor/ruvector/crates/ruvector-fpga-transformer/benches/gating.rs
vendored
Normal file
154
vendor/ruvector/crates/ruvector-fpga-transformer/benches/gating.rs
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
//! Gating subsystem benchmarks
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruvector_fpga_transformer::{
|
||||
artifact::{Manifest, ModelArtifact},
|
||||
backend::native_sim::NativeSimBackend,
|
||||
backend::TransformerBackend,
|
||||
gating::{CoherenceConfig, CoherenceGate, DefaultCoherenceGate},
|
||||
types::{ComputeClass, FixedShape, GateDecision, GateHint, InferenceRequest, QuantSpec},
|
||||
};
|
||||
|
||||
fn bench_skip_rate_distribution(c: &mut Criterion) {
|
||||
let gate = DefaultCoherenceGate::new();
|
||||
|
||||
// Generate synthetic coherence distribution
|
||||
let coherence_values: Vec<i16> = (-500..500).collect();
|
||||
|
||||
c.bench_function("skip_rate_uniform_distribution", |b| {
|
||||
b.iter(|| {
|
||||
let mut skipped = 0u32;
|
||||
let mut ran = 0u32;
|
||||
|
||||
for &coherence in &coherence_values {
|
||||
let hint = GateHint::new(coherence, false, ComputeClass::Deliberative);
|
||||
match gate.preflight(black_box(&hint)) {
|
||||
GateDecision::Skipped { .. } => skipped += 1,
|
||||
_ => ran += 1,
|
||||
}
|
||||
}
|
||||
|
||||
(skipped, ran)
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_early_exit_histogram(c: &mut Criterion) {
|
||||
let gate = Arc::new(DefaultCoherenceGate::new());
|
||||
let backend = NativeSimBackend::new(gate);
|
||||
|
||||
let shape = FixedShape::micro();
|
||||
let manifest = Manifest {
|
||||
name: "early_exit_test".into(),
|
||||
model_hash: String::new(),
|
||||
shape,
|
||||
quant: QuantSpec::int8(),
|
||||
io: Default::default(),
|
||||
backend: Default::default(),
|
||||
tests: Default::default(),
|
||||
};
|
||||
|
||||
let embedding_size = shape.vocab as usize * shape.d_model as usize;
|
||||
let artifact = ModelArtifact::new(manifest, vec![0u8; embedding_size], None, None, vec![]);
|
||||
let model_id = backend.load(&artifact).unwrap();
|
||||
|
||||
let tokens: Vec<u16> = (0..shape.seq_len).collect();
|
||||
let mask = vec![1u8; shape.seq_len as usize];
|
||||
|
||||
// Test with varying coherence levels
|
||||
let coherence_levels: Vec<i16> = vec![-500, -200, 0, 200, 500, 1000, 2000];
|
||||
|
||||
let mut group = c.benchmark_group("early_exit_by_coherence");
|
||||
|
||||
for coherence in coherence_levels {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("coherence", coherence),
|
||||
&coherence,
|
||||
|b, &coherence| {
|
||||
let hint = GateHint::new(coherence, false, ComputeClass::Deliberative);
|
||||
|
||||
b.iter(|| {
|
||||
let req =
|
||||
InferenceRequest::new(model_id, shape, black_box(&tokens), &mask, hint);
|
||||
let result = backend.infer(req).unwrap();
|
||||
result.witness.gate_decision
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_checkpoint_overhead(c: &mut Criterion) {
|
||||
let configs = [
|
||||
("default", CoherenceConfig::default()),
|
||||
("strict", CoherenceConfig::strict()),
|
||||
("permissive", CoherenceConfig::permissive()),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("checkpoint_overhead");
|
||||
|
||||
for (name, config) in configs {
|
||||
let gate = DefaultCoherenceGate::with_config(config);
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("config", name), &gate, |b, gate| {
|
||||
b.iter(|| {
|
||||
let mut decision = None;
|
||||
for layer in 0u8..8 {
|
||||
let signal = (layer as i16) * 150;
|
||||
if let Some(d) = gate.checkpoint(black_box(layer), black_box(signal)) {
|
||||
decision = Some(d);
|
||||
break;
|
||||
}
|
||||
}
|
||||
decision
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_mincut_gating(c: &mut Criterion) {
|
||||
use ruvector_fpga_transformer::gating::coherence_gate::MincutCoherenceGate;
|
||||
|
||||
let config = CoherenceConfig::default();
|
||||
let gate = MincutCoherenceGate::new(config, 50, 200);
|
||||
|
||||
let hints = [
|
||||
(
|
||||
"high_lambda",
|
||||
GateHint::new(500, false, ComputeClass::Deliberative),
|
||||
),
|
||||
(
|
||||
"low_lambda",
|
||||
GateHint::new(100, false, ComputeClass::Deliberative),
|
||||
),
|
||||
(
|
||||
"boundary_crossed",
|
||||
GateHint::new(300, true, ComputeClass::Deliberative),
|
||||
),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("mincut_gating");
|
||||
|
||||
for (name, hint) in hints {
|
||||
group.bench_with_input(BenchmarkId::new("preflight", name), &hint, |b, hint| {
|
||||
b.iter(|| gate.preflight(black_box(hint)))
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_skip_rate_distribution,
|
||||
bench_early_exit_histogram,
|
||||
bench_checkpoint_overhead,
|
||||
bench_mincut_gating
|
||||
);
|
||||
criterion_main!(benches);
|
||||
143
vendor/ruvector/crates/ruvector-fpga-transformer/benches/latency.rs
vendored
Normal file
143
vendor/ruvector/crates/ruvector-fpga-transformer/benches/latency.rs
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
//! Latency benchmarks for FPGA Transformer
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruvector_fpga_transformer::{
|
||||
artifact::{Manifest, ModelArtifact},
|
||||
backend::native_sim::NativeSimBackend,
|
||||
backend::TransformerBackend,
|
||||
gating::DefaultCoherenceGate,
|
||||
types::{FixedShape, GateHint, InferenceRequest, ModelId, QuantSpec},
|
||||
};
|
||||
|
||||
fn create_test_artifact(shape: FixedShape) -> ModelArtifact {
|
||||
let manifest = Manifest {
|
||||
name: "bench_model".into(),
|
||||
model_hash: String::new(),
|
||||
shape,
|
||||
quant: QuantSpec::int8(),
|
||||
io: Default::default(),
|
||||
backend: Default::default(),
|
||||
tests: Default::default(),
|
||||
};
|
||||
|
||||
// Create minimal weights
|
||||
let embedding_size = shape.vocab as usize * shape.d_model as usize;
|
||||
let weights = vec![0u8; embedding_size];
|
||||
|
||||
ModelArtifact::new(manifest, weights, None, None, vec![])
|
||||
}
|
||||
|
||||
fn bench_inference(c: &mut Criterion) {
|
||||
let gate = Arc::new(DefaultCoherenceGate::new());
|
||||
let backend = NativeSimBackend::new(gate);
|
||||
|
||||
let shape = FixedShape::micro();
|
||||
let artifact = create_test_artifact(shape);
|
||||
let model_id = backend.load(&artifact).unwrap();
|
||||
|
||||
let tokens: Vec<u16> = (0..shape.seq_len).collect();
|
||||
let mask = vec![1u8; shape.seq_len as usize];
|
||||
|
||||
c.bench_function("native_sim_micro_inference", |b| {
|
||||
b.iter(|| {
|
||||
let req = InferenceRequest::new(
|
||||
model_id,
|
||||
shape,
|
||||
black_box(&tokens),
|
||||
&mask,
|
||||
GateHint::allow_all(),
|
||||
);
|
||||
backend.infer(req).unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_inference_shapes(c: &mut Criterion) {
|
||||
let gate = Arc::new(DefaultCoherenceGate::new());
|
||||
|
||||
let shapes = [
|
||||
("micro", FixedShape::micro()),
|
||||
("small", FixedShape::small()),
|
||||
("baseline", FixedShape::baseline()),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("inference_by_shape");
|
||||
|
||||
for (name, shape) in shapes {
|
||||
let backend = NativeSimBackend::new(gate.clone());
|
||||
let artifact = create_test_artifact(shape);
|
||||
let model_id = backend.load(&artifact).unwrap();
|
||||
|
||||
let tokens: Vec<u16> = (0..shape.seq_len).collect();
|
||||
let mask = vec![1u8; shape.seq_len as usize];
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("native_sim", name), &shape, |b, &shape| {
|
||||
b.iter(|| {
|
||||
let req = InferenceRequest::new(
|
||||
model_id,
|
||||
shape,
|
||||
black_box(&tokens),
|
||||
&mask,
|
||||
GateHint::allow_all(),
|
||||
);
|
||||
backend.infer(req).unwrap()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_load_unload(c: &mut Criterion) {
|
||||
let gate = Arc::new(DefaultCoherenceGate::new());
|
||||
let backend = NativeSimBackend::new(gate);
|
||||
|
||||
let artifact = create_test_artifact(FixedShape::micro());
|
||||
|
||||
c.bench_function("model_load", |b| {
|
||||
b.iter(|| {
|
||||
let id = backend.load(black_box(&artifact)).unwrap();
|
||||
backend.unload(id).unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_gating(c: &mut Criterion) {
|
||||
use ruvector_fpga_transformer::gating::{CoherenceConfig, CoherenceGate};
|
||||
|
||||
let gate = DefaultCoherenceGate::with_config(CoherenceConfig::default());
|
||||
|
||||
let hints = [
|
||||
("allow_all", GateHint::allow_all()),
|
||||
("reflex_only", GateHint::reflex_only()),
|
||||
(
|
||||
"low_coherence",
|
||||
GateHint::new(
|
||||
-500,
|
||||
true,
|
||||
ruvector_fpga_transformer::types::ComputeClass::Deliberative,
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("gating_preflight");
|
||||
|
||||
for (name, hint) in hints {
|
||||
group.bench_with_input(BenchmarkId::new("preflight", name), &hint, |b, hint| {
|
||||
b.iter(|| gate.preflight(black_box(hint)))
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_inference,
|
||||
bench_inference_shapes,
|
||||
bench_load_unload,
|
||||
bench_gating
|
||||
);
|
||||
criterion_main!(benches);
|
||||
Reference in New Issue
Block a user