Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-fpga-transformer/benches/correctness.rs
+++ b/vendor/ruvector/crates/ruvector-fpga-transformer/benches/correctness.rs
@@ -0,0 +1,157 @@
+//! Correctness and determinism benchmarks
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use std::sync::Arc;
+
+use ruvector_fpga_transformer::{
+    artifact::{Manifest, ModelArtifact},
+    backend::native_sim::NativeSimBackend,
+    backend::TransformerBackend,
+    gating::DefaultCoherenceGate,
+    types::{FixedShape, GateHint, InferenceRequest, QuantSpec},
+};
+
+fn create_test_artifact() -> ModelArtifact {
+    let shape = FixedShape::micro();
+    let manifest = Manifest {
+        name: "determinism_test".into(),
+        model_hash: String::new(),
+        shape,
+        quant: QuantSpec::int8(),
+        io: Default::default(),
+        backend: Default::default(),
+        tests: Default::default(),
+    };
+
+    let embedding_size = shape.vocab as usize * shape.d_model as usize;
+    let weights: Vec<u8> = (0..embedding_size).map(|i| (i % 256) as u8).collect();
+
+    ModelArtifact::new(manifest, weights, None, None, vec![])
+}
+
+fn bench_determinism(c: &mut Criterion) {
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let backend = NativeSimBackend::new(gate);
+
+    let artifact = create_test_artifact();
+    let model_id = backend.load(&artifact).unwrap();
+    let shape = FixedShape::micro();
+
+    let tokens: Vec<u16> = (0..shape.seq_len)
+        .map(|i| (i * 7) % shape.vocab as u16)
+        .collect();
+    let mask = vec![1u8; shape.seq_len as usize];
+
+    c.bench_function("determinism_check_1000", |b| {
+        b.iter(|| {
+            let mut first_hash: Option<u64> = None;
+
+            for _ in 0..1000 {
+                let req = InferenceRequest::new(
+                    model_id,
+                    shape,
+                    black_box(&tokens),
+                    &mask,
+                    GateHint::allow_all(),
+                );
+                let result = backend.infer(req).unwrap();
+
+                // Hash the logits
+                let hash = result
+                    .logits_q
+                    .iter()
+                    .fold(0u64, |acc, &v| acc.wrapping_mul(31).wrapping_add(v as u64));
+
+                match first_hash {
+                    None => first_hash = Some(hash),
+                    Some(expected) => assert_eq!(hash, expected, "Non-deterministic output"),
+                }
+            }
+        })
+    });
+}
+
+fn bench_golden_vectors(c: &mut Criterion) {
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let backend = NativeSimBackend::new(gate);
+
+    let artifact = create_test_artifact();
+    let model_id = backend.load(&artifact).unwrap();
+    let shape = FixedShape::micro();
+
+    // Create golden vectors
+    let test_inputs: Vec<Vec<u16>> = (0..128)
+        .map(|seed| {
+            (0..shape.seq_len)
+                .map(|i| ((i as usize * seed + 1) % shape.vocab as usize) as u16)
+                .collect()
+        })
+        .collect();
+
+    let mask = vec![1u8; shape.seq_len as usize];
+
+    // Compute expected outputs
+    let expected: Vec<Vec<i16>> = test_inputs
+        .iter()
+        .map(|tokens| {
+            let req = InferenceRequest::new(model_id, shape, tokens, &mask, GateHint::allow_all());
+            backend.infer(req).unwrap().logits_q
+        })
+        .collect();
+
+    c.bench_function("golden_vector_validation", |b| {
+        b.iter(|| {
+            for (tokens, exp) in test_inputs.iter().zip(&expected) {
+                let req = InferenceRequest::new(
+                    model_id,
+                    shape,
+                    black_box(tokens),
+                    &mask,
+                    GateHint::allow_all(),
+                );
+                let result = backend.infer(req).unwrap();
+
+                // Compute max abs error
+                let max_err: i32 = result
+                    .logits_q
+                    .iter()
+                    .zip(exp)
+                    .map(|(&a, &b)| (a as i32 - b as i32).abs())
+                    .max()
+                    .unwrap_or(0);
+
+                assert_eq!(max_err, 0, "Golden vector mismatch");
+            }
+        })
+    });
+}
+
+fn bench_quantization_accuracy(c: &mut Criterion) {
+    use ruvector_fpga_transformer::quant::qformat::{quantize_symmetric_i8, QuantizedMatrix};
+
+    c.bench_function("quantize_matrix_256x256", |b| {
+        let data: Vec<f32> = (0..256 * 256).map(|i| (i as f32 * 0.001).sin()).collect();
+
+        b.iter(|| {
+            let matrix = QuantizedMatrix::from_f32(black_box(&data), 256, 256);
+            let dequant = matrix.to_f32();
+
+            // Check reconstruction error
+            let max_err: f32 = data
+                .iter()
+                .zip(&dequant)
+                .map(|(a, b)| (a - b).abs())
+                .fold(0.0f32, f32::max);
+
+            assert!(max_err < 0.1, "Quantization error too high: {}", max_err);
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_determinism,
+    bench_golden_vectors,
+    bench_quantization_accuracy
+);
+criterion_main!(benches);
--- a/vendor/ruvector/crates/ruvector-fpga-transformer/benches/gating.rs
+++ b/vendor/ruvector/crates/ruvector-fpga-transformer/benches/gating.rs
@@ -0,0 +1,154 @@
+//! Gating subsystem benchmarks
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::sync::Arc;
+
+use ruvector_fpga_transformer::{
+    artifact::{Manifest, ModelArtifact},
+    backend::native_sim::NativeSimBackend,
+    backend::TransformerBackend,
+    gating::{CoherenceConfig, CoherenceGate, DefaultCoherenceGate},
+    types::{ComputeClass, FixedShape, GateDecision, GateHint, InferenceRequest, QuantSpec},
+};
+
+fn bench_skip_rate_distribution(c: &mut Criterion) {
+    let gate = DefaultCoherenceGate::new();
+
+    // Generate synthetic coherence distribution
+    let coherence_values: Vec<i16> = (-500..500).collect();
+
+    c.bench_function("skip_rate_uniform_distribution", |b| {
+        b.iter(|| {
+            let mut skipped = 0u32;
+            let mut ran = 0u32;
+
+            for &coherence in &coherence_values {
+                let hint = GateHint::new(coherence, false, ComputeClass::Deliberative);
+                match gate.preflight(black_box(&hint)) {
+                    GateDecision::Skipped { .. } => skipped += 1,
+                    _ => ran += 1,
+                }
+            }
+
+            (skipped, ran)
+        })
+    });
+}
+
+fn bench_early_exit_histogram(c: &mut Criterion) {
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let backend = NativeSimBackend::new(gate);
+
+    let shape = FixedShape::micro();
+    let manifest = Manifest {
+        name: "early_exit_test".into(),
+        model_hash: String::new(),
+        shape,
+        quant: QuantSpec::int8(),
+        io: Default::default(),
+        backend: Default::default(),
+        tests: Default::default(),
+    };
+
+    let embedding_size = shape.vocab as usize * shape.d_model as usize;
+    let artifact = ModelArtifact::new(manifest, vec![0u8; embedding_size], None, None, vec![]);
+    let model_id = backend.load(&artifact).unwrap();
+
+    let tokens: Vec<u16> = (0..shape.seq_len).collect();
+    let mask = vec![1u8; shape.seq_len as usize];
+
+    // Test with varying coherence levels
+    let coherence_levels: Vec<i16> = vec![-500, -200, 0, 200, 500, 1000, 2000];
+
+    let mut group = c.benchmark_group("early_exit_by_coherence");
+
+    for coherence in coherence_levels {
+        group.bench_with_input(
+            BenchmarkId::new("coherence", coherence),
+            &coherence,
+            |b, &coherence| {
+                let hint = GateHint::new(coherence, false, ComputeClass::Deliberative);
+
+                b.iter(|| {
+                    let req =
+                        InferenceRequest::new(model_id, shape, black_box(&tokens), &mask, hint);
+                    let result = backend.infer(req).unwrap();
+                    result.witness.gate_decision
+                })
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn bench_checkpoint_overhead(c: &mut Criterion) {
+    let configs = [
+        ("default", CoherenceConfig::default()),
+        ("strict", CoherenceConfig::strict()),
+        ("permissive", CoherenceConfig::permissive()),
+    ];
+
+    let mut group = c.benchmark_group("checkpoint_overhead");
+
+    for (name, config) in configs {
+        let gate = DefaultCoherenceGate::with_config(config);
+
+        group.bench_with_input(BenchmarkId::new("config", name), &gate, |b, gate| {
+            b.iter(|| {
+                let mut decision = None;
+                for layer in 0u8..8 {
+                    let signal = (layer as i16) * 150;
+                    if let Some(d) = gate.checkpoint(black_box(layer), black_box(signal)) {
+                        decision = Some(d);
+                        break;
+                    }
+                }
+                decision
+            })
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_mincut_gating(c: &mut Criterion) {
+    use ruvector_fpga_transformer::gating::coherence_gate::MincutCoherenceGate;
+
+    let config = CoherenceConfig::default();
+    let gate = MincutCoherenceGate::new(config, 50, 200);
+
+    let hints = [
+        (
+            "high_lambda",
+            GateHint::new(500, false, ComputeClass::Deliberative),
+        ),
+        (
+            "low_lambda",
+            GateHint::new(100, false, ComputeClass::Deliberative),
+        ),
+        (
+            "boundary_crossed",
+            GateHint::new(300, true, ComputeClass::Deliberative),
+        ),
+    ];
+
+    let mut group = c.benchmark_group("mincut_gating");
+
+    for (name, hint) in hints {
+        group.bench_with_input(BenchmarkId::new("preflight", name), &hint, |b, hint| {
+            b.iter(|| gate.preflight(black_box(hint)))
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_skip_rate_distribution,
+    bench_early_exit_histogram,
+    bench_checkpoint_overhead,
+    bench_mincut_gating
+);
+criterion_main!(benches);
--- a/vendor/ruvector/crates/ruvector-fpga-transformer/benches/latency.rs
+++ b/vendor/ruvector/crates/ruvector-fpga-transformer/benches/latency.rs
@@ -0,0 +1,143 @@
+//! Latency benchmarks for FPGA Transformer
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::sync::Arc;
+
+use ruvector_fpga_transformer::{
+    artifact::{Manifest, ModelArtifact},
+    backend::native_sim::NativeSimBackend,
+    backend::TransformerBackend,
+    gating::DefaultCoherenceGate,
+    types::{FixedShape, GateHint, InferenceRequest, ModelId, QuantSpec},
+};
+
+fn create_test_artifact(shape: FixedShape) -> ModelArtifact {
+    let manifest = Manifest {
+        name: "bench_model".into(),
+        model_hash: String::new(),
+        shape,
+        quant: QuantSpec::int8(),
+        io: Default::default(),
+        backend: Default::default(),
+        tests: Default::default(),
+    };
+
+    // Create minimal weights
+    let embedding_size = shape.vocab as usize * shape.d_model as usize;
+    let weights = vec![0u8; embedding_size];
+
+    ModelArtifact::new(manifest, weights, None, None, vec![])
+}
+
+fn bench_inference(c: &mut Criterion) {
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let backend = NativeSimBackend::new(gate);
+
+    let shape = FixedShape::micro();
+    let artifact = create_test_artifact(shape);
+    let model_id = backend.load(&artifact).unwrap();
+
+    let tokens: Vec<u16> = (0..shape.seq_len).collect();
+    let mask = vec![1u8; shape.seq_len as usize];
+
+    c.bench_function("native_sim_micro_inference", |b| {
+        b.iter(|| {
+            let req = InferenceRequest::new(
+                model_id,
+                shape,
+                black_box(&tokens),
+                &mask,
+                GateHint::allow_all(),
+            );
+            backend.infer(req).unwrap()
+        })
+    });
+}
+
+fn bench_inference_shapes(c: &mut Criterion) {
+    let gate = Arc::new(DefaultCoherenceGate::new());
+
+    let shapes = [
+        ("micro", FixedShape::micro()),
+        ("small", FixedShape::small()),
+        ("baseline", FixedShape::baseline()),
+    ];
+
+    let mut group = c.benchmark_group("inference_by_shape");
+
+    for (name, shape) in shapes {
+        let backend = NativeSimBackend::new(gate.clone());
+        let artifact = create_test_artifact(shape);
+        let model_id = backend.load(&artifact).unwrap();
+
+        let tokens: Vec<u16> = (0..shape.seq_len).collect();
+        let mask = vec![1u8; shape.seq_len as usize];
+
+        group.bench_with_input(BenchmarkId::new("native_sim", name), &shape, |b, &shape| {
+            b.iter(|| {
+                let req = InferenceRequest::new(
+                    model_id,
+                    shape,
+                    black_box(&tokens),
+                    &mask,
+                    GateHint::allow_all(),
+                );
+                backend.infer(req).unwrap()
+            })
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_load_unload(c: &mut Criterion) {
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let backend = NativeSimBackend::new(gate);
+
+    let artifact = create_test_artifact(FixedShape::micro());
+
+    c.bench_function("model_load", |b| {
+        b.iter(|| {
+            let id = backend.load(black_box(&artifact)).unwrap();
+            backend.unload(id).unwrap();
+        })
+    });
+}
+
+fn bench_gating(c: &mut Criterion) {
+    use ruvector_fpga_transformer::gating::{CoherenceConfig, CoherenceGate};
+
+    let gate = DefaultCoherenceGate::with_config(CoherenceConfig::default());
+
+    let hints = [
+        ("allow_all", GateHint::allow_all()),
+        ("reflex_only", GateHint::reflex_only()),
+        (
+            "low_coherence",
+            GateHint::new(
+                -500,
+                true,
+                ruvector_fpga_transformer::types::ComputeClass::Deliberative,
+            ),
+        ),
+    ];
+
+    let mut group = c.benchmark_group("gating_preflight");
+
+    for (name, hint) in hints {
+        group.bench_with_input(BenchmarkId::new("preflight", name), &hint, |b, hint| {
+            b.iter(|| gate.preflight(black_box(hint)))
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_inference,
+    bench_inference_shapes,
+    bench_load_unload,
+    bench_gating
+);
+criterion_main!(benches);