Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/crates/ruvector-fpga-transformer/examples/basic_inference.rs
+++ b/vendor/ruvector/crates/ruvector-fpga-transformer/examples/basic_inference.rs
@@ -0,0 +1,123 @@
+//! Basic inference example
+//!
+//! Demonstrates loading a model and running inference with the native simulator.
+
+use std::sync::Arc;
+
+use ruvector_fpga_transformer::{
+    artifact::{Manifest, ModelArtifact},
+    backend::native_sim::NativeSimBackend,
+    gating::DefaultCoherenceGate,
+    types::{FixedShape, GateHint, InferenceRequest, QuantSpec},
+    Engine,
+};
+
+fn main() -> anyhow::Result<()> {
+    println!("FPGA Transformer - Basic Inference Example");
+    println!("==========================================\n");
+
+    // Create a micro-sized model for demonstration
+    let shape = FixedShape::micro();
+    println!("Model shape: {:?}", shape);
+
+    // Create manifest
+    let manifest = Manifest {
+        name: "demo_reflex_transformer".into(),
+        model_hash: String::new(),
+        shape,
+        quant: QuantSpec::int8(),
+        io: Default::default(),
+        backend: Default::default(),
+        tests: Default::default(),
+    };
+
+    // Create minimal weights (random for demo)
+    let embedding_size = shape.vocab as usize * shape.d_model as usize;
+    let weights: Vec<u8> = (0..embedding_size)
+        .map(|i| ((i * 7 + 13) % 256) as u8)
+        .collect();
+
+    println!("Weight size: {} bytes", weights.len());
+
+    // Create artifact
+    let artifact = ModelArtifact::new(manifest, weights, None, None, vec![]);
+    println!("Artifact created, model ID: {}", artifact.model_id());
+
+    // Create backend and engine
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let backend = Box::new(NativeSimBackend::new(gate.clone()));
+    let mut engine = Engine::new(backend, gate);
+
+    // Load model
+    let model_id = engine.load(&artifact)?;
+    println!("Model loaded successfully\n");
+
+    // Prepare input
+    let tokens: Vec<u16> = (0..shape.seq_len).collect();
+    let mask = vec![1u8; shape.seq_len as usize];
+
+    println!("Running inference...");
+    println!("  Input tokens: {:?}...", &tokens[..4.min(tokens.len())]);
+
+    // Run inference with different coherence levels
+    let coherence_levels = [
+        (
+            "High coherence",
+            GateHint::new(
+                500,
+                false,
+                ruvector_fpga_transformer::ComputeClass::Deliberative,
+            ),
+        ),
+        (
+            "Medium coherence",
+            GateHint::new(
+                100,
+                false,
+                ruvector_fpga_transformer::ComputeClass::Associative,
+            ),
+        ),
+        (
+            "Low coherence",
+            GateHint::new(-100, true, ruvector_fpga_transformer::ComputeClass::Reflex),
+        ),
+    ];
+
+    for (name, hint) in coherence_levels {
+        let req = InferenceRequest::new(model_id, shape, &tokens, &mask, hint);
+
+        match engine.infer(req) {
+            Ok(result) => {
+                println!("\n{}", name);
+                println!("  Gate decision: {:?}", result.witness.gate_decision);
+                println!(
+                    "  Latency: {:.2}ms",
+                    result.witness.latency_ns as f64 / 1_000_000.0
+                );
+
+                if let Some(topk) = &result.topk {
+                    println!("  Top-3 predictions:");
+                    for (i, (token, score)) in topk.iter().take(3).enumerate() {
+                        println!("    {}. Token {} (score: {})", i + 1, token, score);
+                    }
+                }
+            }
+            Err(e) => {
+                println!("\n{}: Skipped - {:?}", name, e);
+            }
+        }
+    }
+
+    // Print statistics
+    println!("\n==========================================");
+    println!("Engine Statistics:");
+    let stats = engine.stats();
+    println!("  Total inferences: {}", stats.total_inferences);
+    println!("  Successful: {}", stats.successful);
+    println!("  Skipped: {}", stats.skipped);
+    println!("  Early exits: {}", stats.early_exits);
+    println!("  Success rate: {:.1}%", stats.success_rate() * 100.0);
+    println!("  Avg latency: {:.2}ms", stats.avg_latency_ms());
+
+    Ok(())
+}
--- a/vendor/ruvector/crates/ruvector-fpga-transformer/examples/daemon_client.rs
+++ b/vendor/ruvector/crates/ruvector-fpga-transformer/examples/daemon_client.rs
@@ -0,0 +1,114 @@
+//! FPGA Daemon client example
+//!
+//! Demonstrates connecting to an FPGA daemon and running inference.
+//! This example requires the `daemon` feature and a running FPGA daemon.
+
+use std::sync::Arc;
+
+use ruvector_fpga_transformer::{
+    artifact::{Manifest, ModelArtifact},
+    backend::fpga_daemon::{DaemonConfig, DaemonConnection, FpgaDaemonBackend},
+    gating::DefaultCoherenceGate,
+    types::{FixedShape, GateHint, InferenceRequest, QuantSpec},
+    Engine,
+};
+
+fn main() -> anyhow::Result<()> {
+    println!("FPGA Transformer - Daemon Client Example");
+    println!("=========================================\n");
+
+    // Configure daemon connection
+    let socket_path = std::env::var("RUVECTOR_FPGA_SOCKET")
+        .unwrap_or_else(|_| "/var/run/ruvector_fpga.sock".into());
+
+    println!("Connecting to daemon at: {}", socket_path);
+
+    let connection = DaemonConnection::unix(&socket_path);
+    let config = DaemonConfig {
+        connect_timeout_ms: 5000,
+        read_timeout_ms: 10000,
+        write_timeout_ms: 5000,
+        retries: 3,
+        backoff_multiplier: 2.0,
+        topk_only: true,
+        topk: 16,
+    };
+
+    // Create backend
+    let backend = Box::new(FpgaDaemonBackend::with_connection(connection, config));
+
+    // Create gate and engine
+    let gate = Arc::new(DefaultCoherenceGate::new());
+    let mut engine = Engine::new(backend, gate);
+
+    // Create a test model
+    let shape = FixedShape::micro();
+    let manifest = Manifest {
+        name: "fpga_test_model".into(),
+        model_hash: String::new(),
+        shape,
+        quant: QuantSpec::int4_int8(),
+        io: Default::default(),
+        backend: Default::default(),
+        tests: Default::default(),
+    };
+
+    let embedding_size = shape.vocab as usize * shape.d_model as usize / 2; // INT4 packed
+    let weights: Vec<u8> = (0..embedding_size)
+        .map(|i| ((i * 11 + 7) % 256) as u8)
+        .collect();
+
+    let artifact = ModelArtifact::new(manifest, weights, None, None, vec![]);
+
+    // Try to load the model
+    println!("Loading model...");
+    match engine.load(&artifact) {
+        Ok(model_id) => {
+            println!("Model loaded: {}", model_id);
+
+            // Prepare input
+            let tokens: Vec<u16> = (0..shape.seq_len).map(|i| i * 2).collect();
+            let mask = vec![1u8; shape.seq_len as usize];
+
+            // Run inference
+            println!("\nRunning FPGA inference...");
+            let req = InferenceRequest::new(model_id, shape, &tokens, &mask, GateHint::allow_all());
+
+            match engine.infer(req) {
+                Ok(result) => {
+                    println!("Inference successful!");
+                    println!("  Backend: {:?}", result.witness.backend);
+                    println!("  Cycles: {}", result.witness.cycles);
+                    println!(
+                        "  Latency: {}ns ({:.3}ms)",
+                        result.witness.latency_ns,
+                        result.witness.latency_ns as f64 / 1_000_000.0
+                    );
+                    println!("  Gate decision: {:?}", result.witness.gate_decision);
+
+                    if let Some(topk) = &result.topk {
+                        println!("\n  Top-5 predictions:");
+                        for (i, (token, score)) in topk.iter().take(5).enumerate() {
+                            println!("    {}. Token {} (score: {})", i + 1, token, score);
+                        }
+                    }
+                }
+                Err(e) => {
+                    eprintln!("Inference failed: {}", e);
+                }
+            }
+
+            // Unload model
+            engine.unload(model_id)?;
+            println!("\nModel unloaded");
+        }
+        Err(e) => {
+            eprintln!("Failed to load model: {}", e);
+            eprintln!("\nMake sure the FPGA daemon is running:");
+            eprintln!("  ruvector-fpga-daemon --socket {}", socket_path);
+            return Err(e.into());
+        }
+    }
+
+    Ok(())
+}