Files
wifi-densepose/crates/ruvector-fpga-transformer/examples/daemon_client.rs
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

115 lines
3.9 KiB
Rust

//! FPGA Daemon client example
//!
//! Demonstrates connecting to an FPGA daemon and running inference.
//! This example requires the `daemon` feature and a running FPGA daemon.
use std::sync::Arc;
use ruvector_fpga_transformer::{
artifact::{Manifest, ModelArtifact},
backend::fpga_daemon::{DaemonConfig, DaemonConnection, FpgaDaemonBackend},
gating::DefaultCoherenceGate,
types::{FixedShape, GateHint, InferenceRequest, QuantSpec},
Engine,
};
fn main() -> anyhow::Result<()> {
println!("FPGA Transformer - Daemon Client Example");
println!("=========================================\n");
// Configure daemon connection
let socket_path = std::env::var("RUVECTOR_FPGA_SOCKET")
.unwrap_or_else(|_| "/var/run/ruvector_fpga.sock".into());
println!("Connecting to daemon at: {}", socket_path);
let connection = DaemonConnection::unix(&socket_path);
let config = DaemonConfig {
connect_timeout_ms: 5000,
read_timeout_ms: 10000,
write_timeout_ms: 5000,
retries: 3,
backoff_multiplier: 2.0,
topk_only: true,
topk: 16,
};
// Create backend
let backend = Box::new(FpgaDaemonBackend::with_connection(connection, config));
// Create gate and engine
let gate = Arc::new(DefaultCoherenceGate::new());
let mut engine = Engine::new(backend, gate);
// Create a test model
let shape = FixedShape::micro();
let manifest = Manifest {
name: "fpga_test_model".into(),
model_hash: String::new(),
shape,
quant: QuantSpec::int4_int8(),
io: Default::default(),
backend: Default::default(),
tests: Default::default(),
};
let embedding_size = shape.vocab as usize * shape.d_model as usize / 2; // INT4 packed
let weights: Vec<u8> = (0..embedding_size)
.map(|i| ((i * 11 + 7) % 256) as u8)
.collect();
let artifact = ModelArtifact::new(manifest, weights, None, None, vec![]);
// Try to load the model
println!("Loading model...");
match engine.load(&artifact) {
Ok(model_id) => {
println!("Model loaded: {}", model_id);
// Prepare input
let tokens: Vec<u16> = (0..shape.seq_len).map(|i| i * 2).collect();
let mask = vec![1u8; shape.seq_len as usize];
// Run inference
println!("\nRunning FPGA inference...");
let req = InferenceRequest::new(model_id, shape, &tokens, &mask, GateHint::allow_all());
match engine.infer(req) {
Ok(result) => {
println!("Inference successful!");
println!(" Backend: {:?}", result.witness.backend);
println!(" Cycles: {}", result.witness.cycles);
println!(
" Latency: {}ns ({:.3}ms)",
result.witness.latency_ns,
result.witness.latency_ns as f64 / 1_000_000.0
);
println!(" Gate decision: {:?}", result.witness.gate_decision);
if let Some(topk) = &result.topk {
println!("\n Top-5 predictions:");
for (i, (token, score)) in topk.iter().take(5).enumerate() {
println!(" {}. Token {} (score: {})", i + 1, token, score);
}
}
}
Err(e) => {
eprintln!("Inference failed: {}", e);
}
}
// Unload model
engine.unload(model_id)?;
println!("\nModel unloaded");
}
Err(e) => {
eprintln!("Failed to load model: {}", e);
eprintln!("\nMake sure the FPGA daemon is running:");
eprintln!(" ruvector-fpga-daemon --socket {}", socket_path);
return Err(e.into());
}
}
Ok(())
}