feat: Complete Rust port of WiFi-DensePose with modular crates
Major changes: - Organized Python v1 implementation into v1/ subdirectory - Created Rust workspace with 9 modular crates: - wifi-densepose-core: Core types, traits, errors - wifi-densepose-signal: CSI processing, phase sanitization, FFT - wifi-densepose-nn: Neural network inference (ONNX/Candle/tch) - wifi-densepose-api: Axum-based REST/WebSocket API - wifi-densepose-db: SQLx database layer - wifi-densepose-config: Configuration management - wifi-densepose-hardware: Hardware abstraction - wifi-densepose-wasm: WebAssembly bindings - wifi-densepose-cli: Command-line interface Documentation: - ADR-001: Workspace structure - ADR-002: Signal processing library selection - ADR-003: Neural network inference strategy - DDD domain model with bounded contexts Testing: - 69 tests passing across all crates - Signal processing: 45 tests - Neural networks: 21 tests - Core: 3 doc tests Performance targets: - 10x faster CSI processing (~0.5ms vs ~5ms) - 5x lower memory usage (~100MB vs ~500MB) - WASM support for browser deployment
This commit is contained in:
@@ -0,0 +1,569 @@
|
||||
//! Inference engine abstraction for neural network backends.
|
||||
//!
|
||||
//! This module provides a unified interface for running inference across
|
||||
//! different backends (ONNX Runtime, tch-rs, Candle).
|
||||
|
||||
use crate::densepose::{DensePoseConfig, DensePoseOutput};
|
||||
use crate::error::{NnError, NnResult};
|
||||
use crate::tensor::{Tensor, TensorShape};
|
||||
use crate::translator::TranslatorConfig;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info, instrument};
|
||||
|
||||
/// Options for inference execution
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InferenceOptions {
|
||||
/// Batch size for inference
|
||||
#[serde(default = "default_batch_size")]
|
||||
pub batch_size: usize,
|
||||
/// Whether to use GPU acceleration
|
||||
#[serde(default)]
|
||||
pub use_gpu: bool,
|
||||
/// GPU device ID (if using GPU)
|
||||
#[serde(default)]
|
||||
pub gpu_device_id: usize,
|
||||
/// Number of CPU threads for inference
|
||||
#[serde(default = "default_num_threads")]
|
||||
pub num_threads: usize,
|
||||
/// Enable model optimization/fusion
|
||||
#[serde(default = "default_optimize")]
|
||||
pub optimize: bool,
|
||||
/// Memory limit in bytes (0 = unlimited)
|
||||
#[serde(default)]
|
||||
pub memory_limit: usize,
|
||||
/// Enable profiling
|
||||
#[serde(default)]
|
||||
pub profiling: bool,
|
||||
}
|
||||
|
||||
fn default_batch_size() -> usize {
|
||||
1
|
||||
}
|
||||
|
||||
fn default_num_threads() -> usize {
|
||||
4
|
||||
}
|
||||
|
||||
fn default_optimize() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl Default for InferenceOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
batch_size: default_batch_size(),
|
||||
use_gpu: false,
|
||||
gpu_device_id: 0,
|
||||
num_threads: default_num_threads(),
|
||||
optimize: default_optimize(),
|
||||
memory_limit: 0,
|
||||
profiling: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InferenceOptions {
|
||||
/// Create options for CPU inference
|
||||
pub fn cpu() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Create options for GPU inference
|
||||
pub fn gpu(device_id: usize) -> Self {
|
||||
Self {
|
||||
use_gpu: true,
|
||||
gpu_device_id: device_id,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Set batch size
|
||||
pub fn with_batch_size(mut self, batch_size: usize) -> Self {
|
||||
self.batch_size = batch_size;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set number of threads
|
||||
pub fn with_threads(mut self, num_threads: usize) -> Self {
|
||||
self.num_threads = num_threads;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Backend trait for different inference engines
|
||||
pub trait Backend: Send + Sync {
|
||||
/// Get the backend name
|
||||
fn name(&self) -> &str;
|
||||
|
||||
/// Check if the backend is available
|
||||
fn is_available(&self) -> bool;
|
||||
|
||||
/// Get input names
|
||||
fn input_names(&self) -> Vec<String>;
|
||||
|
||||
/// Get output names
|
||||
fn output_names(&self) -> Vec<String>;
|
||||
|
||||
/// Get input shape for a given input name
|
||||
fn input_shape(&self, name: &str) -> Option<TensorShape>;
|
||||
|
||||
/// Get output shape for a given output name
|
||||
fn output_shape(&self, name: &str) -> Option<TensorShape>;
|
||||
|
||||
/// Run inference
|
||||
fn run(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>>;
|
||||
|
||||
/// Run inference on a single input
|
||||
fn run_single(&self, input: &Tensor) -> NnResult<Tensor> {
|
||||
let input_names = self.input_names();
|
||||
let output_names = self.output_names();
|
||||
|
||||
if input_names.is_empty() {
|
||||
return Err(NnError::inference("No input names defined"));
|
||||
}
|
||||
if output_names.is_empty() {
|
||||
return Err(NnError::inference("No output names defined"));
|
||||
}
|
||||
|
||||
let mut inputs = HashMap::new();
|
||||
inputs.insert(input_names[0].clone(), input.clone());
|
||||
|
||||
let outputs = self.run(inputs)?;
|
||||
outputs
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, v)| v)
|
||||
.ok_or_else(|| NnError::inference("No outputs returned"))
|
||||
}
|
||||
|
||||
/// Warm up the model (optional pre-run for optimization)
|
||||
fn warmup(&self) -> NnResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get memory usage in bytes
|
||||
fn memory_usage(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// Mock backend for testing
|
||||
#[derive(Debug)]
|
||||
pub struct MockBackend {
|
||||
name: String,
|
||||
input_shapes: HashMap<String, TensorShape>,
|
||||
output_shapes: HashMap<String, TensorShape>,
|
||||
}
|
||||
|
||||
impl MockBackend {
|
||||
/// Create a new mock backend
|
||||
pub fn new(name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
input_shapes: HashMap::new(),
|
||||
output_shapes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an input definition
|
||||
pub fn with_input(mut self, name: impl Into<String>, shape: TensorShape) -> Self {
|
||||
self.input_shapes.insert(name.into(), shape);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an output definition
|
||||
pub fn with_output(mut self, name: impl Into<String>, shape: TensorShape) -> Self {
|
||||
self.output_shapes.insert(name.into(), shape);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Backend for MockBackend {
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
fn is_available(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn input_names(&self) -> Vec<String> {
|
||||
self.input_shapes.keys().cloned().collect()
|
||||
}
|
||||
|
||||
fn output_names(&self) -> Vec<String> {
|
||||
self.output_shapes.keys().cloned().collect()
|
||||
}
|
||||
|
||||
fn input_shape(&self, name: &str) -> Option<TensorShape> {
|
||||
self.input_shapes.get(name).cloned()
|
||||
}
|
||||
|
||||
fn output_shape(&self, name: &str) -> Option<TensorShape> {
|
||||
self.output_shapes.get(name).cloned()
|
||||
}
|
||||
|
||||
fn run(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>> {
|
||||
let mut outputs = HashMap::new();
|
||||
|
||||
for (name, shape) in &self.output_shapes {
|
||||
let dims: Vec<usize> = shape.dims().to_vec();
|
||||
if dims.len() == 4 {
|
||||
outputs.insert(
|
||||
name.clone(),
|
||||
Tensor::zeros_4d([dims[0], dims[1], dims[2], dims[3]]),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(outputs)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unified inference engine that supports multiple backends
|
||||
pub struct InferenceEngine<B: Backend> {
|
||||
backend: B,
|
||||
options: InferenceOptions,
|
||||
/// Inference statistics
|
||||
stats: Arc<RwLock<InferenceStats>>,
|
||||
}
|
||||
|
||||
/// Statistics for inference performance
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct InferenceStats {
|
||||
/// Total number of inferences
|
||||
pub total_inferences: u64,
|
||||
/// Total inference time in milliseconds
|
||||
pub total_time_ms: f64,
|
||||
/// Average inference time
|
||||
pub avg_time_ms: f64,
|
||||
/// Min inference time
|
||||
pub min_time_ms: f64,
|
||||
/// Max inference time
|
||||
pub max_time_ms: f64,
|
||||
/// Last inference time
|
||||
pub last_time_ms: f64,
|
||||
}
|
||||
|
||||
impl InferenceStats {
|
||||
/// Record a new inference timing
|
||||
pub fn record(&mut self, time_ms: f64) {
|
||||
self.total_inferences += 1;
|
||||
self.total_time_ms += time_ms;
|
||||
self.last_time_ms = time_ms;
|
||||
self.avg_time_ms = self.total_time_ms / self.total_inferences as f64;
|
||||
|
||||
if self.total_inferences == 1 {
|
||||
self.min_time_ms = time_ms;
|
||||
self.max_time_ms = time_ms;
|
||||
} else {
|
||||
self.min_time_ms = self.min_time_ms.min(time_ms);
|
||||
self.max_time_ms = self.max_time_ms.max(time_ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> InferenceEngine<B> {
|
||||
/// Create a new inference engine with a backend
|
||||
pub fn new(backend: B, options: InferenceOptions) -> Self {
|
||||
Self {
|
||||
backend,
|
||||
options,
|
||||
stats: Arc::new(RwLock::new(InferenceStats::default())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the backend
|
||||
pub fn backend(&self) -> &B {
|
||||
&self.backend
|
||||
}
|
||||
|
||||
/// Get the options
|
||||
pub fn options(&self) -> &InferenceOptions {
|
||||
&self.options
|
||||
}
|
||||
|
||||
/// Check if GPU is being used
|
||||
pub fn uses_gpu(&self) -> bool {
|
||||
self.options.use_gpu && self.backend.is_available()
|
||||
}
|
||||
|
||||
/// Warm up the engine
|
||||
pub fn warmup(&self) -> NnResult<()> {
|
||||
info!("Warming up inference engine: {}", self.backend.name());
|
||||
self.backend.warmup()
|
||||
}
|
||||
|
||||
/// Run inference on a single input
|
||||
#[instrument(skip(self, input))]
|
||||
pub fn infer(&self, input: &Tensor) -> NnResult<Tensor> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let result = self.backend.run_single(input)?;
|
||||
|
||||
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
|
||||
debug!(elapsed_ms = %elapsed_ms, "Inference completed");
|
||||
|
||||
// Update stats asynchronously (best effort)
|
||||
let stats = self.stats.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut stats = stats.write().await;
|
||||
stats.record(elapsed_ms);
|
||||
});
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Run inference with named inputs
|
||||
#[instrument(skip(self, inputs))]
|
||||
pub fn infer_named(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let result = self.backend.run(inputs)?;
|
||||
|
||||
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
|
||||
debug!(elapsed_ms = %elapsed_ms, "Named inference completed");
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Run batched inference
|
||||
pub fn infer_batch(&self, inputs: &[Tensor]) -> NnResult<Vec<Tensor>> {
|
||||
inputs.iter().map(|input| self.infer(input)).collect()
|
||||
}
|
||||
|
||||
/// Get inference statistics
|
||||
pub async fn stats(&self) -> InferenceStats {
|
||||
self.stats.read().await.clone()
|
||||
}
|
||||
|
||||
/// Reset statistics
|
||||
pub async fn reset_stats(&self) {
|
||||
let mut stats = self.stats.write().await;
|
||||
*stats = InferenceStats::default();
|
||||
}
|
||||
|
||||
/// Get memory usage
|
||||
pub fn memory_usage(&self) -> usize {
|
||||
self.backend.memory_usage()
|
||||
}
|
||||
}
|
||||
|
||||
/// Combined pipeline for WiFi-DensePose inference
|
||||
pub struct WiFiDensePosePipeline<B: Backend> {
|
||||
/// Modality translator backend
|
||||
translator_backend: B,
|
||||
/// DensePose backend
|
||||
densepose_backend: B,
|
||||
/// Translator configuration
|
||||
translator_config: TranslatorConfig,
|
||||
/// DensePose configuration
|
||||
densepose_config: DensePoseConfig,
|
||||
/// Inference options
|
||||
options: InferenceOptions,
|
||||
}
|
||||
|
||||
impl<B: Backend> WiFiDensePosePipeline<B> {
|
||||
/// Create a new pipeline
|
||||
pub fn new(
|
||||
translator_backend: B,
|
||||
densepose_backend: B,
|
||||
translator_config: TranslatorConfig,
|
||||
densepose_config: DensePoseConfig,
|
||||
options: InferenceOptions,
|
||||
) -> Self {
|
||||
Self {
|
||||
translator_backend,
|
||||
densepose_backend,
|
||||
translator_config,
|
||||
densepose_config,
|
||||
options,
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the full pipeline: CSI -> Visual Features -> DensePose
|
||||
#[instrument(skip(self, csi_input))]
|
||||
pub fn run(&self, csi_input: &Tensor) -> NnResult<DensePoseOutput> {
|
||||
// Step 1: Translate CSI to visual features
|
||||
let visual_features = self.translator_backend.run_single(csi_input)?;
|
||||
|
||||
// Step 2: Run DensePose on visual features
|
||||
let mut inputs = HashMap::new();
|
||||
inputs.insert("features".to_string(), visual_features);
|
||||
|
||||
let outputs = self.densepose_backend.run(inputs)?;
|
||||
|
||||
// Extract outputs
|
||||
let segmentation = outputs
|
||||
.get("segmentation")
|
||||
.cloned()
|
||||
.ok_or_else(|| NnError::inference("Missing segmentation output"))?;
|
||||
|
||||
let uv_coordinates = outputs
|
||||
.get("uv_coordinates")
|
||||
.cloned()
|
||||
.ok_or_else(|| NnError::inference("Missing uv_coordinates output"))?;
|
||||
|
||||
Ok(DensePoseOutput {
|
||||
segmentation,
|
||||
uv_coordinates,
|
||||
confidence: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get translator config
|
||||
pub fn translator_config(&self) -> &TranslatorConfig {
|
||||
&self.translator_config
|
||||
}
|
||||
|
||||
/// Get DensePose config
|
||||
pub fn densepose_config(&self) -> &DensePoseConfig {
|
||||
&self.densepose_config
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for creating inference engines
|
||||
pub struct EngineBuilder {
|
||||
options: InferenceOptions,
|
||||
model_path: Option<String>,
|
||||
}
|
||||
|
||||
impl EngineBuilder {
|
||||
/// Create a new builder
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
options: InferenceOptions::default(),
|
||||
model_path: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set inference options
|
||||
pub fn options(mut self, options: InferenceOptions) -> Self {
|
||||
self.options = options;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set model path
|
||||
pub fn model_path(mut self, path: impl Into<String>) -> Self {
|
||||
self.model_path = Some(path.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Use GPU
|
||||
pub fn gpu(mut self, device_id: usize) -> Self {
|
||||
self.options.use_gpu = true;
|
||||
self.options.gpu_device_id = device_id;
|
||||
self
|
||||
}
|
||||
|
||||
/// Use CPU
|
||||
pub fn cpu(mut self) -> Self {
|
||||
self.options.use_gpu = false;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set batch size
|
||||
pub fn batch_size(mut self, size: usize) -> Self {
|
||||
self.options.batch_size = size;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set number of threads
|
||||
pub fn threads(mut self, n: usize) -> Self {
|
||||
self.options.num_threads = n;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build with a mock backend (for testing)
|
||||
pub fn build_mock(self) -> InferenceEngine<MockBackend> {
|
||||
let backend = MockBackend::new("mock")
|
||||
.with_input("input".to_string(), TensorShape::new(vec![1, 256, 64, 64]))
|
||||
.with_output("output".to_string(), TensorShape::new(vec![1, 256, 64, 64]));
|
||||
|
||||
InferenceEngine::new(backend, self.options)
|
||||
}
|
||||
|
||||
/// Build with ONNX backend
|
||||
#[cfg(feature = "onnx")]
|
||||
pub fn build_onnx(self) -> NnResult<InferenceEngine<crate::onnx::OnnxBackend>> {
|
||||
let model_path = self
|
||||
.model_path
|
||||
.ok_or_else(|| NnError::config("Model path required for ONNX backend"))?;
|
||||
|
||||
let backend = crate::onnx::OnnxBackend::from_file(&model_path)?;
|
||||
Ok(InferenceEngine::new(backend, self.options))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EngineBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_inference_options() {
|
||||
let opts = InferenceOptions::cpu().with_batch_size(4).with_threads(8);
|
||||
assert_eq!(opts.batch_size, 4);
|
||||
assert_eq!(opts.num_threads, 8);
|
||||
assert!(!opts.use_gpu);
|
||||
|
||||
let gpu_opts = InferenceOptions::gpu(0);
|
||||
assert!(gpu_opts.use_gpu);
|
||||
assert_eq!(gpu_opts.gpu_device_id, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mock_backend() {
|
||||
let backend = MockBackend::new("test")
|
||||
.with_input("input", TensorShape::new(vec![1, 3, 224, 224]))
|
||||
.with_output("output", TensorShape::new(vec![1, 1000]));
|
||||
|
||||
assert_eq!(backend.name(), "test");
|
||||
assert!(backend.is_available());
|
||||
assert_eq!(backend.input_names(), vec!["input".to_string()]);
|
||||
assert_eq!(backend.output_names(), vec!["output".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_engine_builder() {
|
||||
let engine = EngineBuilder::new()
|
||||
.cpu()
|
||||
.batch_size(2)
|
||||
.threads(4)
|
||||
.build_mock();
|
||||
|
||||
assert_eq!(engine.options().batch_size, 2);
|
||||
assert_eq!(engine.options().num_threads, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inference_stats() {
|
||||
let mut stats = InferenceStats::default();
|
||||
stats.record(10.0);
|
||||
stats.record(20.0);
|
||||
stats.record(15.0);
|
||||
|
||||
assert_eq!(stats.total_inferences, 3);
|
||||
assert_eq!(stats.min_time_ms, 10.0);
|
||||
assert_eq!(stats.max_time_ms, 20.0);
|
||||
assert_eq!(stats.avg_time_ms, 15.0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inference_engine() {
|
||||
let engine = EngineBuilder::new().build_mock();
|
||||
|
||||
let input = Tensor::zeros_4d([1, 256, 64, 64]);
|
||||
let output = engine.infer(&input).unwrap();
|
||||
|
||||
assert_eq!(output.shape().dims(), &[1, 256, 64, 64]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user