feat: Complete Rust port of WiFi-DensePose with modular crates

Major changes: - Organized Python v1 implementation into v1/ subdirectory - Created Rust workspace with 9 modular crates: - wifi-densepose-core: Core types, traits, errors - wifi-densepose-signal: CSI processing, phase sanitization, FFT - wifi-densepose-nn: Neural network inference (ONNX/Candle/tch) - wifi-densepose-api: Axum-based REST/WebSocket API - wifi-densepose-db: SQLx database layer - wifi-densepose-config: Configuration management - wifi-densepose-hardware: Hardware abstraction - wifi-densepose-wasm: WebAssembly bindings - wifi-densepose-cli: Command-line interface Documentation: - ADR-001: Workspace structure - ADR-002: Signal processing library selection - ADR-003: Neural network inference strategy - DDD domain model with bounded contexts Testing: - 69 tests passing across all crates - Signal processing: 45 tests - Neural networks: 21 tests - Core: 3 doc tests Performance targets: - 10x faster CSI processing (~0.5ms vs ~5ms) - 5x lower memory usage (~100MB vs ~500MB) - WASM support for browser deployment
2026-01-13 03:11:16 +00:00
parent 5101504b72
commit 6ed69a3d48
427 changed files with 90993 additions and 0 deletions
--- a/rust-port/wifi-densepose-rs/crates/wifi-densepose-nn/src/inference.rs
+++ b/rust-port/wifi-densepose-rs/crates/wifi-densepose-nn/src/inference.rs
@@ -0,0 +1,569 @@
+//! Inference engine abstraction for neural network backends.
+//!
+//! This module provides a unified interface for running inference across
+//! different backends (ONNX Runtime, tch-rs, Candle).
+
+use crate::densepose::{DensePoseConfig, DensePoseOutput};
+use crate::error::{NnError, NnResult};
+use crate::tensor::{Tensor, TensorShape};
+use crate::translator::TranslatorConfig;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use tracing::{debug, info, instrument};
+
+/// Options for inference execution
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InferenceOptions {
+    /// Batch size for inference
+    #[serde(default = "default_batch_size")]
+    pub batch_size: usize,
+    /// Whether to use GPU acceleration
+    #[serde(default)]
+    pub use_gpu: bool,
+    /// GPU device ID (if using GPU)
+    #[serde(default)]
+    pub gpu_device_id: usize,
+    /// Number of CPU threads for inference
+    #[serde(default = "default_num_threads")]
+    pub num_threads: usize,
+    /// Enable model optimization/fusion
+    #[serde(default = "default_optimize")]
+    pub optimize: bool,
+    /// Memory limit in bytes (0 = unlimited)
+    #[serde(default)]
+    pub memory_limit: usize,
+    /// Enable profiling
+    #[serde(default)]
+    pub profiling: bool,
+}
+
+fn default_batch_size() -> usize {
+    1
+}
+
+fn default_num_threads() -> usize {
+    4
+}
+
+fn default_optimize() -> bool {
+    true
+}
+
+impl Default for InferenceOptions {
+    fn default() -> Self {
+        Self {
+            batch_size: default_batch_size(),
+            use_gpu: false,
+            gpu_device_id: 0,
+            num_threads: default_num_threads(),
+            optimize: default_optimize(),
+            memory_limit: 0,
+            profiling: false,
+        }
+    }
+}
+
+impl InferenceOptions {
+    /// Create options for CPU inference
+    pub fn cpu() -> Self {
+        Self::default()
+    }
+
+    /// Create options for GPU inference
+    pub fn gpu(device_id: usize) -> Self {
+        Self {
+            use_gpu: true,
+            gpu_device_id: device_id,
+            ..Default::default()
+        }
+    }
+
+    /// Set batch size
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = batch_size;
+        self
+    }
+
+    /// Set number of threads
+    pub fn with_threads(mut self, num_threads: usize) -> Self {
+        self.num_threads = num_threads;
+        self
+    }
+}
+
+/// Backend trait for different inference engines
+pub trait Backend: Send + Sync {
+    /// Get the backend name
+    fn name(&self) -> &str;
+
+    /// Check if the backend is available
+    fn is_available(&self) -> bool;
+
+    /// Get input names
+    fn input_names(&self) -> Vec<String>;
+
+    /// Get output names
+    fn output_names(&self) -> Vec<String>;
+
+    /// Get input shape for a given input name
+    fn input_shape(&self, name: &str) -> Option<TensorShape>;
+
+    /// Get output shape for a given output name
+    fn output_shape(&self, name: &str) -> Option<TensorShape>;
+
+    /// Run inference
+    fn run(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>>;
+
+    /// Run inference on a single input
+    fn run_single(&self, input: &Tensor) -> NnResult<Tensor> {
+        let input_names = self.input_names();
+        let output_names = self.output_names();
+
+        if input_names.is_empty() {
+            return Err(NnError::inference("No input names defined"));
+        }
+        if output_names.is_empty() {
+            return Err(NnError::inference("No output names defined"));
+        }
+
+        let mut inputs = HashMap::new();
+        inputs.insert(input_names[0].clone(), input.clone());
+
+        let outputs = self.run(inputs)?;
+        outputs
+            .into_iter()
+            .next()
+            .map(|(_, v)| v)
+            .ok_or_else(|| NnError::inference("No outputs returned"))
+    }
+
+    /// Warm up the model (optional pre-run for optimization)
+    fn warmup(&self) -> NnResult<()> {
+        Ok(())
+    }
+
+    /// Get memory usage in bytes
+    fn memory_usage(&self) -> usize {
+        0
+    }
+}
+
+/// Mock backend for testing
+#[derive(Debug)]
+pub struct MockBackend {
+    name: String,
+    input_shapes: HashMap<String, TensorShape>,
+    output_shapes: HashMap<String, TensorShape>,
+}
+
+impl MockBackend {
+    /// Create a new mock backend
+    pub fn new(name: impl Into<String>) -> Self {
+        Self {
+            name: name.into(),
+            input_shapes: HashMap::new(),
+            output_shapes: HashMap::new(),
+        }
+    }
+
+    /// Add an input definition
+    pub fn with_input(mut self, name: impl Into<String>, shape: TensorShape) -> Self {
+        self.input_shapes.insert(name.into(), shape);
+        self
+    }
+
+    /// Add an output definition
+    pub fn with_output(mut self, name: impl Into<String>, shape: TensorShape) -> Self {
+        self.output_shapes.insert(name.into(), shape);
+        self
+    }
+}
+
+impl Backend for MockBackend {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn is_available(&self) -> bool {
+        true
+    }
+
+    fn input_names(&self) -> Vec<String> {
+        self.input_shapes.keys().cloned().collect()
+    }
+
+    fn output_names(&self) -> Vec<String> {
+        self.output_shapes.keys().cloned().collect()
+    }
+
+    fn input_shape(&self, name: &str) -> Option<TensorShape> {
+        self.input_shapes.get(name).cloned()
+    }
+
+    fn output_shape(&self, name: &str) -> Option<TensorShape> {
+        self.output_shapes.get(name).cloned()
+    }
+
+    fn run(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>> {
+        let mut outputs = HashMap::new();
+
+        for (name, shape) in &self.output_shapes {
+            let dims: Vec<usize> = shape.dims().to_vec();
+            if dims.len() == 4 {
+                outputs.insert(
+                    name.clone(),
+                    Tensor::zeros_4d([dims[0], dims[1], dims[2], dims[3]]),
+                );
+            }
+        }
+
+        Ok(outputs)
+    }
+}
+
+/// Unified inference engine that supports multiple backends
+pub struct InferenceEngine<B: Backend> {
+    backend: B,
+    options: InferenceOptions,
+    /// Inference statistics
+    stats: Arc<RwLock<InferenceStats>>,
+}
+
+/// Statistics for inference performance
+#[derive(Debug, Default, Clone)]
+pub struct InferenceStats {
+    /// Total number of inferences
+    pub total_inferences: u64,
+    /// Total inference time in milliseconds
+    pub total_time_ms: f64,
+    /// Average inference time
+    pub avg_time_ms: f64,
+    /// Min inference time
+    pub min_time_ms: f64,
+    /// Max inference time
+    pub max_time_ms: f64,
+    /// Last inference time
+    pub last_time_ms: f64,
+}
+
+impl InferenceStats {
+    /// Record a new inference timing
+    pub fn record(&mut self, time_ms: f64) {
+        self.total_inferences += 1;
+        self.total_time_ms += time_ms;
+        self.last_time_ms = time_ms;
+        self.avg_time_ms = self.total_time_ms / self.total_inferences as f64;
+
+        if self.total_inferences == 1 {
+            self.min_time_ms = time_ms;
+            self.max_time_ms = time_ms;
+        } else {
+            self.min_time_ms = self.min_time_ms.min(time_ms);
+            self.max_time_ms = self.max_time_ms.max(time_ms);
+        }
+    }
+}
+
+impl<B: Backend> InferenceEngine<B> {
+    /// Create a new inference engine with a backend
+    pub fn new(backend: B, options: InferenceOptions) -> Self {
+        Self {
+            backend,
+            options,
+            stats: Arc::new(RwLock::new(InferenceStats::default())),
+        }
+    }
+
+    /// Get the backend
+    pub fn backend(&self) -> &B {
+        &self.backend
+    }
+
+    /// Get the options
+    pub fn options(&self) -> &InferenceOptions {
+        &self.options
+    }
+
+    /// Check if GPU is being used
+    pub fn uses_gpu(&self) -> bool {
+        self.options.use_gpu && self.backend.is_available()
+    }
+
+    /// Warm up the engine
+    pub fn warmup(&self) -> NnResult<()> {
+        info!("Warming up inference engine: {}", self.backend.name());
+        self.backend.warmup()
+    }
+
+    /// Run inference on a single input
+    #[instrument(skip(self, input))]
+    pub fn infer(&self, input: &Tensor) -> NnResult<Tensor> {
+        let start = std::time::Instant::now();
+
+        let result = self.backend.run_single(input)?;
+
+        let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
+        debug!(elapsed_ms = %elapsed_ms, "Inference completed");
+
+        // Update stats asynchronously (best effort)
+        let stats = self.stats.clone();
+        tokio::spawn(async move {
+            let mut stats = stats.write().await;
+            stats.record(elapsed_ms);
+        });
+
+        Ok(result)
+    }
+
+    /// Run inference with named inputs
+    #[instrument(skip(self, inputs))]
+    pub fn infer_named(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>> {
+        let start = std::time::Instant::now();
+
+        let result = self.backend.run(inputs)?;
+
+        let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
+        debug!(elapsed_ms = %elapsed_ms, "Named inference completed");
+
+        Ok(result)
+    }
+
+    /// Run batched inference
+    pub fn infer_batch(&self, inputs: &[Tensor]) -> NnResult<Vec<Tensor>> {
+        inputs.iter().map(|input| self.infer(input)).collect()
+    }
+
+    /// Get inference statistics
+    pub async fn stats(&self) -> InferenceStats {
+        self.stats.read().await.clone()
+    }
+
+    /// Reset statistics
+    pub async fn reset_stats(&self) {
+        let mut stats = self.stats.write().await;
+        *stats = InferenceStats::default();
+    }
+
+    /// Get memory usage
+    pub fn memory_usage(&self) -> usize {
+        self.backend.memory_usage()
+    }
+}
+
+/// Combined pipeline for WiFi-DensePose inference
+pub struct WiFiDensePosePipeline<B: Backend> {
+    /// Modality translator backend
+    translator_backend: B,
+    /// DensePose backend
+    densepose_backend: B,
+    /// Translator configuration
+    translator_config: TranslatorConfig,
+    /// DensePose configuration
+    densepose_config: DensePoseConfig,
+    /// Inference options
+    options: InferenceOptions,
+}
+
+impl<B: Backend> WiFiDensePosePipeline<B> {
+    /// Create a new pipeline
+    pub fn new(
+        translator_backend: B,
+        densepose_backend: B,
+        translator_config: TranslatorConfig,
+        densepose_config: DensePoseConfig,
+        options: InferenceOptions,
+    ) -> Self {
+        Self {
+            translator_backend,
+            densepose_backend,
+            translator_config,
+            densepose_config,
+            options,
+        }
+    }
+
+    /// Run the full pipeline: CSI -> Visual Features -> DensePose
+    #[instrument(skip(self, csi_input))]
+    pub fn run(&self, csi_input: &Tensor) -> NnResult<DensePoseOutput> {
+        // Step 1: Translate CSI to visual features
+        let visual_features = self.translator_backend.run_single(csi_input)?;
+
+        // Step 2: Run DensePose on visual features
+        let mut inputs = HashMap::new();
+        inputs.insert("features".to_string(), visual_features);
+
+        let outputs = self.densepose_backend.run(inputs)?;
+
+        // Extract outputs
+        let segmentation = outputs
+            .get("segmentation")
+            .cloned()
+            .ok_or_else(|| NnError::inference("Missing segmentation output"))?;
+
+        let uv_coordinates = outputs
+            .get("uv_coordinates")
+            .cloned()
+            .ok_or_else(|| NnError::inference("Missing uv_coordinates output"))?;
+
+        Ok(DensePoseOutput {
+            segmentation,
+            uv_coordinates,
+            confidence: None,
+        })
+    }
+
+    /// Get translator config
+    pub fn translator_config(&self) -> &TranslatorConfig {
+        &self.translator_config
+    }
+
+    /// Get DensePose config
+    pub fn densepose_config(&self) -> &DensePoseConfig {
+        &self.densepose_config
+    }
+}
+
+/// Builder for creating inference engines
+pub struct EngineBuilder {
+    options: InferenceOptions,
+    model_path: Option<String>,
+}
+
+impl EngineBuilder {
+    /// Create a new builder
+    pub fn new() -> Self {
+        Self {
+            options: InferenceOptions::default(),
+            model_path: None,
+        }
+    }
+
+    /// Set inference options
+    pub fn options(mut self, options: InferenceOptions) -> Self {
+        self.options = options;
+        self
+    }
+
+    /// Set model path
+    pub fn model_path(mut self, path: impl Into<String>) -> Self {
+        self.model_path = Some(path.into());
+        self
+    }
+
+    /// Use GPU
+    pub fn gpu(mut self, device_id: usize) -> Self {
+        self.options.use_gpu = true;
+        self.options.gpu_device_id = device_id;
+        self
+    }
+
+    /// Use CPU
+    pub fn cpu(mut self) -> Self {
+        self.options.use_gpu = false;
+        self
+    }
+
+    /// Set batch size
+    pub fn batch_size(mut self, size: usize) -> Self {
+        self.options.batch_size = size;
+        self
+    }
+
+    /// Set number of threads
+    pub fn threads(mut self, n: usize) -> Self {
+        self.options.num_threads = n;
+        self
+    }
+
+    /// Build with a mock backend (for testing)
+    pub fn build_mock(self) -> InferenceEngine<MockBackend> {
+        let backend = MockBackend::new("mock")
+            .with_input("input".to_string(), TensorShape::new(vec![1, 256, 64, 64]))
+            .with_output("output".to_string(), TensorShape::new(vec![1, 256, 64, 64]));
+
+        InferenceEngine::new(backend, self.options)
+    }
+
+    /// Build with ONNX backend
+    #[cfg(feature = "onnx")]
+    pub fn build_onnx(self) -> NnResult<InferenceEngine<crate::onnx::OnnxBackend>> {
+        let model_path = self
+            .model_path
+            .ok_or_else(|| NnError::config("Model path required for ONNX backend"))?;
+
+        let backend = crate::onnx::OnnxBackend::from_file(&model_path)?;
+        Ok(InferenceEngine::new(backend, self.options))
+    }
+}
+
+impl Default for EngineBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_inference_options() {
+        let opts = InferenceOptions::cpu().with_batch_size(4).with_threads(8);
+        assert_eq!(opts.batch_size, 4);
+        assert_eq!(opts.num_threads, 8);
+        assert!(!opts.use_gpu);
+
+        let gpu_opts = InferenceOptions::gpu(0);
+        assert!(gpu_opts.use_gpu);
+        assert_eq!(gpu_opts.gpu_device_id, 0);
+    }
+
+    #[test]
+    fn test_mock_backend() {
+        let backend = MockBackend::new("test")
+            .with_input("input", TensorShape::new(vec![1, 3, 224, 224]))
+            .with_output("output", TensorShape::new(vec![1, 1000]));
+
+        assert_eq!(backend.name(), "test");
+        assert!(backend.is_available());
+        assert_eq!(backend.input_names(), vec!["input".to_string()]);
+        assert_eq!(backend.output_names(), vec!["output".to_string()]);
+    }
+
+    #[test]
+    fn test_engine_builder() {
+        let engine = EngineBuilder::new()
+            .cpu()
+            .batch_size(2)
+            .threads(4)
+            .build_mock();
+
+        assert_eq!(engine.options().batch_size, 2);
+        assert_eq!(engine.options().num_threads, 4);
+    }
+
+    #[test]
+    fn test_inference_stats() {
+        let mut stats = InferenceStats::default();
+        stats.record(10.0);
+        stats.record(20.0);
+        stats.record(15.0);
+
+        assert_eq!(stats.total_inferences, 3);
+        assert_eq!(stats.min_time_ms, 10.0);
+        assert_eq!(stats.max_time_ms, 20.0);
+        assert_eq!(stats.avg_time_ms, 15.0);
+    }
+
+    #[tokio::test]
+    async fn test_inference_engine() {
+        let engine = EngineBuilder::new().build_mock();
+
+        let input = Tensor::zeros_4d([1, 256, 64, 64]);
+        let output = engine.infer(&input).unwrap();
+
+        assert_eq!(output.shape().dims(), &[1, 256, 64, 64]);
+    }
+}