Files
wifi-densepose/rust-port/wifi-densepose-rs/crates/wifi-densepose-nn/src/inference.rs
Claude 7c00482314 fix: Clean up Rust warnings and add Python vital signs detection
Rust changes:
- Fix unused variable warnings in wifi-densepose-nn (densepose.rs, inference.rs, tensor.rs, translator.rs)
- Remove unused imports in wifi-densepose-mat (breathing.rs, pipeline.rs, csi_receiver.rs, debris_model.rs, vital_signs_classifier.rs)
- All tests continue to pass

Python changes:
- Add vital_signs.py module with BreathingDetector and HeartbeatDetector classes
- Mirror Rust wifi-densepose-mat detection functionality
- Update v1 package version to 1.2.0
- Export new vital signs classes from core module
2026-01-14 17:42:37 +00:00

570 lines
15 KiB
Rust

//! Inference engine abstraction for neural network backends.
//!
//! This module provides a unified interface for running inference across
//! different backends (ONNX Runtime, tch-rs, Candle).
use crate::densepose::{DensePoseConfig, DensePoseOutput};
use crate::error::{NnError, NnResult};
use crate::tensor::{Tensor, TensorShape};
use crate::translator::TranslatorConfig;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, info, instrument};
/// Options for inference execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InferenceOptions {
/// Batch size for inference
#[serde(default = "default_batch_size")]
pub batch_size: usize,
/// Whether to use GPU acceleration
#[serde(default)]
pub use_gpu: bool,
/// GPU device ID (if using GPU)
#[serde(default)]
pub gpu_device_id: usize,
/// Number of CPU threads for inference
#[serde(default = "default_num_threads")]
pub num_threads: usize,
/// Enable model optimization/fusion
#[serde(default = "default_optimize")]
pub optimize: bool,
/// Memory limit in bytes (0 = unlimited)
#[serde(default)]
pub memory_limit: usize,
/// Enable profiling
#[serde(default)]
pub profiling: bool,
}
fn default_batch_size() -> usize {
1
}
fn default_num_threads() -> usize {
4
}
fn default_optimize() -> bool {
true
}
impl Default for InferenceOptions {
fn default() -> Self {
Self {
batch_size: default_batch_size(),
use_gpu: false,
gpu_device_id: 0,
num_threads: default_num_threads(),
optimize: default_optimize(),
memory_limit: 0,
profiling: false,
}
}
}
impl InferenceOptions {
/// Create options for CPU inference
pub fn cpu() -> Self {
Self::default()
}
/// Create options for GPU inference
pub fn gpu(device_id: usize) -> Self {
Self {
use_gpu: true,
gpu_device_id: device_id,
..Default::default()
}
}
/// Set batch size
pub fn with_batch_size(mut self, batch_size: usize) -> Self {
self.batch_size = batch_size;
self
}
/// Set number of threads
pub fn with_threads(mut self, num_threads: usize) -> Self {
self.num_threads = num_threads;
self
}
}
/// Backend trait for different inference engines
pub trait Backend: Send + Sync {
/// Get the backend name
fn name(&self) -> &str;
/// Check if the backend is available
fn is_available(&self) -> bool;
/// Get input names
fn input_names(&self) -> Vec<String>;
/// Get output names
fn output_names(&self) -> Vec<String>;
/// Get input shape for a given input name
fn input_shape(&self, name: &str) -> Option<TensorShape>;
/// Get output shape for a given output name
fn output_shape(&self, name: &str) -> Option<TensorShape>;
/// Run inference
fn run(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>>;
/// Run inference on a single input
fn run_single(&self, input: &Tensor) -> NnResult<Tensor> {
let input_names = self.input_names();
let output_names = self.output_names();
if input_names.is_empty() {
return Err(NnError::inference("No input names defined"));
}
if output_names.is_empty() {
return Err(NnError::inference("No output names defined"));
}
let mut inputs = HashMap::new();
inputs.insert(input_names[0].clone(), input.clone());
let outputs = self.run(inputs)?;
outputs
.into_iter()
.next()
.map(|(_, v)| v)
.ok_or_else(|| NnError::inference("No outputs returned"))
}
/// Warm up the model (optional pre-run for optimization)
fn warmup(&self) -> NnResult<()> {
Ok(())
}
/// Get memory usage in bytes
fn memory_usage(&self) -> usize {
0
}
}
/// Mock backend for testing
#[derive(Debug)]
pub struct MockBackend {
name: String,
input_shapes: HashMap<String, TensorShape>,
output_shapes: HashMap<String, TensorShape>,
}
impl MockBackend {
/// Create a new mock backend
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
input_shapes: HashMap::new(),
output_shapes: HashMap::new(),
}
}
/// Add an input definition
pub fn with_input(mut self, name: impl Into<String>, shape: TensorShape) -> Self {
self.input_shapes.insert(name.into(), shape);
self
}
/// Add an output definition
pub fn with_output(mut self, name: impl Into<String>, shape: TensorShape) -> Self {
self.output_shapes.insert(name.into(), shape);
self
}
}
impl Backend for MockBackend {
fn name(&self) -> &str {
&self.name
}
fn is_available(&self) -> bool {
true
}
fn input_names(&self) -> Vec<String> {
self.input_shapes.keys().cloned().collect()
}
fn output_names(&self) -> Vec<String> {
self.output_shapes.keys().cloned().collect()
}
fn input_shape(&self, name: &str) -> Option<TensorShape> {
self.input_shapes.get(name).cloned()
}
fn output_shape(&self, name: &str) -> Option<TensorShape> {
self.output_shapes.get(name).cloned()
}
fn run(&self, _inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>> {
let mut outputs = HashMap::new();
for (name, shape) in &self.output_shapes {
let dims: Vec<usize> = shape.dims().to_vec();
if dims.len() == 4 {
outputs.insert(
name.clone(),
Tensor::zeros_4d([dims[0], dims[1], dims[2], dims[3]]),
);
}
}
Ok(outputs)
}
}
/// Unified inference engine that supports multiple backends
pub struct InferenceEngine<B: Backend> {
backend: B,
options: InferenceOptions,
/// Inference statistics
stats: Arc<RwLock<InferenceStats>>,
}
/// Statistics for inference performance
#[derive(Debug, Default, Clone)]
pub struct InferenceStats {
/// Total number of inferences
pub total_inferences: u64,
/// Total inference time in milliseconds
pub total_time_ms: f64,
/// Average inference time
pub avg_time_ms: f64,
/// Min inference time
pub min_time_ms: f64,
/// Max inference time
pub max_time_ms: f64,
/// Last inference time
pub last_time_ms: f64,
}
impl InferenceStats {
/// Record a new inference timing
pub fn record(&mut self, time_ms: f64) {
self.total_inferences += 1;
self.total_time_ms += time_ms;
self.last_time_ms = time_ms;
self.avg_time_ms = self.total_time_ms / self.total_inferences as f64;
if self.total_inferences == 1 {
self.min_time_ms = time_ms;
self.max_time_ms = time_ms;
} else {
self.min_time_ms = self.min_time_ms.min(time_ms);
self.max_time_ms = self.max_time_ms.max(time_ms);
}
}
}
impl<B: Backend> InferenceEngine<B> {
/// Create a new inference engine with a backend
pub fn new(backend: B, options: InferenceOptions) -> Self {
Self {
backend,
options,
stats: Arc::new(RwLock::new(InferenceStats::default())),
}
}
/// Get the backend
pub fn backend(&self) -> &B {
&self.backend
}
/// Get the options
pub fn options(&self) -> &InferenceOptions {
&self.options
}
/// Check if GPU is being used
pub fn uses_gpu(&self) -> bool {
self.options.use_gpu && self.backend.is_available()
}
/// Warm up the engine
pub fn warmup(&self) -> NnResult<()> {
info!("Warming up inference engine: {}", self.backend.name());
self.backend.warmup()
}
/// Run inference on a single input
#[instrument(skip(self, input))]
pub fn infer(&self, input: &Tensor) -> NnResult<Tensor> {
let start = std::time::Instant::now();
let result = self.backend.run_single(input)?;
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
debug!(elapsed_ms = %elapsed_ms, "Inference completed");
// Update stats asynchronously (best effort)
let stats = self.stats.clone();
tokio::spawn(async move {
let mut stats = stats.write().await;
stats.record(elapsed_ms);
});
Ok(result)
}
/// Run inference with named inputs
#[instrument(skip(self, inputs))]
pub fn infer_named(&self, inputs: HashMap<String, Tensor>) -> NnResult<HashMap<String, Tensor>> {
let start = std::time::Instant::now();
let result = self.backend.run(inputs)?;
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
debug!(elapsed_ms = %elapsed_ms, "Named inference completed");
Ok(result)
}
/// Run batched inference
pub fn infer_batch(&self, inputs: &[Tensor]) -> NnResult<Vec<Tensor>> {
inputs.iter().map(|input| self.infer(input)).collect()
}
/// Get inference statistics
pub async fn stats(&self) -> InferenceStats {
self.stats.read().await.clone()
}
/// Reset statistics
pub async fn reset_stats(&self) {
let mut stats = self.stats.write().await;
*stats = InferenceStats::default();
}
/// Get memory usage
pub fn memory_usage(&self) -> usize {
self.backend.memory_usage()
}
}
/// Combined pipeline for WiFi-DensePose inference
pub struct WiFiDensePosePipeline<B: Backend> {
/// Modality translator backend
translator_backend: B,
/// DensePose backend
densepose_backend: B,
/// Translator configuration
translator_config: TranslatorConfig,
/// DensePose configuration
densepose_config: DensePoseConfig,
/// Inference options
options: InferenceOptions,
}
impl<B: Backend> WiFiDensePosePipeline<B> {
/// Create a new pipeline
pub fn new(
translator_backend: B,
densepose_backend: B,
translator_config: TranslatorConfig,
densepose_config: DensePoseConfig,
options: InferenceOptions,
) -> Self {
Self {
translator_backend,
densepose_backend,
translator_config,
densepose_config,
options,
}
}
/// Run the full pipeline: CSI -> Visual Features -> DensePose
#[instrument(skip(self, csi_input))]
pub fn run(&self, csi_input: &Tensor) -> NnResult<DensePoseOutput> {
// Step 1: Translate CSI to visual features
let visual_features = self.translator_backend.run_single(csi_input)?;
// Step 2: Run DensePose on visual features
let mut inputs = HashMap::new();
inputs.insert("features".to_string(), visual_features);
let outputs = self.densepose_backend.run(inputs)?;
// Extract outputs
let segmentation = outputs
.get("segmentation")
.cloned()
.ok_or_else(|| NnError::inference("Missing segmentation output"))?;
let uv_coordinates = outputs
.get("uv_coordinates")
.cloned()
.ok_or_else(|| NnError::inference("Missing uv_coordinates output"))?;
Ok(DensePoseOutput {
segmentation,
uv_coordinates,
confidence: None,
})
}
/// Get translator config
pub fn translator_config(&self) -> &TranslatorConfig {
&self.translator_config
}
/// Get DensePose config
pub fn densepose_config(&self) -> &DensePoseConfig {
&self.densepose_config
}
}
/// Builder for creating inference engines
pub struct EngineBuilder {
options: InferenceOptions,
model_path: Option<String>,
}
impl EngineBuilder {
/// Create a new builder
pub fn new() -> Self {
Self {
options: InferenceOptions::default(),
model_path: None,
}
}
/// Set inference options
pub fn options(mut self, options: InferenceOptions) -> Self {
self.options = options;
self
}
/// Set model path
pub fn model_path(mut self, path: impl Into<String>) -> Self {
self.model_path = Some(path.into());
self
}
/// Use GPU
pub fn gpu(mut self, device_id: usize) -> Self {
self.options.use_gpu = true;
self.options.gpu_device_id = device_id;
self
}
/// Use CPU
pub fn cpu(mut self) -> Self {
self.options.use_gpu = false;
self
}
/// Set batch size
pub fn batch_size(mut self, size: usize) -> Self {
self.options.batch_size = size;
self
}
/// Set number of threads
pub fn threads(mut self, n: usize) -> Self {
self.options.num_threads = n;
self
}
/// Build with a mock backend (for testing)
pub fn build_mock(self) -> InferenceEngine<MockBackend> {
let backend = MockBackend::new("mock")
.with_input("input".to_string(), TensorShape::new(vec![1, 256, 64, 64]))
.with_output("output".to_string(), TensorShape::new(vec![1, 256, 64, 64]));
InferenceEngine::new(backend, self.options)
}
/// Build with ONNX backend
#[cfg(feature = "onnx")]
pub fn build_onnx(self) -> NnResult<InferenceEngine<crate::onnx::OnnxBackend>> {
let model_path = self
.model_path
.ok_or_else(|| NnError::config("Model path required for ONNX backend"))?;
let backend = crate::onnx::OnnxBackend::from_file(&model_path)?;
Ok(InferenceEngine::new(backend, self.options))
}
}
impl Default for EngineBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_inference_options() {
let opts = InferenceOptions::cpu().with_batch_size(4).with_threads(8);
assert_eq!(opts.batch_size, 4);
assert_eq!(opts.num_threads, 8);
assert!(!opts.use_gpu);
let gpu_opts = InferenceOptions::gpu(0);
assert!(gpu_opts.use_gpu);
assert_eq!(gpu_opts.gpu_device_id, 0);
}
#[test]
fn test_mock_backend() {
let backend = MockBackend::new("test")
.with_input("input", TensorShape::new(vec![1, 3, 224, 224]))
.with_output("output", TensorShape::new(vec![1, 1000]));
assert_eq!(backend.name(), "test");
assert!(backend.is_available());
assert_eq!(backend.input_names(), vec!["input".to_string()]);
assert_eq!(backend.output_names(), vec!["output".to_string()]);
}
#[test]
fn test_engine_builder() {
let engine = EngineBuilder::new()
.cpu()
.batch_size(2)
.threads(4)
.build_mock();
assert_eq!(engine.options().batch_size, 2);
assert_eq!(engine.options().num_threads, 4);
}
#[test]
fn test_inference_stats() {
let mut stats = InferenceStats::default();
stats.record(10.0);
stats.record(20.0);
stats.record(15.0);
assert_eq!(stats.total_inferences, 3);
assert_eq!(stats.min_time_ms, 10.0);
assert_eq!(stats.max_time_ms, 20.0);
assert_eq!(stats.avg_time_ms, 15.0);
}
#[tokio::test]
async fn test_inference_engine() {
let engine = EngineBuilder::new().build_mock();
let input = Tensor::zeros_4d([1, 256, 64, 64]);
let output = engine.infer(&input).unwrap();
assert_eq!(output.shape().dims(), &[1, 256, 64, 64]);
}
}