Major changes: - Organized Python v1 implementation into v1/ subdirectory - Created Rust workspace with 9 modular crates: - wifi-densepose-core: Core types, traits, errors - wifi-densepose-signal: CSI processing, phase sanitization, FFT - wifi-densepose-nn: Neural network inference (ONNX/Candle/tch) - wifi-densepose-api: Axum-based REST/WebSocket API - wifi-densepose-db: SQLx database layer - wifi-densepose-config: Configuration management - wifi-densepose-hardware: Hardware abstraction - wifi-densepose-wasm: WebAssembly bindings - wifi-densepose-cli: Command-line interface Documentation: - ADR-001: Workspace structure - ADR-002: Signal processing library selection - ADR-003: Neural network inference strategy - DDD domain model with bounded contexts Testing: - 69 tests passing across all crates - Signal processing: 45 tests - Neural networks: 21 tests - Core: 3 doc tests Performance targets: - 10x faster CSI processing (~0.5ms vs ~5ms) - 5x lower memory usage (~100MB vs ~500MB) - WASM support for browser deployment
507 lines
19 KiB
Python
507 lines
19 KiB
Python
"""
|
|
Performance tests for ML model inference speed.
|
|
|
|
Tests pose estimation model performance, throughput, and optimization.
|
|
"""
|
|
|
|
import pytest
|
|
import asyncio
|
|
import numpy as np
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Any, List, Optional
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
import psutil
|
|
import os
|
|
|
|
|
|
class MockPoseModel:
|
|
"""Mock pose estimation model for performance testing."""
|
|
|
|
def __init__(self, model_complexity: str = "standard"):
|
|
self.model_complexity = model_complexity
|
|
self.is_loaded = False
|
|
self.inference_count = 0
|
|
self.total_inference_time = 0.0
|
|
self.batch_size = 1
|
|
|
|
# Model complexity affects inference time
|
|
self.base_inference_time = {
|
|
"lightweight": 0.02, # 20ms
|
|
"standard": 0.05, # 50ms
|
|
"high_accuracy": 0.15 # 150ms
|
|
}.get(model_complexity, 0.05)
|
|
|
|
async def load_model(self):
|
|
"""Load the model."""
|
|
# Simulate model loading time
|
|
load_time = {
|
|
"lightweight": 0.5,
|
|
"standard": 2.0,
|
|
"high_accuracy": 5.0
|
|
}.get(self.model_complexity, 2.0)
|
|
|
|
await asyncio.sleep(load_time)
|
|
self.is_loaded = True
|
|
|
|
async def predict(self, features: np.ndarray) -> Dict[str, Any]:
|
|
"""Run inference on features."""
|
|
if not self.is_loaded:
|
|
raise RuntimeError("Model not loaded")
|
|
|
|
start_time = time.time()
|
|
|
|
# Simulate inference computation
|
|
batch_size = features.shape[0] if len(features.shape) > 2 else 1
|
|
inference_time = self.base_inference_time * batch_size
|
|
|
|
# Add some variance
|
|
inference_time *= np.random.uniform(0.8, 1.2)
|
|
|
|
await asyncio.sleep(inference_time)
|
|
|
|
end_time = time.time()
|
|
actual_inference_time = end_time - start_time
|
|
|
|
self.inference_count += batch_size
|
|
self.total_inference_time += actual_inference_time
|
|
|
|
# Generate mock predictions
|
|
predictions = []
|
|
for i in range(batch_size):
|
|
predictions.append({
|
|
"person_id": f"person_{i}",
|
|
"confidence": np.random.uniform(0.5, 0.95),
|
|
"keypoints": np.random.rand(17, 3).tolist(), # 17 keypoints with x,y,confidence
|
|
"bounding_box": {
|
|
"x": np.random.uniform(0, 640),
|
|
"y": np.random.uniform(0, 480),
|
|
"width": np.random.uniform(50, 200),
|
|
"height": np.random.uniform(100, 300)
|
|
}
|
|
})
|
|
|
|
return {
|
|
"predictions": predictions,
|
|
"inference_time_ms": actual_inference_time * 1000,
|
|
"model_complexity": self.model_complexity,
|
|
"batch_size": batch_size
|
|
}
|
|
|
|
def get_performance_stats(self) -> Dict[str, Any]:
|
|
"""Get performance statistics."""
|
|
avg_inference_time = (
|
|
self.total_inference_time / self.inference_count
|
|
if self.inference_count > 0 else 0
|
|
)
|
|
|
|
return {
|
|
"total_inferences": self.inference_count,
|
|
"total_time_seconds": self.total_inference_time,
|
|
"average_inference_time_ms": avg_inference_time * 1000,
|
|
"throughput_fps": 1.0 / avg_inference_time if avg_inference_time > 0 else 0,
|
|
"model_complexity": self.model_complexity
|
|
}
|
|
|
|
|
|
class TestInferenceSpeed:
|
|
"""Test inference speed for different model configurations."""
|
|
|
|
@pytest.fixture
|
|
def lightweight_model(self):
|
|
"""Create lightweight model."""
|
|
return MockPoseModel("lightweight")
|
|
|
|
@pytest.fixture
|
|
def standard_model(self):
|
|
"""Create standard model."""
|
|
return MockPoseModel("standard")
|
|
|
|
@pytest.fixture
|
|
def high_accuracy_model(self):
|
|
"""Create high accuracy model."""
|
|
return MockPoseModel("high_accuracy")
|
|
|
|
@pytest.fixture
|
|
def sample_features(self):
|
|
"""Create sample feature data."""
|
|
return np.random.rand(64, 32) # 64x32 feature matrix
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_single_inference_speed_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test single inference speed - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
start_time = time.time()
|
|
result = await standard_model.predict(sample_features)
|
|
end_time = time.time()
|
|
|
|
inference_time = (end_time - start_time) * 1000 # Convert to ms
|
|
|
|
# This will fail initially
|
|
assert inference_time < 100 # Should be less than 100ms
|
|
assert result["inference_time_ms"] > 0
|
|
assert len(result["predictions"]) > 0
|
|
assert result["model_complexity"] == "standard"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_complexity_comparison_should_fail_initially(self, sample_features):
|
|
"""Test model complexity comparison - should fail initially."""
|
|
models = {
|
|
"lightweight": MockPoseModel("lightweight"),
|
|
"standard": MockPoseModel("standard"),
|
|
"high_accuracy": MockPoseModel("high_accuracy")
|
|
}
|
|
|
|
# Load all models
|
|
for model in models.values():
|
|
await model.load_model()
|
|
|
|
# Run inference on each model
|
|
results = {}
|
|
for name, model in models.items():
|
|
start_time = time.time()
|
|
result = await model.predict(sample_features)
|
|
end_time = time.time()
|
|
|
|
results[name] = {
|
|
"inference_time_ms": (end_time - start_time) * 1000,
|
|
"result": result
|
|
}
|
|
|
|
# This will fail initially
|
|
# Lightweight should be fastest
|
|
assert results["lightweight"]["inference_time_ms"] < results["standard"]["inference_time_ms"]
|
|
assert results["standard"]["inference_time_ms"] < results["high_accuracy"]["inference_time_ms"]
|
|
|
|
# All should complete within reasonable time
|
|
for name, result in results.items():
|
|
assert result["inference_time_ms"] < 500 # Less than 500ms
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_batch_inference_performance_should_fail_initially(self, standard_model):
|
|
"""Test batch inference performance - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# Test different batch sizes
|
|
batch_sizes = [1, 4, 8, 16]
|
|
results = {}
|
|
|
|
for batch_size in batch_sizes:
|
|
# Create batch of features
|
|
batch_features = np.random.rand(batch_size, 64, 32)
|
|
|
|
start_time = time.time()
|
|
result = await standard_model.predict(batch_features)
|
|
end_time = time.time()
|
|
|
|
total_time = (end_time - start_time) * 1000
|
|
per_sample_time = total_time / batch_size
|
|
|
|
results[batch_size] = {
|
|
"total_time_ms": total_time,
|
|
"per_sample_time_ms": per_sample_time,
|
|
"throughput_fps": 1000 / per_sample_time,
|
|
"predictions": len(result["predictions"])
|
|
}
|
|
|
|
# This will fail initially
|
|
# Batch processing should be more efficient per sample
|
|
assert results[1]["per_sample_time_ms"] > results[4]["per_sample_time_ms"]
|
|
assert results[4]["per_sample_time_ms"] > results[8]["per_sample_time_ms"]
|
|
|
|
# Verify correct number of predictions
|
|
for batch_size, result in results.items():
|
|
assert result["predictions"] == batch_size
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_sustained_inference_performance_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test sustained inference performance - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# Run many inferences to test sustained performance
|
|
num_inferences = 50
|
|
inference_times = []
|
|
|
|
for i in range(num_inferences):
|
|
start_time = time.time()
|
|
await standard_model.predict(sample_features)
|
|
end_time = time.time()
|
|
|
|
inference_times.append((end_time - start_time) * 1000)
|
|
|
|
# This will fail initially
|
|
# Calculate performance metrics
|
|
avg_time = np.mean(inference_times)
|
|
std_time = np.std(inference_times)
|
|
min_time = np.min(inference_times)
|
|
max_time = np.max(inference_times)
|
|
|
|
assert avg_time < 100 # Average should be less than 100ms
|
|
assert std_time < 20 # Standard deviation should be low (consistent performance)
|
|
assert max_time < avg_time * 2 # No inference should take more than 2x average
|
|
|
|
# Check model statistics
|
|
stats = standard_model.get_performance_stats()
|
|
assert stats["total_inferences"] == num_inferences
|
|
assert stats["throughput_fps"] > 10 # Should achieve at least 10 FPS
|
|
|
|
|
|
class TestInferenceOptimization:
|
|
"""Test inference optimization techniques."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_warmup_effect_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test model warmup effect - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# First inference (cold start)
|
|
start_time = time.time()
|
|
await standard_model.predict(sample_features)
|
|
cold_start_time = (time.time() - start_time) * 1000
|
|
|
|
# Subsequent inferences (warmed up)
|
|
warm_times = []
|
|
for _ in range(5):
|
|
start_time = time.time()
|
|
await standard_model.predict(sample_features)
|
|
warm_times.append((time.time() - start_time) * 1000)
|
|
|
|
avg_warm_time = np.mean(warm_times)
|
|
|
|
# This will fail initially
|
|
# Warm inferences should be faster than cold start
|
|
assert avg_warm_time <= cold_start_time
|
|
assert cold_start_time > 0
|
|
assert avg_warm_time > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_inference_performance_should_fail_initially(self, sample_features):
|
|
"""Test concurrent inference performance - should fail initially."""
|
|
# Create multiple model instances
|
|
models = [MockPoseModel("standard") for _ in range(3)]
|
|
|
|
# Load all models
|
|
for model in models:
|
|
await model.load_model()
|
|
|
|
async def run_inference(model, features):
|
|
start_time = time.time()
|
|
result = await model.predict(features)
|
|
end_time = time.time()
|
|
return (end_time - start_time) * 1000
|
|
|
|
# Run concurrent inferences
|
|
tasks = [run_inference(model, sample_features) for model in models]
|
|
inference_times = await asyncio.gather(*tasks)
|
|
|
|
# This will fail initially
|
|
# All inferences should complete
|
|
assert len(inference_times) == 3
|
|
assert all(time > 0 for time in inference_times)
|
|
|
|
# Concurrent execution shouldn't be much slower than sequential
|
|
avg_concurrent_time = np.mean(inference_times)
|
|
assert avg_concurrent_time < 200 # Should complete within 200ms each
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_usage_during_inference_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test memory usage during inference - should fail initially."""
|
|
process = psutil.Process(os.getpid())
|
|
|
|
await standard_model.load_model()
|
|
initial_memory = process.memory_info().rss
|
|
|
|
# Run multiple inferences
|
|
for i in range(20):
|
|
await standard_model.predict(sample_features)
|
|
|
|
# Check memory every 5 inferences
|
|
if i % 5 == 0:
|
|
current_memory = process.memory_info().rss
|
|
memory_increase = current_memory - initial_memory
|
|
|
|
# This will fail initially
|
|
# Memory increase should be reasonable (less than 50MB)
|
|
assert memory_increase < 50 * 1024 * 1024
|
|
|
|
final_memory = process.memory_info().rss
|
|
total_increase = final_memory - initial_memory
|
|
|
|
# Total memory increase should be reasonable
|
|
assert total_increase < 100 * 1024 * 1024 # Less than 100MB
|
|
|
|
|
|
class TestInferenceAccuracy:
|
|
"""Test inference accuracy and quality metrics."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prediction_consistency_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test prediction consistency - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# Run same inference multiple times
|
|
results = []
|
|
for _ in range(5):
|
|
result = await standard_model.predict(sample_features)
|
|
results.append(result)
|
|
|
|
# This will fail initially
|
|
# All results should have similar structure
|
|
for result in results:
|
|
assert "predictions" in result
|
|
assert "inference_time_ms" in result
|
|
assert len(result["predictions"]) > 0
|
|
|
|
# Inference times should be consistent
|
|
inference_times = [r["inference_time_ms"] for r in results]
|
|
avg_time = np.mean(inference_times)
|
|
std_time = np.std(inference_times)
|
|
|
|
assert std_time < avg_time * 0.5 # Standard deviation should be less than 50% of mean
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_confidence_score_distribution_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test confidence score distribution - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# Collect confidence scores from multiple inferences
|
|
all_confidences = []
|
|
|
|
for _ in range(20):
|
|
result = await standard_model.predict(sample_features)
|
|
for prediction in result["predictions"]:
|
|
all_confidences.append(prediction["confidence"])
|
|
|
|
# This will fail initially
|
|
if all_confidences: # Only test if we have predictions
|
|
# Confidence scores should be in valid range
|
|
assert all(0.0 <= conf <= 1.0 for conf in all_confidences)
|
|
|
|
# Should have reasonable distribution
|
|
avg_confidence = np.mean(all_confidences)
|
|
assert 0.3 <= avg_confidence <= 0.95 # Reasonable average confidence
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_keypoint_detection_quality_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test keypoint detection quality - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
result = await standard_model.predict(sample_features)
|
|
|
|
# This will fail initially
|
|
for prediction in result["predictions"]:
|
|
keypoints = prediction["keypoints"]
|
|
|
|
# Should have correct number of keypoints
|
|
assert len(keypoints) == 17 # Standard pose has 17 keypoints
|
|
|
|
# Each keypoint should have x, y, confidence
|
|
for keypoint in keypoints:
|
|
assert len(keypoint) == 3
|
|
x, y, conf = keypoint
|
|
assert isinstance(x, (int, float))
|
|
assert isinstance(y, (int, float))
|
|
assert 0.0 <= conf <= 1.0
|
|
|
|
|
|
class TestInferenceScaling:
|
|
"""Test inference scaling characteristics."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_input_size_scaling_should_fail_initially(self, standard_model):
|
|
"""Test inference scaling with input size - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# Test different input sizes
|
|
input_sizes = [(32, 16), (64, 32), (128, 64), (256, 128)]
|
|
results = {}
|
|
|
|
for height, width in input_sizes:
|
|
features = np.random.rand(height, width)
|
|
|
|
start_time = time.time()
|
|
result = await standard_model.predict(features)
|
|
end_time = time.time()
|
|
|
|
inference_time = (end_time - start_time) * 1000
|
|
input_size = height * width
|
|
|
|
results[input_size] = {
|
|
"inference_time_ms": inference_time,
|
|
"dimensions": (height, width),
|
|
"predictions": len(result["predictions"])
|
|
}
|
|
|
|
# This will fail initially
|
|
# Larger inputs should generally take longer
|
|
sizes = sorted(results.keys())
|
|
for i in range(len(sizes) - 1):
|
|
current_size = sizes[i]
|
|
next_size = sizes[i + 1]
|
|
|
|
# Allow some variance, but larger inputs should generally be slower
|
|
time_ratio = results[next_size]["inference_time_ms"] / results[current_size]["inference_time_ms"]
|
|
assert time_ratio >= 0.8 # Next size shouldn't be much faster
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_throughput_under_load_should_fail_initially(self, standard_model, sample_features):
|
|
"""Test throughput under sustained load - should fail initially."""
|
|
await standard_model.load_model()
|
|
|
|
# Simulate sustained load
|
|
duration_seconds = 5
|
|
start_time = time.time()
|
|
inference_count = 0
|
|
|
|
while time.time() - start_time < duration_seconds:
|
|
await standard_model.predict(sample_features)
|
|
inference_count += 1
|
|
|
|
actual_duration = time.time() - start_time
|
|
throughput = inference_count / actual_duration
|
|
|
|
# This will fail initially
|
|
# Should maintain reasonable throughput under load
|
|
assert throughput > 5 # At least 5 FPS
|
|
assert inference_count > 20 # Should complete at least 20 inferences in 5 seconds
|
|
|
|
# Check model statistics
|
|
stats = standard_model.get_performance_stats()
|
|
assert stats["total_inferences"] >= inference_count
|
|
assert stats["throughput_fps"] > 0
|
|
|
|
|
|
@pytest.mark.benchmark
|
|
class TestInferenceBenchmarks:
|
|
"""Benchmark tests for inference performance."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_benchmark_lightweight_model_should_fail_initially(self, benchmark):
|
|
"""Benchmark lightweight model performance - should fail initially."""
|
|
model = MockPoseModel("lightweight")
|
|
await model.load_model()
|
|
features = np.random.rand(64, 32)
|
|
|
|
async def run_inference():
|
|
return await model.predict(features)
|
|
|
|
# This will fail initially
|
|
# Benchmark the inference
|
|
result = await run_inference()
|
|
assert result["inference_time_ms"] < 50 # Should be less than 50ms
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_benchmark_batch_processing_should_fail_initially(self, benchmark):
|
|
"""Benchmark batch processing performance - should fail initially."""
|
|
model = MockPoseModel("standard")
|
|
await model.load_model()
|
|
batch_features = np.random.rand(8, 64, 32) # Batch of 8
|
|
|
|
async def run_batch_inference():
|
|
return await model.predict(batch_features)
|
|
|
|
# This will fail initially
|
|
result = await run_batch_inference()
|
|
assert len(result["predictions"]) == 8
|
|
assert result["inference_time_ms"] < 200 # Batch should be efficient |