feat: Complete Rust port of WiFi-DensePose with modular crates

Major changes:
- Organized Python v1 implementation into v1/ subdirectory
- Created Rust workspace with 9 modular crates:
  - wifi-densepose-core: Core types, traits, errors
  - wifi-densepose-signal: CSI processing, phase sanitization, FFT
  - wifi-densepose-nn: Neural network inference (ONNX/Candle/tch)
  - wifi-densepose-api: Axum-based REST/WebSocket API
  - wifi-densepose-db: SQLx database layer
  - wifi-densepose-config: Configuration management
  - wifi-densepose-hardware: Hardware abstraction
  - wifi-densepose-wasm: WebAssembly bindings
  - wifi-densepose-cli: Command-line interface

Documentation:
- ADR-001: Workspace structure
- ADR-002: Signal processing library selection
- ADR-003: Neural network inference strategy
- DDD domain model with bounded contexts

Testing:
- 69 tests passing across all crates
- Signal processing: 45 tests
- Neural networks: 21 tests
- Core: 3 doc tests

Performance targets:
- 10x faster CSI processing (~0.5ms vs ~5ms)
- 5x lower memory usage (~100MB vs ~500MB)
- WASM support for browser deployment
This commit is contained in:
Claude
2026-01-13 03:11:16 +00:00
parent 5101504b72
commit 6ed69a3d48
427 changed files with 90993 additions and 0 deletions

View File

@@ -0,0 +1,419 @@
"""
Health check API endpoints
"""
import logging
import psutil
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, HTTPException, Request
from pydantic import BaseModel, Field
from src.api.dependencies import get_current_user
from src.config.settings import get_settings
logger = logging.getLogger(__name__)
router = APIRouter()
# Response models
class ComponentHealth(BaseModel):
"""Health status for a system component."""
name: str = Field(..., description="Component name")
status: str = Field(..., description="Health status (healthy, degraded, unhealthy)")
message: Optional[str] = Field(default=None, description="Status message")
last_check: datetime = Field(..., description="Last health check timestamp")
uptime_seconds: Optional[float] = Field(default=None, description="Component uptime")
metrics: Optional[Dict[str, Any]] = Field(default=None, description="Component metrics")
class SystemHealth(BaseModel):
"""Overall system health status."""
status: str = Field(..., description="Overall system status")
timestamp: datetime = Field(..., description="Health check timestamp")
uptime_seconds: float = Field(..., description="System uptime")
components: Dict[str, ComponentHealth] = Field(..., description="Component health status")
system_metrics: Dict[str, Any] = Field(..., description="System-level metrics")
class ReadinessCheck(BaseModel):
"""System readiness check result."""
ready: bool = Field(..., description="Whether system is ready to serve requests")
timestamp: datetime = Field(..., description="Readiness check timestamp")
checks: Dict[str, bool] = Field(..., description="Individual readiness checks")
message: str = Field(..., description="Readiness status message")
# Health check endpoints
@router.get("/health", response_model=SystemHealth)
async def health_check(request: Request):
"""Comprehensive system health check."""
try:
# Get services from app state
hardware_service = getattr(request.app.state, 'hardware_service', None)
pose_service = getattr(request.app.state, 'pose_service', None)
stream_service = getattr(request.app.state, 'stream_service', None)
timestamp = datetime.utcnow()
components = {}
overall_status = "healthy"
# Check hardware service
if hardware_service:
try:
hw_health = await hardware_service.health_check()
components["hardware"] = ComponentHealth(
name="Hardware Service",
status=hw_health["status"],
message=hw_health.get("message"),
last_check=timestamp,
uptime_seconds=hw_health.get("uptime_seconds"),
metrics=hw_health.get("metrics")
)
if hw_health["status"] != "healthy":
overall_status = "degraded" if overall_status == "healthy" else "unhealthy"
except Exception as e:
logger.error(f"Hardware service health check failed: {e}")
components["hardware"] = ComponentHealth(
name="Hardware Service",
status="unhealthy",
message=f"Health check failed: {str(e)}",
last_check=timestamp
)
overall_status = "unhealthy"
else:
components["hardware"] = ComponentHealth(
name="Hardware Service",
status="unavailable",
message="Service not initialized",
last_check=timestamp
)
overall_status = "degraded"
# Check pose service
if pose_service:
try:
pose_health = await pose_service.health_check()
components["pose"] = ComponentHealth(
name="Pose Service",
status=pose_health["status"],
message=pose_health.get("message"),
last_check=timestamp,
uptime_seconds=pose_health.get("uptime_seconds"),
metrics=pose_health.get("metrics")
)
if pose_health["status"] != "healthy":
overall_status = "degraded" if overall_status == "healthy" else "unhealthy"
except Exception as e:
logger.error(f"Pose service health check failed: {e}")
components["pose"] = ComponentHealth(
name="Pose Service",
status="unhealthy",
message=f"Health check failed: {str(e)}",
last_check=timestamp
)
overall_status = "unhealthy"
else:
components["pose"] = ComponentHealth(
name="Pose Service",
status="unavailable",
message="Service not initialized",
last_check=timestamp
)
overall_status = "degraded"
# Check stream service
if stream_service:
try:
stream_health = await stream_service.health_check()
components["stream"] = ComponentHealth(
name="Stream Service",
status=stream_health["status"],
message=stream_health.get("message"),
last_check=timestamp,
uptime_seconds=stream_health.get("uptime_seconds"),
metrics=stream_health.get("metrics")
)
if stream_health["status"] != "healthy":
overall_status = "degraded" if overall_status == "healthy" else "unhealthy"
except Exception as e:
logger.error(f"Stream service health check failed: {e}")
components["stream"] = ComponentHealth(
name="Stream Service",
status="unhealthy",
message=f"Health check failed: {str(e)}",
last_check=timestamp
)
overall_status = "unhealthy"
else:
components["stream"] = ComponentHealth(
name="Stream Service",
status="unavailable",
message="Service not initialized",
last_check=timestamp
)
overall_status = "degraded"
# Get system metrics
system_metrics = get_system_metrics()
# Calculate system uptime (placeholder - would need actual startup time)
uptime_seconds = 0.0 # TODO: Implement actual uptime tracking
return SystemHealth(
status=overall_status,
timestamp=timestamp,
uptime_seconds=uptime_seconds,
components=components,
system_metrics=system_metrics
)
except Exception as e:
logger.error(f"Health check failed: {e}")
raise HTTPException(
status_code=500,
detail=f"Health check failed: {str(e)}"
)
@router.get("/ready", response_model=ReadinessCheck)
async def readiness_check(request: Request):
"""Check if system is ready to serve requests."""
try:
timestamp = datetime.utcnow()
checks = {}
# Check if services are available in app state
if hasattr(request.app.state, 'pose_service') and request.app.state.pose_service:
try:
checks["pose_ready"] = await request.app.state.pose_service.is_ready()
except Exception as e:
logger.warning(f"Pose service readiness check failed: {e}")
checks["pose_ready"] = False
else:
checks["pose_ready"] = False
if hasattr(request.app.state, 'stream_service') and request.app.state.stream_service:
try:
checks["stream_ready"] = await request.app.state.stream_service.is_ready()
except Exception as e:
logger.warning(f"Stream service readiness check failed: {e}")
checks["stream_ready"] = False
else:
checks["stream_ready"] = False
# Hardware service check (basic availability)
checks["hardware_ready"] = True # Basic readiness - API is responding
# Check system resources
checks["memory_available"] = check_memory_availability()
checks["disk_space_available"] = check_disk_space()
# Application is ready if at least the basic services are available
# For now, we'll consider it ready if the API is responding
ready = True # Basic readiness
message = "System is ready" if ready else "System is not ready"
if not ready:
failed_checks = [name for name, status in checks.items() if not status]
message += f". Failed checks: {', '.join(failed_checks)}"
return ReadinessCheck(
ready=ready,
timestamp=timestamp,
checks=checks,
message=message
)
except Exception as e:
logger.error(f"Readiness check failed: {e}")
return ReadinessCheck(
ready=False,
timestamp=datetime.utcnow(),
checks={},
message=f"Readiness check failed: {str(e)}"
)
@router.get("/live")
async def liveness_check():
"""Simple liveness check for load balancers."""
return {
"status": "alive",
"timestamp": datetime.utcnow().isoformat()
}
@router.get("/metrics")
async def get_health_metrics(
request: Request,
current_user: Optional[Dict] = Depends(get_current_user)
):
"""Get detailed system metrics."""
try:
metrics = get_system_metrics()
# Add additional metrics if authenticated
if current_user:
metrics.update(get_detailed_metrics())
return {
"timestamp": datetime.utcnow().isoformat(),
"metrics": metrics
}
except Exception as e:
logger.error(f"Error getting system metrics: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to get system metrics: {str(e)}"
)
@router.get("/version")
async def get_version_info():
"""Get application version information."""
settings = get_settings()
return {
"name": settings.app_name,
"version": settings.version,
"environment": settings.environment,
"debug": settings.debug,
"timestamp": datetime.utcnow().isoformat()
}
def get_system_metrics() -> Dict[str, Any]:
"""Get basic system metrics."""
try:
# CPU metrics
cpu_percent = psutil.cpu_percent(interval=1)
cpu_count = psutil.cpu_count()
# Memory metrics
memory = psutil.virtual_memory()
memory_metrics = {
"total_gb": round(memory.total / (1024**3), 2),
"available_gb": round(memory.available / (1024**3), 2),
"used_gb": round(memory.used / (1024**3), 2),
"percent": memory.percent
}
# Disk metrics
disk = psutil.disk_usage('/')
disk_metrics = {
"total_gb": round(disk.total / (1024**3), 2),
"free_gb": round(disk.free / (1024**3), 2),
"used_gb": round(disk.used / (1024**3), 2),
"percent": round((disk.used / disk.total) * 100, 2)
}
# Network metrics (basic)
network = psutil.net_io_counters()
network_metrics = {
"bytes_sent": network.bytes_sent,
"bytes_recv": network.bytes_recv,
"packets_sent": network.packets_sent,
"packets_recv": network.packets_recv
}
return {
"cpu": {
"percent": cpu_percent,
"count": cpu_count
},
"memory": memory_metrics,
"disk": disk_metrics,
"network": network_metrics
}
except Exception as e:
logger.error(f"Error getting system metrics: {e}")
return {}
def get_detailed_metrics() -> Dict[str, Any]:
"""Get detailed system metrics (requires authentication)."""
try:
# Process metrics
process = psutil.Process()
process_metrics = {
"pid": process.pid,
"cpu_percent": process.cpu_percent(),
"memory_mb": round(process.memory_info().rss / (1024**2), 2),
"num_threads": process.num_threads(),
"create_time": datetime.fromtimestamp(process.create_time()).isoformat()
}
# Load average (Unix-like systems)
load_avg = None
try:
load_avg = psutil.getloadavg()
except AttributeError:
# Windows doesn't have load average
pass
# Temperature sensors (if available)
temperatures = {}
try:
temps = psutil.sensors_temperatures()
for name, entries in temps.items():
temperatures[name] = [
{"label": entry.label, "current": entry.current}
for entry in entries
]
except AttributeError:
# Not available on all systems
pass
detailed = {
"process": process_metrics
}
if load_avg:
detailed["load_average"] = {
"1min": load_avg[0],
"5min": load_avg[1],
"15min": load_avg[2]
}
if temperatures:
detailed["temperatures"] = temperatures
return detailed
except Exception as e:
logger.error(f"Error getting detailed metrics: {e}")
return {}
def check_memory_availability() -> bool:
"""Check if sufficient memory is available."""
try:
memory = psutil.virtual_memory()
# Consider system ready if less than 90% memory is used
return memory.percent < 90.0
except Exception:
return False
def check_disk_space() -> bool:
"""Check if sufficient disk space is available."""
try:
disk = psutil.disk_usage('/')
# Consider system ready if more than 1GB free space
free_gb = disk.free / (1024**3)
return free_gb > 1.0
except Exception:
return False