feat: Complete Rust port of WiFi-DensePose with modular crates
Major changes: - Organized Python v1 implementation into v1/ subdirectory - Created Rust workspace with 9 modular crates: - wifi-densepose-core: Core types, traits, errors - wifi-densepose-signal: CSI processing, phase sanitization, FFT - wifi-densepose-nn: Neural network inference (ONNX/Candle/tch) - wifi-densepose-api: Axum-based REST/WebSocket API - wifi-densepose-db: SQLx database layer - wifi-densepose-config: Configuration management - wifi-densepose-hardware: Hardware abstraction - wifi-densepose-wasm: WebAssembly bindings - wifi-densepose-cli: Command-line interface Documentation: - ADR-001: Workspace structure - ADR-002: Signal processing library selection - ADR-003: Neural network inference strategy - DDD domain model with bounded contexts Testing: - 69 tests passing across all crates - Signal processing: 45 tests - Neural networks: 21 tests - Core: 3 doc tests Performance targets: - 10x faster CSI processing (~0.5ms vs ~5ms) - 5x lower memory usage (~100MB vs ~500MB) - WASM support for browser deployment
This commit is contained in:
359
v1/src/commands/start.py
Normal file
359
v1/src/commands/start.py
Normal file
@@ -0,0 +1,359 @@
|
||||
"""
|
||||
Start command implementation for WiFi-DensePose API
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import uvicorn
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from src.config.settings import Settings
|
||||
from src.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def start_command(
|
||||
settings: Settings,
|
||||
host: str = "0.0.0.0",
|
||||
port: int = 8000,
|
||||
workers: int = 1,
|
||||
reload: bool = False,
|
||||
daemon: bool = False
|
||||
) -> None:
|
||||
"""Start the WiFi-DensePose API server."""
|
||||
|
||||
logger.info(f"Starting WiFi-DensePose API server...")
|
||||
logger.info(f"Environment: {settings.environment}")
|
||||
logger.info(f"Debug mode: {settings.debug}")
|
||||
logger.info(f"Host: {host}")
|
||||
logger.info(f"Port: {port}")
|
||||
logger.info(f"Workers: {workers}")
|
||||
|
||||
# Validate settings
|
||||
await _validate_startup_requirements(settings)
|
||||
|
||||
# Setup signal handlers
|
||||
_setup_signal_handlers()
|
||||
|
||||
# Create PID file if running as daemon
|
||||
pid_file = None
|
||||
if daemon:
|
||||
pid_file = _create_pid_file(settings)
|
||||
|
||||
try:
|
||||
# Initialize database
|
||||
await _initialize_database(settings)
|
||||
|
||||
# Start background tasks
|
||||
background_tasks = await _start_background_tasks(settings)
|
||||
|
||||
# Configure uvicorn
|
||||
uvicorn_config = {
|
||||
"app": "src.app:app",
|
||||
"host": host,
|
||||
"port": port,
|
||||
"reload": reload,
|
||||
"workers": workers if not reload else 1, # Reload doesn't work with multiple workers
|
||||
"log_level": "debug" if settings.debug else "info",
|
||||
"access_log": True,
|
||||
"use_colors": not daemon,
|
||||
}
|
||||
|
||||
if daemon:
|
||||
# Run as daemon
|
||||
await _run_as_daemon(uvicorn_config, pid_file)
|
||||
else:
|
||||
# Run in foreground
|
||||
await _run_server(uvicorn_config)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received interrupt signal, shutting down...")
|
||||
except Exception as e:
|
||||
logger.error(f"Server startup failed: {e}")
|
||||
raise
|
||||
finally:
|
||||
# Cleanup
|
||||
if pid_file and pid_file.exists():
|
||||
pid_file.unlink()
|
||||
|
||||
# Stop background tasks
|
||||
if 'background_tasks' in locals():
|
||||
await _stop_background_tasks(background_tasks)
|
||||
|
||||
|
||||
async def _validate_startup_requirements(settings: Settings) -> None:
|
||||
"""Validate that all startup requirements are met."""
|
||||
|
||||
logger.info("Validating startup requirements...")
|
||||
|
||||
# Check database connection
|
||||
try:
|
||||
from src.database.connection import get_database_manager
|
||||
|
||||
db_manager = get_database_manager(settings)
|
||||
await db_manager.test_connection()
|
||||
logger.info("✓ Database connection validated")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Database connection failed: {e}")
|
||||
raise
|
||||
|
||||
# Check Redis connection (if enabled)
|
||||
if settings.redis_enabled:
|
||||
try:
|
||||
redis_stats = await db_manager.get_connection_stats()
|
||||
if "redis" in redis_stats and not redis_stats["redis"].get("error"):
|
||||
logger.info("✓ Redis connection validated")
|
||||
else:
|
||||
logger.warning("⚠ Redis connection failed, continuing without Redis")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠ Redis connection failed: {e}, continuing without Redis")
|
||||
|
||||
# Check required directories
|
||||
directories = [
|
||||
("Log directory", settings.log_directory),
|
||||
("Backup directory", settings.backup_directory),
|
||||
]
|
||||
|
||||
for name, directory in directories:
|
||||
path = Path(directory)
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"✓ {name} ready: {directory}")
|
||||
|
||||
logger.info("All startup requirements validated")
|
||||
|
||||
|
||||
async def _initialize_database(settings: Settings) -> None:
|
||||
"""Initialize database connection and run migrations if needed."""
|
||||
|
||||
logger.info("Initializing database...")
|
||||
|
||||
try:
|
||||
from src.database.connection import get_database_manager
|
||||
|
||||
db_manager = get_database_manager(settings)
|
||||
await db_manager.initialize()
|
||||
|
||||
logger.info("Database initialized successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Database initialization failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def _start_background_tasks(settings: Settings) -> dict:
|
||||
"""Start background tasks."""
|
||||
|
||||
logger.info("Starting background tasks...")
|
||||
|
||||
tasks = {}
|
||||
|
||||
try:
|
||||
# Start cleanup task
|
||||
if settings.cleanup_interval_seconds > 0:
|
||||
from src.tasks.cleanup import run_periodic_cleanup
|
||||
|
||||
cleanup_task = asyncio.create_task(run_periodic_cleanup(settings))
|
||||
tasks['cleanup'] = cleanup_task
|
||||
logger.info("✓ Cleanup task started")
|
||||
|
||||
# Start monitoring task
|
||||
if settings.monitoring_interval_seconds > 0:
|
||||
from src.tasks.monitoring import run_periodic_monitoring
|
||||
|
||||
monitoring_task = asyncio.create_task(run_periodic_monitoring(settings))
|
||||
tasks['monitoring'] = monitoring_task
|
||||
logger.info("✓ Monitoring task started")
|
||||
|
||||
# Start backup task
|
||||
if settings.backup_interval_seconds > 0:
|
||||
from src.tasks.backup import run_periodic_backup
|
||||
|
||||
backup_task = asyncio.create_task(run_periodic_backup(settings))
|
||||
tasks['backup'] = backup_task
|
||||
logger.info("✓ Backup task started")
|
||||
|
||||
logger.info(f"Started {len(tasks)} background tasks")
|
||||
return tasks
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start background tasks: {e}")
|
||||
# Cancel any started tasks
|
||||
for task in tasks.values():
|
||||
task.cancel()
|
||||
raise
|
||||
|
||||
|
||||
async def _stop_background_tasks(tasks: dict) -> None:
|
||||
"""Stop background tasks gracefully."""
|
||||
|
||||
logger.info("Stopping background tasks...")
|
||||
|
||||
# Cancel all tasks
|
||||
for name, task in tasks.items():
|
||||
if not task.done():
|
||||
logger.info(f"Stopping {name} task...")
|
||||
task.cancel()
|
||||
|
||||
# Wait for tasks to complete
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks.values(), return_exceptions=True)
|
||||
|
||||
logger.info("Background tasks stopped")
|
||||
|
||||
|
||||
def _setup_signal_handlers() -> None:
|
||||
"""Setup signal handlers for graceful shutdown."""
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
logger.info(f"Received signal {signum}, initiating graceful shutdown...")
|
||||
# The actual shutdown will be handled by the main loop
|
||||
sys.exit(0)
|
||||
|
||||
# Setup signal handlers
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
if hasattr(signal, 'SIGHUP'):
|
||||
signal.signal(signal.SIGHUP, signal_handler)
|
||||
|
||||
|
||||
def _create_pid_file(settings: Settings) -> Path:
|
||||
"""Create PID file for daemon mode."""
|
||||
|
||||
pid_file = Path(settings.log_directory) / "wifi-densepose-api.pid"
|
||||
|
||||
# Check if PID file already exists
|
||||
if pid_file.exists():
|
||||
try:
|
||||
with open(pid_file, 'r') as f:
|
||||
old_pid = int(f.read().strip())
|
||||
|
||||
# Check if process is still running
|
||||
try:
|
||||
os.kill(old_pid, 0) # Signal 0 just checks if process exists
|
||||
logger.error(f"Server already running with PID {old_pid}")
|
||||
sys.exit(1)
|
||||
except OSError:
|
||||
# Process doesn't exist, remove stale PID file
|
||||
pid_file.unlink()
|
||||
logger.info("Removed stale PID file")
|
||||
|
||||
except (ValueError, IOError):
|
||||
# Invalid PID file, remove it
|
||||
pid_file.unlink()
|
||||
logger.info("Removed invalid PID file")
|
||||
|
||||
# Write current PID
|
||||
with open(pid_file, 'w') as f:
|
||||
f.write(str(os.getpid()))
|
||||
|
||||
logger.info(f"Created PID file: {pid_file}")
|
||||
return pid_file
|
||||
|
||||
|
||||
async def _run_server(config: dict) -> None:
|
||||
"""Run the server in foreground mode."""
|
||||
|
||||
logger.info("Starting server in foreground mode...")
|
||||
|
||||
# Create uvicorn server
|
||||
server = uvicorn.Server(uvicorn.Config(**config))
|
||||
|
||||
# Run server
|
||||
await server.serve()
|
||||
|
||||
|
||||
async def _run_as_daemon(config: dict, pid_file: Path) -> None:
|
||||
"""Run the server as a daemon."""
|
||||
|
||||
logger.info("Starting server in daemon mode...")
|
||||
|
||||
# Fork process
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
# Parent process
|
||||
logger.info(f"Server started as daemon with PID {pid}")
|
||||
sys.exit(0)
|
||||
except OSError as e:
|
||||
logger.error(f"Fork failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Child process continues
|
||||
|
||||
# Decouple from parent environment
|
||||
os.chdir("/")
|
||||
os.setsid()
|
||||
os.umask(0)
|
||||
|
||||
# Second fork
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
# Exit second parent
|
||||
sys.exit(0)
|
||||
except OSError as e:
|
||||
logger.error(f"Second fork failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Update PID file with daemon PID
|
||||
with open(pid_file, 'w') as f:
|
||||
f.write(str(os.getpid()))
|
||||
|
||||
# Redirect standard file descriptors
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
|
||||
# Redirect stdin, stdout, stderr to /dev/null
|
||||
with open('/dev/null', 'r') as f:
|
||||
os.dup2(f.fileno(), sys.stdin.fileno())
|
||||
|
||||
with open('/dev/null', 'w') as f:
|
||||
os.dup2(f.fileno(), sys.stdout.fileno())
|
||||
os.dup2(f.fileno(), sys.stderr.fileno())
|
||||
|
||||
# Create uvicorn server
|
||||
server = uvicorn.Server(uvicorn.Config(**config))
|
||||
|
||||
# Run server
|
||||
await server.serve()
|
||||
|
||||
|
||||
def get_server_status(settings: Settings) -> dict:
|
||||
"""Get current server status."""
|
||||
|
||||
pid_file = Path(settings.log_directory) / "wifi-densepose-api.pid"
|
||||
|
||||
status = {
|
||||
"running": False,
|
||||
"pid": None,
|
||||
"pid_file": str(pid_file),
|
||||
"pid_file_exists": pid_file.exists(),
|
||||
}
|
||||
|
||||
if pid_file.exists():
|
||||
try:
|
||||
with open(pid_file, 'r') as f:
|
||||
pid = int(f.read().strip())
|
||||
|
||||
status["pid"] = pid
|
||||
|
||||
# Check if process is running
|
||||
try:
|
||||
os.kill(pid, 0) # Signal 0 just checks if process exists
|
||||
status["running"] = True
|
||||
except OSError:
|
||||
# Process doesn't exist
|
||||
status["running"] = False
|
||||
|
||||
except (ValueError, IOError):
|
||||
# Invalid PID file
|
||||
status["running"] = False
|
||||
|
||||
return status
|
||||
501
v1/src/commands/status.py
Normal file
501
v1/src/commands/status.py
Normal file
@@ -0,0 +1,501 @@
|
||||
"""
|
||||
Status command implementation for WiFi-DensePose API
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import psutil
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from src.config.settings import Settings
|
||||
from src.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def status_command(
|
||||
settings: Settings,
|
||||
output_format: str = "text",
|
||||
detailed: bool = False
|
||||
) -> None:
|
||||
"""Show the status of the WiFi-DensePose API server."""
|
||||
|
||||
logger.debug("Gathering server status information...")
|
||||
|
||||
try:
|
||||
# Collect status information
|
||||
status_data = await _collect_status_data(settings, detailed)
|
||||
|
||||
# Output status
|
||||
if output_format == "json":
|
||||
print(json.dumps(status_data, indent=2, default=str))
|
||||
else:
|
||||
_print_text_status(status_data, detailed)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get status: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def _collect_status_data(settings: Settings, detailed: bool) -> Dict[str, Any]:
|
||||
"""Collect comprehensive status data."""
|
||||
|
||||
status_data = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"server": await _get_server_status(settings),
|
||||
"system": _get_system_status(),
|
||||
"configuration": _get_configuration_status(settings),
|
||||
}
|
||||
|
||||
if detailed:
|
||||
status_data.update({
|
||||
"database": await _get_database_status(settings),
|
||||
"background_tasks": await _get_background_tasks_status(settings),
|
||||
"resources": _get_resource_usage(),
|
||||
"health": await _get_health_status(settings),
|
||||
})
|
||||
|
||||
return status_data
|
||||
|
||||
|
||||
async def _get_server_status(settings: Settings) -> Dict[str, Any]:
|
||||
"""Get server process status."""
|
||||
|
||||
from src.commands.stop import get_server_status
|
||||
|
||||
status = get_server_status(settings)
|
||||
|
||||
server_info = {
|
||||
"running": status["running"],
|
||||
"pid": status["pid"],
|
||||
"pid_file": status["pid_file"],
|
||||
"pid_file_exists": status["pid_file_exists"],
|
||||
}
|
||||
|
||||
if status["running"] and status["pid"]:
|
||||
try:
|
||||
# Get process information
|
||||
process = psutil.Process(status["pid"])
|
||||
|
||||
server_info.update({
|
||||
"start_time": datetime.fromtimestamp(process.create_time()).isoformat(),
|
||||
"uptime_seconds": time.time() - process.create_time(),
|
||||
"memory_usage_mb": process.memory_info().rss / (1024 * 1024),
|
||||
"cpu_percent": process.cpu_percent(),
|
||||
"status": process.status(),
|
||||
"num_threads": process.num_threads(),
|
||||
"connections": len(process.connections()) if hasattr(process, 'connections') else None,
|
||||
})
|
||||
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
|
||||
server_info["error"] = f"Cannot access process info: {e}"
|
||||
|
||||
return server_info
|
||||
|
||||
|
||||
def _get_system_status() -> Dict[str, Any]:
|
||||
"""Get system status information."""
|
||||
|
||||
uname_info = psutil.os.uname()
|
||||
return {
|
||||
"hostname": uname_info.nodename,
|
||||
"platform": uname_info.sysname,
|
||||
"architecture": uname_info.machine,
|
||||
"python_version": f"{psutil.sys.version_info.major}.{psutil.sys.version_info.minor}.{psutil.sys.version_info.micro}",
|
||||
"boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(),
|
||||
"uptime_seconds": time.time() - psutil.boot_time(),
|
||||
}
|
||||
|
||||
|
||||
def _get_configuration_status(settings: Settings) -> Dict[str, Any]:
|
||||
"""Get configuration status."""
|
||||
|
||||
return {
|
||||
"environment": settings.environment,
|
||||
"debug": settings.debug,
|
||||
"version": settings.version,
|
||||
"host": settings.host,
|
||||
"port": settings.port,
|
||||
"database_configured": bool(settings.database_url or (settings.db_host and settings.db_name)),
|
||||
"redis_enabled": settings.redis_enabled,
|
||||
"monitoring_enabled": settings.monitoring_interval_seconds > 0,
|
||||
"cleanup_enabled": settings.cleanup_interval_seconds > 0,
|
||||
"backup_enabled": settings.backup_interval_seconds > 0,
|
||||
}
|
||||
|
||||
|
||||
async def _get_database_status(settings: Settings) -> Dict[str, Any]:
|
||||
"""Get database status."""
|
||||
|
||||
db_status = {
|
||||
"connected": False,
|
||||
"connection_pool": None,
|
||||
"tables": {},
|
||||
"error": None,
|
||||
}
|
||||
|
||||
try:
|
||||
from src.database.connection import get_database_manager
|
||||
|
||||
db_manager = get_database_manager(settings)
|
||||
|
||||
# Test connection
|
||||
await db_manager.test_connection()
|
||||
db_status["connected"] = True
|
||||
|
||||
# Get connection stats
|
||||
connection_stats = await db_manager.get_connection_stats()
|
||||
db_status["connection_pool"] = connection_stats
|
||||
|
||||
# Get table counts
|
||||
async with db_manager.get_async_session() as session:
|
||||
from sqlalchemy import text, func
|
||||
from src.database.models import Device, Session, CSIData, PoseDetection, SystemMetric, AuditLog
|
||||
|
||||
tables = {
|
||||
"devices": Device,
|
||||
"sessions": Session,
|
||||
"csi_data": CSIData,
|
||||
"pose_detections": PoseDetection,
|
||||
"system_metrics": SystemMetric,
|
||||
"audit_logs": AuditLog,
|
||||
}
|
||||
|
||||
for table_name, model in tables.items():
|
||||
try:
|
||||
result = await session.execute(
|
||||
text(f"SELECT COUNT(*) FROM {table_name}")
|
||||
)
|
||||
count = result.scalar()
|
||||
db_status["tables"][table_name] = {"count": count}
|
||||
except Exception as e:
|
||||
db_status["tables"][table_name] = {"error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
db_status["error"] = str(e)
|
||||
|
||||
return db_status
|
||||
|
||||
|
||||
async def _get_background_tasks_status(settings: Settings) -> Dict[str, Any]:
|
||||
"""Get background tasks status."""
|
||||
|
||||
tasks_status = {}
|
||||
|
||||
try:
|
||||
# Cleanup tasks
|
||||
from src.tasks.cleanup import get_cleanup_manager
|
||||
cleanup_manager = get_cleanup_manager(settings)
|
||||
tasks_status["cleanup"] = cleanup_manager.get_stats()
|
||||
|
||||
except Exception as e:
|
||||
tasks_status["cleanup"] = {"error": str(e)}
|
||||
|
||||
try:
|
||||
# Monitoring tasks
|
||||
from src.tasks.monitoring import get_monitoring_manager
|
||||
monitoring_manager = get_monitoring_manager(settings)
|
||||
tasks_status["monitoring"] = monitoring_manager.get_stats()
|
||||
|
||||
except Exception as e:
|
||||
tasks_status["monitoring"] = {"error": str(e)}
|
||||
|
||||
try:
|
||||
# Backup tasks
|
||||
from src.tasks.backup import get_backup_manager
|
||||
backup_manager = get_backup_manager(settings)
|
||||
tasks_status["backup"] = backup_manager.get_stats()
|
||||
|
||||
except Exception as e:
|
||||
tasks_status["backup"] = {"error": str(e)}
|
||||
|
||||
return tasks_status
|
||||
|
||||
|
||||
def _get_resource_usage() -> Dict[str, Any]:
|
||||
"""Get system resource usage."""
|
||||
|
||||
# CPU usage
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
cpu_count = psutil.cpu_count()
|
||||
|
||||
# Memory usage
|
||||
memory = psutil.virtual_memory()
|
||||
swap = psutil.swap_memory()
|
||||
|
||||
# Disk usage
|
||||
disk = psutil.disk_usage('/')
|
||||
|
||||
# Network I/O
|
||||
network = psutil.net_io_counters()
|
||||
|
||||
return {
|
||||
"cpu": {
|
||||
"usage_percent": cpu_percent,
|
||||
"count": cpu_count,
|
||||
},
|
||||
"memory": {
|
||||
"total_mb": memory.total / (1024 * 1024),
|
||||
"used_mb": memory.used / (1024 * 1024),
|
||||
"available_mb": memory.available / (1024 * 1024),
|
||||
"usage_percent": memory.percent,
|
||||
},
|
||||
"swap": {
|
||||
"total_mb": swap.total / (1024 * 1024),
|
||||
"used_mb": swap.used / (1024 * 1024),
|
||||
"usage_percent": swap.percent,
|
||||
},
|
||||
"disk": {
|
||||
"total_gb": disk.total / (1024 * 1024 * 1024),
|
||||
"used_gb": disk.used / (1024 * 1024 * 1024),
|
||||
"free_gb": disk.free / (1024 * 1024 * 1024),
|
||||
"usage_percent": (disk.used / disk.total) * 100,
|
||||
},
|
||||
"network": {
|
||||
"bytes_sent": network.bytes_sent,
|
||||
"bytes_recv": network.bytes_recv,
|
||||
"packets_sent": network.packets_sent,
|
||||
"packets_recv": network.packets_recv,
|
||||
} if network else None,
|
||||
}
|
||||
|
||||
|
||||
async def _get_health_status(settings: Settings) -> Dict[str, Any]:
|
||||
"""Get overall health status."""
|
||||
|
||||
health = {
|
||||
"status": "healthy",
|
||||
"checks": {},
|
||||
"issues": [],
|
||||
}
|
||||
|
||||
# Check database health
|
||||
try:
|
||||
from src.database.connection import get_database_manager
|
||||
|
||||
db_manager = get_database_manager(settings)
|
||||
await db_manager.test_connection()
|
||||
health["checks"]["database"] = "healthy"
|
||||
|
||||
except Exception as e:
|
||||
health["checks"]["database"] = "unhealthy"
|
||||
health["issues"].append(f"Database connection failed: {e}")
|
||||
health["status"] = "unhealthy"
|
||||
|
||||
# Check disk space
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_usage_percent = (disk.used / disk.total) * 100
|
||||
|
||||
if disk_usage_percent > 90:
|
||||
health["checks"]["disk_space"] = "critical"
|
||||
health["issues"].append(f"Disk usage critical: {disk_usage_percent:.1f}%")
|
||||
health["status"] = "critical"
|
||||
elif disk_usage_percent > 80:
|
||||
health["checks"]["disk_space"] = "warning"
|
||||
health["issues"].append(f"Disk usage high: {disk_usage_percent:.1f}%")
|
||||
if health["status"] == "healthy":
|
||||
health["status"] = "warning"
|
||||
else:
|
||||
health["checks"]["disk_space"] = "healthy"
|
||||
|
||||
# Check memory usage
|
||||
memory = psutil.virtual_memory()
|
||||
|
||||
if memory.percent > 90:
|
||||
health["checks"]["memory"] = "critical"
|
||||
health["issues"].append(f"Memory usage critical: {memory.percent:.1f}%")
|
||||
health["status"] = "critical"
|
||||
elif memory.percent > 80:
|
||||
health["checks"]["memory"] = "warning"
|
||||
health["issues"].append(f"Memory usage high: {memory.percent:.1f}%")
|
||||
if health["status"] == "healthy":
|
||||
health["status"] = "warning"
|
||||
else:
|
||||
health["checks"]["memory"] = "healthy"
|
||||
|
||||
# Check log directory
|
||||
log_dir = Path(settings.log_directory)
|
||||
if log_dir.exists() and log_dir.is_dir():
|
||||
health["checks"]["log_directory"] = "healthy"
|
||||
else:
|
||||
health["checks"]["log_directory"] = "unhealthy"
|
||||
health["issues"].append(f"Log directory not accessible: {log_dir}")
|
||||
health["status"] = "unhealthy"
|
||||
|
||||
# Check backup directory
|
||||
backup_dir = Path(settings.backup_directory)
|
||||
if backup_dir.exists() and backup_dir.is_dir():
|
||||
health["checks"]["backup_directory"] = "healthy"
|
||||
else:
|
||||
health["checks"]["backup_directory"] = "unhealthy"
|
||||
health["issues"].append(f"Backup directory not accessible: {backup_dir}")
|
||||
health["status"] = "unhealthy"
|
||||
|
||||
return health
|
||||
|
||||
|
||||
def _print_text_status(status_data: Dict[str, Any], detailed: bool) -> None:
|
||||
"""Print status in human-readable text format."""
|
||||
|
||||
print("=" * 60)
|
||||
print("WiFi-DensePose API Server Status")
|
||||
print("=" * 60)
|
||||
print(f"Timestamp: {status_data['timestamp']}")
|
||||
print()
|
||||
|
||||
# Server status
|
||||
server = status_data["server"]
|
||||
print("🖥️ Server Status:")
|
||||
if server["running"]:
|
||||
print(f" ✅ Running (PID: {server['pid']})")
|
||||
if "start_time" in server:
|
||||
uptime = timedelta(seconds=int(server["uptime_seconds"]))
|
||||
print(f" ⏱️ Uptime: {uptime}")
|
||||
print(f" 💾 Memory: {server['memory_usage_mb']:.1f} MB")
|
||||
print(f" 🔧 CPU: {server['cpu_percent']:.1f}%")
|
||||
print(f" 🧵 Threads: {server['num_threads']}")
|
||||
else:
|
||||
print(" ❌ Not running")
|
||||
if server["pid_file_exists"]:
|
||||
print(" ⚠️ Stale PID file exists")
|
||||
print()
|
||||
|
||||
# System status
|
||||
system = status_data["system"]
|
||||
print("🖥️ System:")
|
||||
print(f" Hostname: {system['hostname']}")
|
||||
print(f" Platform: {system['platform']} ({system['architecture']})")
|
||||
print(f" Python: {system['python_version']}")
|
||||
uptime = timedelta(seconds=int(system["uptime_seconds"]))
|
||||
print(f" Uptime: {uptime}")
|
||||
print()
|
||||
|
||||
# Configuration
|
||||
config = status_data["configuration"]
|
||||
print("⚙️ Configuration:")
|
||||
print(f" Environment: {config['environment']}")
|
||||
print(f" Debug: {config['debug']}")
|
||||
print(f" API Version: {config['version']}")
|
||||
print(f" Listen: {config['host']}:{config['port']}")
|
||||
print(f" Database: {'✅' if config['database_configured'] else '❌'}")
|
||||
print(f" Redis: {'✅' if config['redis_enabled'] else '❌'}")
|
||||
print(f" Monitoring: {'✅' if config['monitoring_enabled'] else '❌'}")
|
||||
print(f" Cleanup: {'✅' if config['cleanup_enabled'] else '❌'}")
|
||||
print(f" Backup: {'✅' if config['backup_enabled'] else '❌'}")
|
||||
print()
|
||||
|
||||
if detailed:
|
||||
# Database status
|
||||
if "database" in status_data:
|
||||
db = status_data["database"]
|
||||
print("🗄️ Database:")
|
||||
if db["connected"]:
|
||||
print(" ✅ Connected")
|
||||
if "tables" in db:
|
||||
print(" 📊 Table counts:")
|
||||
for table, info in db["tables"].items():
|
||||
if "count" in info:
|
||||
print(f" {table}: {info['count']:,}")
|
||||
else:
|
||||
print(f" {table}: Error - {info.get('error', 'Unknown')}")
|
||||
else:
|
||||
print(f" ❌ Not connected: {db.get('error', 'Unknown error')}")
|
||||
print()
|
||||
|
||||
# Background tasks
|
||||
if "background_tasks" in status_data:
|
||||
tasks = status_data["background_tasks"]
|
||||
print("🔄 Background Tasks:")
|
||||
for task_name, task_info in tasks.items():
|
||||
if "error" in task_info:
|
||||
print(f" ❌ {task_name}: {task_info['error']}")
|
||||
else:
|
||||
manager_info = task_info.get("manager", {})
|
||||
print(f" 📋 {task_name}:")
|
||||
print(f" Running: {manager_info.get('running', 'Unknown')}")
|
||||
print(f" Last run: {manager_info.get('last_run', 'Never')}")
|
||||
print(f" Run count: {manager_info.get('run_count', 0)}")
|
||||
print()
|
||||
|
||||
# Resource usage
|
||||
if "resources" in status_data:
|
||||
resources = status_data["resources"]
|
||||
print("📊 Resource Usage:")
|
||||
|
||||
cpu = resources["cpu"]
|
||||
print(f" 🔧 CPU: {cpu['usage_percent']:.1f}% ({cpu['count']} cores)")
|
||||
|
||||
memory = resources["memory"]
|
||||
print(f" 💾 Memory: {memory['usage_percent']:.1f}% "
|
||||
f"({memory['used_mb']:.0f}/{memory['total_mb']:.0f} MB)")
|
||||
|
||||
disk = resources["disk"]
|
||||
print(f" 💿 Disk: {disk['usage_percent']:.1f}% "
|
||||
f"({disk['used_gb']:.1f}/{disk['total_gb']:.1f} GB)")
|
||||
print()
|
||||
|
||||
# Health status
|
||||
if "health" in status_data:
|
||||
health = status_data["health"]
|
||||
print("🏥 Health Status:")
|
||||
|
||||
status_emoji = {
|
||||
"healthy": "✅",
|
||||
"warning": "⚠️",
|
||||
"critical": "❌",
|
||||
"unhealthy": "❌"
|
||||
}
|
||||
|
||||
print(f" Overall: {status_emoji.get(health['status'], '❓')} {health['status'].upper()}")
|
||||
|
||||
if health["issues"]:
|
||||
print(" Issues:")
|
||||
for issue in health["issues"]:
|
||||
print(f" • {issue}")
|
||||
|
||||
print(" Checks:")
|
||||
for check, status in health["checks"].items():
|
||||
emoji = status_emoji.get(status, "❓")
|
||||
print(f" {emoji} {check}: {status}")
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
def get_quick_status(settings: Settings) -> str:
|
||||
"""Get a quick one-line status."""
|
||||
|
||||
from src.commands.stop import get_server_status
|
||||
|
||||
status = get_server_status(settings)
|
||||
|
||||
if status["running"]:
|
||||
return f"✅ Running (PID: {status['pid']})"
|
||||
elif status["pid_file_exists"]:
|
||||
return "⚠️ Not running (stale PID file)"
|
||||
else:
|
||||
return "❌ Not running"
|
||||
|
||||
|
||||
async def check_health(settings: Settings) -> bool:
|
||||
"""Quick health check - returns True if healthy."""
|
||||
|
||||
try:
|
||||
status_data = await _collect_status_data(settings, detailed=True)
|
||||
|
||||
# Check if server is running
|
||||
if not status_data["server"]["running"]:
|
||||
return False
|
||||
|
||||
# Check health status
|
||||
if "health" in status_data:
|
||||
health_status = status_data["health"]["status"]
|
||||
return health_status in ["healthy", "warning"]
|
||||
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
294
v1/src/commands/stop.py
Normal file
294
v1/src/commands/stop.py
Normal file
@@ -0,0 +1,294 @@
|
||||
"""
|
||||
Stop command implementation for WiFi-DensePose API
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from src.config.settings import Settings
|
||||
from src.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def stop_command(
|
||||
settings: Settings,
|
||||
force: bool = False,
|
||||
timeout: int = 30
|
||||
) -> None:
|
||||
"""Stop the WiFi-DensePose API server."""
|
||||
|
||||
logger.info("Stopping WiFi-DensePose API server...")
|
||||
|
||||
# Get server status
|
||||
status = get_server_status(settings)
|
||||
|
||||
if not status["running"]:
|
||||
if status["pid_file_exists"]:
|
||||
logger.info("Server is not running, but PID file exists. Cleaning up...")
|
||||
_cleanup_pid_file(settings)
|
||||
else:
|
||||
logger.info("Server is not running")
|
||||
return
|
||||
|
||||
pid = status["pid"]
|
||||
logger.info(f"Found running server with PID {pid}")
|
||||
|
||||
try:
|
||||
if force:
|
||||
await _force_stop_server(pid, settings)
|
||||
else:
|
||||
await _graceful_stop_server(pid, timeout, settings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to stop server: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def _graceful_stop_server(pid: int, timeout: int, settings: Settings) -> None:
|
||||
"""Stop server gracefully with timeout."""
|
||||
|
||||
logger.info(f"Attempting graceful shutdown (timeout: {timeout}s)...")
|
||||
|
||||
try:
|
||||
# Send SIGTERM for graceful shutdown
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
logger.info("Sent SIGTERM signal")
|
||||
|
||||
# Wait for process to terminate
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < timeout:
|
||||
try:
|
||||
# Check if process is still running
|
||||
os.kill(pid, 0)
|
||||
await asyncio.sleep(1)
|
||||
except OSError:
|
||||
# Process has terminated
|
||||
logger.info("Server stopped gracefully")
|
||||
_cleanup_pid_file(settings)
|
||||
return
|
||||
|
||||
# Timeout reached, force kill
|
||||
logger.warning(f"Graceful shutdown timeout ({timeout}s) reached, forcing stop...")
|
||||
await _force_stop_server(pid, settings)
|
||||
|
||||
except OSError as e:
|
||||
if e.errno == 3: # No such process
|
||||
logger.info("Process already terminated")
|
||||
_cleanup_pid_file(settings)
|
||||
else:
|
||||
logger.error(f"Failed to send signal to process {pid}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def _force_stop_server(pid: int, settings: Settings) -> None:
|
||||
"""Force stop server immediately."""
|
||||
|
||||
logger.info("Force stopping server...")
|
||||
|
||||
try:
|
||||
# Send SIGKILL for immediate termination
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
logger.info("Sent SIGKILL signal")
|
||||
|
||||
# Wait a moment for process to die
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Verify process is dead
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
logger.error(f"Process {pid} still running after SIGKILL")
|
||||
except OSError:
|
||||
logger.info("Server force stopped")
|
||||
|
||||
except OSError as e:
|
||||
if e.errno == 3: # No such process
|
||||
logger.info("Process already terminated")
|
||||
else:
|
||||
logger.error(f"Failed to force kill process {pid}: {e}")
|
||||
raise
|
||||
|
||||
finally:
|
||||
_cleanup_pid_file(settings)
|
||||
|
||||
|
||||
def _cleanup_pid_file(settings: Settings) -> None:
|
||||
"""Clean up PID file."""
|
||||
|
||||
pid_file = Path(settings.log_directory) / "wifi-densepose-api.pid"
|
||||
|
||||
if pid_file.exists():
|
||||
try:
|
||||
pid_file.unlink()
|
||||
logger.info("Cleaned up PID file")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to remove PID file: {e}")
|
||||
|
||||
|
||||
def get_server_status(settings: Settings) -> dict:
|
||||
"""Get current server status."""
|
||||
|
||||
pid_file = Path(settings.log_directory) / "wifi-densepose-api.pid"
|
||||
|
||||
status = {
|
||||
"running": False,
|
||||
"pid": None,
|
||||
"pid_file": str(pid_file),
|
||||
"pid_file_exists": pid_file.exists(),
|
||||
}
|
||||
|
||||
if pid_file.exists():
|
||||
try:
|
||||
with open(pid_file, 'r') as f:
|
||||
pid = int(f.read().strip())
|
||||
|
||||
status["pid"] = pid
|
||||
|
||||
# Check if process is running
|
||||
try:
|
||||
os.kill(pid, 0) # Signal 0 just checks if process exists
|
||||
status["running"] = True
|
||||
except OSError:
|
||||
# Process doesn't exist
|
||||
status["running"] = False
|
||||
|
||||
except (ValueError, IOError):
|
||||
# Invalid PID file
|
||||
status["running"] = False
|
||||
|
||||
return status
|
||||
|
||||
|
||||
async def stop_all_background_tasks(settings: Settings) -> None:
|
||||
"""Stop all background tasks if they're running."""
|
||||
|
||||
logger.info("Stopping background tasks...")
|
||||
|
||||
try:
|
||||
# This would typically involve connecting to a task queue or
|
||||
# sending signals to background processes
|
||||
# For now, we'll just log the action
|
||||
|
||||
logger.info("Background tasks stop signal sent")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to stop background tasks: {e}")
|
||||
|
||||
|
||||
async def cleanup_resources(settings: Settings) -> None:
|
||||
"""Clean up system resources."""
|
||||
|
||||
logger.info("Cleaning up resources...")
|
||||
|
||||
try:
|
||||
# Close database connections
|
||||
from src.database.connection import get_database_manager
|
||||
|
||||
db_manager = get_database_manager(settings)
|
||||
await db_manager.close_all_connections()
|
||||
logger.info("Database connections closed")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to close database connections: {e}")
|
||||
|
||||
try:
|
||||
# Clean up temporary files
|
||||
temp_files = [
|
||||
Path(settings.log_directory) / "temp",
|
||||
Path(settings.backup_directory) / "temp",
|
||||
]
|
||||
|
||||
for temp_path in temp_files:
|
||||
if temp_path.exists() and temp_path.is_dir():
|
||||
import shutil
|
||||
shutil.rmtree(temp_path)
|
||||
logger.info(f"Cleaned up temporary directory: {temp_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temporary files: {e}")
|
||||
|
||||
logger.info("Resource cleanup completed")
|
||||
|
||||
|
||||
def is_server_running(settings: Settings) -> bool:
|
||||
"""Check if server is currently running."""
|
||||
|
||||
status = get_server_status(settings)
|
||||
return status["running"]
|
||||
|
||||
|
||||
def get_server_pid(settings: Settings) -> Optional[int]:
|
||||
"""Get server PID if running."""
|
||||
|
||||
status = get_server_status(settings)
|
||||
return status["pid"] if status["running"] else None
|
||||
|
||||
|
||||
async def wait_for_server_stop(settings: Settings, timeout: int = 30) -> bool:
|
||||
"""Wait for server to stop with timeout."""
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
if not is_server_running(settings):
|
||||
return True
|
||||
await asyncio.sleep(1)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def send_reload_signal(settings: Settings) -> bool:
|
||||
"""Send reload signal to running server."""
|
||||
|
||||
status = get_server_status(settings)
|
||||
|
||||
if not status["running"]:
|
||||
logger.error("Server is not running")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Send SIGHUP for reload
|
||||
os.kill(status["pid"], signal.SIGHUP)
|
||||
logger.info("Sent reload signal to server")
|
||||
return True
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to send reload signal: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def restart_server(settings: Settings, timeout: int = 30) -> None:
|
||||
"""Restart the server (stop then start)."""
|
||||
|
||||
logger.info("Restarting server...")
|
||||
|
||||
# Stop server if running
|
||||
if is_server_running(settings):
|
||||
await stop_command(settings, timeout=timeout)
|
||||
|
||||
# Wait for server to stop
|
||||
if not await wait_for_server_stop(settings, timeout):
|
||||
logger.error("Server did not stop within timeout, forcing restart")
|
||||
await stop_command(settings, force=True)
|
||||
|
||||
# Start server
|
||||
from src.commands.start import start_command
|
||||
await start_command(settings)
|
||||
|
||||
|
||||
def get_stop_status_summary(settings: Settings) -> dict:
|
||||
"""Get a summary of stop operation status."""
|
||||
|
||||
status = get_server_status(settings)
|
||||
|
||||
return {
|
||||
"server_running": status["running"],
|
||||
"pid": status["pid"],
|
||||
"pid_file_exists": status["pid_file_exists"],
|
||||
"can_stop": status["running"],
|
||||
"cleanup_needed": status["pid_file_exists"] and not status["running"],
|
||||
}
|
||||
Reference in New Issue
Block a user