Files
wifi-densepose/v1/src/tasks/cleanup.py
Claude 6ed69a3d48 feat: Complete Rust port of WiFi-DensePose with modular crates
Major changes:
- Organized Python v1 implementation into v1/ subdirectory
- Created Rust workspace with 9 modular crates:
  - wifi-densepose-core: Core types, traits, errors
  - wifi-densepose-signal: CSI processing, phase sanitization, FFT
  - wifi-densepose-nn: Neural network inference (ONNX/Candle/tch)
  - wifi-densepose-api: Axum-based REST/WebSocket API
  - wifi-densepose-db: SQLx database layer
  - wifi-densepose-config: Configuration management
  - wifi-densepose-hardware: Hardware abstraction
  - wifi-densepose-wasm: WebAssembly bindings
  - wifi-densepose-cli: Command-line interface

Documentation:
- ADR-001: Workspace structure
- ADR-002: Signal processing library selection
- ADR-003: Neural network inference strategy
- DDD domain model with bounded contexts

Testing:
- 69 tests passing across all crates
- Signal processing: 45 tests
- Neural networks: 21 tests
- Core: 3 doc tests

Performance targets:
- 10x faster CSI processing (~0.5ms vs ~5ms)
- 5x lower memory usage (~100MB vs ~500MB)
- WASM support for browser deployment
2026-01-13 03:11:16 +00:00

598 lines
21 KiB
Python

"""
Periodic cleanup tasks for WiFi-DensePose API
"""
import asyncio
import logging
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List
from contextlib import asynccontextmanager
from sqlalchemy import delete, select, func, and_, or_
from sqlalchemy.ext.asyncio import AsyncSession
from src.config.settings import Settings
from src.database.connection import get_database_manager
from src.database.models import (
CSIData, PoseDetection, SystemMetric, AuditLog, Session, Device
)
from src.logger import get_logger
logger = get_logger(__name__)
class CleanupTask:
"""Base class for cleanup tasks."""
def __init__(self, name: str, settings: Settings):
self.name = name
self.settings = settings
self.enabled = True
self.last_run = None
self.run_count = 0
self.error_count = 0
self.total_cleaned = 0
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute the cleanup task."""
raise NotImplementedError
async def run(self, session: AsyncSession) -> Dict[str, Any]:
"""Run the cleanup task with error handling."""
start_time = datetime.utcnow()
try:
logger.info(f"Starting cleanup task: {self.name}")
result = await self.execute(session)
self.last_run = start_time
self.run_count += 1
if result.get("cleaned_count", 0) > 0:
self.total_cleaned += result["cleaned_count"]
logger.info(
f"Cleanup task {self.name} completed: "
f"cleaned {result['cleaned_count']} items"
)
else:
logger.debug(f"Cleanup task {self.name} completed: no items to clean")
return {
"task": self.name,
"status": "success",
"start_time": start_time.isoformat(),
"duration_ms": (datetime.utcnow() - start_time).total_seconds() * 1000,
**result
}
except Exception as e:
self.error_count += 1
logger.error(f"Cleanup task {self.name} failed: {e}", exc_info=True)
return {
"task": self.name,
"status": "error",
"start_time": start_time.isoformat(),
"duration_ms": (datetime.utcnow() - start_time).total_seconds() * 1000,
"error": str(e),
"cleaned_count": 0
}
def get_stats(self) -> Dict[str, Any]:
"""Get task statistics."""
return {
"name": self.name,
"enabled": self.enabled,
"last_run": self.last_run.isoformat() if self.last_run else None,
"run_count": self.run_count,
"error_count": self.error_count,
"total_cleaned": self.total_cleaned,
}
class OldCSIDataCleanup(CleanupTask):
"""Cleanup old CSI data records."""
def __init__(self, settings: Settings):
super().__init__("old_csi_data_cleanup", settings)
self.retention_days = settings.csi_data_retention_days
self.batch_size = settings.cleanup_batch_size
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute CSI data cleanup."""
if self.retention_days <= 0:
return {"cleaned_count": 0, "message": "CSI data retention disabled"}
cutoff_date = datetime.utcnow() - timedelta(days=self.retention_days)
# Count records to be deleted
count_query = select(func.count(CSIData.id)).where(
CSIData.created_at < cutoff_date
)
total_count = await session.scalar(count_query)
if total_count == 0:
return {"cleaned_count": 0, "message": "No old CSI data to clean"}
# Delete in batches
cleaned_count = 0
while cleaned_count < total_count:
# Get batch of IDs to delete
id_query = select(CSIData.id).where(
CSIData.created_at < cutoff_date
).limit(self.batch_size)
result = await session.execute(id_query)
ids_to_delete = [row[0] for row in result.fetchall()]
if not ids_to_delete:
break
# Delete batch
delete_query = delete(CSIData).where(CSIData.id.in_(ids_to_delete))
await session.execute(delete_query)
await session.commit()
batch_size = len(ids_to_delete)
cleaned_count += batch_size
logger.debug(f"Deleted {batch_size} CSI data records (total: {cleaned_count})")
# Small delay to avoid overwhelming the database
await asyncio.sleep(0.1)
return {
"cleaned_count": cleaned_count,
"retention_days": self.retention_days,
"cutoff_date": cutoff_date.isoformat()
}
class OldPoseDetectionCleanup(CleanupTask):
"""Cleanup old pose detection records."""
def __init__(self, settings: Settings):
super().__init__("old_pose_detection_cleanup", settings)
self.retention_days = settings.pose_detection_retention_days
self.batch_size = settings.cleanup_batch_size
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute pose detection cleanup."""
if self.retention_days <= 0:
return {"cleaned_count": 0, "message": "Pose detection retention disabled"}
cutoff_date = datetime.utcnow() - timedelta(days=self.retention_days)
# Count records to be deleted
count_query = select(func.count(PoseDetection.id)).where(
PoseDetection.created_at < cutoff_date
)
total_count = await session.scalar(count_query)
if total_count == 0:
return {"cleaned_count": 0, "message": "No old pose detections to clean"}
# Delete in batches
cleaned_count = 0
while cleaned_count < total_count:
# Get batch of IDs to delete
id_query = select(PoseDetection.id).where(
PoseDetection.created_at < cutoff_date
).limit(self.batch_size)
result = await session.execute(id_query)
ids_to_delete = [row[0] for row in result.fetchall()]
if not ids_to_delete:
break
# Delete batch
delete_query = delete(PoseDetection).where(PoseDetection.id.in_(ids_to_delete))
await session.execute(delete_query)
await session.commit()
batch_size = len(ids_to_delete)
cleaned_count += batch_size
logger.debug(f"Deleted {batch_size} pose detection records (total: {cleaned_count})")
# Small delay to avoid overwhelming the database
await asyncio.sleep(0.1)
return {
"cleaned_count": cleaned_count,
"retention_days": self.retention_days,
"cutoff_date": cutoff_date.isoformat()
}
class OldMetricsCleanup(CleanupTask):
"""Cleanup old system metrics."""
def __init__(self, settings: Settings):
super().__init__("old_metrics_cleanup", settings)
self.retention_days = settings.metrics_retention_days
self.batch_size = settings.cleanup_batch_size
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute metrics cleanup."""
if self.retention_days <= 0:
return {"cleaned_count": 0, "message": "Metrics retention disabled"}
cutoff_date = datetime.utcnow() - timedelta(days=self.retention_days)
# Count records to be deleted
count_query = select(func.count(SystemMetric.id)).where(
SystemMetric.created_at < cutoff_date
)
total_count = await session.scalar(count_query)
if total_count == 0:
return {"cleaned_count": 0, "message": "No old metrics to clean"}
# Delete in batches
cleaned_count = 0
while cleaned_count < total_count:
# Get batch of IDs to delete
id_query = select(SystemMetric.id).where(
SystemMetric.created_at < cutoff_date
).limit(self.batch_size)
result = await session.execute(id_query)
ids_to_delete = [row[0] for row in result.fetchall()]
if not ids_to_delete:
break
# Delete batch
delete_query = delete(SystemMetric).where(SystemMetric.id.in_(ids_to_delete))
await session.execute(delete_query)
await session.commit()
batch_size = len(ids_to_delete)
cleaned_count += batch_size
logger.debug(f"Deleted {batch_size} metric records (total: {cleaned_count})")
# Small delay to avoid overwhelming the database
await asyncio.sleep(0.1)
return {
"cleaned_count": cleaned_count,
"retention_days": self.retention_days,
"cutoff_date": cutoff_date.isoformat()
}
class OldAuditLogCleanup(CleanupTask):
"""Cleanup old audit logs."""
def __init__(self, settings: Settings):
super().__init__("old_audit_log_cleanup", settings)
self.retention_days = settings.audit_log_retention_days
self.batch_size = settings.cleanup_batch_size
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute audit log cleanup."""
if self.retention_days <= 0:
return {"cleaned_count": 0, "message": "Audit log retention disabled"}
cutoff_date = datetime.utcnow() - timedelta(days=self.retention_days)
# Count records to be deleted
count_query = select(func.count(AuditLog.id)).where(
AuditLog.created_at < cutoff_date
)
total_count = await session.scalar(count_query)
if total_count == 0:
return {"cleaned_count": 0, "message": "No old audit logs to clean"}
# Delete in batches
cleaned_count = 0
while cleaned_count < total_count:
# Get batch of IDs to delete
id_query = select(AuditLog.id).where(
AuditLog.created_at < cutoff_date
).limit(self.batch_size)
result = await session.execute(id_query)
ids_to_delete = [row[0] for row in result.fetchall()]
if not ids_to_delete:
break
# Delete batch
delete_query = delete(AuditLog).where(AuditLog.id.in_(ids_to_delete))
await session.execute(delete_query)
await session.commit()
batch_size = len(ids_to_delete)
cleaned_count += batch_size
logger.debug(f"Deleted {batch_size} audit log records (total: {cleaned_count})")
# Small delay to avoid overwhelming the database
await asyncio.sleep(0.1)
return {
"cleaned_count": cleaned_count,
"retention_days": self.retention_days,
"cutoff_date": cutoff_date.isoformat()
}
class OrphanedSessionCleanup(CleanupTask):
"""Cleanup orphaned sessions (sessions without associated data)."""
def __init__(self, settings: Settings):
super().__init__("orphaned_session_cleanup", settings)
self.orphan_threshold_days = settings.orphaned_session_threshold_days
self.batch_size = settings.cleanup_batch_size
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute orphaned session cleanup."""
if self.orphan_threshold_days <= 0:
return {"cleaned_count": 0, "message": "Orphaned session cleanup disabled"}
cutoff_date = datetime.utcnow() - timedelta(days=self.orphan_threshold_days)
# Find sessions that are old and have no associated CSI data or pose detections
orphaned_sessions_query = select(Session.id).where(
and_(
Session.created_at < cutoff_date,
Session.status.in_(["completed", "failed", "cancelled"]),
~Session.id.in_(select(CSIData.session_id).where(CSIData.session_id.isnot(None))),
~Session.id.in_(select(PoseDetection.session_id))
)
)
result = await session.execute(orphaned_sessions_query)
orphaned_ids = [row[0] for row in result.fetchall()]
if not orphaned_ids:
return {"cleaned_count": 0, "message": "No orphaned sessions to clean"}
# Delete orphaned sessions
delete_query = delete(Session).where(Session.id.in_(orphaned_ids))
await session.execute(delete_query)
await session.commit()
cleaned_count = len(orphaned_ids)
return {
"cleaned_count": cleaned_count,
"orphan_threshold_days": self.orphan_threshold_days,
"cutoff_date": cutoff_date.isoformat()
}
class InvalidDataCleanup(CleanupTask):
"""Cleanup invalid or corrupted data records."""
def __init__(self, settings: Settings):
super().__init__("invalid_data_cleanup", settings)
self.batch_size = settings.cleanup_batch_size
async def execute(self, session: AsyncSession) -> Dict[str, Any]:
"""Execute invalid data cleanup."""
total_cleaned = 0
# Clean invalid CSI data
invalid_csi_query = select(CSIData.id).where(
or_(
CSIData.is_valid == False,
CSIData.amplitude == None,
CSIData.phase == None,
CSIData.frequency <= 0,
CSIData.bandwidth <= 0,
CSIData.num_subcarriers <= 0
)
)
result = await session.execute(invalid_csi_query)
invalid_csi_ids = [row[0] for row in result.fetchall()]
if invalid_csi_ids:
delete_query = delete(CSIData).where(CSIData.id.in_(invalid_csi_ids))
await session.execute(delete_query)
total_cleaned += len(invalid_csi_ids)
logger.debug(f"Deleted {len(invalid_csi_ids)} invalid CSI data records")
# Clean invalid pose detections
invalid_pose_query = select(PoseDetection.id).where(
or_(
PoseDetection.is_valid == False,
PoseDetection.person_count < 0,
and_(
PoseDetection.detection_confidence.isnot(None),
or_(
PoseDetection.detection_confidence < 0,
PoseDetection.detection_confidence > 1
)
)
)
)
result = await session.execute(invalid_pose_query)
invalid_pose_ids = [row[0] for row in result.fetchall()]
if invalid_pose_ids:
delete_query = delete(PoseDetection).where(PoseDetection.id.in_(invalid_pose_ids))
await session.execute(delete_query)
total_cleaned += len(invalid_pose_ids)
logger.debug(f"Deleted {len(invalid_pose_ids)} invalid pose detection records")
await session.commit()
return {
"cleaned_count": total_cleaned,
"invalid_csi_count": len(invalid_csi_ids) if invalid_csi_ids else 0,
"invalid_pose_count": len(invalid_pose_ids) if invalid_pose_ids else 0,
}
class CleanupManager:
"""Manager for all cleanup tasks."""
def __init__(self, settings: Settings):
self.settings = settings
self.db_manager = get_database_manager(settings)
self.tasks = self._initialize_tasks()
self.running = False
self.last_run = None
self.run_count = 0
self.total_cleaned = 0
def _initialize_tasks(self) -> List[CleanupTask]:
"""Initialize all cleanup tasks."""
tasks = [
OldCSIDataCleanup(self.settings),
OldPoseDetectionCleanup(self.settings),
OldMetricsCleanup(self.settings),
OldAuditLogCleanup(self.settings),
OrphanedSessionCleanup(self.settings),
InvalidDataCleanup(self.settings),
]
# Filter enabled tasks
enabled_tasks = [task for task in tasks if task.enabled]
logger.info(f"Initialized {len(enabled_tasks)} cleanup tasks")
return enabled_tasks
async def run_all_tasks(self) -> Dict[str, Any]:
"""Run all cleanup tasks."""
if self.running:
return {"status": "already_running", "message": "Cleanup already in progress"}
self.running = True
start_time = datetime.utcnow()
try:
logger.info("Starting cleanup tasks")
results = []
total_cleaned = 0
async with self.db_manager.get_async_session() as session:
for task in self.tasks:
if not task.enabled:
continue
result = await task.run(session)
results.append(result)
total_cleaned += result.get("cleaned_count", 0)
self.last_run = start_time
self.run_count += 1
self.total_cleaned += total_cleaned
duration = (datetime.utcnow() - start_time).total_seconds()
logger.info(
f"Cleanup tasks completed: cleaned {total_cleaned} items "
f"in {duration:.2f} seconds"
)
return {
"status": "completed",
"start_time": start_time.isoformat(),
"duration_seconds": duration,
"total_cleaned": total_cleaned,
"task_results": results,
}
except Exception as e:
logger.error(f"Cleanup tasks failed: {e}", exc_info=True)
return {
"status": "error",
"start_time": start_time.isoformat(),
"duration_seconds": (datetime.utcnow() - start_time).total_seconds(),
"error": str(e),
"total_cleaned": 0,
}
finally:
self.running = False
async def run_task(self, task_name: str) -> Dict[str, Any]:
"""Run a specific cleanup task."""
task = next((t for t in self.tasks if t.name == task_name), None)
if not task:
return {
"status": "error",
"error": f"Task '{task_name}' not found",
"available_tasks": [t.name for t in self.tasks]
}
if not task.enabled:
return {
"status": "error",
"error": f"Task '{task_name}' is disabled"
}
async with self.db_manager.get_async_session() as session:
return await task.run(session)
def get_stats(self) -> Dict[str, Any]:
"""Get cleanup manager statistics."""
return {
"manager": {
"running": self.running,
"last_run": self.last_run.isoformat() if self.last_run else None,
"run_count": self.run_count,
"total_cleaned": self.total_cleaned,
},
"tasks": [task.get_stats() for task in self.tasks],
}
def enable_task(self, task_name: str) -> bool:
"""Enable a specific task."""
task = next((t for t in self.tasks if t.name == task_name), None)
if task:
task.enabled = True
return True
return False
def disable_task(self, task_name: str) -> bool:
"""Disable a specific task."""
task = next((t for t in self.tasks if t.name == task_name), None)
if task:
task.enabled = False
return True
return False
# Global cleanup manager instance
_cleanup_manager: Optional[CleanupManager] = None
def get_cleanup_manager(settings: Settings) -> CleanupManager:
"""Get cleanup manager instance."""
global _cleanup_manager
if _cleanup_manager is None:
_cleanup_manager = CleanupManager(settings)
return _cleanup_manager
async def run_periodic_cleanup(settings: Settings):
"""Run periodic cleanup tasks."""
cleanup_manager = get_cleanup_manager(settings)
while True:
try:
await cleanup_manager.run_all_tasks()
# Wait for next cleanup interval
await asyncio.sleep(settings.cleanup_interval_seconds)
except asyncio.CancelledError:
logger.info("Periodic cleanup cancelled")
break
except Exception as e:
logger.error(f"Periodic cleanup error: {e}", exc_info=True)
# Wait before retrying
await asyncio.sleep(60)