git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
1935 lines
52 KiB
Markdown
1935 lines
52 KiB
Markdown
# Security Architecture - RuVector Scipix OCR
|
|
|
|
## Executive Summary
|
|
|
|
This document outlines the comprehensive security architecture for the ruvector-scipix OCR system, designed with defense-in-depth principles, zero-trust assumptions, and Rust's memory-safety guarantees at its core.
|
|
|
|
**Security Posture**: Multi-layered protection spanning authentication, authorization, data privacy, input validation, secure processing, transport security, and supply chain integrity.
|
|
|
|
**Target Threat Model**: Protection against unauthorized access, data exfiltration, denial-of-service attacks, code injection, malicious file uploads, and supply chain attacks.
|
|
|
|
---
|
|
|
|
## 1. Authentication System
|
|
|
|
### 1.1 API Key Management
|
|
|
|
#### Key Generation Strategy
|
|
```rust
|
|
use argon2::{Argon2, PasswordHasher};
|
|
use rand::Rng;
|
|
use base64::{Engine as _, engine::general_purpose};
|
|
|
|
pub struct ApiKeyManager {
|
|
pepper: [u8; 32],
|
|
}
|
|
|
|
impl ApiKeyManager {
|
|
/// Generate cryptographically secure API key
|
|
pub fn generate_api_key(&self) -> Result<ApiKey, SecurityError> {
|
|
let mut rng = rand::thread_rng();
|
|
let mut key_bytes = [0u8; 32];
|
|
rng.fill(&mut key_bytes);
|
|
|
|
// Format: rvx_live_<base64url>_<checksum>
|
|
let key_data = general_purpose::URL_SAFE_NO_PAD.encode(&key_bytes);
|
|
let checksum = self.compute_checksum(&key_bytes)?;
|
|
|
|
let key_string = format!("rvx_live_{}_{}", key_data, checksum);
|
|
|
|
Ok(ApiKey {
|
|
key: key_string,
|
|
hash: self.hash_key(&key_bytes)?,
|
|
created_at: chrono::Utc::now(),
|
|
expires_at: None,
|
|
})
|
|
}
|
|
|
|
/// Hash API key for secure storage
|
|
fn hash_key(&self, key_bytes: &[u8]) -> Result<String, SecurityError> {
|
|
let mut combined = Vec::new();
|
|
combined.extend_from_slice(key_bytes);
|
|
combined.extend_from_slice(&self.pepper);
|
|
|
|
let salt = argon2::password_hash::SaltString::generate(&mut rand::thread_rng());
|
|
let argon2 = Argon2::default();
|
|
|
|
let hash = argon2.hash_password(&combined, &salt)
|
|
.map_err(|e| SecurityError::HashingFailed(e.to_string()))?;
|
|
|
|
Ok(hash.to_string())
|
|
}
|
|
|
|
/// Verify API key without timing attacks
|
|
pub fn verify_key(&self, provided_key: &str, stored_hash: &str) -> Result<bool, SecurityError> {
|
|
use argon2::PasswordVerifier;
|
|
use subtle::ConstantTimeEq;
|
|
|
|
// Parse and extract key bytes
|
|
let key_bytes = self.parse_key(provided_key)?;
|
|
|
|
let mut combined = Vec::new();
|
|
combined.extend_from_slice(&key_bytes);
|
|
combined.extend_from_slice(&self.pepper);
|
|
|
|
let parsed_hash = argon2::PasswordHash::new(stored_hash)
|
|
.map_err(|e| SecurityError::InvalidHash(e.to_string()))?;
|
|
|
|
// Constant-time verification
|
|
match Argon2::default().verify_password(&combined, &parsed_hash) {
|
|
Ok(_) => Ok(true),
|
|
Err(_) => Ok(false),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct ApiKey {
|
|
pub key: String, // Never logged or displayed
|
|
pub hash: String, // Stored in database
|
|
pub created_at: chrono::DateTime<chrono::Utc>,
|
|
pub expires_at: Option<chrono::DateTime<chrono::Utc>>,
|
|
}
|
|
```
|
|
|
|
#### Key Rotation Policy
|
|
- **Automatic rotation**: Every 90 days for high-privilege keys
|
|
- **Manual rotation**: Available at any time via API
|
|
- **Grace period**: 7 days overlap for seamless transition
|
|
- **Revocation**: Immediate invalidation with audit trail
|
|
|
|
### 1.2 Token Generation and Expiry
|
|
|
|
#### JWT Implementation
|
|
```rust
|
|
use jsonwebtoken::{encode, decode, Header, Algorithm, Validation, EncodingKey, DecodingKey};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct Claims {
|
|
sub: String, // Subject (API key ID)
|
|
exp: usize, // Expiry timestamp
|
|
iat: usize, // Issued at
|
|
nbf: usize, // Not before
|
|
jti: String, // JWT ID (for revocation)
|
|
scopes: Vec<String>, // Permission scopes
|
|
|
|
// Custom claims
|
|
rate_limit_tier: String,
|
|
max_image_size: usize,
|
|
}
|
|
|
|
pub struct TokenManager {
|
|
encoding_key: EncodingKey,
|
|
decoding_key: DecodingKey,
|
|
revoked_tokens: Arc<DashSet<String>>, // Distributed revocation list
|
|
}
|
|
|
|
impl TokenManager {
|
|
/// Generate short-lived access token
|
|
pub fn generate_access_token(&self, api_key_id: &str, scopes: Vec<String>) -> Result<String, SecurityError> {
|
|
let now = chrono::Utc::now();
|
|
let expiry = now + chrono::Duration::minutes(15); // 15-minute expiry
|
|
|
|
let claims = Claims {
|
|
sub: api_key_id.to_string(),
|
|
exp: expiry.timestamp() as usize,
|
|
iat: now.timestamp() as usize,
|
|
nbf: now.timestamp() as usize,
|
|
jti: uuid::Uuid::new_v4().to_string(),
|
|
scopes,
|
|
rate_limit_tier: "standard".to_string(),
|
|
max_image_size: 10 * 1024 * 1024, // 10MB
|
|
};
|
|
|
|
encode(&Header::new(Algorithm::EdDSA), &claims, &self.encoding_key)
|
|
.map_err(|e| SecurityError::TokenGenerationFailed(e.to_string()))
|
|
}
|
|
|
|
/// Validate and decode token
|
|
pub fn validate_token(&self, token: &str) -> Result<Claims, SecurityError> {
|
|
// Check revocation list first (fast path)
|
|
if self.is_revoked(token) {
|
|
return Err(SecurityError::TokenRevoked);
|
|
}
|
|
|
|
let mut validation = Validation::new(Algorithm::EdDSA);
|
|
validation.set_required_spec_claims(&["exp", "sub", "iat"]);
|
|
|
|
let token_data = decode::<Claims>(token, &self.decoding_key, &validation)
|
|
.map_err(|e| SecurityError::InvalidToken(e.to_string()))?;
|
|
|
|
// Double-check JTI in revocation list (defense in depth)
|
|
if self.revoked_tokens.contains(&token_data.claims.jti) {
|
|
return Err(SecurityError::TokenRevoked);
|
|
}
|
|
|
|
Ok(token_data.claims)
|
|
}
|
|
|
|
/// Revoke token immediately
|
|
pub fn revoke_token(&self, jti: &str) {
|
|
self.revoked_tokens.insert(jti.to_string());
|
|
// Also propagate to distributed cache (Redis/etc)
|
|
}
|
|
}
|
|
```
|
|
|
|
### 1.3 Client-Side vs Server-Side Keys
|
|
|
|
#### Key Classification
|
|
```rust
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum KeyType {
|
|
/// Server-side keys: Full API access, never exposed to browsers
|
|
ServerSide,
|
|
|
|
/// Client-side keys: Limited scope, rate-limited, domain-restricted
|
|
ClientSide,
|
|
|
|
/// Service keys: M2M communication, specific service scopes
|
|
Service,
|
|
}
|
|
|
|
pub struct KeyConfig {
|
|
key_type: KeyType,
|
|
allowed_domains: Vec<String>, // CORS whitelist
|
|
allowed_ips: Vec<IpNet>, // IP whitelist
|
|
max_requests_per_minute: u32,
|
|
allowed_scopes: Vec<Scope>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum Scope {
|
|
// Read-only scopes (safe for client-side)
|
|
ReadEquations,
|
|
ReadImages,
|
|
|
|
// Write scopes (server-side only)
|
|
WriteEquations,
|
|
ProcessBatch,
|
|
AccessAnalytics,
|
|
|
|
// Admin scopes (highly restricted)
|
|
ManageKeys,
|
|
AccessAuditLogs,
|
|
}
|
|
|
|
impl KeyConfig {
|
|
/// Validate scope is allowed for key type
|
|
pub fn validate_scope(&self, scope: &Scope) -> bool {
|
|
match self.key_type {
|
|
KeyType::ClientSide => {
|
|
// Client-side keys restricted to read-only operations
|
|
matches!(scope, Scope::ReadEquations | Scope::ReadImages)
|
|
},
|
|
KeyType::ServerSide => {
|
|
// Server-side keys can access all non-admin scopes
|
|
!matches!(scope, Scope::ManageKeys | Scope::AccessAuditLogs)
|
|
},
|
|
KeyType::Service => {
|
|
// Service keys have explicit scope list
|
|
self.allowed_scopes.contains(scope)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 2. Authorization System
|
|
|
|
### 2.1 Permission Levels
|
|
|
|
#### Role-Based Access Control (RBAC)
|
|
```rust
|
|
use std::collections::HashSet;
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
pub enum Permission {
|
|
// Basic permissions
|
|
ProcessImage,
|
|
ProcessEquation,
|
|
|
|
// Batch operations
|
|
ProcessBatch,
|
|
|
|
// Analytics
|
|
ViewUsageStats,
|
|
ViewDetailedAnalytics,
|
|
|
|
// Administration
|
|
ManageApiKeys,
|
|
ViewAuditLogs,
|
|
ConfigureRateLimits,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum Role {
|
|
Free,
|
|
Standard,
|
|
Premium,
|
|
Enterprise,
|
|
Admin,
|
|
}
|
|
|
|
impl Role {
|
|
pub fn permissions(&self) -> HashSet<Permission> {
|
|
use Permission::*;
|
|
|
|
match self {
|
|
Role::Free => {
|
|
vec![ProcessImage, ProcessEquation].into_iter().collect()
|
|
},
|
|
Role::Standard => {
|
|
vec![
|
|
ProcessImage,
|
|
ProcessEquation,
|
|
ProcessBatch,
|
|
ViewUsageStats,
|
|
].into_iter().collect()
|
|
},
|
|
Role::Premium => {
|
|
vec![
|
|
ProcessImage,
|
|
ProcessEquation,
|
|
ProcessBatch,
|
|
ViewUsageStats,
|
|
ViewDetailedAnalytics,
|
|
].into_iter().collect()
|
|
},
|
|
Role::Enterprise => {
|
|
vec![
|
|
ProcessImage,
|
|
ProcessEquation,
|
|
ProcessBatch,
|
|
ViewUsageStats,
|
|
ViewDetailedAnalytics,
|
|
ManageApiKeys,
|
|
].into_iter().collect()
|
|
},
|
|
Role::Admin => {
|
|
vec![
|
|
ProcessImage,
|
|
ProcessEquation,
|
|
ProcessBatch,
|
|
ViewUsageStats,
|
|
ViewDetailedAnalytics,
|
|
ManageApiKeys,
|
|
ViewAuditLogs,
|
|
ConfigureRateLimits,
|
|
].into_iter().collect()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct AuthorizationService {
|
|
role_cache: Arc<DashMap<String, Role>>,
|
|
}
|
|
|
|
impl AuthorizationService {
|
|
pub fn check_permission(&self, user_id: &str, permission: Permission) -> Result<(), AuthError> {
|
|
let role = self.role_cache
|
|
.get(user_id)
|
|
.ok_or(AuthError::UserNotFound)?;
|
|
|
|
if role.permissions().contains(&permission) {
|
|
Ok(())
|
|
} else {
|
|
Err(AuthError::InsufficientPermissions {
|
|
required: permission,
|
|
user_role: role.clone(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### 2.2 Rate Limiting Per Key
|
|
|
|
#### Token Bucket Algorithm with Distributed State
|
|
```rust
|
|
use std::time::{Duration, Instant};
|
|
use dashmap::DashMap;
|
|
|
|
pub struct RateLimiter {
|
|
buckets: Arc<DashMap<String, TokenBucket>>,
|
|
tiers: Arc<DashMap<String, RateLimitTier>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct RateLimitTier {
|
|
requests_per_second: u32,
|
|
burst_size: u32,
|
|
daily_quota: Option<u64>,
|
|
}
|
|
|
|
struct TokenBucket {
|
|
tokens: f64,
|
|
capacity: f64,
|
|
refill_rate: f64,
|
|
last_refill: Instant,
|
|
daily_count: u64,
|
|
daily_reset: Instant,
|
|
}
|
|
|
|
impl RateLimiter {
|
|
pub fn new() -> Self {
|
|
let mut tiers = DashMap::new();
|
|
|
|
tiers.insert("free".to_string(), RateLimitTier {
|
|
requests_per_second: 1,
|
|
burst_size: 5,
|
|
daily_quota: Some(100),
|
|
});
|
|
|
|
tiers.insert("standard".to_string(), RateLimitTier {
|
|
requests_per_second: 10,
|
|
burst_size: 50,
|
|
daily_quota: Some(10_000),
|
|
});
|
|
|
|
tiers.insert("premium".to_string(), RateLimitTier {
|
|
requests_per_second: 100,
|
|
burst_size: 500,
|
|
daily_quota: Some(1_000_000),
|
|
});
|
|
|
|
tiers.insert("enterprise".to_string(), RateLimitTier {
|
|
requests_per_second: 1000,
|
|
burst_size: 5000,
|
|
daily_quota: None, // Unlimited
|
|
});
|
|
|
|
Self {
|
|
buckets: Arc::new(DashMap::new()),
|
|
tiers: Arc::new(tiers),
|
|
}
|
|
}
|
|
|
|
/// Check and consume tokens (returns remaining tokens or error)
|
|
pub async fn check_limit(&self, key_id: &str, tier: &str, cost: u32) -> Result<u64, RateLimitError> {
|
|
let tier_config = self.tiers
|
|
.get(tier)
|
|
.ok_or(RateLimitError::UnknownTier)?;
|
|
|
|
// Initialize or get bucket
|
|
let mut bucket_ref = self.buckets.entry(key_id.to_string()).or_insert_with(|| {
|
|
TokenBucket {
|
|
tokens: tier_config.burst_size as f64,
|
|
capacity: tier_config.burst_size as f64,
|
|
refill_rate: tier_config.requests_per_second as f64,
|
|
last_refill: Instant::now(),
|
|
daily_count: 0,
|
|
daily_reset: Instant::now() + Duration::from_secs(86400),
|
|
}
|
|
});
|
|
|
|
let bucket = bucket_ref.value_mut();
|
|
|
|
// Refill tokens based on elapsed time
|
|
let now = Instant::now();
|
|
let elapsed = now.duration_since(bucket.last_refill).as_secs_f64();
|
|
bucket.tokens = (bucket.tokens + elapsed * bucket.refill_rate).min(bucket.capacity);
|
|
bucket.last_refill = now;
|
|
|
|
// Reset daily counter if needed
|
|
if now >= bucket.daily_reset {
|
|
bucket.daily_count = 0;
|
|
bucket.daily_reset = now + Duration::from_secs(86400);
|
|
}
|
|
|
|
// Check daily quota
|
|
if let Some(quota) = tier_config.daily_quota {
|
|
if bucket.daily_count >= quota {
|
|
return Err(RateLimitError::DailyQuotaExceeded {
|
|
quota,
|
|
reset_at: bucket.daily_reset,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Check if enough tokens available
|
|
if bucket.tokens >= cost as f64 {
|
|
bucket.tokens -= cost as f64;
|
|
bucket.daily_count += cost as u64;
|
|
Ok(bucket.tokens as u64)
|
|
} else {
|
|
Err(RateLimitError::RateLimitExceeded {
|
|
retry_after: Duration::from_secs_f64((cost as f64 - bucket.tokens) / bucket.refill_rate),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum RateLimitError {
|
|
RateLimitExceeded { retry_after: Duration },
|
|
DailyQuotaExceeded { quota: u64, reset_at: Instant },
|
|
UnknownTier,
|
|
}
|
|
```
|
|
|
|
### 2.3 Feature Access Control
|
|
|
|
#### Feature Flags with Permission Gating
|
|
```rust
|
|
pub struct FeatureGate {
|
|
features: DashMap<String, FeatureConfig>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct FeatureConfig {
|
|
name: String,
|
|
enabled: bool,
|
|
required_role: Role,
|
|
required_permissions: Vec<Permission>,
|
|
beta_access: bool,
|
|
}
|
|
|
|
impl FeatureGate {
|
|
pub fn can_access_feature(&self, user_id: &str, feature: &str) -> Result<(), FeatureError> {
|
|
let config = self.features
|
|
.get(feature)
|
|
.ok_or(FeatureError::UnknownFeature)?;
|
|
|
|
if !config.enabled {
|
|
return Err(FeatureError::FeatureDisabled);
|
|
}
|
|
|
|
// Check role requirement
|
|
let user_role = self.get_user_role(user_id)?;
|
|
if !self.role_satisfies(&user_role, &config.required_role) {
|
|
return Err(FeatureError::InsufficientRole);
|
|
}
|
|
|
|
// Check beta access
|
|
if config.beta_access && !self.has_beta_access(user_id)? {
|
|
return Err(FeatureError::BetaAccessRequired);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 3. Data Privacy
|
|
|
|
### 3.1 Image Data Handling
|
|
|
|
#### Zero-Persistence Default Policy
|
|
```rust
|
|
pub struct ImageProcessor {
|
|
temp_storage: TempStorage,
|
|
max_retention: Duration,
|
|
}
|
|
|
|
impl ImageProcessor {
|
|
/// Process image with automatic cleanup
|
|
pub async fn process_image(&self, image_data: Vec<u8>, request_id: &str) -> Result<OcrResult, ProcessingError> {
|
|
// Create temporary storage with auto-cleanup
|
|
let temp_file = self.temp_storage.create_temp_file(request_id, image_data)?;
|
|
|
|
// Ensure cleanup on drop
|
|
let _cleanup_guard = CleanupGuard::new(temp_file.path());
|
|
|
|
// Process image
|
|
let result = self.run_ocr(&temp_file).await?;
|
|
|
|
// Explicit cleanup (guard ensures it happens even on panic)
|
|
drop(_cleanup_guard);
|
|
|
|
Ok(result)
|
|
}
|
|
}
|
|
|
|
/// RAII guard for automatic cleanup
|
|
struct CleanupGuard {
|
|
path: PathBuf,
|
|
}
|
|
|
|
impl CleanupGuard {
|
|
fn new(path: PathBuf) -> Self {
|
|
Self { path }
|
|
}
|
|
}
|
|
|
|
impl Drop for CleanupGuard {
|
|
fn drop(&mut self) {
|
|
// Secure deletion: overwrite before removal
|
|
if let Ok(mut file) = std::fs::OpenOptions::new()
|
|
.write(true)
|
|
.open(&self.path)
|
|
{
|
|
let metadata = file.metadata().ok();
|
|
if let Some(meta) = metadata {
|
|
let size = meta.len();
|
|
let zeros = vec![0u8; size as usize];
|
|
let _ = file.write_all(&zeros);
|
|
let _ = file.sync_all();
|
|
}
|
|
}
|
|
|
|
// Remove file
|
|
let _ = std::fs::remove_file(&self.path);
|
|
}
|
|
}
|
|
|
|
/// Optional persistent storage (opt-in only)
|
|
pub struct OptInStorage {
|
|
enabled: bool,
|
|
encryption_key: [u8; 32],
|
|
retention_policy: RetentionPolicy,
|
|
}
|
|
|
|
impl OptInStorage {
|
|
pub async fn store_if_enabled(&self, user_consent: bool, data: &[u8]) -> Result<(), StorageError> {
|
|
if !self.enabled || !user_consent {
|
|
return Ok(()); // Skip storage
|
|
}
|
|
|
|
// Encrypt before storage
|
|
let encrypted = self.encrypt_data(data)?;
|
|
|
|
// Store with retention metadata
|
|
self.persist_encrypted(encrypted, self.retention_policy.duration).await?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
```
|
|
|
|
### 3.2 GDPR Compliance
|
|
|
|
#### Data Subject Rights Implementation
|
|
```rust
|
|
pub struct GdprCompliance {
|
|
data_registry: Arc<DashMap<String, UserDataRecord>>,
|
|
deletion_queue: Arc<Mutex<VecDeque<DeletionRequest>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct UserDataRecord {
|
|
user_id: String,
|
|
data_locations: Vec<DataLocation>,
|
|
processing_purposes: Vec<ProcessingPurpose>,
|
|
consent_given: bool,
|
|
consent_timestamp: chrono::DateTime<chrono::Utc>,
|
|
}
|
|
|
|
impl GdprCompliance {
|
|
/// Right to Access (Article 15)
|
|
pub async fn export_user_data(&self, user_id: &str) -> Result<UserDataExport, GdprError> {
|
|
let record = self.data_registry
|
|
.get(user_id)
|
|
.ok_or(GdprError::UserNotFound)?;
|
|
|
|
let mut export = UserDataExport::new(user_id);
|
|
|
|
for location in &record.data_locations {
|
|
let data = self.retrieve_data(location).await?;
|
|
export.add_data(location.category.clone(), data);
|
|
}
|
|
|
|
Ok(export)
|
|
}
|
|
|
|
/// Right to Erasure (Article 17)
|
|
pub async fn delete_user_data(&self, user_id: &str, reason: DeletionReason) -> Result<(), GdprError> {
|
|
let record = self.data_registry
|
|
.remove(user_id)
|
|
.ok_or(GdprError::UserNotFound)?;
|
|
|
|
// Queue deletion across all storage locations
|
|
for location in record.1.data_locations {
|
|
self.deletion_queue.lock().await.push_back(DeletionRequest {
|
|
user_id: user_id.to_string(),
|
|
location,
|
|
reason: reason.clone(),
|
|
requested_at: chrono::Utc::now(),
|
|
});
|
|
}
|
|
|
|
// Process deletions
|
|
self.process_deletion_queue().await?;
|
|
|
|
// Audit log
|
|
self.log_deletion(user_id, reason).await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Right to Rectification (Article 16)
|
|
pub async fn update_user_data(&self, user_id: &str, updates: DataUpdates) -> Result<(), GdprError> {
|
|
// Implementation for data correction
|
|
todo!()
|
|
}
|
|
|
|
/// Right to Data Portability (Article 20)
|
|
pub async fn export_portable_format(&self, user_id: &str) -> Result<PortableData, GdprError> {
|
|
let export = self.export_user_data(user_id).await?;
|
|
|
|
// Convert to machine-readable format (JSON)
|
|
Ok(PortableData {
|
|
format: "application/json".to_string(),
|
|
data: serde_json::to_vec(&export)?,
|
|
})
|
|
}
|
|
}
|
|
```
|
|
|
|
### 3.3 Data Retention Policies
|
|
|
|
#### Automated Retention Management
|
|
```rust
|
|
pub struct RetentionPolicy {
|
|
default_retention: Duration,
|
|
category_policies: HashMap<DataCategory, Duration>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
|
pub enum DataCategory {
|
|
ProcessedImages, // 0 seconds (immediate deletion)
|
|
ApiLogs, // 90 days
|
|
AuditLogs, // 7 years
|
|
UserProfiles, // Until account deletion
|
|
BillingRecords, // 7 years (legal requirement)
|
|
}
|
|
|
|
impl RetentionPolicy {
|
|
pub fn retention_period(&self, category: DataCategory) -> Duration {
|
|
self.category_policies
|
|
.get(&category)
|
|
.copied()
|
|
.unwrap_or(self.default_retention)
|
|
}
|
|
|
|
pub async fn enforce_retention(&self, storage: &dyn Storage) -> Result<(), RetentionError> {
|
|
for (category, period) in &self.category_policies {
|
|
let cutoff = chrono::Utc::now() - chrono::Duration::from_std(*period)?;
|
|
|
|
// Delete data older than retention period
|
|
storage.delete_older_than(category, cutoff).await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
```
|
|
|
|
### 3.4 Audit Logging
|
|
|
|
#### Tamper-Proof Audit Trail
|
|
```rust
|
|
use sha2::{Sha256, Digest};
|
|
|
|
pub struct AuditLogger {
|
|
log_chain: Arc<Mutex<Vec<AuditEntry>>>,
|
|
storage: Arc<dyn AuditStorage>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct AuditEntry {
|
|
timestamp: chrono::DateTime<chrono::Utc>,
|
|
event_type: AuditEventType,
|
|
user_id: String,
|
|
ip_address: IpAddr,
|
|
user_agent: String,
|
|
action: String,
|
|
resource: String,
|
|
result: ActionResult,
|
|
previous_hash: String,
|
|
current_hash: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub enum AuditEventType {
|
|
Authentication,
|
|
Authorization,
|
|
DataAccess,
|
|
DataModification,
|
|
DataDeletion,
|
|
ConfigChange,
|
|
SecurityEvent,
|
|
}
|
|
|
|
impl AuditLogger {
|
|
/// Log event with chain verification
|
|
pub async fn log(&self, event: AuditEvent) -> Result<(), AuditError> {
|
|
let mut chain = self.log_chain.lock().await;
|
|
|
|
let previous_hash = chain.last()
|
|
.map(|e| e.current_hash.clone())
|
|
.unwrap_or_else(|| "genesis".to_string());
|
|
|
|
let entry = AuditEntry {
|
|
timestamp: chrono::Utc::now(),
|
|
event_type: event.event_type,
|
|
user_id: event.user_id,
|
|
ip_address: event.ip_address,
|
|
user_agent: event.user_agent,
|
|
action: event.action,
|
|
resource: event.resource,
|
|
result: event.result,
|
|
previous_hash: previous_hash.clone(),
|
|
current_hash: String::new(), // Computed below
|
|
};
|
|
|
|
// Compute hash of current entry
|
|
let current_hash = self.compute_hash(&entry)?;
|
|
let mut entry = entry;
|
|
entry.current_hash = current_hash;
|
|
|
|
// Append to chain
|
|
chain.push(entry.clone());
|
|
|
|
// Persist to storage
|
|
self.storage.append(entry).await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn compute_hash(&self, entry: &AuditEntry) -> Result<String, AuditError> {
|
|
let mut hasher = Sha256::new();
|
|
hasher.update(entry.timestamp.to_rfc3339().as_bytes());
|
|
hasher.update(entry.user_id.as_bytes());
|
|
hasher.update(entry.action.as_bytes());
|
|
hasher.update(entry.previous_hash.as_bytes());
|
|
|
|
Ok(format!("{:x}", hasher.finalize()))
|
|
}
|
|
|
|
/// Verify audit log integrity
|
|
pub async fn verify_integrity(&self) -> Result<bool, AuditError> {
|
|
let chain = self.log_chain.lock().await;
|
|
|
|
for i in 1..chain.len() {
|
|
if chain[i].previous_hash != chain[i-1].current_hash {
|
|
return Ok(false); // Chain broken
|
|
}
|
|
|
|
// Verify current hash
|
|
let computed = self.compute_hash(&chain[i])?;
|
|
if computed != chain[i].current_hash {
|
|
return Ok(false); // Hash mismatch
|
|
}
|
|
}
|
|
|
|
Ok(true)
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 4. Input Validation
|
|
|
|
### 4.1 Image Size Limits
|
|
|
|
```rust
|
|
pub struct ImageValidator {
|
|
max_file_size: usize,
|
|
max_dimensions: (u32, u32),
|
|
max_pixel_count: u64,
|
|
}
|
|
|
|
impl ImageValidator {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
max_file_size: 10 * 1024 * 1024, // 10 MB
|
|
max_dimensions: (8192, 8192), // 8K resolution
|
|
max_pixel_count: 50_000_000, // 50 megapixels
|
|
}
|
|
}
|
|
|
|
pub fn validate_size(&self, data: &[u8]) -> Result<(), ValidationError> {
|
|
if data.len() > self.max_file_size {
|
|
return Err(ValidationError::FileTooLarge {
|
|
size: data.len(),
|
|
max_size: self.max_file_size,
|
|
});
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
pub fn validate_dimensions(&self, image: &DynamicImage) -> Result<(), ValidationError> {
|
|
let (width, height) = (image.width(), image.height());
|
|
|
|
if width > self.max_dimensions.0 || height > self.max_dimensions.1 {
|
|
return Err(ValidationError::DimensionsTooLarge {
|
|
dimensions: (width, height),
|
|
max_dimensions: self.max_dimensions,
|
|
});
|
|
}
|
|
|
|
let pixel_count = width as u64 * height as u64;
|
|
if pixel_count > self.max_pixel_count {
|
|
return Err(ValidationError::TooManyPixels {
|
|
count: pixel_count,
|
|
max_count: self.max_pixel_count,
|
|
});
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4.2 File Type Validation
|
|
|
|
```rust
|
|
pub struct FileTypeValidator {
|
|
allowed_types: HashSet<ImageFormat>,
|
|
}
|
|
|
|
impl FileTypeValidator {
|
|
pub fn validate(&self, data: &[u8]) -> Result<ImageFormat, ValidationError> {
|
|
// Check magic bytes
|
|
let format = self.detect_format(data)?;
|
|
|
|
if !self.allowed_types.contains(&format) {
|
|
return Err(ValidationError::UnsupportedFormat {
|
|
detected: format,
|
|
allowed: self.allowed_types.clone(),
|
|
});
|
|
}
|
|
|
|
// Additional format-specific validation
|
|
match format {
|
|
ImageFormat::Png => self.validate_png(data)?,
|
|
ImageFormat::Jpeg => self.validate_jpeg(data)?,
|
|
ImageFormat::WebP => self.validate_webp(data)?,
|
|
_ => {}
|
|
}
|
|
|
|
Ok(format)
|
|
}
|
|
|
|
fn detect_format(&self, data: &[u8]) -> Result<ImageFormat, ValidationError> {
|
|
if data.len() < 12 {
|
|
return Err(ValidationError::FileTooSmall);
|
|
}
|
|
|
|
// Check magic bytes
|
|
match &data[0..4] {
|
|
[0x89, b'P', b'N', b'G'] => Ok(ImageFormat::Png),
|
|
[0xFF, 0xD8, 0xFF, _] => Ok(ImageFormat::Jpeg),
|
|
[b'R', b'I', b'F', b'F'] if &data[8..12] == b"WEBP" => Ok(ImageFormat::WebP),
|
|
_ => Err(ValidationError::UnknownFormat),
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4.3 Malicious File Detection
|
|
|
|
```rust
|
|
pub struct MalwareScanner {
|
|
yara_rules: yara::Rules,
|
|
suspicious_patterns: Vec<Pattern>,
|
|
}
|
|
|
|
impl MalwareScanner {
|
|
pub async fn scan(&self, data: &[u8]) -> Result<ScanResult, ScanError> {
|
|
let mut threats = Vec::new();
|
|
|
|
// YARA scanning
|
|
let matches = self.yara_rules.scan_mem(data, 30)?;
|
|
for m in matches {
|
|
threats.push(Threat {
|
|
severity: Severity::High,
|
|
description: format!("YARA rule matched: {}", m.identifier),
|
|
});
|
|
}
|
|
|
|
// Check for polyglot files
|
|
if self.is_polyglot(data) {
|
|
threats.push(Threat {
|
|
severity: Severity::Medium,
|
|
description: "Polyglot file detected (valid as multiple formats)".to_string(),
|
|
});
|
|
}
|
|
|
|
// Check for embedded executables
|
|
if self.contains_executable(data) {
|
|
threats.push(Threat {
|
|
severity: Severity::Critical,
|
|
description: "Embedded executable code detected".to_string(),
|
|
});
|
|
}
|
|
|
|
// Check for steganography indicators
|
|
if self.has_steganography_markers(data) {
|
|
threats.push(Threat {
|
|
severity: Severity::Low,
|
|
description: "Possible steganographic content".to_string(),
|
|
});
|
|
}
|
|
|
|
Ok(ScanResult { threats })
|
|
}
|
|
|
|
fn is_polyglot(&self, data: &[u8]) -> bool {
|
|
// Check if file is valid as multiple formats
|
|
let mut valid_formats = 0;
|
|
|
|
if self.is_valid_png(data) { valid_formats += 1; }
|
|
if self.is_valid_jpeg(data) { valid_formats += 1; }
|
|
if self.is_valid_gif(data) { valid_formats += 1; }
|
|
|
|
valid_formats > 1
|
|
}
|
|
|
|
fn contains_executable(&self, data: &[u8]) -> bool {
|
|
// Check for PE header
|
|
if data.windows(2).any(|w| w == b"MZ") {
|
|
return true;
|
|
}
|
|
|
|
// Check for ELF header
|
|
if data.starts_with(b"\x7fELF") {
|
|
return true;
|
|
}
|
|
|
|
// Check for Mach-O header
|
|
if data.starts_with(&[0xFE, 0xED, 0xFA, 0xCE]) ||
|
|
data.starts_with(&[0xCE, 0xFA, 0xED, 0xFE]) {
|
|
return true;
|
|
}
|
|
|
|
false
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4.4 Path Traversal Prevention
|
|
|
|
```rust
|
|
pub struct PathValidator {
|
|
allowed_base_dir: PathBuf,
|
|
}
|
|
|
|
impl PathValidator {
|
|
pub fn validate_path(&self, user_path: &Path) -> Result<PathBuf, ValidationError> {
|
|
// Canonicalize to resolve '..' and symlinks
|
|
let canonical = user_path.canonicalize()
|
|
.map_err(|_| ValidationError::InvalidPath)?;
|
|
|
|
// Ensure path is within allowed directory
|
|
if !canonical.starts_with(&self.allowed_base_dir) {
|
|
return Err(ValidationError::PathTraversal {
|
|
attempted: canonical,
|
|
allowed_base: self.allowed_base_dir.clone(),
|
|
});
|
|
}
|
|
|
|
// Check for suspicious components
|
|
for component in canonical.components() {
|
|
match component {
|
|
std::path::Component::ParentDir => {
|
|
return Err(ValidationError::SuspiciousPath("Contains '..'".to_string()));
|
|
}
|
|
std::path::Component::Normal(s) => {
|
|
if s.to_string_lossy().contains('\0') {
|
|
return Err(ValidationError::SuspiciousPath("Contains null byte".to_string()));
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
Ok(canonical)
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 5. Secure Processing
|
|
|
|
### 5.1 Sandboxed Inference
|
|
|
|
```rust
|
|
use nix::unistd::{fork, ForkResult};
|
|
use nix::sys::wait::waitpid;
|
|
|
|
pub struct SandboxedInference {
|
|
resource_limits: ResourceLimits,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ResourceLimits {
|
|
max_memory: usize,
|
|
max_cpu_time: Duration,
|
|
max_file_descriptors: u64,
|
|
}
|
|
|
|
impl SandboxedInference {
|
|
pub async fn run_in_sandbox<F, T>(&self, f: F) -> Result<T, SandboxError>
|
|
where
|
|
F: FnOnce() -> T + Send,
|
|
T: Send,
|
|
{
|
|
match unsafe { fork() } {
|
|
Ok(ForkResult::Parent { child }) => {
|
|
// Parent process: wait for child with timeout
|
|
let timeout = self.resource_limits.max_cpu_time;
|
|
|
|
match tokio::time::timeout(timeout, async {
|
|
waitpid(child, None)
|
|
}).await {
|
|
Ok(Ok(status)) => {
|
|
if status.success() {
|
|
// Read result from shared memory or pipe
|
|
Ok(todo!()) // Retrieve result
|
|
} else {
|
|
Err(SandboxError::ProcessFailed)
|
|
}
|
|
}
|
|
Ok(Err(e)) => Err(SandboxError::WaitFailed(e)),
|
|
Err(_) => {
|
|
// Timeout: kill child
|
|
let _ = nix::sys::signal::kill(child, nix::sys::signal::SIGKILL);
|
|
Err(SandboxError::Timeout)
|
|
}
|
|
}
|
|
}
|
|
Ok(ForkResult::Child) => {
|
|
// Child process: set resource limits and execute
|
|
self.apply_resource_limits()?;
|
|
|
|
let result = f();
|
|
|
|
// Write result to shared memory or pipe
|
|
std::process::exit(0);
|
|
}
|
|
Err(e) => Err(SandboxError::ForkFailed(e)),
|
|
}
|
|
}
|
|
|
|
fn apply_resource_limits(&self) -> Result<(), SandboxError> {
|
|
use nix::sys::resource::{setrlimit, Resource};
|
|
|
|
// Memory limit
|
|
setrlimit(
|
|
Resource::RLIMIT_AS,
|
|
self.resource_limits.max_memory as u64,
|
|
self.resource_limits.max_memory as u64,
|
|
)?;
|
|
|
|
// CPU time limit
|
|
let cpu_secs = self.resource_limits.max_cpu_time.as_secs();
|
|
setrlimit(
|
|
Resource::RLIMIT_CPU,
|
|
cpu_secs,
|
|
cpu_secs,
|
|
)?;
|
|
|
|
// File descriptor limit
|
|
setrlimit(
|
|
Resource::RLIMIT_NOFILE,
|
|
self.resource_limits.max_file_descriptors,
|
|
self.resource_limits.max_file_descriptors,
|
|
)?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
```
|
|
|
|
### 5.2 Memory Isolation
|
|
|
|
```rust
|
|
pub struct IsolatedMemoryPool {
|
|
pool: Arc<Mutex<Vec<Vec<u8>>>>,
|
|
max_pool_size: usize,
|
|
}
|
|
|
|
impl IsolatedMemoryPool {
|
|
/// Allocate isolated memory region
|
|
pub fn allocate(&self, size: usize) -> Result<IsolatedBuffer, MemoryError> {
|
|
if size > self.max_pool_size {
|
|
return Err(MemoryError::AllocationTooLarge);
|
|
}
|
|
|
|
// Allocate page-aligned memory
|
|
let layout = std::alloc::Layout::from_size_align(size, 4096)
|
|
.map_err(|_| MemoryError::InvalidAlignment)?;
|
|
|
|
let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
|
|
|
|
if ptr.is_null() {
|
|
return Err(MemoryError::AllocationFailed);
|
|
}
|
|
|
|
// Lock pages to prevent swapping (sensitive data)
|
|
#[cfg(unix)]
|
|
unsafe {
|
|
libc::mlock(ptr as *const libc::c_void, size);
|
|
}
|
|
|
|
Ok(IsolatedBuffer {
|
|
ptr,
|
|
size,
|
|
layout,
|
|
})
|
|
}
|
|
}
|
|
|
|
pub struct IsolatedBuffer {
|
|
ptr: *mut u8,
|
|
size: usize,
|
|
layout: std::alloc::Layout,
|
|
}
|
|
|
|
impl Drop for IsolatedBuffer {
|
|
fn drop(&mut self) {
|
|
unsafe {
|
|
// Zero memory before deallocation
|
|
std::ptr::write_bytes(self.ptr, 0, self.size);
|
|
|
|
// Unlock pages
|
|
#[cfg(unix)]
|
|
libc::munlock(self.ptr as *const libc::c_void, self.size);
|
|
|
|
// Deallocate
|
|
std::alloc::dealloc(self.ptr, self.layout);
|
|
}
|
|
}
|
|
}
|
|
|
|
unsafe impl Send for IsolatedBuffer {}
|
|
unsafe impl Sync for IsolatedBuffer {}
|
|
```
|
|
|
|
### 5.3 Resource Limits
|
|
|
|
```rust
|
|
pub struct ResourceGovernor {
|
|
cpu_limit: CpuLimit,
|
|
memory_limit: MemoryLimit,
|
|
time_limit: TimeLimit,
|
|
}
|
|
|
|
pub struct CpuLimit {
|
|
max_threads: usize,
|
|
max_cpu_percent: f32,
|
|
}
|
|
|
|
impl CpuLimit {
|
|
pub fn enforce(&self) -> Result<(), ResourceError> {
|
|
// Set CPU affinity
|
|
#[cfg(target_os = "linux")]
|
|
{
|
|
let max_cores = (num_cpus::get() as f32 * self.max_cpu_percent / 100.0).ceil() as usize;
|
|
let mut cpu_set = nix::sched::CpuSet::new();
|
|
|
|
for i in 0..max_cores {
|
|
cpu_set.set(i)?;
|
|
}
|
|
|
|
nix::sched::sched_setaffinity(nix::unistd::Pid::from_raw(0), &cpu_set)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub struct MemoryLimit {
|
|
max_heap: usize,
|
|
max_stack: usize,
|
|
}
|
|
|
|
impl MemoryLimit {
|
|
pub fn enforce(&self) -> Result<(), ResourceError> {
|
|
use nix::sys::resource::{setrlimit, Resource};
|
|
|
|
// Heap limit
|
|
setrlimit(Resource::RLIMIT_DATA, self.max_heap as u64, self.max_heap as u64)?;
|
|
|
|
// Stack limit
|
|
setrlimit(Resource::RLIMIT_STACK, self.max_stack as u64, self.max_stack as u64)?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub struct TimeLimit {
|
|
max_duration: Duration,
|
|
}
|
|
|
|
impl TimeLimit {
|
|
pub async fn enforce<F, T>(&self, future: F) -> Result<T, ResourceError>
|
|
where
|
|
F: Future<Output = T>,
|
|
{
|
|
tokio::time::timeout(self.max_duration, future)
|
|
.await
|
|
.map_err(|_| ResourceError::TimeoutExceeded)
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 6. Transport Security
|
|
|
|
### 6.1 TLS 1.3 Enforcement
|
|
|
|
```rust
|
|
use rustls::{ServerConfig, ClientConfig};
|
|
use rustls::version::TLS13;
|
|
|
|
pub struct TlsConfigBuilder {
|
|
cert_resolver: Arc<dyn ResolvesServerCert>,
|
|
}
|
|
|
|
impl TlsConfigBuilder {
|
|
pub fn build_server_config(&self) -> Result<ServerConfig, TlsError> {
|
|
let mut config = ServerConfig::builder()
|
|
.with_safe_default_cipher_suites()
|
|
.with_safe_default_kx_groups()
|
|
.with_protocol_versions(&[&TLS13])? // TLS 1.3 only
|
|
.with_no_client_auth()
|
|
.with_cert_resolver(self.cert_resolver.clone());
|
|
|
|
// Disable session resumption (enforce fresh handshakes)
|
|
config.session_storage = Arc::new(rustls::server::NoServerSessionStorage {});
|
|
|
|
// Enable ALPN for HTTP/2
|
|
config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
|
|
|
Ok(config)
|
|
}
|
|
|
|
pub fn build_client_config(&self) -> Result<ClientConfig, TlsError> {
|
|
let mut config = ClientConfig::builder()
|
|
.with_safe_default_cipher_suites()
|
|
.with_safe_default_kx_groups()
|
|
.with_protocol_versions(&[&TLS13])? // TLS 1.3 only
|
|
.with_root_certificates(self.load_root_certs()?)
|
|
.with_no_client_auth();
|
|
|
|
// Enable certificate transparency verification
|
|
config.enable_sct = true;
|
|
|
|
Ok(config)
|
|
}
|
|
}
|
|
```
|
|
|
|
### 6.2 Certificate Management
|
|
|
|
```rust
|
|
use x509_parser::prelude::*;
|
|
|
|
pub struct CertificateManager {
|
|
cert_store: Arc<DashMap<String, Certificate>>,
|
|
renewal_threshold: Duration,
|
|
}
|
|
|
|
impl CertificateManager {
|
|
/// Check certificate expiry and auto-renew
|
|
pub async fn check_and_renew(&self, domain: &str) -> Result<(), CertError> {
|
|
let cert = self.cert_store.get(domain)
|
|
.ok_or(CertError::CertificateNotFound)?;
|
|
|
|
let expires_at = cert.validity_period.not_after;
|
|
let now = chrono::Utc::now();
|
|
|
|
let time_until_expiry = expires_at.signed_duration_since(now);
|
|
|
|
if time_until_expiry < self.renewal_threshold {
|
|
// Renew certificate via ACME
|
|
let new_cert = self.renew_via_acme(domain).await?;
|
|
self.cert_store.insert(domain.to_string(), new_cert);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn renew_via_acme(&self, domain: &str) -> Result<Certificate, CertError> {
|
|
// ACME protocol implementation
|
|
todo!()
|
|
}
|
|
|
|
/// Validate certificate chain
|
|
pub fn validate_chain(&self, cert_chain: &[Certificate]) -> Result<(), CertError> {
|
|
for i in 0..cert_chain.len() - 1 {
|
|
let cert = &cert_chain[i];
|
|
let issuer = &cert_chain[i + 1];
|
|
|
|
// Verify signature
|
|
if !self.verify_signature(cert, issuer)? {
|
|
return Err(CertError::InvalidSignature);
|
|
}
|
|
|
|
// Check validity period
|
|
if !cert.is_valid_at(chrono::Utc::now()) {
|
|
return Err(CertError::CertificateExpired);
|
|
}
|
|
|
|
// Check revocation status (OCSP)
|
|
if self.is_revoked(cert).await? {
|
|
return Err(CertError::CertificateRevoked);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
```
|
|
|
|
### 6.3 CORS Policies
|
|
|
|
```rust
|
|
use axum::http::Method;
|
|
use tower_http::cors::{CorsLayer, AllowOrigin};
|
|
|
|
pub struct CorsConfigBuilder {
|
|
allowed_origins: Vec<String>,
|
|
}
|
|
|
|
impl CorsConfigBuilder {
|
|
pub fn build(&self) -> CorsLayer {
|
|
CorsLayer::new()
|
|
// Specific origins (no wildcard for credentials)
|
|
.allow_origin(AllowOrigin::list(
|
|
self.allowed_origins.iter()
|
|
.map(|o| o.parse().unwrap())
|
|
))
|
|
// Allowed methods
|
|
.allow_methods([Method::GET, Method::POST, Method::OPTIONS])
|
|
// Allowed headers
|
|
.allow_headers([
|
|
"Authorization",
|
|
"Content-Type",
|
|
"X-Request-ID",
|
|
])
|
|
// Expose headers
|
|
.expose_headers([
|
|
"X-RateLimit-Remaining",
|
|
"X-RateLimit-Reset",
|
|
])
|
|
// Max age for preflight cache (1 hour)
|
|
.max_age(Duration::from_secs(3600))
|
|
// Allow credentials
|
|
.allow_credentials(true)
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 7. Dependency Security
|
|
|
|
### 7.1 Cargo Audit Integration
|
|
|
|
```toml
|
|
# .github/workflows/security-audit.yml
|
|
name: Security Audit
|
|
on:
|
|
schedule:
|
|
- cron: '0 0 * * *' # Daily
|
|
pull_request:
|
|
push:
|
|
branches: [main]
|
|
|
|
jobs:
|
|
audit:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
|
|
- name: Install cargo-audit
|
|
run: cargo install cargo-audit
|
|
|
|
- name: Run audit
|
|
run: cargo audit --deny warnings
|
|
|
|
- name: Check for yanked crates
|
|
run: cargo audit --deny yanked
|
|
|
|
- name: Generate SBOM
|
|
run: cargo install cargo-sbom && cargo sbom > sbom.json
|
|
|
|
- name: Upload SBOM
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: sbom
|
|
path: sbom.json
|
|
```
|
|
|
|
### 7.2 Supply Chain Security
|
|
|
|
```rust
|
|
// build.rs - Verify dependency integrity at build time
|
|
use std::process::Command;
|
|
|
|
fn main() {
|
|
// Verify cargo.lock exists and is committed
|
|
if !std::path::Path::new("Cargo.lock").exists() {
|
|
panic!("Cargo.lock must exist and be committed");
|
|
}
|
|
|
|
// Run cargo-deny checks
|
|
let output = Command::new("cargo")
|
|
.args(&["deny", "check"])
|
|
.output()
|
|
.expect("Failed to run cargo-deny");
|
|
|
|
if !output.status.success() {
|
|
panic!("cargo-deny checks failed");
|
|
}
|
|
|
|
// Verify no git dependencies (security risk)
|
|
let cargo_toml = std::fs::read_to_string("Cargo.toml")
|
|
.expect("Failed to read Cargo.toml");
|
|
|
|
if cargo_toml.contains("git =") {
|
|
panic!("Git dependencies are not allowed in production");
|
|
}
|
|
}
|
|
```
|
|
|
|
```toml
|
|
# deny.toml - cargo-deny configuration
|
|
[advisories]
|
|
vulnerability = "deny"
|
|
unmaintained = "warn"
|
|
yanked = "deny"
|
|
notice = "warn"
|
|
|
|
[licenses]
|
|
unlicensed = "deny"
|
|
allow = [
|
|
"MIT",
|
|
"Apache-2.0",
|
|
"BSD-3-Clause",
|
|
]
|
|
deny = [
|
|
"GPL-3.0",
|
|
"AGPL-3.0",
|
|
]
|
|
|
|
[bans]
|
|
multiple-versions = "warn"
|
|
wildcards = "deny"
|
|
|
|
[sources]
|
|
unknown-registry = "deny"
|
|
unknown-git = "deny"
|
|
allow-git = []
|
|
```
|
|
|
|
### 7.3 Minimal Dependencies
|
|
|
|
```toml
|
|
# Cargo.toml - Minimal dependency strategy
|
|
[dependencies]
|
|
# Use feature flags to minimize attack surface
|
|
tokio = { version = "1", default-features = false, features = ["rt", "net"] }
|
|
serde = { version = "1", default-features = false, features = ["derive"] }
|
|
|
|
# Prefer well-audited crates
|
|
rustls = "0.21" # Instead of openssl
|
|
ring = "0.17" # Cryptography
|
|
|
|
# Avoid unnecessary dependencies
|
|
# ❌ regex = "1" # Heavy dependency
|
|
# ✅ Use stdlib when possible
|
|
|
|
[dev-dependencies]
|
|
# Development dependencies don't affect production binary
|
|
criterion = "0.5"
|
|
```
|
|
|
|
---
|
|
|
|
## 8. Error Handling
|
|
|
|
### 8.1 Safe Error Messages
|
|
|
|
```rust
|
|
use thiserror::Error;
|
|
|
|
#[derive(Error, Debug)]
|
|
pub enum ApiError {
|
|
#[error("Authentication failed")]
|
|
AuthenticationFailed, // Don't leak why
|
|
|
|
#[error("Resource not found")]
|
|
NotFound, // Don't leak what
|
|
|
|
#[error("Invalid input")]
|
|
InvalidInput, // Don't leak specifics
|
|
|
|
#[error("Internal error occurred")]
|
|
InternalError, // Don't leak implementation details
|
|
|
|
#[error("Rate limit exceeded")]
|
|
RateLimitExceeded { retry_after: Duration }, // Safe to expose
|
|
}
|
|
|
|
impl ApiError {
|
|
/// Convert to user-facing error response
|
|
pub fn to_response(&self) -> ErrorResponse {
|
|
match self {
|
|
ApiError::AuthenticationFailed => ErrorResponse {
|
|
code: "AUTH_FAILED",
|
|
message: "Authentication failed".to_string(),
|
|
details: None, // No details
|
|
},
|
|
ApiError::InternalError => ErrorResponse {
|
|
code: "INTERNAL_ERROR",
|
|
message: "An internal error occurred".to_string(),
|
|
details: None, // Never expose internals
|
|
},
|
|
ApiError::RateLimitExceeded { retry_after } => ErrorResponse {
|
|
code: "RATE_LIMIT",
|
|
message: "Rate limit exceeded".to_string(),
|
|
details: Some(json!({
|
|
"retry_after_seconds": retry_after.as_secs()
|
|
})),
|
|
},
|
|
_ => ErrorResponse {
|
|
code: "ERROR",
|
|
message: "An error occurred".to_string(),
|
|
details: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Internal error with full details (for logging only)
|
|
pub fn internal_details(&self) -> String {
|
|
// Full details only in logs, never in responses
|
|
format!("{:?}", self)
|
|
}
|
|
}
|
|
```
|
|
|
|
### 8.2 Logging Without PII
|
|
|
|
```rust
|
|
use tracing::{info, warn, error};
|
|
|
|
pub struct SafeLogger;
|
|
|
|
impl SafeLogger {
|
|
/// Log request without PII
|
|
pub fn log_request(&self, request: &Request) {
|
|
info!(
|
|
request_id = %request.id,
|
|
method = %request.method,
|
|
path = %self.sanitize_path(&request.path),
|
|
ip = %self.anonymize_ip(&request.ip),
|
|
user_agent_hash = %self.hash_user_agent(&request.user_agent),
|
|
"Request received"
|
|
);
|
|
}
|
|
|
|
fn sanitize_path(&self, path: &str) -> String {
|
|
// Remove potential PII from path parameters
|
|
path.split('/')
|
|
.map(|segment| {
|
|
if self.looks_like_pii(segment) {
|
|
"[REDACTED]"
|
|
} else {
|
|
segment
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("/")
|
|
}
|
|
|
|
fn anonymize_ip(&self, ip: &IpAddr) -> String {
|
|
match ip {
|
|
IpAddr::V4(ipv4) => {
|
|
let octets = ipv4.octets();
|
|
format!("{}.{}.0.0", octets[0], octets[1])
|
|
}
|
|
IpAddr::V6(ipv6) => {
|
|
let segments = ipv6.segments();
|
|
format!("{:x}:{:x}::", segments[0], segments[1])
|
|
}
|
|
}
|
|
}
|
|
|
|
fn hash_user_agent(&self, ua: &str) -> String {
|
|
use sha2::{Sha256, Digest};
|
|
let mut hasher = Sha256::new();
|
|
hasher.update(ua.as_bytes());
|
|
format!("{:x}", hasher.finalize())[..8].to_string()
|
|
}
|
|
|
|
fn looks_like_pii(&self, segment: &str) -> bool {
|
|
// Email pattern
|
|
if segment.contains('@') {
|
|
return true;
|
|
}
|
|
|
|
// UUID pattern
|
|
if uuid::Uuid::parse_str(segment).is_ok() {
|
|
return true;
|
|
}
|
|
|
|
// Long numeric strings (potential IDs)
|
|
if segment.len() > 10 && segment.chars().all(|c| c.is_numeric()) {
|
|
return true;
|
|
}
|
|
|
|
false
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 9. Open Source Considerations
|
|
|
|
### 9.1 License Compliance
|
|
|
|
```toml
|
|
# Cargo.toml
|
|
[package]
|
|
name = "ruvector-scipix"
|
|
license = "Apache-2.0"
|
|
license-file = "LICENSE"
|
|
|
|
[dependencies]
|
|
# All dependencies must have compatible licenses
|
|
# Verified via cargo-deny
|
|
```
|
|
|
|
```rust
|
|
// src/lib.rs
|
|
//! # License
|
|
//!
|
|
//! Copyright 2024 RuVector Contributors
|
|
//!
|
|
//! Licensed under the Apache License, Version 2.0 (the "License");
|
|
//! you may not use this file except in compliance with the License.
|
|
//! You may obtain a copy of the License at
|
|
//!
|
|
//! http://www.apache.org/licenses/LICENSE-2.0
|
|
//!
|
|
//! Unless required by applicable law or agreed to in writing, software
|
|
//! distributed under the License is distributed on an "AS IS" BASIS,
|
|
//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
//! See the License for the specific language governing permissions and
|
|
//! limitations under the License.
|
|
```
|
|
|
|
### 9.2 Security Disclosure Process
|
|
|
|
```markdown
|
|
# SECURITY.md
|
|
|
|
## Reporting Security Vulnerabilities
|
|
|
|
**Please do not report security vulnerabilities through public GitHub issues.**
|
|
|
|
Instead, please report them via email to security@ruvector.io
|
|
|
|
Include:
|
|
- Description of the vulnerability
|
|
- Steps to reproduce
|
|
- Potential impact
|
|
- Suggested fix (if any)
|
|
|
|
## Response Timeline
|
|
|
|
- **24 hours**: Initial response acknowledging receipt
|
|
- **7 days**: Assessment and severity classification
|
|
- **30 days**: Fix development and testing
|
|
- **90 days**: Public disclosure (coordinated)
|
|
|
|
## Security Updates
|
|
|
|
Security updates are released as patch versions and announced via:
|
|
- GitHub Security Advisories
|
|
- Release notes
|
|
- Security mailing list
|
|
|
|
## Supported Versions
|
|
|
|
| Version | Supported |
|
|
| ------- | ------------------ |
|
|
| 0.x.x | :white_check_mark: |
|
|
|
|
## Security Best Practices
|
|
|
|
### For Users
|
|
- Always use the latest version
|
|
- Enable automatic updates
|
|
- Use API keys, not hardcoded credentials
|
|
- Rotate keys regularly
|
|
- Monitor audit logs
|
|
|
|
### For Contributors
|
|
- Run `cargo audit` before submitting PRs
|
|
- Never commit secrets or credentials
|
|
- Follow secure coding guidelines
|
|
- Add security tests for new features
|
|
```
|
|
|
|
### 9.3 Responsible Defaults
|
|
|
|
```rust
|
|
pub struct SecurityDefaults;
|
|
|
|
impl SecurityDefaults {
|
|
/// Secure-by-default configuration
|
|
pub fn production_config() -> Config {
|
|
Config {
|
|
// TLS required
|
|
tls_enabled: true,
|
|
tls_min_version: TlsVersion::Tls13,
|
|
|
|
// Strong authentication
|
|
require_api_key: true,
|
|
allow_anonymous: false,
|
|
|
|
// Data protection
|
|
auto_delete_temp_files: true,
|
|
max_file_retention: Duration::from_secs(0), // Immediate deletion
|
|
encrypt_at_rest: true,
|
|
|
|
// Rate limiting
|
|
rate_limit_enabled: true,
|
|
default_rate_limit: RateLimitTier::Free,
|
|
|
|
// Audit logging
|
|
audit_enabled: true,
|
|
log_level: LogLevel::Info,
|
|
|
|
// Resource limits
|
|
max_request_size: 10 * 1024 * 1024, // 10MB
|
|
max_processing_time: Duration::from_secs(30),
|
|
|
|
// Security headers
|
|
cors_enabled: true,
|
|
cors_allow_credentials: false, // Safer default
|
|
hsts_enabled: true,
|
|
csp_enabled: true,
|
|
}
|
|
}
|
|
|
|
/// Development configuration (less restrictive)
|
|
pub fn development_config() -> Config {
|
|
let mut config = Self::production_config();
|
|
|
|
// Relax some constraints for development
|
|
config.tls_enabled = false; // Allow HTTP for localhost
|
|
config.rate_limit_enabled = false; // Easier testing
|
|
|
|
config
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Security Testing
|
|
|
|
### Automated Security Tests
|
|
|
|
```rust
|
|
#[cfg(test)]
|
|
mod security_tests {
|
|
use super::*;
|
|
|
|
#[tokio::test]
|
|
async fn test_sql_injection_prevention() {
|
|
let malicious_input = "'; DROP TABLE users; --";
|
|
let result = process_user_input(malicious_input).await;
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_path_traversal_prevention() {
|
|
let malicious_path = "../../etc/passwd";
|
|
let result = validate_file_path(malicious_path);
|
|
assert!(matches!(result, Err(ValidationError::PathTraversal { .. })));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_rate_limiting() {
|
|
let limiter = RateLimiter::new();
|
|
|
|
// Exhaust rate limit
|
|
for _ in 0..100 {
|
|
let _ = limiter.check_limit("user123", "free", 1).await;
|
|
}
|
|
|
|
// Next request should be blocked
|
|
let result = limiter.check_limit("user123", "free", 1).await;
|
|
assert!(matches!(result, Err(RateLimitError::RateLimitExceeded { .. })));
|
|
}
|
|
|
|
#[test]
|
|
fn test_constant_time_comparison() {
|
|
use subtle::ConstantTimeEq;
|
|
|
|
let secret1 = b"correct_password";
|
|
let secret2 = b"correct_password";
|
|
let wrong = b"wrong_password!!";
|
|
|
|
// Correct comparison
|
|
assert_eq!(secret1.ct_eq(secret2).unwrap_u8(), 1);
|
|
|
|
// Wrong comparison
|
|
assert_eq!(secret1.ct_eq(wrong).unwrap_u8(), 0);
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Security Checklist
|
|
|
|
### Pre-Release Security Audit
|
|
|
|
- [ ] All dependencies audited (`cargo audit`)
|
|
- [ ] No hardcoded secrets or credentials
|
|
- [ ] TLS 1.3 enforced
|
|
- [ ] Rate limiting tested
|
|
- [ ] Input validation comprehensive
|
|
- [ ] Error messages don't leak information
|
|
- [ ] Audit logging enabled
|
|
- [ ] GDPR compliance verified
|
|
- [ ] Security tests passing
|
|
- [ ] Penetration testing completed
|
|
- [ ] Security documentation updated
|
|
- [ ] Incident response plan in place
|
|
|
|
### Deployment Security
|
|
|
|
- [ ] Secrets managed via environment variables or vault
|
|
- [ ] Firewall rules configured
|
|
- [ ] Monitoring and alerting enabled
|
|
- [ ] Backup and recovery tested
|
|
- [ ] Access controls reviewed
|
|
- [ ] Security headers configured
|
|
- [ ] HTTPS enforced
|
|
- [ ] Regular security updates scheduled
|
|
|
|
---
|
|
|
|
## Conclusion
|
|
|
|
This security architecture provides defense-in-depth protection for the ruvector-scipix OCR system through:
|
|
|
|
1. **Strong Authentication**: API keys with Argon2 hashing and JWT tokens
|
|
2. **Granular Authorization**: RBAC with feature gating and rate limiting
|
|
3. **Privacy by Design**: GDPR compliance and minimal data retention
|
|
4. **Secure Processing**: Sandboxing, resource limits, and memory isolation
|
|
5. **Transport Security**: TLS 1.3 with certificate management
|
|
6. **Supply Chain Security**: Dependency auditing and minimal dependencies
|
|
7. **Responsible Defaults**: Secure-by-default configuration
|
|
|
|
**Security is not a feature—it's a foundational requirement.** This architecture must be maintained and updated as new threats emerge and best practices evolve.
|