# API Server Design - Scipix API v3 Compatibility ## Overview This document describes the REST API server implementation for ruvector-scipix, providing full compatibility with Scipix API v3 endpoints while leveraging Rust's performance and safety. **Stack:** - **Web Framework:** Axum (high-performance, ergonomic) - **Serialization:** Serde (JSON/multipart) - **Async Runtime:** Tokio - **Middleware:** Tower - **Auth:** Custom middleware - **Rate Limiting:** tower-governor - **Database:** PostgreSQL (job storage) + Redis (queue/cache) --- ## 1. API Design ### 1.1 Core Request/Response Structures ```rust // src/api/models.rs use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// Authentication credentials #[derive(Debug, Clone, Deserialize)] pub struct AuthCredentials { pub app_id: String, pub app_key: String, } #[derive(Debug, Clone, Deserialize)] pub struct BearerAuth { pub app_token: String, } /// Common request options #[derive(Debug, Deserialize, Clone)] pub struct OcrOptions { /// Include image data in response #[serde(default)] pub include_detected_alphabets: bool, /// Include confidence scores #[serde(default)] pub include_confidence: bool, /// Include word/line bounding boxes #[serde(default)] pub include_geometry: bool, /// Include LaTeX output #[serde(default)] pub include_latex: bool, /// Include MathML output #[serde(default)] pub include_mathml: bool, /// Include table structure #[serde(default)] pub include_table_data: bool, /// Skip text detection #[serde(default)] pub skip_text_detection: bool, /// Alphabets to detect (e.g., ["en", "es", "de"]) #[serde(default)] pub alphabets: Vec, /// Output formats (json, latex, html, etc.) #[serde(default)] pub formats: Vec, } /// POST /v3/text request #[derive(Debug, Deserialize)] pub struct TextRequest { /// Base64-encoded image or URL pub src: String, /// Optional processing options #[serde(flatten)] pub options: OcrOptions, /// Callback URL for async processing pub callback_url: Option, /// Metadata for tracking pub metadata: Option>, } /// Text detection result #[derive(Debug, Serialize)] pub struct TextResponse { /// Request ID for tracking pub request_id: String, /// Detected text pub text: String, /// LaTeX representation (if requested) #[serde(skip_serializing_if = "Option::is_none")] pub latex: Option, /// MathML representation (if requested) #[serde(skip_serializing_if = "Option::is_none")] pub mathml: Option, /// Confidence score (0.0-1.0) #[serde(skip_serializing_if = "Option::is_none")] pub confidence: Option, /// Word/line geometry #[serde(skip_serializing_if = "Option::is_none")] pub geometry: Option>, /// Detected alphabets #[serde(skip_serializing_if = "Option::is_none")] pub detected_alphabets: Option>, /// Processing time (ms) pub processing_time_ms: u64, } #[derive(Debug, Serialize)] pub struct BoundingBox { pub x: f32, pub y: f32, pub width: f32, pub height: f32, pub text: String, pub confidence: f32, } /// POST /v3/strokes request (digital ink) #[derive(Debug, Deserialize)] pub struct StrokesRequest { /// Array of stroke data pub strokes: Vec, #[serde(flatten)] pub options: OcrOptions, } #[derive(Debug, Deserialize)] pub struct Stroke { /// X coordinates pub x: Vec, /// Y coordinates pub y: Vec, /// Timestamps (optional) pub t: Option>, } /// POST /v3/pdf request (async) #[derive(Debug, Deserialize)] pub struct PdfRequest { /// PDF source (URL or base64) pub src: String, /// Conversion format (mmd, docx, html, etc.) pub conversion_format: String, /// Math formatting options pub math_inline_delimiters: Option>, pub math_display_delimiters: Option>, /// Enable table detection #[serde(default)] pub enable_tables_fallback: bool, /// Callback URL pub callback_url: Option, #[serde(flatten)] pub options: OcrOptions, } /// PDF job response #[derive(Debug, Serialize)] pub struct PdfJobResponse { pub pdf_id: String, pub status: JobStatus, pub created_at: String, /// Estimated completion time (seconds) pub estimated_completion_time: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "lowercase")] pub enum JobStatus { Queued, Processing, Completed, Failed, Cancelled, } /// GET /v3/pdf/{id} response #[derive(Debug, Serialize)] pub struct PdfStatusResponse { pub pdf_id: String, pub status: JobStatus, pub progress: f32, // 0.0-1.0 /// Result URL (when completed) pub result_url: Option, /// Error message (if failed) pub error: Option, pub created_at: String, pub updated_at: String, pub completed_at: Option, } /// POST /v3/converter request #[derive(Debug, Deserialize)] pub struct ConverterRequest { /// MMD content pub src: String, /// Target format (html, pdf, docx) pub format: String, /// Conversion options pub options: Option>, } /// GET /v3/ocr-results query parameters #[derive(Debug, Deserialize)] pub struct OcrResultsQuery { pub limit: Option, pub offset: Option, pub start_date: Option, pub end_date: Option, pub status: Option, } /// GET /v3/ocr-usage response #[derive(Debug, Serialize)] pub struct UsageStats { pub period: String, pub total_requests: u64, pub successful_requests: u64, pub failed_requests: u64, pub total_processing_time_ms: u64, pub average_processing_time_ms: f64, pub requests_by_endpoint: HashMap, } /// Standard error response #[derive(Debug, Serialize)] pub struct ApiError { pub error: String, pub error_code: String, pub message: String, pub request_id: Option, } ``` ### 1.2 Error Codes ```rust // src/api/errors.rs use axum::{ http::StatusCode, response::{IntoResponse, Response}, Json, }; #[derive(Debug)] pub enum ApiErrorCode { // Authentication errors (401) InvalidCredentials, ExpiredToken, MissingAuth, // Authorization errors (403) InsufficientQuota, RateLimitExceeded, // Request errors (400) InvalidRequest, InvalidImageFormat, ImageTooLarge, InvalidPdfFormat, // Processing errors (422) ProcessingFailed, ModelLoadFailed, // Server errors (500) InternalError, ServiceUnavailable, // Resource errors (404) JobNotFound, ResultNotFound, } impl ApiErrorCode { pub fn code(&self) -> &'static str { match self { Self::InvalidCredentials => "invalid_credentials", Self::ExpiredToken => "expired_token", Self::MissingAuth => "missing_auth", Self::InsufficientQuota => "insufficient_quota", Self::RateLimitExceeded => "rate_limit_exceeded", Self::InvalidRequest => "invalid_request", Self::InvalidImageFormat => "invalid_image_format", Self::ImageTooLarge => "image_too_large", Self::InvalidPdfFormat => "invalid_pdf_format", Self::ProcessingFailed => "processing_failed", Self::ModelLoadFailed => "model_load_failed", Self::InternalError => "internal_error", Self::ServiceUnavailable => "service_unavailable", Self::JobNotFound => "job_not_found", Self::ResultNotFound => "result_not_found", } } pub fn status_code(&self) -> StatusCode { match self { Self::InvalidCredentials | Self::ExpiredToken | Self::MissingAuth => StatusCode::UNAUTHORIZED, Self::InsufficientQuota | Self::RateLimitExceeded => StatusCode::FORBIDDEN, Self::InvalidRequest | Self::InvalidImageFormat | Self::ImageTooLarge | Self::InvalidPdfFormat => StatusCode::BAD_REQUEST, Self::ProcessingFailed | Self::ModelLoadFailed => StatusCode::UNPROCESSABLE_ENTITY, Self::JobNotFound | Self::ResultNotFound => StatusCode::NOT_FOUND, Self::InternalError | Self::ServiceUnavailable => StatusCode::INTERNAL_SERVER_ERROR, } } pub fn message(&self) -> &'static str { match self { Self::InvalidCredentials => "Invalid app_id or app_key", Self::ExpiredToken => "Authentication token has expired", Self::MissingAuth => "Missing authentication credentials", Self::InsufficientQuota => "Insufficient API quota", Self::RateLimitExceeded => "Rate limit exceeded. Please retry later.", Self::InvalidRequest => "Invalid request parameters", Self::InvalidImageFormat => "Unsupported image format", Self::ImageTooLarge => "Image exceeds maximum size limit", Self::InvalidPdfFormat => "Invalid or corrupted PDF file", Self::ProcessingFailed => "Failed to process input", Self::ModelLoadFailed => "Failed to load processing model", Self::InternalError => "Internal server error", Self::ServiceUnavailable => "Service temporarily unavailable", Self::JobNotFound => "Job not found", Self::ResultNotFound => "Result not found or expired", } } } pub struct AppError { pub code: ApiErrorCode, pub context: Option, pub request_id: Option, } impl IntoResponse for AppError { fn into_response(self) -> Response { let error_response = super::models::ApiError { error: self.code.code().to_string(), error_code: self.code.code().to_string(), message: self.context.unwrap_or_else(|| self.code.message().to_string()), request_id: self.request_id, }; (self.code.status_code(), Json(error_response)).into_response() } } ``` --- ## 2. Axum Server Implementation ### 2.1 Server Setup ```rust // src/api/server.rs use axum::{ Router, routing::{get, post, delete}, middleware, Extension, }; use std::sync::Arc; use tower::ServiceBuilder; use tower_http::{ cors::{CorsLayer, Any}, trace::TraceLayer, compression::CompressionLayer, }; pub struct ApiServer { config: Arc, state: Arc, } #[derive(Clone)] pub struct AppState { pub db_pool: sqlx::PgPool, pub redis_client: redis::aio::ConnectionManager, pub job_queue: Arc, pub model_manager: Arc, pub auth_service: Arc, } #[derive(Debug, Clone)] pub struct ServerConfig { pub host: String, pub port: u16, pub max_upload_size: usize, // bytes pub request_timeout: u64, // seconds pub enable_tls: bool, pub tls_cert_path: Option, pub tls_key_path: Option, pub model_path: String, pub storage_path: String, pub redis_url: String, pub database_url: String, } impl ApiServer { pub async fn new(config: ServerConfig) -> Result> { // Initialize database pool let db_pool = sqlx::postgres::PgPoolOptions::new() .max_connections(20) .connect(&config.database_url) .await?; // Initialize Redis client let redis_client = redis::Client::open(config.redis_url.clone())?; let redis_conn = redis_client.get_connection_manager().await?; // Initialize job queue let job_queue = Arc::new(JobQueue::new(redis_conn.clone())); // Initialize model manager let model_manager = Arc::new( ModelManager::new(&config.model_path).await? ); // Initialize auth service let auth_service = Arc::new(AuthService::new(db_pool.clone())); let state = Arc::new(AppState { db_pool, redis_client: redis_conn, job_queue, model_manager, auth_service, }); Ok(Self { config: Arc::new(config), state, }) } pub fn router(&self) -> Router { // API v3 routes let v3_routes = Router::new() // OCR endpoints .route("/text", post(handlers::process_text)) .route("/strokes", post(handlers::process_strokes)) .route("/latex", post(handlers::process_latex)) // PDF processing .route("/pdf", post(handlers::submit_pdf)) .route("/pdf/:id", get(handlers::get_pdf_status)) .route("/pdf/:id", delete(handlers::delete_pdf_job)) // Converter .route("/converter", post(handlers::convert_document)) // Query endpoints .route("/ocr-results", get(handlers::query_results)) .route("/ocr-usage", get(handlers::get_usage_stats)) // Apply authentication middleware .layer(middleware::from_fn_with_state( self.state.clone(), auth_middleware, )) // Apply rate limiting .layer(middleware::from_fn_with_state( self.state.clone(), rate_limit_middleware, )); // Health check (no auth) let health_routes = Router::new() .route("/health", get(handlers::health_check)) .route("/ready", get(handlers::readiness_check)); Router::new() .nest("/v3", v3_routes) .merge(health_routes) .layer( ServiceBuilder::new() // Logging .layer(TraceLayer::new_for_http()) // CORS .layer( CorsLayer::new() .allow_origin(Any) .allow_methods(Any) .allow_headers(Any) ) // Compression .layer(CompressionLayer::new()) // Request ID .layer(middleware::from_fn(request_id_middleware)) ) .layer(Extension(self.state.clone())) .layer(Extension(self.config.clone())) } pub async fn serve(self) -> Result<(), Box> { let addr = format!("{}:{}", self.config.host, self.config.port); let listener = tokio::net::TcpListener::bind(&addr).await?; tracing::info!("API server listening on {}", addr); if self.config.enable_tls { // TLS configuration let tls_config = self.load_tls_config()?; axum_server::from_tcp_rustls(listener.into_std()?, tls_config) .serve(self.router().into_make_service()) .await?; } else { axum::serve(listener, self.router()) .await?; } Ok(()) } fn load_tls_config(&self) -> Result< axum_server::tls_rustls::RustlsConfig, Box > { let cert_path = self.config.tls_cert_path.as_ref() .ok_or("TLS cert path not configured")?; let key_path = self.config.tls_key_path.as_ref() .ok_or("TLS key path not configured")?; Ok(axum_server::tls_rustls::RustlsConfig::from_pem_file( cert_path, key_path, )) } } ``` ### 2.2 Middleware Stack ```rust // src/api/middleware/auth.rs use axum::{ extract::{Request, State}, middleware::Next, response::Response, http::header, }; pub async fn auth_middleware( State(state): State>, mut request: Request, next: Next, ) -> Result { // Check for Bearer token if let Some(auth_header) = request.headers().get(header::AUTHORIZATION) { if let Ok(auth_str) = auth_header.to_str() { if let Some(token) = auth_str.strip_prefix("Bearer ") { let user = state.auth_service .validate_token(token) .await .map_err(|_| AppError { code: ApiErrorCode::InvalidCredentials, context: None, request_id: None, })?; request.extensions_mut().insert(user); return Ok(next.run(request).await); } } } // Check for app_id and app_key headers let app_id = request.headers() .get("app_id") .and_then(|v| v.to_str().ok()); let app_key = request.headers() .get("app_key") .and_then(|v| v.to_str().ok()); if let (Some(id), Some(key)) = (app_id, app_key) { let user = state.auth_service .validate_credentials(id, key) .await .map_err(|_| AppError { code: ApiErrorCode::InvalidCredentials, context: None, request_id: None, })?; request.extensions_mut().insert(user); return Ok(next.run(request).await); } Err(AppError { code: ApiErrorCode::MissingAuth, context: None, request_id: None, }) } // src/api/middleware/rate_limit.rs use tower_governor::{ governor::GovernorConfigBuilder, key_extractor::SmartIpKeyExtractor, GovernorLayer, }; pub async fn rate_limit_middleware( State(state): State>, request: Request, next: Next, ) -> Result { // Extract user from request let user = request.extensions().get::() .ok_or(AppError { code: ApiErrorCode::MissingAuth, context: None, request_id: None, })?; // Check rate limit let limit_key = format!("rate_limit:{}", user.id); let current_count: u64 = state.redis_client .clone() .incr(&limit_key, 1) .await .unwrap_or(1); if current_count == 1 { // Set expiry (1 minute window) let _: () = state.redis_client .clone() .expire(&limit_key, 60) .await .unwrap_or(()); } // Check against user's rate limit if current_count > user.rate_limit { return Err(AppError { code: ApiErrorCode::RateLimitExceeded, context: Some(format!( "Rate limit: {} requests per minute", user.rate_limit )), request_id: None, }); } Ok(next.run(request).await) } // src/api/middleware/request_id.rs use uuid::Uuid; pub async fn request_id_middleware( mut request: Request, next: Next, ) -> Response { let request_id = Uuid::new_v4().to_string(); request.extensions_mut().insert(RequestId(request_id.clone())); let mut response = next.run(request).await; response.headers_mut().insert( "X-Request-ID", request_id.parse().unwrap(), ); response } #[derive(Clone)] pub struct RequestId(pub String); ``` --- ## 3. Request Handlers ### 3.1 Image Processing Endpoint ```rust // src/api/handlers/text.rs use axum::{ extract::{State, Multipart}, Json, }; pub async fn process_text( State(state): State>, Extension(user): Extension, Extension(request_id): Extension, payload: Json, ) -> Result, AppError> { let start = std::time::Instant::now(); // Parse image source let image_data = parse_image_source(&payload.src).await .map_err(|e| AppError { code: ApiErrorCode::InvalidImageFormat, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; // Validate image size if image_data.len() > state.config.max_upload_size { return Err(AppError { code: ApiErrorCode::ImageTooLarge, context: Some(format!( "Max size: {} bytes", state.config.max_upload_size )), request_id: Some(request_id.0.clone()), }); } // Process image let result = state.model_manager .process_image(&image_data, &payload.options) .await .map_err(|e| AppError { code: ApiErrorCode::ProcessingFailed, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; // Record usage record_usage(&state.db_pool, &user, "text", start.elapsed()).await?; // Send callback if requested if let Some(callback_url) = &payload.callback_url { tokio::spawn(send_callback( callback_url.clone(), request_id.0.clone(), result.clone(), )); } Ok(Json(TextResponse { request_id: request_id.0, text: result.text, latex: payload.options.include_latex.then_some(result.latex), mathml: payload.options.include_mathml.then_some(result.mathml), confidence: payload.options.include_confidence.then_some(result.confidence), geometry: payload.options.include_geometry.then_some(result.geometry), detected_alphabets: payload.options.include_detected_alphabets .then_some(result.detected_alphabets), processing_time_ms: start.elapsed().as_millis() as u64, })) } async fn parse_image_source(src: &str) -> Result, Box> { if src.starts_with("http://") || src.starts_with("https://") { // Download from URL let response = reqwest::get(src).await?; Ok(response.bytes().await?.to_vec()) } else if src.starts_with("data:image/") { // Parse data URL let base64_data = src.split(',').nth(1) .ok_or("Invalid data URL")?; Ok(base64::decode(base64_data)?) } else { // Assume base64 Ok(base64::decode(src)?) } } // Multipart upload handler pub async fn process_text_multipart( State(state): State>, Extension(user): Extension, Extension(request_id): Extension, mut multipart: Multipart, ) -> Result, AppError> { let mut image_data = None; let mut options = OcrOptions::default(); while let Some(field) = multipart.next_field().await.unwrap() { let name = field.name().unwrap_or("").to_string(); match name.as_str() { "file" => { image_data = Some(field.bytes().await.unwrap().to_vec()); } "options" => { let json_str = field.text().await.unwrap(); options = serde_json::from_str(&json_str).unwrap_or_default(); } _ => {} } } let image_data = image_data.ok_or(AppError { code: ApiErrorCode::InvalidRequest, context: Some("Missing image file".to_string()), request_id: Some(request_id.0.clone()), })?; // Process image (reuse logic from process_text) let start = std::time::Instant::now(); let result = state.model_manager .process_image(&image_data, &options) .await .map_err(|e| AppError { code: ApiErrorCode::ProcessingFailed, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; Ok(Json(TextResponse { request_id: request_id.0, text: result.text, latex: options.include_latex.then_some(result.latex), mathml: options.include_mathml.then_some(result.mathml), confidence: options.include_confidence.then_some(result.confidence), geometry: options.include_geometry.then_some(result.geometry), detected_alphabets: options.include_detected_alphabets .then_some(result.detected_alphabets), processing_time_ms: start.elapsed().as_millis() as u64, })) } ``` ### 3.2 PDF Processing (Async) ```rust // src/api/handlers/pdf.rs pub async fn submit_pdf( State(state): State>, Extension(user): Extension, Extension(request_id): Extension, Json(payload): Json, ) -> Result, AppError> { // Parse PDF source let pdf_data = parse_pdf_source(&payload.src).await .map_err(|e| AppError { code: ApiErrorCode::InvalidPdfFormat, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; // Create job let pdf_id = Uuid::new_v4().to_string(); let job = PdfJob { id: pdf_id.clone(), user_id: user.id, status: JobStatus::Queued, pdf_data, conversion_format: payload.conversion_format, options: payload.options, callback_url: payload.callback_url, created_at: chrono::Utc::now(), updated_at: chrono::Utc::now(), completed_at: None, result_url: None, error: None, }; // Store job in database sqlx::query!( r#" INSERT INTO pdf_jobs (id, user_id, status, conversion_format, options, callback_url, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7) "#, job.id, job.user_id, serde_json::to_value(&job.status).unwrap(), job.conversion_format, serde_json::to_value(&job.options).unwrap(), job.callback_url, job.created_at, ) .execute(&state.db_pool) .await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; // Queue job state.job_queue.enqueue(job).await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; Ok(Json(PdfJobResponse { pdf_id, status: JobStatus::Queued, created_at: chrono::Utc::now().to_rfc3339(), estimated_completion_time: Some(300), // 5 minutes })) } pub async fn get_pdf_status( State(state): State>, Extension(user): Extension, Extension(request_id): Extension, axum::extract::Path(pdf_id): axum::extract::Path, ) -> Result, AppError> { // Query job status let job = sqlx::query_as!( PdfJobRecord, r#" SELECT * FROM pdf_jobs WHERE id = $1 AND user_id = $2 "#, pdf_id, user.id, ) .fetch_optional(&state.db_pool) .await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })? .ok_or(AppError { code: ApiErrorCode::JobNotFound, context: None, request_id: Some(request_id.0.clone()), })?; Ok(Json(PdfStatusResponse { pdf_id: job.id, status: serde_json::from_value(job.status).unwrap(), progress: job.progress.unwrap_or(0.0), result_url: job.result_url, error: job.error, created_at: job.created_at.to_rfc3339(), updated_at: job.updated_at.to_rfc3339(), completed_at: job.completed_at.map(|dt| dt.to_rfc3339()), })) } pub async fn delete_pdf_job( State(state): State>, Extension(user): Extension, Extension(request_id): Extension, axum::extract::Path(pdf_id): axum::extract::Path, ) -> Result { // Update job status to cancelled let result = sqlx::query!( r#" UPDATE pdf_jobs SET status = $1, updated_at = $2 WHERE id = $3 AND user_id = $4 AND status != 'completed' "#, serde_json::to_value(&JobStatus::Cancelled).unwrap(), chrono::Utc::now(), pdf_id, user.id, ) .execute(&state.db_pool) .await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: Some(request_id.0.clone()), })?; if result.rows_affected() == 0 { return Err(AppError { code: ApiErrorCode::JobNotFound, context: Some("Job not found or already completed".to_string()), request_id: Some(request_id.0.clone()), }); } Ok(StatusCode::NO_CONTENT) } ``` ### 3.3 Query Endpoints ```rust // src/api/handlers/query.rs pub async fn query_results( State(state): State>, Extension(user): Extension, axum::extract::Query(params): axum::extract::Query, ) -> Result>, AppError> { let limit = params.limit.unwrap_or(50).min(100); let offset = params.offset.unwrap_or(0); let mut query_builder = sqlx::QueryBuilder::new( "SELECT * FROM ocr_results WHERE user_id = " ); query_builder.push_bind(user.id); if let Some(start_date) = params.start_date { query_builder.push(" AND created_at >= "); query_builder.push_bind(start_date); } if let Some(end_date) = params.end_date { query_builder.push(" AND created_at <= "); query_builder.push_bind(end_date); } if let Some(status) = params.status { query_builder.push(" AND status = "); query_builder.push_bind(serde_json::to_value(&status).unwrap()); } query_builder.push(" ORDER BY created_at DESC LIMIT "); query_builder.push_bind(limit as i64); query_builder.push(" OFFSET "); query_builder.push_bind(offset as i64); let results = query_builder .build_query_as::() .fetch_all(&state.db_pool) .await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: None, })?; Ok(Json(results)) } pub async fn get_usage_stats( State(state): State>, Extension(user): Extension, axum::extract::Query(params): axum::extract::Query>, ) -> Result, AppError> { let period = params.get("period").map(|s| s.as_str()).unwrap_or("month"); let start_date = match period { "day" => chrono::Utc::now() - chrono::Duration::days(1), "week" => chrono::Utc::now() - chrono::Duration::weeks(1), "month" => chrono::Utc::now() - chrono::Duration::days(30), _ => chrono::Utc::now() - chrono::Duration::days(30), }; let stats = sqlx::query!( r#" SELECT COUNT(*) as total_requests, COUNT(*) FILTER (WHERE status = 'completed') as successful_requests, COUNT(*) FILTER (WHERE status = 'failed') as failed_requests, SUM(processing_time_ms) as total_processing_time_ms, AVG(processing_time_ms) as average_processing_time_ms FROM ocr_results WHERE user_id = $1 AND created_at >= $2 "#, user.id, start_date, ) .fetch_one(&state.db_pool) .await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: None, })?; // Get requests by endpoint let endpoint_stats = sqlx::query!( r#" SELECT endpoint, COUNT(*) as count FROM ocr_results WHERE user_id = $1 AND created_at >= $2 GROUP BY endpoint "#, user.id, start_date, ) .fetch_all(&state.db_pool) .await .map_err(|e| AppError { code: ApiErrorCode::InternalError, context: Some(e.to_string()), request_id: None, })?; let mut requests_by_endpoint = HashMap::new(); for stat in endpoint_stats { requests_by_endpoint.insert(stat.endpoint, stat.count as u64); } Ok(Json(UsageStats { period: period.to_string(), total_requests: stats.total_requests.unwrap_or(0) as u64, successful_requests: stats.successful_requests.unwrap_or(0) as u64, failed_requests: stats.failed_requests.unwrap_or(0) as u64, total_processing_time_ms: stats.total_processing_time_ms.unwrap_or(0) as u64, average_processing_time_ms: stats.average_processing_time_ms.unwrap_or(0.0), requests_by_endpoint, })) } ``` --- ## 4. Job Queue & Background Processing ### 4.1 Redis-based Job Queue ```rust // src/api/queue.rs use redis::AsyncCommands; pub struct JobQueue { redis: redis::aio::ConnectionManager, queue_key: String, } impl JobQueue { pub fn new(redis: redis::aio::ConnectionManager) -> Self { Self { redis, queue_key: "pdf_jobs:queue".to_string(), } } pub async fn enqueue(&self, job: PdfJob) -> Result<(), redis::RedisError> { let job_json = serde_json::to_string(&job).unwrap(); let mut conn = self.redis.clone(); conn.rpush(&self.queue_key, job_json).await?; Ok(()) } pub async fn dequeue(&self) -> Result, redis::RedisError> { let mut conn = self.redis.clone(); let job_json: Option = conn.lpop(&self.queue_key, None).await?; Ok(job_json.and_then(|json| serde_json::from_str(&json).ok())) } pub async fn queue_length(&self) -> Result { let mut conn = self.redis.clone(); conn.llen(&self.queue_key).await } } // Worker process pub struct PdfWorker { queue: Arc, db_pool: sqlx::PgPool, model_manager: Arc, storage_path: String, } impl PdfWorker { pub async fn run(&self) { loop { match self.process_next_job().await { Ok(true) => { tracing::info!("Job processed successfully"); } Ok(false) => { // No jobs in queue, sleep tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; } Err(e) => { tracing::error!("Job processing error: {}", e); tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; } } } } async fn process_next_job(&self) -> Result> { let job = match self.queue.dequeue().await? { Some(job) => job, None => return Ok(false), }; tracing::info!("Processing PDF job: {}", job.id); // Update status to processing self.update_job_status(&job.id, JobStatus::Processing, 0.0).await?; // Process PDF match self.process_pdf(&job).await { Ok(result_url) => { // Update status to completed sqlx::query!( r#" UPDATE pdf_jobs SET status = $1, result_url = $2, completed_at = $3, updated_at = $4, progress = 1.0 WHERE id = $5 "#, serde_json::to_value(&JobStatus::Completed).unwrap(), result_url, chrono::Utc::now(), chrono::Utc::now(), job.id, ) .execute(&self.db_pool) .await?; // Send callback if let Some(callback_url) = job.callback_url { self.send_completion_callback(&callback_url, &job.id, &result_url).await?; } Ok(true) } Err(e) => { // Update status to failed sqlx::query!( r#" UPDATE pdf_jobs SET status = $1, error = $2, updated_at = $3 WHERE id = $4 "#, serde_json::to_value(&JobStatus::Failed).unwrap(), e.to_string(), chrono::Utc::now(), job.id, ) .execute(&self.db_pool) .await?; Err(e) } } } async fn process_pdf(&self, job: &PdfJob) -> Result> { // Process PDF with model manager let result = self.model_manager .process_pdf(&job.pdf_data, &job.conversion_format, &job.options) .await?; // Save result to storage let result_filename = format!("{}.{}", job.id, job.conversion_format); let result_path = format!("{}/{}", self.storage_path, result_filename); tokio::fs::write(&result_path, result).await?; // Return public URL Ok(format!("/results/{}", result_filename)) } async fn update_job_status( &self, job_id: &str, status: JobStatus, progress: f32, ) -> Result<(), sqlx::Error> { sqlx::query!( r#" UPDATE pdf_jobs SET status = $1, progress = $2, updated_at = $3 WHERE id = $4 "#, serde_json::to_value(&status).unwrap(), progress, chrono::Utc::now(), job_id, ) .execute(&self.db_pool) .await?; Ok(()) } async fn send_completion_callback( &self, callback_url: &str, job_id: &str, result_url: &str, ) -> Result<(), Box> { let client = reqwest::Client::new(); client .post(callback_url) .json(&serde_json::json!({ "pdf_id": job_id, "status": "completed", "result_url": result_url, })) .send() .await?; Ok(()) } } ``` --- ## 5. Authentication Service ```rust // src/api/auth.rs use sha2::{Sha256, Digest}; #[derive(Clone)] pub struct AuthUser { pub id: i64, pub app_id: String, pub email: String, pub rate_limit: u64, pub quota_remaining: i64, } pub struct AuthService { db_pool: sqlx::PgPool, } impl AuthService { pub fn new(db_pool: sqlx::PgPool) -> Self { Self { db_pool } } pub async fn validate_credentials( &self, app_id: &str, app_key: &str, ) -> Result> { // Hash the app_key let mut hasher = Sha256::new(); hasher.update(app_key.as_bytes()); let key_hash = format!("{:x}", hasher.finalize()); // Query database let user = sqlx::query_as!( AuthUser, r#" SELECT id, app_id, email, rate_limit, quota_remaining FROM users WHERE app_id = $1 AND app_key_hash = $2 AND active = true "#, app_id, key_hash, ) .fetch_optional(&self.db_pool) .await? .ok_or("Invalid credentials")?; Ok(user) } pub async fn validate_token( &self, token: &str, ) -> Result> { // Decode JWT token let claims = decode_jwt(token)?; // Query user let user = sqlx::query_as!( AuthUser, r#" SELECT id, app_id, email, rate_limit, quota_remaining FROM users WHERE id = $1 AND active = true "#, claims.user_id, ) .fetch_optional(&self.db_pool) .await? .ok_or("Invalid token")?; Ok(user) } pub async fn generate_token( &self, user_id: i64, ) -> Result> { // Generate JWT token let claims = JwtClaims { user_id, exp: (chrono::Utc::now() + chrono::Duration::days(30)).timestamp() as usize, }; encode_jwt(&claims) } } #[derive(Debug, Serialize, Deserialize)] struct JwtClaims { user_id: i64, exp: usize, } fn encode_jwt(claims: &JwtClaims) -> Result> { use jsonwebtoken::{encode, Header, EncodingKey}; let secret = std::env::var("JWT_SECRET")?; let token = encode( &Header::default(), claims, &EncodingKey::from_secret(secret.as_bytes()), )?; Ok(token) } fn decode_jwt(token: &str) -> Result> { use jsonwebtoken::{decode, Validation, DecodingKey}; let secret = std::env::var("JWT_SECRET")?; let token_data = decode::( token, &DecodingKey::from_secret(secret.as_bytes()), &Validation::default(), )?; Ok(token_data.claims) } ``` --- ## 6. Configuration ### 6.1 Server Configuration ```rust // config/server.toml [server] host = "0.0.0.0" port = 8080 max_upload_size = 10485760 # 10MB request_timeout = 300 # 5 minutes enable_tls = false # tls_cert_path = "/path/to/cert.pem" # tls_key_path = "/path/to/key.pem" [storage] model_path = "./models" storage_path = "./storage/results" [database] url = "postgres://user:pass@localhost/ruvector" max_connections = 20 [redis] url = "redis://localhost:6379" [rate_limiting] default_rate_limit = 100 # requests per minute default_quota = 10000 # requests per month [workers] pdf_workers = 4 cleanup_interval = 3600 # 1 hour [features] enable_webhooks = true enable_streaming = true enable_pdf_processing = true ``` ### 6.2 Loading Configuration ```rust // src/config.rs use serde::Deserialize; #[derive(Debug, Deserialize, Clone)] pub struct Config { pub server: ServerConfig, pub storage: StorageConfig, pub database: DatabaseConfig, pub redis: RedisConfig, pub rate_limiting: RateLimitConfig, pub workers: WorkerConfig, pub features: FeatureConfig, } #[derive(Debug, Deserialize, Clone)] pub struct StorageConfig { pub model_path: String, pub storage_path: String, } #[derive(Debug, Deserialize, Clone)] pub struct DatabaseConfig { pub url: String, pub max_connections: u32, } #[derive(Debug, Deserialize, Clone)] pub struct RedisConfig { pub url: String, } #[derive(Debug, Deserialize, Clone)] pub struct RateLimitConfig { pub default_rate_limit: u64, pub default_quota: i64, } #[derive(Debug, Deserialize, Clone)] pub struct WorkerConfig { pub pdf_workers: usize, pub cleanup_interval: u64, } #[derive(Debug, Deserialize, Clone)] pub struct FeatureConfig { pub enable_webhooks: bool, pub enable_streaming: bool, pub enable_pdf_processing: bool, } impl Config { pub fn from_file(path: &str) -> Result> { let contents = std::fs::read_to_string(path)?; let config: Config = toml::from_str(&contents)?; Ok(config) } } ``` --- ## 7. OpenAPI Specification ### 7.1 OpenAPI Schema ```yaml # openapi.yaml openapi: 3.0.3 info: title: RuVector Scipix API description: OCR and document processing API compatible with Scipix v3 version: 1.0.0 contact: name: API Support email: support@ruvector.io servers: - url: https://api.ruvector.io/v3 description: Production server - url: http://localhost:8080/v3 description: Development server security: - BearerAuth: [] - ApiKeyAuth: [] components: securitySchemes: BearerAuth: type: http scheme: bearer bearerFormat: JWT ApiKeyAuth: type: apiKey in: header name: app_id description: Requires both app_id and app_key headers schemas: TextRequest: type: object required: - src properties: src: type: string description: Image source (base64, data URL, or HTTP URL) include_latex: type: boolean default: false include_mathml: type: boolean default: false include_confidence: type: boolean default: false include_geometry: type: boolean default: false alphabets: type: array items: type: string example: ["en", "es"] callback_url: type: string format: uri TextResponse: type: object properties: request_id: type: string format: uuid text: type: string latex: type: string mathml: type: string confidence: type: number format: float geometry: type: array items: $ref: '#/components/schemas/BoundingBox' processing_time_ms: type: integer BoundingBox: type: object properties: x: type: number y: type: number width: type: number height: type: number text: type: string confidence: type: number PdfRequest: type: object required: - src - conversion_format properties: src: type: string conversion_format: type: string enum: [mmd, docx, html, latex] enable_tables_fallback: type: boolean callback_url: type: string PdfJobResponse: type: object properties: pdf_id: type: string format: uuid status: type: string enum: [queued, processing, completed, failed, cancelled] created_at: type: string format: date-time estimated_completion_time: type: integer Error: type: object properties: error: type: string error_code: type: string message: type: string request_id: type: string paths: /text: post: summary: Process image OCR tags: - OCR requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/TextRequest' multipart/form-data: schema: type: object properties: file: type: string format: binary options: type: string description: JSON-encoded options responses: '200': description: Success content: application/json: schema: $ref: '#/components/schemas/TextResponse' '400': description: Bad request content: application/json: schema: $ref: '#/components/schemas/Error' '401': description: Unauthorized '429': description: Rate limit exceeded /pdf: post: summary: Submit PDF for processing tags: - PDF requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PdfRequest' responses: '202': description: Job accepted content: application/json: schema: $ref: '#/components/schemas/PdfJobResponse' /pdf/{id}: get: summary: Get PDF job status tags: - PDF parameters: - name: id in: path required: true schema: type: string responses: '200': description: Job status delete: summary: Cancel PDF job tags: - PDF parameters: - name: id in: path required: true schema: type: string responses: '204': description: Job cancelled /ocr-results: get: summary: Query OCR results tags: - Query parameters: - name: limit in: query schema: type: integer default: 50 - name: offset in: query schema: type: integer default: 0 responses: '200': description: Results list /ocr-usage: get: summary: Get usage statistics tags: - Query parameters: - name: period in: query schema: type: string enum: [day, week, month] responses: '200': description: Usage stats ``` --- ## 8. Database Schema ```sql -- migrations/001_initial.sql -- Users table CREATE TABLE users ( id BIGSERIAL PRIMARY KEY, app_id VARCHAR(64) UNIQUE NOT NULL, app_key_hash VARCHAR(64) NOT NULL, email VARCHAR(255) UNIQUE NOT NULL, active BOOLEAN DEFAULT true, rate_limit BIGINT DEFAULT 100, quota_remaining BIGINT DEFAULT 10000, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_users_app_id ON users(app_id); CREATE INDEX idx_users_email ON users(email); -- PDF jobs table CREATE TABLE pdf_jobs ( id VARCHAR(64) PRIMARY KEY, user_id BIGINT REFERENCES users(id), status JSONB NOT NULL, conversion_format VARCHAR(32) NOT NULL, options JSONB, callback_url TEXT, result_url TEXT, error TEXT, progress FLOAT DEFAULT 0.0, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), completed_at TIMESTAMPTZ ); CREATE INDEX idx_pdf_jobs_user_id ON pdf_jobs(user_id); CREATE INDEX idx_pdf_jobs_status ON pdf_jobs((status->>'status')); CREATE INDEX idx_pdf_jobs_created_at ON pdf_jobs(created_at); -- OCR results table CREATE TABLE ocr_results ( id BIGSERIAL PRIMARY KEY, user_id BIGINT REFERENCES users(id), request_id VARCHAR(64) UNIQUE NOT NULL, endpoint VARCHAR(64) NOT NULL, status VARCHAR(32) NOT NULL, processing_time_ms BIGINT, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_ocr_results_user_id ON ocr_results(user_id); CREATE INDEX idx_ocr_results_created_at ON ocr_results(created_at); CREATE INDEX idx_ocr_results_endpoint ON ocr_results(endpoint); ``` --- ## 9. Main Application Entry ```rust // src/main.rs use clap::Parser; #[derive(Parser)] #[command(name = "ruvector-api")] #[command(about = "RuVector Scipix API Server")] struct Cli { #[arg(short, long, default_value = "config/server.toml")] config: String, #[arg(long)] workers: Option, } #[tokio::main] async fn main() -> Result<(), Box> { // Initialize tracing tracing_subscriber::fmt() .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .init(); let cli = Cli::parse(); // Load configuration let config = Config::from_file(&cli.config)?; // Start PDF workers let worker_count = cli.workers.unwrap_or(config.workers.pdf_workers); for i in 0..worker_count { let config = config.clone(); tokio::spawn(async move { tracing::info!("Starting PDF worker {}", i); let worker = PdfWorker::new(config).await.unwrap(); worker.run().await; }); } // Start API server let server = ApiServer::new(config.server).await?; server.serve().await?; Ok(()) } ``` --- ## 10. Cargo Dependencies ```toml # Cargo.toml additions for API server [dependencies] # Web framework axum = "0.7" axum-server = { version = "0.6", features = ["tls-rustls"] } tower = "0.4" tower-http = { version = "0.5", features = ["cors", "trace", "compression", "fs"] } tower-governor = "0.3" # Async runtime tokio = { version = "1", features = ["full"] } # Serialization serde = { version = "1", features = ["derive"] } serde_json = "1" toml = "0.8" # Database sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "postgres", "chrono", "uuid"] } redis = { version = "0.24", features = ["tokio-comp", "connection-manager"] } # Auth jsonwebtoken = "9" sha2 = "0.10" bcrypt = "0.15" # HTTP client reqwest = { version = "0.11", features = ["json", "multipart"] } # Utilities uuid = { version = "1", features = ["v4", "serde"] } chrono = { version = "0.4", features = ["serde"] } base64 = "0.21" bytes = "1" # Logging tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } # CLI clap = { version = "4", features = ["derive"] } ``` --- ## Summary This API server design provides: 1. **Full Scipix v3 compatibility** - All major endpoints implemented 2. **Production-ready architecture** - Async processing, rate limiting, auth 3. **Scalable design** - Worker pool, Redis queue, PostgreSQL storage 4. **Type safety** - Leveraging Rust's type system with Serde 5. **Performance** - Axum + Tokio for high-throughput async I/O 6. **Observability** - Structured logging, metrics, request tracing 7. **Security** - JWT/API key auth, input validation, rate limiting 8. **Developer experience** - OpenAPI spec, clear error codes The server can be extended with: - WebSocket support for real-time updates - GraphQL endpoint for flexible queries - Prometheus metrics export - Distributed tracing (OpenTelemetry) - Multi-region deployment support