Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,189 @@
//! JavaScript API for Scipix OCR
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use wasm_bindgen::prelude::*;
use web_sys::{HtmlCanvasElement, ImageData};
use crate::wasm::canvas::CanvasProcessor;
use crate::wasm::memory::WasmBuffer;
use crate::wasm::types::{OcrResult, RecognitionFormat};
static PROCESSOR: OnceCell<Arc<CanvasProcessor>> = OnceCell::new();
/// Main WASM API for Scipix OCR
#[wasm_bindgen]
pub struct ScipixWasm {
processor: Arc<CanvasProcessor>,
format: RecognitionFormat,
confidence_threshold: f32,
}
#[wasm_bindgen]
impl ScipixWasm {
/// Create a new ScipixWasm instance
#[wasm_bindgen(constructor)]
pub async fn new() -> Result<ScipixWasm, JsValue> {
let processor = PROCESSOR
.get_or_init(|| Arc::new(CanvasProcessor::new()))
.clone();
Ok(ScipixWasm {
processor,
format: RecognitionFormat::Both,
confidence_threshold: 0.5,
})
}
/// Recognize text from raw image data
#[wasm_bindgen]
pub async fn recognize(&self, image_data: &[u8]) -> Result<JsValue, JsValue> {
let buffer = WasmBuffer::from_slice(image_data);
let result = self
.processor
.process_image_bytes(buffer.as_slice(), self.format)
.await
.map_err(|e| JsValue::from_str(&format!("Recognition failed: {}", e)))?;
// Filter by confidence threshold
let filtered = self.filter_by_confidence(result);
serde_wasm_bindgen::to_value(&filtered)
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
}
/// Recognize text from HTML Canvas element
#[wasm_bindgen(js_name = recognizeFromCanvas)]
pub async fn recognize_from_canvas(
&self,
canvas: &HtmlCanvasElement,
) -> Result<JsValue, JsValue> {
let image_data = self
.processor
.extract_canvas_image(canvas)
.map_err(|e| JsValue::from_str(&format!("Canvas extraction failed: {}", e)))?;
let result = self
.processor
.process_image_data(&image_data, self.format)
.await
.map_err(|e| JsValue::from_str(&format!("Recognition failed: {}", e)))?;
let filtered = self.filter_by_confidence(result);
serde_wasm_bindgen::to_value(&filtered)
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
}
/// Recognize text from base64-encoded image
#[wasm_bindgen(js_name = recognizeBase64)]
pub async fn recognize_base64(&self, base64: &str) -> Result<JsValue, JsValue> {
// Remove data URL prefix if present
let base64_data = if base64.contains(',') {
base64.split(',').nth(1).unwrap_or(base64)
} else {
base64
};
let image_bytes = base64::decode(base64_data)
.map_err(|e| JsValue::from_str(&format!("Base64 decode failed: {}", e)))?;
self.recognize(&image_bytes).await
}
/// Recognize text from ImageData object
#[wasm_bindgen(js_name = recognizeImageData)]
pub async fn recognize_image_data(&self, image_data: &ImageData) -> Result<JsValue, JsValue> {
let result = self
.processor
.process_image_data(image_data, self.format)
.await
.map_err(|e| JsValue::from_str(&format!("Recognition failed: {}", e)))?;
let filtered = self.filter_by_confidence(result);
serde_wasm_bindgen::to_value(&filtered)
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
}
/// Set the output format (text, latex, or both)
#[wasm_bindgen(js_name = setFormat)]
pub fn set_format(&mut self, format: &str) {
self.format = match format.to_lowercase().as_str() {
"text" => RecognitionFormat::Text,
"latex" => RecognitionFormat::Latex,
"both" => RecognitionFormat::Both,
_ => RecognitionFormat::Both,
};
}
/// Set the confidence threshold (0.0 - 1.0)
#[wasm_bindgen(js_name = setConfidenceThreshold)]
pub fn set_confidence_threshold(&mut self, threshold: f32) {
self.confidence_threshold = threshold.clamp(0.0, 1.0);
}
/// Get the current confidence threshold
#[wasm_bindgen(js_name = getConfidenceThreshold)]
pub fn get_confidence_threshold(&self) -> f32 {
self.confidence_threshold
}
/// Get the version of the library
#[wasm_bindgen(js_name = getVersion)]
pub fn get_version(&self) -> String {
env!("CARGO_PKG_VERSION").to_string()
}
/// Get supported output formats
#[wasm_bindgen(js_name = getSupportedFormats)]
pub fn get_supported_formats(&self) -> Vec<JsValue> {
vec![
JsValue::from_str("text"),
JsValue::from_str("latex"),
JsValue::from_str("both"),
]
}
/// Batch process multiple images
#[wasm_bindgen(js_name = recognizeBatch)]
pub async fn recognize_batch(&self, images: Vec<JsValue>) -> Result<JsValue, JsValue> {
let mut results = Vec::new();
for img in images {
// Try to process as Uint8Array
if let Ok(bytes) = js_sys::Uint8Array::new(&img).to_vec() {
match self.recognize(&bytes).await {
Ok(result) => results.push(result),
Err(e) => {
web_sys::console::warn_1(&JsValue::from_str(&format!(
"Failed to process image: {:?}",
e
)));
results.push(JsValue::NULL);
}
}
}
}
Ok(js_sys::Array::from_iter(results).into())
}
// Private helper methods
fn filter_by_confidence(&self, mut result: OcrResult) -> OcrResult {
if result.confidence < self.confidence_threshold {
result.text = String::new();
result.latex = None;
}
result
}
}
/// Create a new ScipixWasm instance (factory function)
#[wasm_bindgen(js_name = createScipix)]
pub async fn create_scipix() -> Result<ScipixWasm, JsValue> {
ScipixWasm::new().await
}

View File

@@ -0,0 +1,217 @@
//! Canvas and ImageData handling for WASM
use anyhow::{anyhow, Result};
use image::{DynamicImage, ImageBuffer, Rgba};
use wasm_bindgen::prelude::*;
use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement, ImageData};
use crate::wasm::types::{OcrResult, RecognitionFormat};
/// Processor for canvas and image data
pub struct CanvasProcessor {
// Could add model loading here in the future
}
impl CanvasProcessor {
/// Create a new canvas processor
pub fn new() -> Self {
Self {}
}
/// Extract image data from HTML canvas element
pub fn extract_canvas_image(&self, canvas: &HtmlCanvasElement) -> Result<ImageData> {
let context = canvas
.get_context("2d")
.map_err(|_| anyhow!("Failed to get 2d context"))?
.ok_or_else(|| anyhow!("Context is null"))?
.dyn_into::<CanvasRenderingContext2d>()
.map_err(|_| anyhow!("Failed to cast to 2d context"))?;
let width = canvas.width();
let height = canvas.height();
context
.get_image_data(0.0, 0.0, width as f64, height as f64)
.map_err(|_| anyhow!("Failed to get image data"))
}
/// Convert ImageData to DynamicImage
pub fn image_data_to_dynamic(&self, image_data: &ImageData) -> Result<DynamicImage> {
let width = image_data.width();
let height = image_data.height();
let data = image_data.data();
let img_buffer = ImageBuffer::<Rgba<u8>, Vec<u8>>::from_raw(width, height, data.to_vec())
.ok_or_else(|| anyhow!("Failed to create image buffer"))?;
Ok(DynamicImage::ImageRgba8(img_buffer))
}
/// Process raw image bytes
pub async fn process_image_bytes(
&self,
image_bytes: &[u8],
format: RecognitionFormat,
) -> Result<OcrResult> {
// Decode image
let img = image::load_from_memory(image_bytes)
.map_err(|e| anyhow!("Failed to decode image: {}", e))?;
self.process_dynamic_image(&img, format).await
}
/// Process ImageData from canvas
pub async fn process_image_data(
&self,
image_data: &ImageData,
format: RecognitionFormat,
) -> Result<OcrResult> {
let img = self.image_data_to_dynamic(image_data)?;
self.process_dynamic_image(&img, format).await
}
/// Process a DynamicImage
async fn process_dynamic_image(
&self,
img: &DynamicImage,
format: RecognitionFormat,
) -> Result<OcrResult> {
// Convert to grayscale for processing
let gray = img.to_luma8();
// Apply preprocessing
let preprocessed = self.preprocess_image(&gray);
// Perform OCR (mock implementation for now)
// In a real implementation, this would run a model
let text = self.extract_text(&preprocessed)?;
let latex = if matches!(format, RecognitionFormat::Latex | RecognitionFormat::Both) {
Some(self.extract_latex(&preprocessed)?)
} else {
None
};
// Calculate confidence (simplified)
let confidence = self.calculate_confidence(&text, &latex);
Ok(OcrResult {
text,
latex,
confidence,
metadata: Some(serde_json::json!({
"width": img.width(),
"height": img.height(),
"format": format.to_string(),
})),
})
}
/// Preprocess image for OCR
fn preprocess_image(&self, img: &image::GrayImage) -> image::GrayImage {
// Apply simple thresholding
let mut output = img.clone();
for pixel in output.pixels_mut() {
let value = pixel.0[0];
pixel.0[0] = if value > 128 { 255 } else { 0 };
}
output
}
/// Extract plain text (mock implementation)
fn extract_text(&self, img: &image::GrayImage) -> Result<String> {
// This would normally run an OCR model
// For now, return a placeholder
Ok("Recognized text placeholder".to_string())
}
/// Extract LaTeX (mock implementation)
fn extract_latex(&self, img: &image::GrayImage) -> Result<String> {
// This would normally run a math OCR model
// For now, return a placeholder
Ok(r"\sum_{i=1}^{n} x_i".to_string())
}
/// Calculate confidence score
fn calculate_confidence(&self, text: &str, latex: &Option<String>) -> f32 {
// Simple heuristic: longer text = higher confidence
let text_score = (text.len() as f32 / 100.0).min(1.0);
let latex_score = latex
.as_ref()
.map(|l| (l.len() as f32 / 50.0).min(1.0))
.unwrap_or(0.0);
(text_score + latex_score) / 2.0
}
}
impl Default for CanvasProcessor {
fn default() -> Self {
Self::new()
}
}
/// Convert blob URL to image data
#[wasm_bindgen]
pub async fn blob_url_to_image_data(blob_url: &str) -> Result<ImageData, JsValue> {
use web_sys::{window, HtmlImageElement};
let window = window().ok_or_else(|| JsValue::from_str("No window"))?;
let document = window
.document()
.ok_or_else(|| JsValue::from_str("No document"))?;
// Create image element
let img =
HtmlImageElement::new().map_err(|_| JsValue::from_str("Failed to create image element"))?;
img.set_src(blob_url);
// Wait for image to load
let promise = js_sys::Promise::new(&mut |resolve, reject| {
let img_clone = img.clone();
let onload = Closure::wrap(Box::new(move || {
resolve.call1(&JsValue::NULL, &img_clone).unwrap();
}) as Box<dyn FnMut()>);
img.set_onload(Some(onload.as_ref().unchecked_ref()));
onload.forget();
let onerror = Closure::wrap(Box::new(move || {
reject
.call1(&JsValue::NULL, &JsValue::from_str("Image load failed"))
.unwrap();
}) as Box<dyn FnMut()>);
img.set_onerror(Some(onerror.as_ref().unchecked_ref()));
onerror.forget();
});
wasm_bindgen_futures::JsFuture::from(promise).await?;
// Create canvas and draw image
let canvas = document
.create_element("canvas")
.map_err(|_| JsValue::from_str("Failed to create canvas"))?
.dyn_into::<HtmlCanvasElement>()
.map_err(|_| JsValue::from_str("Failed to cast to canvas"))?;
canvas.set_width(img.natural_width());
canvas.set_height(img.natural_height());
let context = canvas
.get_context("2d")
.map_err(|_| JsValue::from_str("Failed to get 2d context"))?
.ok_or_else(|| JsValue::from_str("Context is null"))?
.dyn_into::<CanvasRenderingContext2d>()
.map_err(|_| JsValue::from_str("Failed to cast to 2d context"))?;
context
.draw_image_with_html_image_element(&img, 0.0, 0.0)
.map_err(|_| JsValue::from_str("Failed to draw image"))?;
context
.get_image_data(0.0, 0.0, canvas.width() as f64, canvas.height() as f64)
.map_err(|_| JsValue::from_str("Failed to get image data"))
}

View File

@@ -0,0 +1,218 @@
//! Memory management for WASM
use std::ops::Deref;
use wasm_bindgen::prelude::*;
/// Efficient buffer wrapper for WASM memory management
pub struct WasmBuffer {
data: Vec<u8>,
}
impl WasmBuffer {
/// Create a new buffer with capacity
pub fn with_capacity(capacity: usize) -> Self {
Self {
data: Vec::with_capacity(capacity),
}
}
/// Create buffer from slice (copies data)
pub fn from_slice(slice: &[u8]) -> Self {
Self {
data: slice.to_vec(),
}
}
/// Create buffer from Vec (takes ownership)
pub fn from_vec(data: Vec<u8>) -> Self {
Self { data }
}
/// Get the underlying slice
pub fn as_slice(&self) -> &[u8] {
&self.data
}
/// Get mutable slice
pub fn as_mut_slice(&mut self) -> &mut [u8] {
&mut self.data
}
/// Get length
pub fn len(&self) -> usize {
self.data.len()
}
/// Check if empty
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
/// Clear the buffer (keeps capacity)
pub fn clear(&mut self) {
self.data.clear();
}
/// Shrink to fit
pub fn shrink_to_fit(&mut self) {
self.data.shrink_to_fit();
}
/// Convert to Vec
pub fn into_vec(self) -> Vec<u8> {
self.data
}
}
impl Deref for WasmBuffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl Drop for WasmBuffer {
fn drop(&mut self) {
// Explicitly clear to help WASM memory management
self.data.clear();
self.data.shrink_to_fit();
}
}
/// Shared memory for large images (uses SharedArrayBuffer when available)
#[wasm_bindgen]
pub struct SharedImageBuffer {
buffer: WasmBuffer,
width: u32,
height: u32,
}
#[wasm_bindgen]
impl SharedImageBuffer {
/// Create a new shared buffer
#[wasm_bindgen(constructor)]
pub fn new(width: u32, height: u32) -> Self {
let size = (width * height * 4) as usize; // RGBA
Self {
buffer: WasmBuffer::with_capacity(size),
width,
height,
}
}
/// Get width
#[wasm_bindgen(getter)]
pub fn width(&self) -> u32 {
self.width
}
/// Get height
#[wasm_bindgen(getter)]
pub fn height(&self) -> u32 {
self.height
}
/// Get buffer size
#[wasm_bindgen(js_name = bufferSize)]
pub fn buffer_size(&self) -> usize {
self.buffer.len()
}
/// Get buffer as Uint8Array
#[wasm_bindgen(js_name = getBuffer)]
pub fn get_buffer(&self) -> js_sys::Uint8Array {
js_sys::Uint8Array::from(self.buffer.as_slice())
}
/// Set buffer from Uint8Array
#[wasm_bindgen(js_name = setBuffer)]
pub fn set_buffer(&mut self, data: &js_sys::Uint8Array) {
self.buffer = WasmBuffer::from_vec(data.to_vec());
}
/// Clear the buffer
pub fn clear(&mut self) {
self.buffer.clear();
}
}
/// Memory pool for reusing buffers
pub struct MemoryPool {
buffers: Vec<WasmBuffer>,
max_size: usize,
}
impl MemoryPool {
/// Create a new memory pool
pub fn new(max_size: usize) -> Self {
Self {
buffers: Vec::with_capacity(max_size),
max_size,
}
}
/// Get a buffer from the pool or create a new one
pub fn acquire(&mut self, size: usize) -> WasmBuffer {
self.buffers
.pop()
.map(|mut buf| {
buf.clear();
buf
})
.unwrap_or_else(|| WasmBuffer::with_capacity(size))
}
/// Return a buffer to the pool
pub fn release(&mut self, mut buffer: WasmBuffer) {
if self.buffers.len() < self.max_size {
buffer.clear();
self.buffers.push(buffer);
}
// Otherwise drop the buffer
}
/// Clear all buffers from the pool
pub fn clear(&mut self) {
self.buffers.clear();
}
}
impl Default for MemoryPool {
fn default() -> Self {
Self::new(10)
}
}
/// Get memory usage statistics
#[wasm_bindgen(js_name = getMemoryStats)]
pub fn get_memory_stats() -> JsValue {
#[cfg(target_arch = "wasm32")]
{
use wasm_bindgen::JsValue;
// Try to get memory info from performance.memory (non-standard)
let performance = web_sys::window().and_then(|w| w.performance());
if let Some(perf) = performance {
serde_wasm_bindgen::to_value(&serde_json::json!({
"available": true,
"timestamp": perf.now(),
}))
.unwrap_or(JsValue::NULL)
} else {
JsValue::NULL
}
}
#[cfg(not(target_arch = "wasm32"))]
JsValue::NULL
}
/// Force garbage collection (hint to runtime)
#[wasm_bindgen(js_name = forceGC)]
pub fn force_gc() {
// This is just a hint; actual GC is controlled by the JS runtime
// In wasm-bindgen, we can't directly trigger GC
// But we can help by ensuring our memory is freed
}

View File

@@ -0,0 +1,49 @@
//! WebAssembly bindings for Scipix OCR
//!
//! This module provides WASM bindings with wasm-bindgen for browser-based OCR.
#![cfg(target_arch = "wasm32")]
pub mod api;
pub mod canvas;
pub mod memory;
pub mod types;
pub mod worker;
pub use api::ScipixWasm;
pub use types::*;
use wasm_bindgen::prelude::*;
/// Initialize the WASM module with panic hooks and allocator
#[wasm_bindgen(start)]
pub fn init() {
// Set panic hook for better error messages
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
// Use wee_alloc for smaller binary size
#[cfg(feature = "wee_alloc")]
{
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
}
// Initialize logging
tracing_wasm::set_as_global_default();
}
/// Get the version of the WASM module
#[wasm_bindgen]
pub fn version() -> String {
env!("CARGO_PKG_VERSION").to_string()
}
/// Check if the WASM module is ready
#[wasm_bindgen]
pub fn is_ready() -> bool {
true
}
// Re-export tracing-wasm for logging
use tracing_wasm;

View File

@@ -0,0 +1,179 @@
//! Type definitions for WASM API
use serde::{Deserialize, Serialize};
use wasm_bindgen::prelude::*;
/// OCR result returned to JavaScript
#[derive(Debug, Clone, Serialize, Deserialize)]
#[wasm_bindgen]
pub struct OcrResult {
/// Recognized plain text
pub text: String,
/// LaTeX representation (if applicable)
#[serde(skip_serializing_if = "Option::is_none")]
pub latex: Option<String>,
/// Confidence score (0.0 - 1.0)
pub confidence: f32,
/// Additional metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<serde_json::Value>,
}
#[wasm_bindgen]
impl OcrResult {
/// Create a new OCR result
#[wasm_bindgen(constructor)]
pub fn new(text: String, confidence: f32) -> Self {
Self {
text,
latex: None,
confidence,
metadata: None,
}
}
/// Get the text
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
/// Get the LaTeX (if available)
#[wasm_bindgen(getter)]
pub fn latex(&self) -> Option<String> {
self.latex.clone()
}
/// Get the confidence score
#[wasm_bindgen(getter)]
pub fn confidence(&self) -> f32 {
self.confidence
}
/// Check if result has LaTeX
#[wasm_bindgen(js_name = hasLatex)]
pub fn has_latex(&self) -> bool {
self.latex.is_some()
}
/// Convert to JSON
#[wasm_bindgen(js_name = toJSON)]
pub fn to_json(&self) -> Result<JsValue, JsValue> {
serde_wasm_bindgen::to_value(self)
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
}
}
/// Recognition output format
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RecognitionFormat {
/// Plain text only
Text,
/// LaTeX only
Latex,
/// Both text and LaTeX
Both,
}
impl RecognitionFormat {
pub fn to_string(&self) -> String {
match self {
Self::Text => "text".to_string(),
Self::Latex => "latex".to_string(),
Self::Both => "both".to_string(),
}
}
}
impl Default for RecognitionFormat {
fn default() -> Self {
Self::Both
}
}
/// Processing options
#[derive(Debug, Clone, Serialize, Deserialize)]
#[wasm_bindgen]
pub struct ProcessingOptions {
/// Output format
pub format: String,
/// Confidence threshold
pub confidence_threshold: f32,
/// Enable preprocessing
pub preprocess: bool,
/// Enable postprocessing
pub postprocess: bool,
}
#[wasm_bindgen]
impl ProcessingOptions {
/// Create default options
#[wasm_bindgen(constructor)]
pub fn new() -> Self {
Self::default()
}
/// Set format
#[wasm_bindgen(js_name = setFormat)]
pub fn set_format(&mut self, format: String) {
self.format = format;
}
/// Set confidence threshold
#[wasm_bindgen(js_name = setConfidenceThreshold)]
pub fn set_confidence_threshold(&mut self, threshold: f32) {
self.confidence_threshold = threshold;
}
}
impl Default for ProcessingOptions {
fn default() -> Self {
Self {
format: "both".to_string(),
confidence_threshold: 0.5,
preprocess: true,
postprocess: true,
}
}
}
/// Error types for WASM API
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum WasmError {
/// Image decoding error
ImageDecode(String),
/// Processing error
Processing(String),
/// Invalid input
InvalidInput(String),
/// Not initialized
NotInitialized,
}
impl std::fmt::Display for WasmError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ImageDecode(msg) => write!(f, "Image decode error: {}", msg),
Self::Processing(msg) => write!(f, "Processing error: {}", msg),
Self::InvalidInput(msg) => write!(f, "Invalid input: {}", msg),
Self::NotInitialized => write!(f, "WASM module not initialized"),
}
}
}
impl std::error::Error for WasmError {}
impl From<WasmError> for JsValue {
fn from(error: WasmError) -> Self {
JsValue::from_str(&error.to_string())
}
}

View File

@@ -0,0 +1,243 @@
//! Web Worker support for off-main-thread OCR processing
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use wasm_bindgen::prelude::*;
use web_sys::{DedicatedWorkerGlobalScope, MessageEvent};
use crate::wasm::api::ScipixWasm;
use crate::wasm::types::RecognitionFormat;
static WORKER_INSTANCE: OnceCell<Arc<ScipixWasm>> = OnceCell::new();
/// Messages sent from main thread to worker
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum WorkerRequest {
/// Initialize the worker
Init,
/// Process an image
Process {
id: String,
image_data: Vec<u8>,
format: String,
},
/// Process base64 image
ProcessBase64 {
id: String,
base64: String,
format: String,
},
/// Batch process images
BatchProcess {
id: String,
images: Vec<Vec<u8>>,
format: String,
},
/// Terminate worker
Terminate,
}
/// Messages sent from worker to main thread
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum WorkerResponse {
/// Worker is ready
Ready,
/// Processing started
Started { id: String },
/// Processing progress
Progress {
id: String,
processed: usize,
total: usize,
},
/// Processing completed successfully
Success {
id: String,
result: serde_json::Value,
},
/// Processing failed
Error { id: String, error: String },
/// Worker terminated
Terminated,
}
/// Initialize the worker
#[wasm_bindgen(js_name = initWorker)]
pub async fn init_worker() -> Result<(), JsValue> {
let instance = ScipixWasm::new().await?;
WORKER_INSTANCE
.set(Arc::new(instance))
.map_err(|_| JsValue::from_str("Worker already initialized"))?;
post_response(WorkerResponse::Ready)?;
Ok(())
}
/// Handle messages from the main thread
#[wasm_bindgen(js_name = handleWorkerMessage)]
pub async fn handle_worker_message(event: MessageEvent) -> Result<(), JsValue> {
let data = event.data();
let request: WorkerRequest = serde_wasm_bindgen::from_value(data)
.map_err(|e| JsValue::from_str(&format!("Invalid message: {}", e)))?;
match request {
WorkerRequest::Init => {
init_worker().await?;
}
WorkerRequest::Process {
id,
image_data,
format,
} => {
process_image(id, image_data, format).await?;
}
WorkerRequest::ProcessBase64 { id, base64, format } => {
process_base64(id, base64, format).await?;
}
WorkerRequest::BatchProcess { id, images, format } => {
process_batch(id, images, format).await?;
}
WorkerRequest::Terminate => {
post_response(WorkerResponse::Terminated)?;
}
}
Ok(())
}
async fn process_image(id: String, image_data: Vec<u8>, format: String) -> Result<(), JsValue> {
post_response(WorkerResponse::Started { id: id.clone() })?;
let instance = WORKER_INSTANCE
.get()
.ok_or_else(|| JsValue::from_str("Worker not initialized"))?;
let mut worker_instance = ScipixWasm::new().await?;
worker_instance.set_format(&format);
match worker_instance.recognize(&image_data).await {
Ok(result) => {
let json_result: serde_json::Value = serde_wasm_bindgen::from_value(result)?;
post_response(WorkerResponse::Success {
id,
result: json_result,
})?;
}
Err(e) => {
post_response(WorkerResponse::Error {
id,
error: format!("{:?}", e),
})?;
}
}
Ok(())
}
async fn process_base64(id: String, base64: String, format: String) -> Result<(), JsValue> {
post_response(WorkerResponse::Started { id: id.clone() })?;
let mut worker_instance = ScipixWasm::new().await?;
worker_instance.set_format(&format);
match worker_instance.recognize_base64(&base64).await {
Ok(result) => {
let json_result: serde_json::Value = serde_wasm_bindgen::from_value(result)?;
post_response(WorkerResponse::Success {
id,
result: json_result,
})?;
}
Err(e) => {
post_response(WorkerResponse::Error {
id,
error: format!("{:?}", e),
})?;
}
}
Ok(())
}
async fn process_batch(id: String, images: Vec<Vec<u8>>, format: String) -> Result<(), JsValue> {
post_response(WorkerResponse::Started { id: id.clone() })?;
let total = images.len();
let mut results = Vec::new();
let mut worker_instance = ScipixWasm::new().await?;
worker_instance.set_format(&format);
for (idx, image_data) in images.into_iter().enumerate() {
// Report progress
post_response(WorkerResponse::Progress {
id: id.clone(),
processed: idx,
total,
})?;
match worker_instance.recognize(&image_data).await {
Ok(result) => {
let json_result: serde_json::Value = serde_wasm_bindgen::from_value(result)?;
results.push(json_result);
}
Err(e) => {
web_sys::console::warn_1(&JsValue::from_str(&format!(
"Failed to process image {}: {:?}",
idx, e
)));
results.push(serde_json::Value::Null);
}
}
}
post_response(WorkerResponse::Success {
id,
result: serde_json::json!({ "results": results }),
})?;
Ok(())
}
fn post_response(response: WorkerResponse) -> Result<(), JsValue> {
let global = js_sys::global().dyn_into::<DedicatedWorkerGlobalScope>()?;
let message = serde_wasm_bindgen::to_value(&response)?;
global.post_message(&message)?;
Ok(())
}
/// Setup worker message listener
#[wasm_bindgen(js_name = setupWorker)]
pub fn setup_worker() -> Result<(), JsValue> {
let global = js_sys::global().dyn_into::<DedicatedWorkerGlobalScope>()?;
let closure = Closure::wrap(Box::new(move |event: MessageEvent| {
wasm_bindgen_futures::spawn_local(async move {
if let Err(e) = handle_worker_message(event).await {
web_sys::console::error_1(&e);
}
});
}) as Box<dyn FnMut(MessageEvent)>);
global.set_onmessage(Some(closure.as_ref().unchecked_ref()));
closure.forget(); // Keep closure alive
Ok(())
}