Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
189
vendor/ruvector/examples/scipix/src/wasm/api.rs
vendored
Normal file
189
vendor/ruvector/examples/scipix/src/wasm/api.rs
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
//! JavaScript API for Scipix OCR
|
||||
|
||||
use once_cell::sync::OnceCell;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use wasm_bindgen::prelude::*;
|
||||
use web_sys::{HtmlCanvasElement, ImageData};
|
||||
|
||||
use crate::wasm::canvas::CanvasProcessor;
|
||||
use crate::wasm::memory::WasmBuffer;
|
||||
use crate::wasm::types::{OcrResult, RecognitionFormat};
|
||||
|
||||
static PROCESSOR: OnceCell<Arc<CanvasProcessor>> = OnceCell::new();
|
||||
|
||||
/// Main WASM API for Scipix OCR
|
||||
#[wasm_bindgen]
|
||||
pub struct ScipixWasm {
|
||||
processor: Arc<CanvasProcessor>,
|
||||
format: RecognitionFormat,
|
||||
confidence_threshold: f32,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl ScipixWasm {
|
||||
/// Create a new ScipixWasm instance
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub async fn new() -> Result<ScipixWasm, JsValue> {
|
||||
let processor = PROCESSOR
|
||||
.get_or_init(|| Arc::new(CanvasProcessor::new()))
|
||||
.clone();
|
||||
|
||||
Ok(ScipixWasm {
|
||||
processor,
|
||||
format: RecognitionFormat::Both,
|
||||
confidence_threshold: 0.5,
|
||||
})
|
||||
}
|
||||
|
||||
/// Recognize text from raw image data
|
||||
#[wasm_bindgen]
|
||||
pub async fn recognize(&self, image_data: &[u8]) -> Result<JsValue, JsValue> {
|
||||
let buffer = WasmBuffer::from_slice(image_data);
|
||||
|
||||
let result = self
|
||||
.processor
|
||||
.process_image_bytes(buffer.as_slice(), self.format)
|
||||
.await
|
||||
.map_err(|e| JsValue::from_str(&format!("Recognition failed: {}", e)))?;
|
||||
|
||||
// Filter by confidence threshold
|
||||
let filtered = self.filter_by_confidence(result);
|
||||
|
||||
serde_wasm_bindgen::to_value(&filtered)
|
||||
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
|
||||
}
|
||||
|
||||
/// Recognize text from HTML Canvas element
|
||||
#[wasm_bindgen(js_name = recognizeFromCanvas)]
|
||||
pub async fn recognize_from_canvas(
|
||||
&self,
|
||||
canvas: &HtmlCanvasElement,
|
||||
) -> Result<JsValue, JsValue> {
|
||||
let image_data = self
|
||||
.processor
|
||||
.extract_canvas_image(canvas)
|
||||
.map_err(|e| JsValue::from_str(&format!("Canvas extraction failed: {}", e)))?;
|
||||
|
||||
let result = self
|
||||
.processor
|
||||
.process_image_data(&image_data, self.format)
|
||||
.await
|
||||
.map_err(|e| JsValue::from_str(&format!("Recognition failed: {}", e)))?;
|
||||
|
||||
let filtered = self.filter_by_confidence(result);
|
||||
|
||||
serde_wasm_bindgen::to_value(&filtered)
|
||||
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
|
||||
}
|
||||
|
||||
/// Recognize text from base64-encoded image
|
||||
#[wasm_bindgen(js_name = recognizeBase64)]
|
||||
pub async fn recognize_base64(&self, base64: &str) -> Result<JsValue, JsValue> {
|
||||
// Remove data URL prefix if present
|
||||
let base64_data = if base64.contains(',') {
|
||||
base64.split(',').nth(1).unwrap_or(base64)
|
||||
} else {
|
||||
base64
|
||||
};
|
||||
|
||||
let image_bytes = base64::decode(base64_data)
|
||||
.map_err(|e| JsValue::from_str(&format!("Base64 decode failed: {}", e)))?;
|
||||
|
||||
self.recognize(&image_bytes).await
|
||||
}
|
||||
|
||||
/// Recognize text from ImageData object
|
||||
#[wasm_bindgen(js_name = recognizeImageData)]
|
||||
pub async fn recognize_image_data(&self, image_data: &ImageData) -> Result<JsValue, JsValue> {
|
||||
let result = self
|
||||
.processor
|
||||
.process_image_data(image_data, self.format)
|
||||
.await
|
||||
.map_err(|e| JsValue::from_str(&format!("Recognition failed: {}", e)))?;
|
||||
|
||||
let filtered = self.filter_by_confidence(result);
|
||||
|
||||
serde_wasm_bindgen::to_value(&filtered)
|
||||
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
|
||||
}
|
||||
|
||||
/// Set the output format (text, latex, or both)
|
||||
#[wasm_bindgen(js_name = setFormat)]
|
||||
pub fn set_format(&mut self, format: &str) {
|
||||
self.format = match format.to_lowercase().as_str() {
|
||||
"text" => RecognitionFormat::Text,
|
||||
"latex" => RecognitionFormat::Latex,
|
||||
"both" => RecognitionFormat::Both,
|
||||
_ => RecognitionFormat::Both,
|
||||
};
|
||||
}
|
||||
|
||||
/// Set the confidence threshold (0.0 - 1.0)
|
||||
#[wasm_bindgen(js_name = setConfidenceThreshold)]
|
||||
pub fn set_confidence_threshold(&mut self, threshold: f32) {
|
||||
self.confidence_threshold = threshold.clamp(0.0, 1.0);
|
||||
}
|
||||
|
||||
/// Get the current confidence threshold
|
||||
#[wasm_bindgen(js_name = getConfidenceThreshold)]
|
||||
pub fn get_confidence_threshold(&self) -> f32 {
|
||||
self.confidence_threshold
|
||||
}
|
||||
|
||||
/// Get the version of the library
|
||||
#[wasm_bindgen(js_name = getVersion)]
|
||||
pub fn get_version(&self) -> String {
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
}
|
||||
|
||||
/// Get supported output formats
|
||||
#[wasm_bindgen(js_name = getSupportedFormats)]
|
||||
pub fn get_supported_formats(&self) -> Vec<JsValue> {
|
||||
vec![
|
||||
JsValue::from_str("text"),
|
||||
JsValue::from_str("latex"),
|
||||
JsValue::from_str("both"),
|
||||
]
|
||||
}
|
||||
|
||||
/// Batch process multiple images
|
||||
#[wasm_bindgen(js_name = recognizeBatch)]
|
||||
pub async fn recognize_batch(&self, images: Vec<JsValue>) -> Result<JsValue, JsValue> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for img in images {
|
||||
// Try to process as Uint8Array
|
||||
if let Ok(bytes) = js_sys::Uint8Array::new(&img).to_vec() {
|
||||
match self.recognize(&bytes).await {
|
||||
Ok(result) => results.push(result),
|
||||
Err(e) => {
|
||||
web_sys::console::warn_1(&JsValue::from_str(&format!(
|
||||
"Failed to process image: {:?}",
|
||||
e
|
||||
)));
|
||||
results.push(JsValue::NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(js_sys::Array::from_iter(results).into())
|
||||
}
|
||||
|
||||
// Private helper methods
|
||||
|
||||
fn filter_by_confidence(&self, mut result: OcrResult) -> OcrResult {
|
||||
if result.confidence < self.confidence_threshold {
|
||||
result.text = String::new();
|
||||
result.latex = None;
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new ScipixWasm instance (factory function)
|
||||
#[wasm_bindgen(js_name = createScipix)]
|
||||
pub async fn create_scipix() -> Result<ScipixWasm, JsValue> {
|
||||
ScipixWasm::new().await
|
||||
}
|
||||
217
vendor/ruvector/examples/scipix/src/wasm/canvas.rs
vendored
Normal file
217
vendor/ruvector/examples/scipix/src/wasm/canvas.rs
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
//! Canvas and ImageData handling for WASM
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use image::{DynamicImage, ImageBuffer, Rgba};
|
||||
use wasm_bindgen::prelude::*;
|
||||
use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement, ImageData};
|
||||
|
||||
use crate::wasm::types::{OcrResult, RecognitionFormat};
|
||||
|
||||
/// Processor for canvas and image data
|
||||
pub struct CanvasProcessor {
|
||||
// Could add model loading here in the future
|
||||
}
|
||||
|
||||
impl CanvasProcessor {
|
||||
/// Create a new canvas processor
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
/// Extract image data from HTML canvas element
|
||||
pub fn extract_canvas_image(&self, canvas: &HtmlCanvasElement) -> Result<ImageData> {
|
||||
let context = canvas
|
||||
.get_context("2d")
|
||||
.map_err(|_| anyhow!("Failed to get 2d context"))?
|
||||
.ok_or_else(|| anyhow!("Context is null"))?
|
||||
.dyn_into::<CanvasRenderingContext2d>()
|
||||
.map_err(|_| anyhow!("Failed to cast to 2d context"))?;
|
||||
|
||||
let width = canvas.width();
|
||||
let height = canvas.height();
|
||||
|
||||
context
|
||||
.get_image_data(0.0, 0.0, width as f64, height as f64)
|
||||
.map_err(|_| anyhow!("Failed to get image data"))
|
||||
}
|
||||
|
||||
/// Convert ImageData to DynamicImage
|
||||
pub fn image_data_to_dynamic(&self, image_data: &ImageData) -> Result<DynamicImage> {
|
||||
let width = image_data.width();
|
||||
let height = image_data.height();
|
||||
let data = image_data.data();
|
||||
|
||||
let img_buffer = ImageBuffer::<Rgba<u8>, Vec<u8>>::from_raw(width, height, data.to_vec())
|
||||
.ok_or_else(|| anyhow!("Failed to create image buffer"))?;
|
||||
|
||||
Ok(DynamicImage::ImageRgba8(img_buffer))
|
||||
}
|
||||
|
||||
/// Process raw image bytes
|
||||
pub async fn process_image_bytes(
|
||||
&self,
|
||||
image_bytes: &[u8],
|
||||
format: RecognitionFormat,
|
||||
) -> Result<OcrResult> {
|
||||
// Decode image
|
||||
let img = image::load_from_memory(image_bytes)
|
||||
.map_err(|e| anyhow!("Failed to decode image: {}", e))?;
|
||||
|
||||
self.process_dynamic_image(&img, format).await
|
||||
}
|
||||
|
||||
/// Process ImageData from canvas
|
||||
pub async fn process_image_data(
|
||||
&self,
|
||||
image_data: &ImageData,
|
||||
format: RecognitionFormat,
|
||||
) -> Result<OcrResult> {
|
||||
let img = self.image_data_to_dynamic(image_data)?;
|
||||
self.process_dynamic_image(&img, format).await
|
||||
}
|
||||
|
||||
/// Process a DynamicImage
|
||||
async fn process_dynamic_image(
|
||||
&self,
|
||||
img: &DynamicImage,
|
||||
format: RecognitionFormat,
|
||||
) -> Result<OcrResult> {
|
||||
// Convert to grayscale for processing
|
||||
let gray = img.to_luma8();
|
||||
|
||||
// Apply preprocessing
|
||||
let preprocessed = self.preprocess_image(&gray);
|
||||
|
||||
// Perform OCR (mock implementation for now)
|
||||
// In a real implementation, this would run a model
|
||||
let text = self.extract_text(&preprocessed)?;
|
||||
let latex = if matches!(format, RecognitionFormat::Latex | RecognitionFormat::Both) {
|
||||
Some(self.extract_latex(&preprocessed)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Calculate confidence (simplified)
|
||||
let confidence = self.calculate_confidence(&text, &latex);
|
||||
|
||||
Ok(OcrResult {
|
||||
text,
|
||||
latex,
|
||||
confidence,
|
||||
metadata: Some(serde_json::json!({
|
||||
"width": img.width(),
|
||||
"height": img.height(),
|
||||
"format": format.to_string(),
|
||||
})),
|
||||
})
|
||||
}
|
||||
|
||||
/// Preprocess image for OCR
|
||||
fn preprocess_image(&self, img: &image::GrayImage) -> image::GrayImage {
|
||||
// Apply simple thresholding
|
||||
let mut output = img.clone();
|
||||
|
||||
for pixel in output.pixels_mut() {
|
||||
let value = pixel.0[0];
|
||||
pixel.0[0] = if value > 128 { 255 } else { 0 };
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
/// Extract plain text (mock implementation)
|
||||
fn extract_text(&self, img: &image::GrayImage) -> Result<String> {
|
||||
// This would normally run an OCR model
|
||||
// For now, return a placeholder
|
||||
Ok("Recognized text placeholder".to_string())
|
||||
}
|
||||
|
||||
/// Extract LaTeX (mock implementation)
|
||||
fn extract_latex(&self, img: &image::GrayImage) -> Result<String> {
|
||||
// This would normally run a math OCR model
|
||||
// For now, return a placeholder
|
||||
Ok(r"\sum_{i=1}^{n} x_i".to_string())
|
||||
}
|
||||
|
||||
/// Calculate confidence score
|
||||
fn calculate_confidence(&self, text: &str, latex: &Option<String>) -> f32 {
|
||||
// Simple heuristic: longer text = higher confidence
|
||||
let text_score = (text.len() as f32 / 100.0).min(1.0);
|
||||
let latex_score = latex
|
||||
.as_ref()
|
||||
.map(|l| (l.len() as f32 / 50.0).min(1.0))
|
||||
.unwrap_or(0.0);
|
||||
|
||||
(text_score + latex_score) / 2.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CanvasProcessor {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert blob URL to image data
|
||||
#[wasm_bindgen]
|
||||
pub async fn blob_url_to_image_data(blob_url: &str) -> Result<ImageData, JsValue> {
|
||||
use web_sys::{window, HtmlImageElement};
|
||||
|
||||
let window = window().ok_or_else(|| JsValue::from_str("No window"))?;
|
||||
let document = window
|
||||
.document()
|
||||
.ok_or_else(|| JsValue::from_str("No document"))?;
|
||||
|
||||
// Create image element
|
||||
let img =
|
||||
HtmlImageElement::new().map_err(|_| JsValue::from_str("Failed to create image element"))?;
|
||||
|
||||
img.set_src(blob_url);
|
||||
|
||||
// Wait for image to load
|
||||
let promise = js_sys::Promise::new(&mut |resolve, reject| {
|
||||
let img_clone = img.clone();
|
||||
let onload = Closure::wrap(Box::new(move || {
|
||||
resolve.call1(&JsValue::NULL, &img_clone).unwrap();
|
||||
}) as Box<dyn FnMut()>);
|
||||
|
||||
img.set_onload(Some(onload.as_ref().unchecked_ref()));
|
||||
onload.forget();
|
||||
|
||||
let onerror = Closure::wrap(Box::new(move || {
|
||||
reject
|
||||
.call1(&JsValue::NULL, &JsValue::from_str("Image load failed"))
|
||||
.unwrap();
|
||||
}) as Box<dyn FnMut()>);
|
||||
|
||||
img.set_onerror(Some(onerror.as_ref().unchecked_ref()));
|
||||
onerror.forget();
|
||||
});
|
||||
|
||||
wasm_bindgen_futures::JsFuture::from(promise).await?;
|
||||
|
||||
// Create canvas and draw image
|
||||
let canvas = document
|
||||
.create_element("canvas")
|
||||
.map_err(|_| JsValue::from_str("Failed to create canvas"))?
|
||||
.dyn_into::<HtmlCanvasElement>()
|
||||
.map_err(|_| JsValue::from_str("Failed to cast to canvas"))?;
|
||||
|
||||
canvas.set_width(img.natural_width());
|
||||
canvas.set_height(img.natural_height());
|
||||
|
||||
let context = canvas
|
||||
.get_context("2d")
|
||||
.map_err(|_| JsValue::from_str("Failed to get 2d context"))?
|
||||
.ok_or_else(|| JsValue::from_str("Context is null"))?
|
||||
.dyn_into::<CanvasRenderingContext2d>()
|
||||
.map_err(|_| JsValue::from_str("Failed to cast to 2d context"))?;
|
||||
|
||||
context
|
||||
.draw_image_with_html_image_element(&img, 0.0, 0.0)
|
||||
.map_err(|_| JsValue::from_str("Failed to draw image"))?;
|
||||
|
||||
context
|
||||
.get_image_data(0.0, 0.0, canvas.width() as f64, canvas.height() as f64)
|
||||
.map_err(|_| JsValue::from_str("Failed to get image data"))
|
||||
}
|
||||
218
vendor/ruvector/examples/scipix/src/wasm/memory.rs
vendored
Normal file
218
vendor/ruvector/examples/scipix/src/wasm/memory.rs
vendored
Normal file
@@ -0,0 +1,218 @@
|
||||
//! Memory management for WASM
|
||||
|
||||
use std::ops::Deref;
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// Efficient buffer wrapper for WASM memory management
|
||||
pub struct WasmBuffer {
|
||||
data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl WasmBuffer {
|
||||
/// Create a new buffer with capacity
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
data: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create buffer from slice (copies data)
|
||||
pub fn from_slice(slice: &[u8]) -> Self {
|
||||
Self {
|
||||
data: slice.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create buffer from Vec (takes ownership)
|
||||
pub fn from_vec(data: Vec<u8>) -> Self {
|
||||
Self { data }
|
||||
}
|
||||
|
||||
/// Get the underlying slice
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
&self.data
|
||||
}
|
||||
|
||||
/// Get mutable slice
|
||||
pub fn as_mut_slice(&mut self) -> &mut [u8] {
|
||||
&mut self.data
|
||||
}
|
||||
|
||||
/// Get length
|
||||
pub fn len(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
/// Check if empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.data.is_empty()
|
||||
}
|
||||
|
||||
/// Clear the buffer (keeps capacity)
|
||||
pub fn clear(&mut self) {
|
||||
self.data.clear();
|
||||
}
|
||||
|
||||
/// Shrink to fit
|
||||
pub fn shrink_to_fit(&mut self) {
|
||||
self.data.shrink_to_fit();
|
||||
}
|
||||
|
||||
/// Convert to Vec
|
||||
pub fn into_vec(self) -> Vec<u8> {
|
||||
self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for WasmBuffer {
|
||||
type Target = [u8];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WasmBuffer {
|
||||
fn drop(&mut self) {
|
||||
// Explicitly clear to help WASM memory management
|
||||
self.data.clear();
|
||||
self.data.shrink_to_fit();
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared memory for large images (uses SharedArrayBuffer when available)
|
||||
#[wasm_bindgen]
|
||||
pub struct SharedImageBuffer {
|
||||
buffer: WasmBuffer,
|
||||
width: u32,
|
||||
height: u32,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl SharedImageBuffer {
|
||||
/// Create a new shared buffer
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new(width: u32, height: u32) -> Self {
|
||||
let size = (width * height * 4) as usize; // RGBA
|
||||
Self {
|
||||
buffer: WasmBuffer::with_capacity(size),
|
||||
width,
|
||||
height,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get width
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn width(&self) -> u32 {
|
||||
self.width
|
||||
}
|
||||
|
||||
/// Get height
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn height(&self) -> u32 {
|
||||
self.height
|
||||
}
|
||||
|
||||
/// Get buffer size
|
||||
#[wasm_bindgen(js_name = bufferSize)]
|
||||
pub fn buffer_size(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
/// Get buffer as Uint8Array
|
||||
#[wasm_bindgen(js_name = getBuffer)]
|
||||
pub fn get_buffer(&self) -> js_sys::Uint8Array {
|
||||
js_sys::Uint8Array::from(self.buffer.as_slice())
|
||||
}
|
||||
|
||||
/// Set buffer from Uint8Array
|
||||
#[wasm_bindgen(js_name = setBuffer)]
|
||||
pub fn set_buffer(&mut self, data: &js_sys::Uint8Array) {
|
||||
self.buffer = WasmBuffer::from_vec(data.to_vec());
|
||||
}
|
||||
|
||||
/// Clear the buffer
|
||||
pub fn clear(&mut self) {
|
||||
self.buffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory pool for reusing buffers
|
||||
pub struct MemoryPool {
|
||||
buffers: Vec<WasmBuffer>,
|
||||
max_size: usize,
|
||||
}
|
||||
|
||||
impl MemoryPool {
|
||||
/// Create a new memory pool
|
||||
pub fn new(max_size: usize) -> Self {
|
||||
Self {
|
||||
buffers: Vec::with_capacity(max_size),
|
||||
max_size,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a buffer from the pool or create a new one
|
||||
pub fn acquire(&mut self, size: usize) -> WasmBuffer {
|
||||
self.buffers
|
||||
.pop()
|
||||
.map(|mut buf| {
|
||||
buf.clear();
|
||||
buf
|
||||
})
|
||||
.unwrap_or_else(|| WasmBuffer::with_capacity(size))
|
||||
}
|
||||
|
||||
/// Return a buffer to the pool
|
||||
pub fn release(&mut self, mut buffer: WasmBuffer) {
|
||||
if self.buffers.len() < self.max_size {
|
||||
buffer.clear();
|
||||
self.buffers.push(buffer);
|
||||
}
|
||||
// Otherwise drop the buffer
|
||||
}
|
||||
|
||||
/// Clear all buffers from the pool
|
||||
pub fn clear(&mut self) {
|
||||
self.buffers.clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MemoryPool {
|
||||
fn default() -> Self {
|
||||
Self::new(10)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get memory usage statistics
|
||||
#[wasm_bindgen(js_name = getMemoryStats)]
|
||||
pub fn get_memory_stats() -> JsValue {
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
{
|
||||
use wasm_bindgen::JsValue;
|
||||
|
||||
// Try to get memory info from performance.memory (non-standard)
|
||||
let performance = web_sys::window().and_then(|w| w.performance());
|
||||
|
||||
if let Some(perf) = performance {
|
||||
serde_wasm_bindgen::to_value(&serde_json::json!({
|
||||
"available": true,
|
||||
"timestamp": perf.now(),
|
||||
}))
|
||||
.unwrap_or(JsValue::NULL)
|
||||
} else {
|
||||
JsValue::NULL
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
JsValue::NULL
|
||||
}
|
||||
|
||||
/// Force garbage collection (hint to runtime)
|
||||
#[wasm_bindgen(js_name = forceGC)]
|
||||
pub fn force_gc() {
|
||||
// This is just a hint; actual GC is controlled by the JS runtime
|
||||
// In wasm-bindgen, we can't directly trigger GC
|
||||
// But we can help by ensuring our memory is freed
|
||||
}
|
||||
49
vendor/ruvector/examples/scipix/src/wasm/mod.rs
vendored
Normal file
49
vendor/ruvector/examples/scipix/src/wasm/mod.rs
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
//! WebAssembly bindings for Scipix OCR
|
||||
//!
|
||||
//! This module provides WASM bindings with wasm-bindgen for browser-based OCR.
|
||||
|
||||
#![cfg(target_arch = "wasm32")]
|
||||
|
||||
pub mod api;
|
||||
pub mod canvas;
|
||||
pub mod memory;
|
||||
pub mod types;
|
||||
pub mod worker;
|
||||
|
||||
pub use api::ScipixWasm;
|
||||
pub use types::*;
|
||||
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// Initialize the WASM module with panic hooks and allocator
|
||||
#[wasm_bindgen(start)]
|
||||
pub fn init() {
|
||||
// Set panic hook for better error messages
|
||||
#[cfg(feature = "console_error_panic_hook")]
|
||||
console_error_panic_hook::set_once();
|
||||
|
||||
// Use wee_alloc for smaller binary size
|
||||
#[cfg(feature = "wee_alloc")]
|
||||
{
|
||||
#[global_allocator]
|
||||
static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
|
||||
}
|
||||
|
||||
// Initialize logging
|
||||
tracing_wasm::set_as_global_default();
|
||||
}
|
||||
|
||||
/// Get the version of the WASM module
|
||||
#[wasm_bindgen]
|
||||
pub fn version() -> String {
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
}
|
||||
|
||||
/// Check if the WASM module is ready
|
||||
#[wasm_bindgen]
|
||||
pub fn is_ready() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
// Re-export tracing-wasm for logging
|
||||
use tracing_wasm;
|
||||
179
vendor/ruvector/examples/scipix/src/wasm/types.rs
vendored
Normal file
179
vendor/ruvector/examples/scipix/src/wasm/types.rs
vendored
Normal file
@@ -0,0 +1,179 @@
|
||||
//! Type definitions for WASM API
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// OCR result returned to JavaScript
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[wasm_bindgen]
|
||||
pub struct OcrResult {
|
||||
/// Recognized plain text
|
||||
pub text: String,
|
||||
|
||||
/// LaTeX representation (if applicable)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub latex: Option<String>,
|
||||
|
||||
/// Confidence score (0.0 - 1.0)
|
||||
pub confidence: f32,
|
||||
|
||||
/// Additional metadata
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl OcrResult {
|
||||
/// Create a new OCR result
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new(text: String, confidence: f32) -> Self {
|
||||
Self {
|
||||
text,
|
||||
latex: None,
|
||||
confidence,
|
||||
metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the text
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn text(&self) -> String {
|
||||
self.text.clone()
|
||||
}
|
||||
|
||||
/// Get the LaTeX (if available)
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn latex(&self) -> Option<String> {
|
||||
self.latex.clone()
|
||||
}
|
||||
|
||||
/// Get the confidence score
|
||||
#[wasm_bindgen(getter)]
|
||||
pub fn confidence(&self) -> f32 {
|
||||
self.confidence
|
||||
}
|
||||
|
||||
/// Check if result has LaTeX
|
||||
#[wasm_bindgen(js_name = hasLatex)]
|
||||
pub fn has_latex(&self) -> bool {
|
||||
self.latex.is_some()
|
||||
}
|
||||
|
||||
/// Convert to JSON
|
||||
#[wasm_bindgen(js_name = toJSON)]
|
||||
pub fn to_json(&self) -> Result<JsValue, JsValue> {
|
||||
serde_wasm_bindgen::to_value(self)
|
||||
.map_err(|e| JsValue::from_str(&format!("Serialization failed: {}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Recognition output format
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum RecognitionFormat {
|
||||
/// Plain text only
|
||||
Text,
|
||||
/// LaTeX only
|
||||
Latex,
|
||||
/// Both text and LaTeX
|
||||
Both,
|
||||
}
|
||||
|
||||
impl RecognitionFormat {
|
||||
pub fn to_string(&self) -> String {
|
||||
match self {
|
||||
Self::Text => "text".to_string(),
|
||||
Self::Latex => "latex".to_string(),
|
||||
Self::Both => "both".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RecognitionFormat {
|
||||
fn default() -> Self {
|
||||
Self::Both
|
||||
}
|
||||
}
|
||||
|
||||
/// Processing options
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[wasm_bindgen]
|
||||
pub struct ProcessingOptions {
|
||||
/// Output format
|
||||
pub format: String,
|
||||
|
||||
/// Confidence threshold
|
||||
pub confidence_threshold: f32,
|
||||
|
||||
/// Enable preprocessing
|
||||
pub preprocess: bool,
|
||||
|
||||
/// Enable postprocessing
|
||||
pub postprocess: bool,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl ProcessingOptions {
|
||||
/// Create default options
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Set format
|
||||
#[wasm_bindgen(js_name = setFormat)]
|
||||
pub fn set_format(&mut self, format: String) {
|
||||
self.format = format;
|
||||
}
|
||||
|
||||
/// Set confidence threshold
|
||||
#[wasm_bindgen(js_name = setConfidenceThreshold)]
|
||||
pub fn set_confidence_threshold(&mut self, threshold: f32) {
|
||||
self.confidence_threshold = threshold;
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ProcessingOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
format: "both".to_string(),
|
||||
confidence_threshold: 0.5,
|
||||
preprocess: true,
|
||||
postprocess: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Error types for WASM API
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum WasmError {
|
||||
/// Image decoding error
|
||||
ImageDecode(String),
|
||||
|
||||
/// Processing error
|
||||
Processing(String),
|
||||
|
||||
/// Invalid input
|
||||
InvalidInput(String),
|
||||
|
||||
/// Not initialized
|
||||
NotInitialized,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for WasmError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::ImageDecode(msg) => write!(f, "Image decode error: {}", msg),
|
||||
Self::Processing(msg) => write!(f, "Processing error: {}", msg),
|
||||
Self::InvalidInput(msg) => write!(f, "Invalid input: {}", msg),
|
||||
Self::NotInitialized => write!(f, "WASM module not initialized"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for WasmError {}
|
||||
|
||||
impl From<WasmError> for JsValue {
|
||||
fn from(error: WasmError) -> Self {
|
||||
JsValue::from_str(&error.to_string())
|
||||
}
|
||||
}
|
||||
243
vendor/ruvector/examples/scipix/src/wasm/worker.rs
vendored
Normal file
243
vendor/ruvector/examples/scipix/src/wasm/worker.rs
vendored
Normal file
@@ -0,0 +1,243 @@
|
||||
//! Web Worker support for off-main-thread OCR processing
|
||||
|
||||
use once_cell::sync::OnceCell;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use wasm_bindgen::prelude::*;
|
||||
use web_sys::{DedicatedWorkerGlobalScope, MessageEvent};
|
||||
|
||||
use crate::wasm::api::ScipixWasm;
|
||||
use crate::wasm::types::RecognitionFormat;
|
||||
|
||||
static WORKER_INSTANCE: OnceCell<Arc<ScipixWasm>> = OnceCell::new();
|
||||
|
||||
/// Messages sent from main thread to worker
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum WorkerRequest {
|
||||
/// Initialize the worker
|
||||
Init,
|
||||
|
||||
/// Process an image
|
||||
Process {
|
||||
id: String,
|
||||
image_data: Vec<u8>,
|
||||
format: String,
|
||||
},
|
||||
|
||||
/// Process base64 image
|
||||
ProcessBase64 {
|
||||
id: String,
|
||||
base64: String,
|
||||
format: String,
|
||||
},
|
||||
|
||||
/// Batch process images
|
||||
BatchProcess {
|
||||
id: String,
|
||||
images: Vec<Vec<u8>>,
|
||||
format: String,
|
||||
},
|
||||
|
||||
/// Terminate worker
|
||||
Terminate,
|
||||
}
|
||||
|
||||
/// Messages sent from worker to main thread
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum WorkerResponse {
|
||||
/// Worker is ready
|
||||
Ready,
|
||||
|
||||
/// Processing started
|
||||
Started { id: String },
|
||||
|
||||
/// Processing progress
|
||||
Progress {
|
||||
id: String,
|
||||
processed: usize,
|
||||
total: usize,
|
||||
},
|
||||
|
||||
/// Processing completed successfully
|
||||
Success {
|
||||
id: String,
|
||||
result: serde_json::Value,
|
||||
},
|
||||
|
||||
/// Processing failed
|
||||
Error { id: String, error: String },
|
||||
|
||||
/// Worker terminated
|
||||
Terminated,
|
||||
}
|
||||
|
||||
/// Initialize the worker
|
||||
#[wasm_bindgen(js_name = initWorker)]
|
||||
pub async fn init_worker() -> Result<(), JsValue> {
|
||||
let instance = ScipixWasm::new().await?;
|
||||
WORKER_INSTANCE
|
||||
.set(Arc::new(instance))
|
||||
.map_err(|_| JsValue::from_str("Worker already initialized"))?;
|
||||
|
||||
post_response(WorkerResponse::Ready)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle messages from the main thread
|
||||
#[wasm_bindgen(js_name = handleWorkerMessage)]
|
||||
pub async fn handle_worker_message(event: MessageEvent) -> Result<(), JsValue> {
|
||||
let data = event.data();
|
||||
|
||||
let request: WorkerRequest = serde_wasm_bindgen::from_value(data)
|
||||
.map_err(|e| JsValue::from_str(&format!("Invalid message: {}", e)))?;
|
||||
|
||||
match request {
|
||||
WorkerRequest::Init => {
|
||||
init_worker().await?;
|
||||
}
|
||||
|
||||
WorkerRequest::Process {
|
||||
id,
|
||||
image_data,
|
||||
format,
|
||||
} => {
|
||||
process_image(id, image_data, format).await?;
|
||||
}
|
||||
|
||||
WorkerRequest::ProcessBase64 { id, base64, format } => {
|
||||
process_base64(id, base64, format).await?;
|
||||
}
|
||||
|
||||
WorkerRequest::BatchProcess { id, images, format } => {
|
||||
process_batch(id, images, format).await?;
|
||||
}
|
||||
|
||||
WorkerRequest::Terminate => {
|
||||
post_response(WorkerResponse::Terminated)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_image(id: String, image_data: Vec<u8>, format: String) -> Result<(), JsValue> {
|
||||
post_response(WorkerResponse::Started { id: id.clone() })?;
|
||||
|
||||
let instance = WORKER_INSTANCE
|
||||
.get()
|
||||
.ok_or_else(|| JsValue::from_str("Worker not initialized"))?;
|
||||
|
||||
let mut worker_instance = ScipixWasm::new().await?;
|
||||
worker_instance.set_format(&format);
|
||||
|
||||
match worker_instance.recognize(&image_data).await {
|
||||
Ok(result) => {
|
||||
let json_result: serde_json::Value = serde_wasm_bindgen::from_value(result)?;
|
||||
post_response(WorkerResponse::Success {
|
||||
id,
|
||||
result: json_result,
|
||||
})?;
|
||||
}
|
||||
Err(e) => {
|
||||
post_response(WorkerResponse::Error {
|
||||
id,
|
||||
error: format!("{:?}", e),
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_base64(id: String, base64: String, format: String) -> Result<(), JsValue> {
|
||||
post_response(WorkerResponse::Started { id: id.clone() })?;
|
||||
|
||||
let mut worker_instance = ScipixWasm::new().await?;
|
||||
worker_instance.set_format(&format);
|
||||
|
||||
match worker_instance.recognize_base64(&base64).await {
|
||||
Ok(result) => {
|
||||
let json_result: serde_json::Value = serde_wasm_bindgen::from_value(result)?;
|
||||
post_response(WorkerResponse::Success {
|
||||
id,
|
||||
result: json_result,
|
||||
})?;
|
||||
}
|
||||
Err(e) => {
|
||||
post_response(WorkerResponse::Error {
|
||||
id,
|
||||
error: format!("{:?}", e),
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_batch(id: String, images: Vec<Vec<u8>>, format: String) -> Result<(), JsValue> {
|
||||
post_response(WorkerResponse::Started { id: id.clone() })?;
|
||||
|
||||
let total = images.len();
|
||||
let mut results = Vec::new();
|
||||
|
||||
let mut worker_instance = ScipixWasm::new().await?;
|
||||
worker_instance.set_format(&format);
|
||||
|
||||
for (idx, image_data) in images.into_iter().enumerate() {
|
||||
// Report progress
|
||||
post_response(WorkerResponse::Progress {
|
||||
id: id.clone(),
|
||||
processed: idx,
|
||||
total,
|
||||
})?;
|
||||
|
||||
match worker_instance.recognize(&image_data).await {
|
||||
Ok(result) => {
|
||||
let json_result: serde_json::Value = serde_wasm_bindgen::from_value(result)?;
|
||||
results.push(json_result);
|
||||
}
|
||||
Err(e) => {
|
||||
web_sys::console::warn_1(&JsValue::from_str(&format!(
|
||||
"Failed to process image {}: {:?}",
|
||||
idx, e
|
||||
)));
|
||||
results.push(serde_json::Value::Null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
post_response(WorkerResponse::Success {
|
||||
id,
|
||||
result: serde_json::json!({ "results": results }),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn post_response(response: WorkerResponse) -> Result<(), JsValue> {
|
||||
let global = js_sys::global().dyn_into::<DedicatedWorkerGlobalScope>()?;
|
||||
let message = serde_wasm_bindgen::to_value(&response)?;
|
||||
global.post_message(&message)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Setup worker message listener
|
||||
#[wasm_bindgen(js_name = setupWorker)]
|
||||
pub fn setup_worker() -> Result<(), JsValue> {
|
||||
let global = js_sys::global().dyn_into::<DedicatedWorkerGlobalScope>()?;
|
||||
|
||||
let closure = Closure::wrap(Box::new(move |event: MessageEvent| {
|
||||
wasm_bindgen_futures::spawn_local(async move {
|
||||
if let Err(e) = handle_worker_message(event).await {
|
||||
web_sys::console::error_1(&e);
|
||||
}
|
||||
});
|
||||
}) as Box<dyn FnMut(MessageEvent)>);
|
||||
|
||||
global.set_onmessage(Some(closure.as_ref().unchecked_ref()));
|
||||
closure.forget(); // Keep closure alive
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user