Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,298 @@
//! DOCX (Microsoft Word) formatter with Office Math ML support
//!
//! This is a stub implementation. Full DOCX generation requires:
//! - ZIP file creation for .docx format
//! - XML generation for document.xml, styles.xml, etc.
//! - Office Math ML for equations
//! - Image embedding support
//!
//! Consider using libraries like `docx-rs` for production implementation.
use super::{LineData, OcrResult};
use std::io::Write;
/// DOCX formatter (stub implementation)
#[allow(dead_code)]
pub struct DocxFormatter {
include_styles: bool,
page_size: PageSize,
margins: Margins,
}
#[derive(Debug, Clone, Copy)]
pub struct PageSize {
pub width: u32, // in twips (1/1440 inch)
pub height: u32,
}
impl PageSize {
pub fn letter() -> Self {
Self {
width: 12240, // 8.5 inches
height: 15840, // 11 inches
}
}
pub fn a4() -> Self {
Self {
width: 11906, // 210mm
height: 16838, // 297mm
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct Margins {
pub top: u32,
pub right: u32,
pub bottom: u32,
pub left: u32,
}
impl Margins {
pub fn normal() -> Self {
Self {
top: 1440, // 1 inch
right: 1440,
bottom: 1440,
left: 1440,
}
}
}
impl DocxFormatter {
pub fn new() -> Self {
Self {
include_styles: true,
page_size: PageSize::letter(),
margins: Margins::normal(),
}
}
pub fn with_page_size(mut self, page_size: PageSize) -> Self {
self.page_size = page_size;
self
}
pub fn with_margins(mut self, margins: Margins) -> Self {
self.margins = margins;
self
}
/// Generate Office Math ML from LaTeX
/// This is a simplified placeholder - real implementation needs proper conversion
pub fn latex_to_mathml(&self, latex: &str) -> String {
// This is a very simplified stub
// Real implementation would parse LaTeX and generate proper Office Math ML
format!(
r#"<m:oMathPara>
<m:oMath>
<m:r>
<m:t>{}</m:t>
</m:r>
</m:oMath>
</m:oMathPara>"#,
self.escape_xml(latex)
)
}
/// Generate document.xml content
pub fn generate_document_xml(&self, lines: &[LineData]) -> String {
let mut xml = String::from(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math">
<w:body>
"#,
);
for line in lines {
xml.push_str(&self.format_line(line));
}
xml.push_str(" </w:body>\n</w:document>");
xml
}
fn format_line(&self, line: &LineData) -> String {
match line.line_type.as_str() {
"text" => self.format_paragraph(&line.text),
"math" | "equation" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
self.format_math(latex)
}
"heading" => self.format_heading(&line.text, 1),
_ => self.format_paragraph(&line.text),
}
}
fn format_paragraph(&self, text: &str) -> String {
format!(
r#" <w:p>
<w:r>
<w:t>{}</w:t>
</w:r>
</w:p>
"#,
self.escape_xml(text)
)
}
fn format_heading(&self, text: &str, level: u32) -> String {
format!(
r#" <w:p>
<w:pPr>
<w:pStyle w:val="Heading{}"/>
</w:pPr>
<w:r>
<w:t>{}</w:t>
</w:r>
</w:p>
"#,
level,
self.escape_xml(text)
)
}
fn format_math(&self, latex: &str) -> String {
let mathml = self.latex_to_mathml(latex);
format!(
r#" <w:p>
<w:r>
{}
</w:r>
</w:p>
"#,
mathml
)
}
fn escape_xml(&self, text: &str) -> String {
text.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&apos;")
}
/// Save DOCX to file (stub - needs ZIP implementation)
pub fn save_to_file<W: Write>(
&self,
_writer: &mut W,
_result: &OcrResult,
) -> Result<(), String> {
Err("DOCX binary format generation not implemented. Use docx-rs library for full implementation.".to_string())
}
/// Generate styles.xml content
pub fn generate_styles_xml(&self) -> String {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:style w:type="paragraph" w:styleId="Normal">
<w:name w:val="Normal"/>
<w:qFormat/>
</w:style>
<w:style w:type="paragraph" w:styleId="Heading1">
<w:name w:val="Heading 1"/>
<w:basedOn w:val="Normal"/>
<w:qFormat/>
<w:pPr>
<w:keepNext/>
<w:keepLines/>
</w:pPr>
<w:rPr>
<w:b/>
<w:sz w:val="32"/>
</w:rPr>
</w:style>
</w:styles>"#
.to_string()
}
}
impl Default for DocxFormatter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::output::BoundingBox;
#[test]
fn test_page_sizes() {
let letter = PageSize::letter();
assert_eq!(letter.width, 12240);
let a4 = PageSize::a4();
assert!(a4.width < letter.width);
}
#[test]
fn test_escape_xml() {
let formatter = DocxFormatter::new();
let result = formatter.escape_xml("Test <tag> & \"quote\"");
assert!(result.contains("&lt;"));
assert!(result.contains("&gt;"));
assert!(result.contains("&amp;"));
assert!(result.contains("&quot;"));
}
#[test]
fn test_format_paragraph() {
let formatter = DocxFormatter::new();
let result = formatter.format_paragraph("Hello World");
assert!(result.contains("<w:p>"));
assert!(result.contains("<w:t>Hello World</w:t>"));
}
#[test]
fn test_format_heading() {
let formatter = DocxFormatter::new();
let result = formatter.format_heading("Chapter 1", 1);
assert!(result.contains("Heading1"));
assert!(result.contains("Chapter 1"));
}
#[test]
fn test_latex_to_mathml() {
let formatter = DocxFormatter::new();
let result = formatter.latex_to_mathml("E = mc^2");
assert!(result.contains("<m:oMath>"));
assert!(result.contains("mc^2"));
}
#[test]
fn test_generate_document_xml() {
let formatter = DocxFormatter::new();
let lines = vec![LineData {
line_type: "text".to_string(),
text: "Hello".to_string(),
latex: None,
bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
confidence: 0.95,
words: None,
}];
let xml = formatter.generate_document_xml(&lines);
assert!(xml.contains("<?xml"));
assert!(xml.contains("<w:document"));
assert!(xml.contains("Hello"));
}
#[test]
fn test_generate_styles_xml() {
let formatter = DocxFormatter::new();
let xml = formatter.generate_styles_xml();
assert!(xml.contains("<w:styles"));
assert!(xml.contains("Normal"));
assert!(xml.contains("Heading 1"));
}
}

View File

@@ -0,0 +1,412 @@
//! Multi-format output formatter with batch processing and streaming support
use super::*;
use crate::output::{html, latex, mmd, smiles};
use std::io::Write;
/// Configuration for output formatting
#[derive(Debug, Clone)]
pub struct FormatterConfig {
/// Target output formats
pub formats: Vec<OutputFormat>,
/// Enable pretty printing (where applicable)
pub pretty: bool,
/// Include confidence scores in output
pub include_confidence: bool,
/// Include bounding box data
pub include_bbox: bool,
/// Math delimiter style for LaTeX/MMD
pub math_delimiters: MathDelimiters,
/// HTML rendering engine
pub html_engine: HtmlEngine,
/// Enable streaming for large documents
pub streaming: bool,
}
impl Default for FormatterConfig {
fn default() -> Self {
Self {
formats: vec![OutputFormat::Text],
pretty: true,
include_confidence: false,
include_bbox: false,
math_delimiters: MathDelimiters::default(),
html_engine: HtmlEngine::MathJax,
streaming: false,
}
}
}
/// Math delimiter configuration
#[derive(Debug, Clone)]
pub struct MathDelimiters {
pub inline_start: String,
pub inline_end: String,
pub display_start: String,
pub display_end: String,
}
impl Default for MathDelimiters {
fn default() -> Self {
Self {
inline_start: "$".to_string(),
inline_end: "$".to_string(),
display_start: "$$".to_string(),
display_end: "$$".to_string(),
}
}
}
/// HTML rendering engine options
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HtmlEngine {
MathJax,
KaTeX,
Raw,
}
/// Main output formatter
pub struct OutputFormatter {
config: FormatterConfig,
}
impl OutputFormatter {
/// Create a new formatter with default configuration
pub fn new() -> Self {
Self {
config: FormatterConfig::default(),
}
}
/// Create a formatter with custom configuration
pub fn with_config(config: FormatterConfig) -> Self {
Self { config }
}
/// Format a single OCR result
pub fn format_result(&self, result: &OcrResult) -> Result<FormatsData, String> {
let mut formats = FormatsData::default();
for format in &self.config.formats {
let output = self.format_single(result, *format)?;
self.set_format_output(&mut formats, *format, output);
}
Ok(formats)
}
/// Format multiple results in batch
pub fn format_batch(&self, results: &[OcrResult]) -> Result<Vec<FormatsData>, String> {
results
.iter()
.map(|result| self.format_result(result))
.collect()
}
/// Stream format results to a writer
pub fn format_stream<W: Write>(
&self,
results: &[OcrResult],
writer: &mut W,
format: OutputFormat,
) -> Result<(), String> {
for (i, result) in results.iter().enumerate() {
let output = self.format_single(result, format)?;
writer
.write_all(output.as_bytes())
.map_err(|e| format!("Write error: {}", e))?;
// Add separator between results
if i < results.len() - 1 {
writer
.write_all(b"\n\n---\n\n")
.map_err(|e| format!("Write error: {}", e))?;
}
}
Ok(())
}
/// Format a single result to a specific format
fn format_single(&self, result: &OcrResult, format: OutputFormat) -> Result<String, String> {
match format {
OutputFormat::Text => self.format_text(result),
OutputFormat::LaTeX => self.format_latex(result, false),
OutputFormat::LaTeXStyled => self.format_latex(result, true),
OutputFormat::Mmd => self.format_mmd(result),
OutputFormat::Html => self.format_html(result),
OutputFormat::Smiles => self.format_smiles(result),
OutputFormat::Docx => self.format_docx(result),
OutputFormat::MathML => self.format_mathml(result),
OutputFormat::AsciiMath => self.format_asciimath(result),
}
}
fn format_text(&self, result: &OcrResult) -> Result<String, String> {
if let Some(text) = &result.formats.text {
return Ok(text.clone());
}
// Fallback: extract text from line data
if let Some(line_data) = &result.line_data {
let text = line_data
.iter()
.map(|line| line.text.as_str())
.collect::<Vec<_>>()
.join("\n");
return Ok(text);
}
Err("No text content available".to_string())
}
fn format_latex(&self, result: &OcrResult, styled: bool) -> Result<String, String> {
let latex_content = if styled {
result
.formats
.latex_styled
.as_ref()
.or(result.formats.latex_normal.as_ref())
} else {
result.formats.latex_normal.as_ref()
};
if let Some(latex) = latex_content {
if styled {
// Wrap in document with packages
Ok(latex::LaTeXFormatter::new()
.with_packages(vec![
"amsmath".to_string(),
"amssymb".to_string(),
"graphicx".to_string(),
])
.format_document(latex))
} else {
Ok(latex.clone())
}
} else {
Err("No LaTeX content available".to_string())
}
}
fn format_mmd(&self, result: &OcrResult) -> Result<String, String> {
if let Some(mmd) = &result.formats.mmd {
return Ok(mmd.clone());
}
// Generate MMD from line data
if let Some(line_data) = &result.line_data {
let formatter = mmd::MmdFormatter::with_delimiters(self.config.math_delimiters.clone());
return Ok(formatter.format(line_data));
}
Err("No MMD content available".to_string())
}
fn format_html(&self, result: &OcrResult) -> Result<String, String> {
if let Some(html) = &result.formats.html {
return Ok(html.clone());
}
// Generate HTML with math rendering
let content = self.format_text(result)?;
let formatter = html::HtmlFormatter::new()
.with_engine(self.config.html_engine)
.with_styling(self.config.pretty);
Ok(formatter.format(&content, result.line_data.as_deref()))
}
fn format_smiles(&self, result: &OcrResult) -> Result<String, String> {
if let Some(smiles) = &result.formats.smiles {
return Ok(smiles.clone());
}
// Generate SMILES if we have chemical structure data
let generator = smiles::SmilesGenerator::new();
generator.generate_from_result(result)
}
fn format_docx(&self, _result: &OcrResult) -> Result<String, String> {
// DOCX requires binary format, return placeholder
Err("DOCX format requires binary output - use save_docx() instead".to_string())
}
fn format_mathml(&self, result: &OcrResult) -> Result<String, String> {
if let Some(mathml) = &result.formats.mathml {
return Ok(mathml.clone());
}
Err("MathML generation not yet implemented".to_string())
}
fn format_asciimath(&self, result: &OcrResult) -> Result<String, String> {
if let Some(asciimath) = &result.formats.asciimath {
return Ok(asciimath.clone());
}
Err("AsciiMath conversion not yet implemented".to_string())
}
fn set_format_output(&self, formats: &mut FormatsData, format: OutputFormat, output: String) {
match format {
OutputFormat::Text => formats.text = Some(output),
OutputFormat::LaTeX => formats.latex_normal = Some(output),
OutputFormat::LaTeXStyled => formats.latex_styled = Some(output),
OutputFormat::Mmd => formats.mmd = Some(output),
OutputFormat::Html => formats.html = Some(output),
OutputFormat::Smiles => formats.smiles = Some(output),
OutputFormat::MathML => formats.mathml = Some(output),
OutputFormat::AsciiMath => formats.asciimath = Some(output),
OutputFormat::Docx => {} // Binary format, handled separately
}
}
}
impl Default for OutputFormatter {
fn default() -> Self {
Self::new()
}
}
/// Builder for OutputFormatter configuration
pub struct FormatterBuilder {
config: FormatterConfig,
}
impl FormatterBuilder {
pub fn new() -> Self {
Self {
config: FormatterConfig::default(),
}
}
pub fn formats(mut self, formats: Vec<OutputFormat>) -> Self {
self.config.formats = formats;
self
}
pub fn add_format(mut self, format: OutputFormat) -> Self {
self.config.formats.push(format);
self
}
pub fn pretty(mut self, pretty: bool) -> Self {
self.config.pretty = pretty;
self
}
pub fn include_confidence(mut self, include: bool) -> Self {
self.config.include_confidence = include;
self
}
pub fn include_bbox(mut self, include: bool) -> Self {
self.config.include_bbox = include;
self
}
pub fn math_delimiters(mut self, delimiters: MathDelimiters) -> Self {
self.config.math_delimiters = delimiters;
self
}
pub fn html_engine(mut self, engine: HtmlEngine) -> Self {
self.config.html_engine = engine;
self
}
pub fn streaming(mut self, streaming: bool) -> Self {
self.config.streaming = streaming;
self
}
pub fn build(self) -> OutputFormatter {
OutputFormatter::with_config(self.config)
}
}
impl Default for FormatterBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_result() -> OcrResult {
OcrResult {
request_id: "test_123".to_string(),
version: "3.0".to_string(),
image_width: 800,
image_height: 600,
is_printed: true,
is_handwritten: false,
auto_rotate_confidence: 0.95,
auto_rotate_degrees: 0,
confidence: 0.98,
confidence_rate: 0.97,
formats: FormatsData {
text: Some("E = mc^2".to_string()),
latex_normal: Some(r"E = mc^2".to_string()),
..Default::default()
},
line_data: None,
error: None,
metadata: HashMap::new(),
}
}
#[test]
fn test_format_text() {
let formatter = OutputFormatter::new();
let result = create_test_result();
let output = formatter
.format_single(&result, OutputFormat::Text)
.unwrap();
assert_eq!(output, "E = mc^2");
}
#[test]
fn test_format_latex() {
let formatter = OutputFormatter::new();
let result = create_test_result();
let output = formatter
.format_single(&result, OutputFormat::LaTeX)
.unwrap();
assert!(output.contains("mc^2"));
}
#[test]
fn test_builder() {
let formatter = FormatterBuilder::new()
.add_format(OutputFormat::Text)
.add_format(OutputFormat::LaTeX)
.pretty(true)
.include_confidence(true)
.build();
assert_eq!(formatter.config.formats.len(), 2);
assert!(formatter.config.pretty);
assert!(formatter.config.include_confidence);
}
#[test]
fn test_batch_format() {
let formatter = OutputFormatter::new();
let results = vec![create_test_result(), create_test_result()];
let outputs = formatter.format_batch(&results).unwrap();
assert_eq!(outputs.len(), 2);
}
}

View File

@@ -0,0 +1,396 @@
//! HTML output formatter with math rendering support
use super::{HtmlEngine, LineData};
/// HTML formatter with math rendering
pub struct HtmlFormatter {
engine: HtmlEngine,
css_styling: bool,
accessibility: bool,
responsive: bool,
theme: HtmlTheme,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HtmlTheme {
Light,
Dark,
Auto,
}
impl HtmlFormatter {
pub fn new() -> Self {
Self {
engine: HtmlEngine::MathJax,
css_styling: true,
accessibility: true,
responsive: true,
theme: HtmlTheme::Light,
}
}
pub fn with_engine(mut self, engine: HtmlEngine) -> Self {
self.engine = engine;
self
}
pub fn with_styling(mut self, styling: bool) -> Self {
self.css_styling = styling;
self
}
pub fn accessibility(mut self, enabled: bool) -> Self {
self.accessibility = enabled;
self
}
pub fn responsive(mut self, enabled: bool) -> Self {
self.responsive = enabled;
self
}
pub fn theme(mut self, theme: HtmlTheme) -> Self {
self.theme = theme;
self
}
/// Format content to HTML
pub fn format(&self, content: &str, lines: Option<&[LineData]>) -> String {
let mut html = String::new();
// HTML header with math rendering scripts
html.push_str(&self.html_header());
// Body start with theme class
html.push_str("<body");
if self.css_styling {
html.push_str(&format!(r#" class="theme-{:?}""#, self.theme).to_lowercase());
}
html.push_str(">\n");
// Main content container
html.push_str(r#"<div class="content">"#);
html.push_str("\n");
// Format content
if let Some(line_data) = lines {
html.push_str(&self.format_lines(line_data));
} else {
html.push_str(&self.format_text(content));
}
html.push_str("</div>\n");
html.push_str("</body>\n</html>");
html
}
/// Generate HTML header with scripts and styles
fn html_header(&self) -> String {
let mut header = String::from("<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n");
header.push_str(r#" <meta charset="UTF-8">"#);
header.push_str("\n");
if self.responsive {
header.push_str(
r#" <meta name="viewport" content="width=device-width, initial-scale=1.0">"#,
);
header.push_str("\n");
}
header.push_str(" <title>Mathematical Content</title>\n");
// Math rendering scripts
match self.engine {
HtmlEngine::MathJax => {
header.push_str(r#" <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>"#);
header.push_str("\n");
header.push_str(r#" <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>"#);
header.push_str("\n");
header.push_str(" <script>\n");
header.push_str(" MathJax = {\n");
header.push_str(" tex: {\n");
header.push_str(r#" inlineMath: [['$', '$'], ['\\(', '\\)']],"#);
header.push_str("\n");
header.push_str(r#" displayMath: [['$$', '$$'], ['\\[', '\\]']]"#);
header.push_str("\n");
header.push_str(" }\n");
header.push_str(" };\n");
header.push_str(" </script>\n");
}
HtmlEngine::KaTeX => {
header.push_str(r#" <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">"#);
header.push_str("\n");
header.push_str(r#" <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>"#);
header.push_str("\n");
header.push_str(r#" <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js" onload="renderMathInElement(document.body);"></script>"#);
header.push_str("\n");
}
HtmlEngine::Raw => {
// No math rendering
}
}
// CSS styling
if self.css_styling {
header.push_str(" <style>\n");
header.push_str(&self.generate_css());
header.push_str(" </style>\n");
}
header.push_str("</head>\n");
header
}
/// Generate CSS styles
fn generate_css(&self) -> String {
let mut css = String::new();
css.push_str(" body {\n");
css.push_str(" font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;\n");
css.push_str(" line-height: 1.6;\n");
css.push_str(" max-width: 800px;\n");
css.push_str(" margin: 0 auto;\n");
css.push_str(" padding: 20px;\n");
css.push_str(" }\n");
// Theme colors
match self.theme {
HtmlTheme::Light => {
css.push_str(" body.theme-light {\n");
css.push_str(" background-color: #ffffff;\n");
css.push_str(" color: #333333;\n");
css.push_str(" }\n");
}
HtmlTheme::Dark => {
css.push_str(" body.theme-dark {\n");
css.push_str(" background-color: #1e1e1e;\n");
css.push_str(" color: #d4d4d4;\n");
css.push_str(" }\n");
}
HtmlTheme::Auto => {
css.push_str(" @media (prefers-color-scheme: dark) {\n");
css.push_str(" body { background-color: #1e1e1e; color: #d4d4d4; }\n");
css.push_str(" }\n");
}
}
css.push_str(" .content { padding: 20px; }\n");
css.push_str(" .math-display { text-align: center; margin: 20px 0; }\n");
css.push_str(" .math-inline { display: inline; }\n");
css.push_str(" .equation-block { margin: 15px 0; padding: 10px; background: #f5f5f5; border-radius: 4px; }\n");
css.push_str(" table { border-collapse: collapse; width: 100%; margin: 20px 0; }\n");
css.push_str(
" th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }\n",
);
css.push_str(" th { background-color: #f2f2f2; }\n");
if self.accessibility {
css.push_str(" .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0,0,0,0); border: 0; }\n");
}
css
}
/// Format plain text to HTML
fn format_text(&self, text: &str) -> String {
let escaped = self.escape_html(text);
// Convert math delimiters if present
let mut html = escaped;
// Display math $$...$$
html = html.replace("$$", "<div class=\"math-display\">$$");
html = html.replace("$$", "$$</div>");
// Inline math $...$
// This is simplistic - a real implementation would need proper parsing
format!("<p>{}</p>", html)
}
/// Format line data to HTML
fn format_lines(&self, lines: &[LineData]) -> String {
let mut html = String::new();
for line in lines {
match line.line_type.as_str() {
"text" => {
html.push_str("<p>");
html.push_str(&self.escape_html(&line.text));
html.push_str("</p>\n");
}
"math" | "equation" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
html.push_str(r#"<div class="math-display">"#);
if self.accessibility {
html.push_str(&format!(
r#"<span class="sr-only">Equation: {}</span>"#,
self.escape_html(&line.text)
));
}
html.push_str(&format!("$${}$$", latex));
html.push_str("</div>\n");
}
"inline_math" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
html.push_str(&format!(r#"<span class="math-inline">${}$</span>"#, latex));
}
"heading" => {
html.push_str(&format!("<h2>{}</h2>\n", self.escape_html(&line.text)));
}
"table" => {
html.push_str(&self.format_table(&line.text));
}
"image" => {
html.push_str(&format!(
r#"<img src="{}" alt="Image" loading="lazy">"#,
self.escape_html(&line.text)
));
html.push_str("\n");
}
_ => {
html.push_str("<p>");
html.push_str(&self.escape_html(&line.text));
html.push_str("</p>\n");
}
}
}
html
}
/// Format table to HTML
fn format_table(&self, table: &str) -> String {
let mut html = String::from("<table>\n");
let rows: Vec<&str> = table.lines().collect();
for (i, row) in rows.iter().enumerate() {
html.push_str(" <tr>\n");
let cells: Vec<&str> = row
.split('|')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect();
let tag = if i == 0 { "th" } else { "td" };
for cell in cells {
html.push_str(&format!(
" <{}>{}</{}>\n",
tag,
self.escape_html(cell),
tag
));
}
html.push_str(" </tr>\n");
}
html.push_str("</table>\n");
html
}
/// Escape HTML special characters
fn escape_html(&self, text: &str) -> String {
text.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&#39;")
}
}
impl Default for HtmlFormatter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::output::BoundingBox;
#[test]
fn test_html_header() {
let formatter = HtmlFormatter::new().with_engine(HtmlEngine::MathJax);
let header = formatter.html_header();
assert!(header.contains("<!DOCTYPE html>"));
assert!(header.contains("MathJax"));
}
#[test]
fn test_katex_header() {
let formatter = HtmlFormatter::new().with_engine(HtmlEngine::KaTeX);
let header = formatter.html_header();
assert!(header.contains("katex"));
}
#[test]
fn test_escape_html() {
let formatter = HtmlFormatter::new();
let result = formatter.escape_html("<script>alert('test')</script>");
assert!(result.contains("&lt;"));
assert!(result.contains("&gt;"));
assert!(!result.contains("<script>"));
}
#[test]
fn test_format_lines() {
let formatter = HtmlFormatter::new();
let lines = vec![
LineData {
line_type: "text".to_string(),
text: "Introduction".to_string(),
latex: None,
bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
confidence: 0.95,
words: None,
},
LineData {
line_type: "equation".to_string(),
text: "E = mc^2".to_string(),
latex: Some(r"E = mc^2".to_string()),
bbox: BoundingBox::new(0.0, 25.0, 100.0, 30.0),
confidence: 0.98,
words: None,
},
];
let result = formatter.format_lines(&lines);
assert!(result.contains("<p>Introduction</p>"));
assert!(result.contains("math-display"));
assert!(result.contains("$$"));
}
#[test]
fn test_dark_theme() {
let formatter = HtmlFormatter::new().theme(HtmlTheme::Dark);
let css = formatter.generate_css();
assert!(css.contains("theme-dark"));
assert!(css.contains("#1e1e1e"));
}
#[test]
fn test_accessibility() {
let formatter = HtmlFormatter::new().accessibility(true);
let lines = vec![LineData {
line_type: "equation".to_string(),
text: "x squared".to_string(),
latex: Some("x^2".to_string()),
bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
confidence: 0.98,
words: None,
}];
let result = formatter.format_lines(&lines);
assert!(result.contains("sr-only"));
assert!(result.contains("Equation:"));
}
}

View File

@@ -0,0 +1,354 @@
//! JSON API response formatter matching Scipix API specification
use super::{FormatsData, LineData, OcrResult};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
/// Complete API response matching Scipix format
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiResponse {
/// Request identifier
pub request_id: String,
/// API version
pub version: String,
/// Image information
pub image_width: u32,
pub image_height: u32,
/// Detection metadata
pub is_printed: bool,
pub is_handwritten: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub auto_rotate_confidence: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub auto_rotate_degrees: Option<i32>,
/// Confidence metrics
pub confidence: f32,
pub confidence_rate: f32,
/// Available output formats
#[serde(flatten)]
pub formats: FormatsData,
/// Detailed line data
#[serde(skip_serializing_if = "Option::is_none")]
pub line_data: Option<Vec<LineData>>,
/// Error information
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_info: Option<ErrorInfo>,
/// Processing metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<HashMap<String, Value>>,
}
/// Error information structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErrorInfo {
pub code: String,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<Value>,
}
impl ApiResponse {
/// Create response from OCR result
pub fn from_ocr_result(result: OcrResult) -> Self {
Self {
request_id: result.request_id,
version: result.version,
image_width: result.image_width,
image_height: result.image_height,
is_printed: result.is_printed,
is_handwritten: result.is_handwritten,
auto_rotate_confidence: Some(result.auto_rotate_confidence),
auto_rotate_degrees: Some(result.auto_rotate_degrees),
confidence: result.confidence,
confidence_rate: result.confidence_rate,
formats: result.formats,
line_data: result.line_data,
error: result.error,
error_info: None,
metadata: if result.metadata.is_empty() {
None
} else {
Some(result.metadata)
},
}
}
/// Create error response
pub fn error(request_id: String, code: &str, message: &str) -> Self {
Self {
request_id,
version: "3.0".to_string(),
image_width: 0,
image_height: 0,
is_printed: false,
is_handwritten: false,
auto_rotate_confidence: None,
auto_rotate_degrees: None,
confidence: 0.0,
confidence_rate: 0.0,
formats: FormatsData::default(),
line_data: None,
error: Some(message.to_string()),
error_info: Some(ErrorInfo {
code: code.to_string(),
message: message.to_string(),
details: None,
}),
metadata: None,
}
}
/// Convert to JSON string
pub fn to_json(&self) -> Result<String, String> {
serde_json::to_string(self).map_err(|e| format!("JSON serialization error: {}", e))
}
/// Convert to pretty JSON string
pub fn to_json_pretty(&self) -> Result<String, String> {
serde_json::to_string_pretty(self).map_err(|e| format!("JSON serialization error: {}", e))
}
/// Parse from JSON string
pub fn from_json(json: &str) -> Result<Self, String> {
serde_json::from_str(json).map_err(|e| format!("JSON parsing error: {}", e))
}
}
/// Batch API response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BatchApiResponse {
pub batch_id: String,
pub total: usize,
pub completed: usize,
pub results: Vec<ApiResponse>,
#[serde(skip_serializing_if = "Option::is_none")]
pub errors: Option<Vec<BatchError>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BatchError {
pub index: usize,
pub error: ErrorInfo,
}
impl BatchApiResponse {
pub fn new(batch_id: String, results: Vec<ApiResponse>) -> Self {
let total = results.len();
let completed = results.iter().filter(|r| r.error.is_none()).count();
let errors: Vec<BatchError> = results
.iter()
.enumerate()
.filter_map(|(i, r)| {
r.error_info.as_ref().map(|e| BatchError {
index: i,
error: e.clone(),
})
})
.collect();
Self {
batch_id,
total,
completed,
results,
errors: if errors.is_empty() {
None
} else {
Some(errors)
},
}
}
pub fn to_json(&self) -> Result<String, String> {
serde_json::to_string(self).map_err(|e| format!("JSON serialization error: {}", e))
}
pub fn to_json_pretty(&self) -> Result<String, String> {
serde_json::to_string_pretty(self).map_err(|e| format!("JSON serialization error: {}", e))
}
}
/// API request format
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiRequest {
/// Image source (URL or base64)
pub src: String,
/// Requested output formats
#[serde(skip_serializing_if = "Option::is_none")]
pub formats: Option<Vec<String>>,
/// OCR options
#[serde(skip_serializing_if = "Option::is_none")]
pub ocr: Option<OcrOptions>,
/// Additional metadata
#[serde(flatten)]
pub metadata: HashMap<String, Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OcrOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub math_inline_delimiters: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub math_display_delimiters: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rm_spaces: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rm_fonts: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub numbers_default_to_math: Option<bool>,
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_result() -> OcrResult {
OcrResult {
request_id: "test_123".to_string(),
version: "3.0".to_string(),
image_width: 800,
image_height: 600,
is_printed: true,
is_handwritten: false,
auto_rotate_confidence: 0.95,
auto_rotate_degrees: 0,
confidence: 0.98,
confidence_rate: 0.97,
formats: FormatsData {
text: Some("E = mc^2".to_string()),
latex_normal: Some(r"E = mc^2".to_string()),
..Default::default()
},
line_data: None,
error: None,
metadata: HashMap::new(),
}
}
#[test]
fn test_api_response_from_result() {
let result = create_test_result();
let response = ApiResponse::from_ocr_result(result);
assert_eq!(response.request_id, "test_123");
assert_eq!(response.version, "3.0");
assert_eq!(response.confidence, 0.98);
assert!(response.formats.text.is_some());
}
#[test]
fn test_api_response_to_json() {
let result = create_test_result();
let response = ApiResponse::from_ocr_result(result);
let json = response.to_json().unwrap();
assert!(json.contains("request_id"));
assert!(json.contains("test_123"));
assert!(json.contains("confidence"));
}
#[test]
fn test_api_response_round_trip() {
let result = create_test_result();
let response = ApiResponse::from_ocr_result(result);
let json = response.to_json().unwrap();
let parsed = ApiResponse::from_json(&json).unwrap();
assert_eq!(response.request_id, parsed.request_id);
assert_eq!(response.confidence, parsed.confidence);
}
#[test]
fn test_error_response() {
let response = ApiResponse::error(
"test_456".to_string(),
"invalid_image",
"Image format not supported",
);
assert_eq!(response.request_id, "test_456");
assert!(response.error.is_some());
assert!(response.error_info.is_some());
let error_info = response.error_info.unwrap();
assert_eq!(error_info.code, "invalid_image");
}
#[test]
fn test_batch_response() {
let result1 = create_test_result();
let result2 = create_test_result();
let responses = vec![
ApiResponse::from_ocr_result(result1),
ApiResponse::from_ocr_result(result2),
];
let batch = BatchApiResponse::new("batch_789".to_string(), responses);
assert_eq!(batch.batch_id, "batch_789");
assert_eq!(batch.total, 2);
assert_eq!(batch.completed, 2);
assert!(batch.errors.is_none());
}
#[test]
fn test_batch_with_errors() {
let success = create_test_result();
let error_response =
ApiResponse::error("fail_1".to_string(), "timeout", "Processing timeout");
let responses = vec![ApiResponse::from_ocr_result(success), error_response];
let batch = BatchApiResponse::new("batch_error".to_string(), responses);
assert_eq!(batch.total, 2);
assert_eq!(batch.completed, 1);
assert!(batch.errors.is_some());
assert_eq!(batch.errors.unwrap().len(), 1);
}
#[test]
fn test_api_request() {
let request = ApiRequest {
src: "https://example.com/image.png".to_string(),
formats: Some(vec!["text".to_string(), "latex_styled".to_string()]),
ocr: Some(OcrOptions {
math_inline_delimiters: Some(vec!["$".to_string(), "$".to_string()]),
math_display_delimiters: Some(vec!["$$".to_string(), "$$".to_string()]),
rm_spaces: Some(true),
rm_fonts: None,
numbers_default_to_math: Some(false),
}),
metadata: HashMap::new(),
};
let json = serde_json::to_string(&request).unwrap();
assert!(json.contains("src"));
assert!(json.contains("formats"));
}
}

View File

@@ -0,0 +1,430 @@
//! LaTeX output formatter with styling and package management
use super::LineData;
/// LaTeX document formatter
#[derive(Clone)]
pub struct LaTeXFormatter {
packages: Vec<String>,
document_class: String,
preamble: String,
numbered_equations: bool,
custom_delimiters: Option<(String, String)>,
}
impl LaTeXFormatter {
pub fn new() -> Self {
Self {
packages: vec!["amsmath".to_string(), "amssymb".to_string()],
document_class: "article".to_string(),
preamble: String::new(),
numbered_equations: false,
custom_delimiters: None,
}
}
pub fn with_packages(mut self, packages: Vec<String>) -> Self {
self.packages = packages;
self
}
pub fn add_package(mut self, package: String) -> Self {
if !self.packages.contains(&package) {
self.packages.push(package);
}
self
}
pub fn document_class(mut self, class: String) -> Self {
self.document_class = class;
self
}
pub fn preamble(mut self, preamble: String) -> Self {
self.preamble = preamble;
self
}
pub fn numbered_equations(mut self, numbered: bool) -> Self {
self.numbered_equations = numbered;
self
}
pub fn custom_delimiters(mut self, start: String, end: String) -> Self {
self.custom_delimiters = Some((start, end));
self
}
/// Format plain LaTeX content
pub fn format(&self, latex: &str) -> String {
// Clean up LaTeX if needed
let cleaned = self.clean_latex(latex);
// Apply custom delimiters if specified
if let Some((start, end)) = &self.custom_delimiters {
format!("{}{}{}", start, cleaned, end)
} else {
cleaned
}
}
/// Format line data to LaTeX
pub fn format_lines(&self, lines: &[LineData]) -> String {
let mut output = String::new();
let mut in_align = false;
for line in lines {
match line.line_type.as_str() {
"text" => {
if in_align {
output.push_str("\\end{align*}\n\n");
in_align = false;
}
output.push_str(&self.escape_text(&line.text));
output.push_str("\n\n");
}
"math" | "equation" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
if self.numbered_equations {
output.push_str("\\begin{equation}\n");
output.push_str(latex.trim());
output.push_str("\n\\end{equation}\n\n");
} else {
output.push_str("\\[\n");
output.push_str(latex.trim());
output.push_str("\n\\]\n\n");
}
}
"inline_math" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
output.push_str(&format!("${}$", latex.trim()));
}
"align" => {
if !in_align {
output.push_str("\\begin{align*}\n");
in_align = true;
}
let latex = line.latex.as_ref().unwrap_or(&line.text);
output.push_str(latex.trim());
output.push_str(" \\\\\n");
}
"table" => {
output.push_str(&self.format_table(&line.text));
output.push_str("\n\n");
}
_ => {
output.push_str(&line.text);
output.push_str("\n");
}
}
}
if in_align {
output.push_str("\\end{align*}\n");
}
output.trim().to_string()
}
/// Format complete LaTeX document
pub fn format_document(&self, content: &str) -> String {
let mut doc = String::new();
// Document class
doc.push_str(&format!("\\documentclass{{{}}}\n\n", self.document_class));
// Packages
for package in &self.packages {
doc.push_str(&format!("\\usepackage{{{}}}\n", package));
}
doc.push_str("\n");
// Custom preamble
if !self.preamble.is_empty() {
doc.push_str(&self.preamble);
doc.push_str("\n\n");
}
// Begin document
doc.push_str("\\begin{document}\n\n");
// Content
doc.push_str(content);
doc.push_str("\n\n");
// End document
doc.push_str("\\end{document}\n");
doc
}
/// Clean and normalize LaTeX
fn clean_latex(&self, latex: &str) -> String {
let mut cleaned = latex.to_string();
// Remove excessive whitespace
while cleaned.contains(" ") {
cleaned = cleaned.replace(" ", " ");
}
// Normalize line breaks
cleaned = cleaned.replace("\r\n", "\n");
// Ensure proper spacing around operators
for op in &["=", "+", "-", r"\times", r"\div"] {
let spaced = format!(" {} ", op);
cleaned = cleaned.replace(op, &spaced);
}
// Remove duplicate spaces again
while cleaned.contains(" ") {
cleaned = cleaned.replace(" ", " ");
}
cleaned.trim().to_string()
}
/// Escape special LaTeX characters in text
fn escape_text(&self, text: &str) -> String {
text.replace('\\', r"\\")
.replace('{', r"\{")
.replace('}', r"\}")
.replace('$', r"\$")
.replace('%', r"\%")
.replace('_', r"\_")
.replace('&', r"\&")
.replace('#', r"\#")
.replace('^', r"\^")
.replace('~', r"\~")
}
/// Format table to LaTeX tabular environment
fn format_table(&self, table: &str) -> String {
let rows: Vec<&str> = table.lines().collect();
if rows.is_empty() {
return String::new();
}
// Determine number of columns from first row
let num_cols = rows[0].split('|').filter(|s| !s.is_empty()).count();
let col_spec = "c".repeat(num_cols);
let mut output = format!("\\begin{{tabular}}{{{}}}\n", col_spec);
output.push_str("\\hline\n");
for (i, row) in rows.iter().enumerate() {
let cells: Vec<&str> = row
.split('|')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect();
output.push_str(&cells.join(" & "));
output.push_str(" \\\\\n");
if i == 0 {
output.push_str("\\hline\n");
}
}
output.push_str("\\hline\n");
output.push_str("\\end{tabular}");
output
}
/// Convert inline LaTeX to display math
pub fn inline_to_display(&self, latex: &str) -> String {
if self.numbered_equations {
format!("\\begin{{equation}}\n{}\n\\end{{equation}}", latex.trim())
} else {
format!("\\[\n{}\n\\]", latex.trim())
}
}
/// Add equation label
pub fn add_label(&self, latex: &str, label: &str) -> String {
format!("{}\n\\label{{{}}}", latex.trim(), label)
}
}
impl Default for LaTeXFormatter {
fn default() -> Self {
Self::new()
}
}
/// Styled LaTeX formatter with predefined templates
#[allow(dead_code)]
pub struct StyledLaTeXFormatter {
base: LaTeXFormatter,
style: LaTeXStyle,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LaTeXStyle {
Article,
Report,
Book,
Beamer,
Minimal,
}
impl StyledLaTeXFormatter {
pub fn new(style: LaTeXStyle) -> Self {
let base = match style {
LaTeXStyle::Article => LaTeXFormatter::new()
.document_class("article".to_string())
.with_packages(vec![
"amsmath".to_string(),
"amssymb".to_string(),
"graphicx".to_string(),
"hyperref".to_string(),
]),
LaTeXStyle::Report => LaTeXFormatter::new()
.document_class("report".to_string())
.with_packages(vec![
"amsmath".to_string(),
"amssymb".to_string(),
"graphicx".to_string(),
"hyperref".to_string(),
"geometry".to_string(),
]),
LaTeXStyle::Book => LaTeXFormatter::new()
.document_class("book".to_string())
.with_packages(vec![
"amsmath".to_string(),
"amssymb".to_string(),
"graphicx".to_string(),
"hyperref".to_string(),
"geometry".to_string(),
"fancyhdr".to_string(),
]),
LaTeXStyle::Beamer => LaTeXFormatter::new()
.document_class("beamer".to_string())
.with_packages(vec![
"amsmath".to_string(),
"amssymb".to_string(),
"graphicx".to_string(),
]),
LaTeXStyle::Minimal => LaTeXFormatter::new()
.document_class("article".to_string())
.with_packages(vec!["amsmath".to_string()]),
};
Self { base, style }
}
pub fn format_document(
&self,
content: &str,
title: Option<&str>,
author: Option<&str>,
) -> String {
let mut preamble = String::new();
if let Some(t) = title {
preamble.push_str(&format!("\\title{{{}}}\n", t));
}
if let Some(a) = author {
preamble.push_str(&format!("\\author{{{}}}\n", a));
}
if title.is_some() || author.is_some() {
preamble.push_str("\\date{\\today}\n");
}
let formatter = self.base.clone().preamble(preamble);
let mut doc = formatter.format_document(content);
// Add maketitle after \begin{document} if we have title/author
if title.is_some() || author.is_some() {
doc = doc.replace(
"\\begin{document}\n\n",
"\\begin{document}\n\n\\maketitle\n\n",
);
}
doc
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::output::BoundingBox;
#[test]
fn test_format_simple() {
let formatter = LaTeXFormatter::new();
let result = formatter.format("E = mc^2");
assert!(result.contains("mc^2"));
}
#[test]
fn test_format_document() {
let formatter = LaTeXFormatter::new();
let doc = formatter.format_document("E = mc^2");
assert!(doc.contains(r"\documentclass{article}"));
assert!(doc.contains(r"\usepackage{amsmath}"));
assert!(doc.contains(r"\begin{document}"));
assert!(doc.contains("mc^2"));
assert!(doc.contains(r"\end{document}"));
}
#[test]
fn test_escape_text() {
let formatter = LaTeXFormatter::new();
let result = formatter.escape_text("Price: $100 & 50%");
assert!(result.contains(r"\$100"));
assert!(result.contains(r"\&"));
assert!(result.contains(r"\%"));
}
#[test]
fn test_inline_to_display() {
let formatter = LaTeXFormatter::new();
let result = formatter.inline_to_display("x^2 + y^2 = r^2");
assert!(result.contains(r"\["));
assert!(result.contains(r"\]"));
}
#[test]
fn test_styled_formatter() {
let formatter = StyledLaTeXFormatter::new(LaTeXStyle::Article);
let doc = formatter.format_document("Content", Some("My Title"), Some("Author Name"));
assert!(doc.contains(r"\title{My Title}"));
assert!(doc.contains(r"\author{Author Name}"));
assert!(doc.contains(r"\maketitle"));
}
#[test]
fn test_format_lines() {
let formatter = LaTeXFormatter::new();
let lines = vec![
LineData {
line_type: "text".to_string(),
text: "Introduction".to_string(),
latex: None,
bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
confidence: 0.95,
words: None,
},
LineData {
line_type: "equation".to_string(),
text: "E = mc^2".to_string(),
latex: Some(r"E = mc^2".to_string()),
bbox: BoundingBox::new(0.0, 25.0, 100.0, 30.0),
confidence: 0.98,
words: None,
},
];
let result = formatter.format_lines(&lines);
assert!(result.contains("Introduction"));
assert!(result.contains(r"\[") || result.contains(r"\begin{equation}"));
assert!(result.contains("mc^2"));
}
}

View File

@@ -0,0 +1,379 @@
//! Scipix Markdown (MMD) formatter
//!
//! MMD is an enhanced markdown format that supports:
//! - Inline and display math with LaTeX
//! - Tables with alignment
//! - Chemistry notation (SMILES)
//! - Image embedding
//! - Structured documents
use super::{LineData, MathDelimiters};
/// Scipix Markdown formatter
pub struct MmdFormatter {
delimiters: MathDelimiters,
include_metadata: bool,
preserve_structure: bool,
}
impl MmdFormatter {
pub fn new() -> Self {
Self {
delimiters: MathDelimiters::default(),
include_metadata: false,
preserve_structure: true,
}
}
pub fn with_delimiters(delimiters: MathDelimiters) -> Self {
Self {
delimiters,
include_metadata: false,
preserve_structure: true,
}
}
pub fn include_metadata(mut self, include: bool) -> Self {
self.include_metadata = include;
self
}
pub fn preserve_structure(mut self, preserve: bool) -> Self {
self.preserve_structure = preserve;
self
}
/// Format line data to MMD
pub fn format(&self, lines: &[LineData]) -> String {
let mut output = String::new();
let mut in_table = false;
let mut in_list = false;
for line in lines {
match line.line_type.as_str() {
"text" => {
if in_table {
output.push_str("\n");
in_table = false;
}
if in_list && !line.text.trim_start().starts_with(&['-', '*', '1']) {
output.push_str("\n");
in_list = false;
}
output.push_str(&line.text);
output.push_str("\n");
}
"math" | "equation" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
let formatted = self.format_math(latex, true); // display mode
output.push_str(&formatted);
output.push_str("\n\n");
}
"inline_math" => {
let latex = line.latex.as_ref().unwrap_or(&line.text);
let formatted = self.format_math(latex, false); // inline mode
output.push_str(&formatted);
}
"table_row" => {
if !in_table {
in_table = true;
}
output.push_str(&self.format_table_row(&line.text));
output.push_str("\n");
}
"list_item" => {
if !in_list {
in_list = true;
}
output.push_str(&line.text);
output.push_str("\n");
}
"heading" => {
output.push_str(&format!("# {}\n\n", line.text));
}
"image" => {
output.push_str(&self.format_image(&line.text));
output.push_str("\n\n");
}
"chemistry" => {
let smiles = line.text.trim();
output.push_str(&format!("```smiles\n{}\n```\n\n", smiles));
}
_ => {
// Unknown type, output as text
output.push_str(&line.text);
output.push_str("\n");
}
}
}
output.trim().to_string()
}
/// Format LaTeX math expression
pub fn format_math(&self, latex: &str, display: bool) -> String {
if display {
format!(
"{}\n{}\n{}",
self.delimiters.display_start,
latex.trim(),
self.delimiters.display_end
)
} else {
format!(
"{}{}{}",
self.delimiters.inline_start,
latex.trim(),
self.delimiters.inline_end
)
}
}
/// Format table row
fn format_table_row(&self, row: &str) -> String {
// Basic table formatting - split by | and rejoin
let cells: Vec<&str> = row.split('|').map(|s| s.trim()).collect();
format!("| {} |", cells.join(" | "))
}
/// Format image reference
fn format_image(&self, path: &str) -> String {
// Extract alt text and path if available
if path.contains('[') && path.contains(']') {
path.to_string()
} else {
format!("![Image]({})", path)
}
}
/// Convert plain text with embedded LaTeX to MMD
pub fn from_mixed_text(&self, text: &str) -> String {
let mut output = String::new();
let mut current = String::new();
let mut in_math = false;
let mut display_math = false;
let chars: Vec<char> = text.chars().collect();
let mut i = 0;
while i < chars.len() {
// Check for display math $$
if i + 1 < chars.len() && chars[i] == '$' && chars[i + 1] == '$' {
if in_math && display_math {
// End display math
output.push_str(&self.format_math(&current, true));
current.clear();
in_math = false;
display_math = false;
} else if !in_math {
// Start display math
if !current.is_empty() {
output.push_str(&current);
current.clear();
}
in_math = true;
display_math = true;
}
i += 2;
continue;
}
// Check for inline math $
if chars[i] == '$' && !display_math {
if in_math {
// End inline math
output.push_str(&self.format_math(&current, false));
current.clear();
in_math = false;
} else {
// Start inline math
if !current.is_empty() {
output.push_str(&current);
current.clear();
}
in_math = true;
}
i += 1;
continue;
}
current.push(chars[i]);
i += 1;
}
if !current.is_empty() {
output.push_str(&current);
}
output
}
/// Format a complete document with frontmatter
pub fn format_document(&self, title: &str, content: &str, metadata: Option<&str>) -> String {
let mut doc = String::new();
// Add frontmatter if metadata provided
if let Some(meta) = metadata {
doc.push_str("---\n");
doc.push_str(meta);
doc.push_str("\n---\n\n");
}
// Add title
doc.push_str(&format!("# {}\n\n", title));
// Add content
doc.push_str(content);
doc
}
}
impl Default for MmdFormatter {
fn default() -> Self {
Self::new()
}
}
/// Parse MMD back to structured data
pub struct MmdParser;
impl MmdParser {
pub fn new() -> Self {
Self
}
/// Parse MMD content and extract LaTeX expressions
pub fn extract_latex(&self, content: &str) -> Vec<(String, bool)> {
let mut expressions = Vec::new();
let mut current = String::new();
let mut in_math = false;
let mut display_math = false;
let chars: Vec<char> = content.chars().collect();
let mut i = 0;
while i < chars.len() {
if i + 1 < chars.len() && chars[i] == '$' && chars[i + 1] == '$' {
if in_math && display_math {
expressions.push((current.trim().to_string(), true));
current.clear();
in_math = false;
display_math = false;
} else if !in_math {
in_math = true;
display_math = true;
}
i += 2;
} else if chars[i] == '$' && !display_math {
if in_math {
expressions.push((current.trim().to_string(), false));
current.clear();
in_math = false;
} else {
in_math = true;
}
i += 1;
} else if in_math {
current.push(chars[i]);
i += 1;
} else {
i += 1;
}
}
expressions
}
}
impl Default for MmdParser {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::output::BoundingBox;
#[test]
fn test_format_inline_math() {
let formatter = MmdFormatter::new();
let result = formatter.format_math("E = mc^2", false);
assert_eq!(result, "$E = mc^2$");
}
#[test]
fn test_format_display_math() {
let formatter = MmdFormatter::new();
let result = formatter.format_math(r"\int_0^1 x^2 dx", true);
assert!(result.contains("$$"));
assert!(result.contains(r"\int_0^1 x^2 dx"));
}
#[test]
fn test_format_lines() {
let formatter = MmdFormatter::new();
let lines = vec![
LineData {
line_type: "text".to_string(),
text: "The equation".to_string(),
latex: None,
bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
confidence: 0.95,
words: None,
},
LineData {
line_type: "math".to_string(),
text: "E = mc^2".to_string(),
latex: Some(r"E = mc^2".to_string()),
bbox: BoundingBox::new(0.0, 25.0, 100.0, 30.0),
confidence: 0.98,
words: None,
},
];
let result = formatter.format(&lines);
assert!(result.contains("The equation"));
assert!(result.contains("$$"));
assert!(result.contains("mc^2"));
}
#[test]
fn test_from_mixed_text() {
let formatter = MmdFormatter::new();
let text = "The formula $E = mc^2$ is famous.";
let result = formatter.from_mixed_text(text);
assert!(result.contains("$E = mc^2$"));
assert!(result.contains("famous"));
}
#[test]
fn test_extract_latex() {
let parser = MmdParser::new();
let content = "Text with $inline$ and $$display$$ math.";
let expressions = parser.extract_latex(content);
assert_eq!(expressions.len(), 2);
assert_eq!(expressions[0].0, "inline");
assert!(!expressions[0].1); // inline
assert_eq!(expressions[1].0, "display");
assert!(expressions[1].1); // display
}
#[test]
fn test_format_document() {
let formatter = MmdFormatter::new();
let doc = formatter.format_document(
"My Document",
"Content here",
Some("author: Test\ndate: 2025-01-01"),
);
assert!(doc.contains("---"));
assert!(doc.contains("author: Test"));
assert!(doc.contains("# My Document"));
assert!(doc.contains("Content here"));
}
}

View File

@@ -0,0 +1,359 @@
//! Output formatting module for Scipix OCR results
//!
//! Supports multiple output formats:
//! - Text: Plain text extraction
//! - LaTeX: Mathematical notation
//! - Scipix Markdown (mmd): Enhanced markdown with math
//! - MathML: XML-based mathematical markup
//! - HTML: Web-ready output with math rendering
//! - SMILES: Chemical structure notation
//! - DOCX: Microsoft Word format (Office Math ML)
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub mod docx;
pub mod formatter;
pub mod html;
pub mod json;
pub mod latex;
pub mod mmd;
pub mod smiles;
pub use formatter::{HtmlEngine, MathDelimiters, OutputFormatter};
pub use json::ApiResponse;
/// Output format types supported by Scipix OCR
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum OutputFormat {
/// Plain text output
Text,
/// LaTeX mathematical notation
#[serde(rename = "latex_normal")]
LaTeX,
/// Styled LaTeX with custom packages
#[serde(rename = "latex_styled")]
LaTeXStyled,
/// Mathematical Markup Language
#[serde(rename = "mathml")]
MathML,
/// Scipix Markdown (enhanced markdown)
#[serde(rename = "mmd")]
Mmd,
/// ASCII Math notation
#[serde(rename = "asciimath")]
AsciiMath,
/// HTML with embedded math
Html,
/// Chemical structure notation
#[serde(rename = "smiles")]
Smiles,
/// Microsoft Word format
Docx,
}
impl OutputFormat {
/// Get the file extension for this format
pub fn extension(&self) -> &'static str {
match self {
OutputFormat::Text => "txt",
OutputFormat::LaTeX | OutputFormat::LaTeXStyled => "tex",
OutputFormat::MathML => "xml",
OutputFormat::Mmd => "mmd",
OutputFormat::AsciiMath => "txt",
OutputFormat::Html => "html",
OutputFormat::Smiles => "smi",
OutputFormat::Docx => "docx",
}
}
/// Get the MIME type for this format
pub fn mime_type(&self) -> &'static str {
match self {
OutputFormat::Text | OutputFormat::AsciiMath => "text/plain",
OutputFormat::LaTeX | OutputFormat::LaTeXStyled => "application/x-latex",
OutputFormat::MathML => "application/mathml+xml",
OutputFormat::Mmd => "text/markdown",
OutputFormat::Html => "text/html",
OutputFormat::Smiles => "chemical/x-daylight-smiles",
OutputFormat::Docx => {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
}
}
}
}
/// Complete OCR result with all possible output formats
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OcrResult {
/// Request identifier
pub request_id: String,
/// Version of the OCR engine
pub version: String,
/// Image dimensions
pub image_width: u32,
pub image_height: u32,
/// Processing status
pub is_printed: bool,
pub is_handwritten: bool,
pub auto_rotate_confidence: f32,
pub auto_rotate_degrees: i32,
/// Confidence scores
pub confidence: f32,
pub confidence_rate: f32,
/// Available output formats
pub formats: FormatsData,
/// Detailed line and word data
#[serde(skip_serializing_if = "Option::is_none")]
pub line_data: Option<Vec<LineData>>,
/// Error information if processing failed
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
/// Processing metadata
#[serde(flatten)]
pub metadata: HashMap<String, serde_json::Value>,
}
/// All available output format data
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FormatsData {
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex_normal: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex_styled: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex_simplified: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mathml: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub asciimath: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mmd: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub html: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub smiles: Option<String>,
}
/// Line-level OCR data with positioning
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LineData {
/// Line type: text, math, table, image, etc.
#[serde(rename = "type")]
pub line_type: String,
/// Content in various formats
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex: Option<String>,
/// Bounding box coordinates
pub bbox: BoundingBox,
/// Confidence score
pub confidence: f32,
/// Word-level data
#[serde(skip_serializing_if = "Option::is_none")]
pub words: Option<Vec<WordData>>,
}
/// Word-level OCR data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WordData {
pub text: String,
pub bbox: BoundingBox,
pub confidence: f32,
#[serde(skip_serializing_if = "Option::is_none")]
pub latex: Option<String>,
}
/// Bounding box coordinates (x, y, width, height)
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct BoundingBox {
pub x: f32,
pub y: f32,
pub width: f32,
pub height: f32,
}
impl BoundingBox {
pub fn new(x: f32, y: f32, width: f32, height: f32) -> Self {
Self {
x,
y,
width,
height,
}
}
pub fn area(&self) -> f32 {
self.width * self.height
}
pub fn center(&self) -> (f32, f32) {
(self.x + self.width / 2.0, self.y + self.height / 2.0)
}
}
/// Convert between output formats
pub fn convert_format(
content: &str,
from: OutputFormat,
to: OutputFormat,
) -> Result<String, String> {
// Simple pass-through for same format
if from == to {
return Ok(content.to_string());
}
// Format-specific conversions
match (from, to) {
(OutputFormat::LaTeX, OutputFormat::Text) => {
// Strip LaTeX commands for plain text
Ok(strip_latex(content))
}
(OutputFormat::Mmd, OutputFormat::LaTeX) => {
// Extract LaTeX from markdown
Ok(extract_latex_from_mmd(content))
}
(OutputFormat::LaTeX, OutputFormat::Html) => {
// Wrap LaTeX in HTML with MathJax
Ok(format!(
r#"<!DOCTYPE html>
<html>
<head>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
</head>
<body>
<p>\({}\)</p>
</body>
</html>"#,
content
))
}
_ => Err(format!(
"Conversion from {:?} to {:?} not supported",
from, to
)),
}
}
fn strip_latex(content: &str) -> String {
// Remove common LaTeX commands
let mut result = content.to_string();
// Remove math delimiters
result = result.replace("\\(", "").replace("\\)", "");
result = result.replace("\\[", "").replace("\\]", "");
result = result.replace("$$", "");
// Remove common commands but keep their content
for cmd in &["\\text", "\\mathrm", "\\mathbf", "\\mathit"] {
result = result.replace(&format!("{}{}", cmd, "{"), "");
}
result = result.replace("}", "");
// Remove standalone commands
for cmd in &["\\\\", "\\,", "\\;", "\\:", "\\!", "\\quad", "\\qquad"] {
result = result.replace(cmd, " ");
}
result.trim().to_string()
}
fn extract_latex_from_mmd(content: &str) -> String {
let mut latex_parts = Vec::new();
let mut in_math = false;
let mut current = String::new();
let chars: Vec<char> = content.chars().collect();
let mut i = 0;
while i < chars.len() {
if i + 1 < chars.len() && chars[i] == '$' && chars[i + 1] == '$' {
if in_math {
latex_parts.push(current.clone());
current.clear();
in_math = false;
} else {
in_math = true;
}
i += 2;
} else if chars[i] == '$' {
in_math = !in_math;
i += 1;
} else if in_math {
current.push(chars[i]);
i += 1;
} else {
i += 1;
}
}
latex_parts.join("\n\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_output_format_extension() {
assert_eq!(OutputFormat::Text.extension(), "txt");
assert_eq!(OutputFormat::LaTeX.extension(), "tex");
assert_eq!(OutputFormat::Html.extension(), "html");
assert_eq!(OutputFormat::Mmd.extension(), "mmd");
}
#[test]
fn test_output_format_mime_type() {
assert_eq!(OutputFormat::Text.mime_type(), "text/plain");
assert_eq!(OutputFormat::LaTeX.mime_type(), "application/x-latex");
assert_eq!(OutputFormat::Html.mime_type(), "text/html");
}
#[test]
fn test_bounding_box() {
let bbox = BoundingBox::new(10.0, 20.0, 100.0, 50.0);
assert_eq!(bbox.area(), 5000.0);
assert_eq!(bbox.center(), (60.0, 45.0));
}
#[test]
fn test_strip_latex() {
let input = r"\text{Hello } \mathbf{World}";
let output = strip_latex(input);
assert!(output.contains("Hello"));
assert!(output.contains("World"));
}
#[test]
fn test_convert_same_format() {
let content = "test content";
let result = convert_format(content, OutputFormat::Text, OutputFormat::Text).unwrap();
assert_eq!(result, content);
}
}

View File

@@ -0,0 +1,347 @@
//! SMILES (Simplified Molecular Input Line Entry System) generator
//!
//! Converts chemical structure representations to SMILES notation.
//! This is a simplified implementation - full chemistry support requires
//! dedicated chemistry libraries like RDKit or OpenBabel.
use super::OcrResult;
/// SMILES notation generator for chemical structures
pub struct SmilesGenerator {
canonical: bool,
include_stereochemistry: bool,
}
impl SmilesGenerator {
pub fn new() -> Self {
Self {
canonical: true,
include_stereochemistry: true,
}
}
pub fn canonical(mut self, canonical: bool) -> Self {
self.canonical = canonical;
self
}
pub fn stereochemistry(mut self, include: bool) -> Self {
self.include_stereochemistry = include;
self
}
/// Generate SMILES from OCR result
pub fn generate_from_result(&self, result: &OcrResult) -> Result<String, String> {
// Check if SMILES already available
if let Some(smiles) = &result.formats.smiles {
return Ok(smiles.clone());
}
// Check for chemistry-related content in line data
if let Some(line_data) = &result.line_data {
for line in line_data {
if line.line_type == "chemistry" || line.line_type == "molecule" {
return self.parse_chemical_notation(&line.text);
}
}
}
Err("No chemical structure data found".to_string())
}
/// Parse chemical notation to SMILES
/// This is a placeholder - real implementation needs chemistry parsing
fn parse_chemical_notation(&self, notation: &str) -> Result<String, String> {
// Check if already SMILES format
if self.is_smiles(notation) {
return Ok(notation.to_string());
}
// Try to parse common chemical formulas
if let Some(smiles) = self.simple_formula_to_smiles(notation) {
return Ok(smiles);
}
Err(format!("Cannot convert '{}' to SMILES", notation))
}
/// Check if string is already SMILES notation
fn is_smiles(&self, s: &str) -> bool {
// Basic SMILES characters
let smiles_chars = "CNOPSFClBrI[]()=#@+-0123456789cnops";
s.chars().all(|c| smiles_chars.contains(c))
}
/// Convert simple chemical formulas to SMILES
fn simple_formula_to_smiles(&self, formula: &str) -> Option<String> {
// Common chemical formulas
match formula.trim() {
"H2O" | "water" => Some("O".to_string()),
"CO2" | "carbon dioxide" => Some("O=C=O".to_string()),
"CH4" | "methane" => Some("C".to_string()),
"C2H6" | "ethane" => Some("CC".to_string()),
"C2H5OH" | "ethanol" => Some("CCO".to_string()),
"CH3COOH" | "acetic acid" => Some("CC(=O)O".to_string()),
"C6H6" | "benzene" => Some("c1ccccc1".to_string()),
"C6H12O6" | "glucose" => Some("OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O".to_string()),
"NH3" | "ammonia" => Some("N".to_string()),
"H2SO4" | "sulfuric acid" => Some("OS(=O)(=O)O".to_string()),
"NaCl" | "sodium chloride" => Some("[Na+].[Cl-]".to_string()),
_ => None,
}
}
/// Validate SMILES notation
pub fn validate(&self, smiles: &str) -> Result<(), String> {
// Basic validation checks
// Check parentheses balance
let mut depth = 0;
for c in smiles.chars() {
match c {
'(' => depth += 1,
')' => {
depth -= 1;
if depth < 0 {
return Err("Unbalanced parentheses".to_string());
}
}
_ => {}
}
}
if depth != 0 {
return Err("Unbalanced parentheses".to_string());
}
// Check brackets balance
let mut depth = 0;
for c in smiles.chars() {
match c {
'[' => depth += 1,
']' => {
depth -= 1;
if depth < 0 {
return Err("Unbalanced brackets".to_string());
}
}
_ => {}
}
}
if depth != 0 {
return Err("Unbalanced brackets".to_string());
}
Ok(())
}
/// Convert SMILES to molecular formula
pub fn to_molecular_formula(&self, smiles: &str) -> Result<String, String> {
self.validate(smiles)?;
// Simplified formula extraction
// Real implementation would parse the SMILES properly
let mut counts: std::collections::HashMap<char, usize> = std::collections::HashMap::new();
for c in smiles.chars() {
if c.is_alphabetic() && c.is_uppercase() {
*counts.entry(c).or_insert(0) += 1;
}
}
let mut formula = String::new();
// Only use single-character elements for simplicity
for element in &['C', 'H', 'N', 'O', 'S', 'P', 'F'] {
if let Some(&count) = counts.get(element) {
formula.push(*element);
if count > 1 {
formula.push_str(&count.to_string());
}
}
}
if formula.is_empty() {
Err("Could not determine molecular formula".to_string())
} else {
Ok(formula)
}
}
/// Calculate molecular weight (approximate)
pub fn molecular_weight(&self, smiles: &str) -> Result<f32, String> {
self.validate(smiles)?;
// Simplified atomic weights
let weights: std::collections::HashMap<char, f32> = [
('C', 12.01),
('H', 1.008),
('N', 14.01),
('O', 16.00),
('S', 32.07),
('P', 30.97),
('F', 19.00),
]
.iter()
.cloned()
.collect();
let mut total_weight = 0.0;
for c in smiles.chars() {
if let Some(&weight) = weights.get(&c) {
total_weight += weight;
}
}
Ok(total_weight)
}
}
impl Default for SmilesGenerator {
fn default() -> Self {
Self::new()
}
}
/// SMILES parser for extracting structure information
pub struct SmilesParser;
impl SmilesParser {
pub fn new() -> Self {
Self
}
/// Count atoms in SMILES notation
pub fn count_atoms(&self, smiles: &str) -> std::collections::HashMap<String, usize> {
let mut counts = std::collections::HashMap::new();
let mut i = 0;
let chars: Vec<char> = smiles.chars().collect();
while i < chars.len() {
if chars[i].is_uppercase() {
let mut atom = String::from(chars[i]);
// Check for two-letter atoms (Cl, Br, etc.)
if i + 1 < chars.len() && chars[i + 1].is_lowercase() {
atom.push(chars[i + 1]);
i += 1;
}
*counts.entry(atom).or_insert(0) += 1;
}
i += 1;
}
counts
}
/// Extract ring information
pub fn find_rings(&self, smiles: &str) -> Vec<usize> {
let mut rings = Vec::new();
for (_i, c) in smiles.chars().enumerate() {
if c.is_numeric() {
if let Some(digit) = c.to_digit(10) {
rings.push(digit as usize);
}
}
}
rings
}
}
impl Default for SmilesParser {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_smiles() {
let gen = SmilesGenerator::new();
assert!(gen.is_smiles("CCO"));
assert!(gen.is_smiles("c1ccccc1"));
assert!(gen.is_smiles("CC(=O)O"));
assert!(!gen.is_smiles("not smiles!"));
}
#[test]
fn test_simple_formula_conversion() {
let gen = SmilesGenerator::new();
assert_eq!(gen.simple_formula_to_smiles("H2O"), Some("O".to_string()));
assert_eq!(
gen.simple_formula_to_smiles("CO2"),
Some("O=C=O".to_string())
);
assert_eq!(gen.simple_formula_to_smiles("CH4"), Some("C".to_string()));
assert_eq!(
gen.simple_formula_to_smiles("benzene"),
Some("c1ccccc1".to_string())
);
}
#[test]
fn test_validate_smiles() {
let gen = SmilesGenerator::new();
assert!(gen.validate("CCO").is_ok());
assert!(gen.validate("CC(O)C").is_ok());
assert!(gen.validate("c1ccccc1").is_ok());
assert!(gen.validate("CC(O").is_err()); // Unbalanced
assert!(gen.validate("CC)O").is_err()); // Unbalanced
}
#[test]
fn test_molecular_formula() {
let gen = SmilesGenerator::new();
let formula = gen.to_molecular_formula("CCO").unwrap();
assert!(formula.contains('C'));
assert!(formula.contains('O'));
}
#[test]
fn test_molecular_weight() {
let gen = SmilesGenerator::new();
// Water: H2O (but SMILES is just "O", representing OH2)
let weight = gen.molecular_weight("O").unwrap();
assert!(weight > 0.0);
// Ethanol: C2H6O
let weight = gen.molecular_weight("CCO").unwrap();
assert!(weight > 30.0); // Should be around 46
}
#[test]
fn test_count_atoms() {
let parser = SmilesParser::new();
let counts = parser.count_atoms("CCO");
assert_eq!(counts.get("C"), Some(&2));
assert_eq!(counts.get("O"), Some(&1));
let counts = parser.count_atoms("CC(=O)O");
assert_eq!(counts.get("C"), Some(&2));
assert_eq!(counts.get("O"), Some(&2));
}
#[test]
fn test_find_rings() {
let parser = SmilesParser::new();
let rings = parser.find_rings("c1ccccc1");
assert_eq!(rings, vec![1, 1]);
let rings = parser.find_rings("C1CC1");
assert_eq!(rings, vec![1, 1]);
}
}