// LaTeX comparison and manipulation utilities // // Provides functions to normalize, compare, and analyze LaTeX strings use std::collections::HashSet; /// Normalize LaTeX string for comparison pub fn normalize(latex: &str) -> String { latex .chars() .filter(|c| !c.is_whitespace()) .collect::() .to_lowercase() } /// Check if two LaTeX expressions match semantically pub fn expressions_match(a: &str, b: &str) -> bool { let norm_a = normalize(a); let norm_b = normalize(b); // Direct match if norm_a == norm_b { return true; } // Try alternative representations // e.g., \frac{1}{2} vs 0.5, x^{2} vs x^2, etc. // For now, use normalized comparison norm_a == norm_b } /// Calculate similarity between two LaTeX strings (0.0 to 1.0) pub fn calculate_similarity(a: &str, b: &str) -> f64 { let norm_a = normalize(a); let norm_b = normalize(b); // Use Levenshtein distance ratio let distance = levenshtein_distance(&norm_a, &norm_b); let max_len = norm_a.len().max(norm_b.len()) as f64; if max_len == 0.0 { return 1.0; } 1.0 - (distance as f64 / max_len) } /// Calculate Levenshtein distance between two strings fn levenshtein_distance(a: &str, b: &str) -> usize { let a_chars: Vec = a.chars().collect(); let b_chars: Vec = b.chars().collect(); let a_len = a_chars.len(); let b_len = b_chars.len(); if a_len == 0 { return b_len; } if b_len == 0 { return a_len; } let mut matrix = vec![vec![0; b_len + 1]; a_len + 1]; for i in 0..=a_len { matrix[i][0] = i; } for j in 0..=b_len { matrix[0][j] = j; } for i in 1..=a_len { for j in 1..=b_len { let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 }; matrix[i][j] = *[ matrix[i - 1][j] + 1, // deletion matrix[i][j - 1] + 1, // insertion matrix[i - 1][j - 1] + cost, // substitution ] .iter() .min() .unwrap(); } } matrix[a_len][b_len] } /// Extract LaTeX commands from string pub fn extract_commands(latex: &str) -> HashSet { let mut commands = HashSet::new(); let mut chars = latex.chars().peekable(); while let Some(ch) = chars.next() { if ch == '\\' { let mut command = String::from("\\"); while let Some(&next_ch) = chars.peek() { if next_ch.is_alphabetic() { command.push(next_ch); chars.next(); } else { break; } } if command.len() > 1 { commands.insert(command); } } } commands } /// Count LaTeX elements (fractions, superscripts, etc.) pub fn count_elements(latex: &str) -> ElementCounts { let mut counts = ElementCounts::default(); if latex.contains(r"\frac") { counts.fractions = latex.matches(r"\frac").count(); } if latex.contains(r"\int") { counts.integrals = latex.matches(r"\int").count(); } if latex.contains(r"\sum") { counts.sums = latex.matches(r"\sum").count(); } if latex.contains("^") { counts.superscripts = latex.matches("^").count(); } if latex.contains("_") { counts.subscripts = latex.matches("_").count(); } if latex.contains(r"\begin{matrix}") || latex.contains(r"\begin{bmatrix}") { counts.matrices = 1; } counts } #[derive(Debug, Default, Clone, PartialEq)] pub struct ElementCounts { pub fractions: usize, pub integrals: usize, pub sums: usize, pub superscripts: usize, pub subscripts: usize, pub matrices: usize, } /// Validate LaTeX syntax (basic check) pub fn validate_syntax(latex: &str) -> Result<(), String> { let mut brace_count = 0; let mut bracket_count = 0; for ch in latex.chars() { match ch { '{' => brace_count += 1, '}' => { brace_count -= 1; if brace_count < 0 { return Err("Unmatched closing brace".to_string()); } } '[' => bracket_count += 1, ']' => { bracket_count -= 1; if bracket_count < 0 { return Err("Unmatched closing bracket".to_string()); } } _ => {} } } if brace_count != 0 { return Err(format!("Unmatched braces: {} unclosed", brace_count)); } if bracket_count != 0 { return Err(format!("Unmatched brackets: {} unclosed", bracket_count)); } Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_normalize() { assert_eq!(normalize("x + y"), "x+y"); assert_eq!(normalize(" a b "), "ab"); assert_eq!(normalize(r"\frac{1}{2}"), r"\frac{1}{2}"); } #[test] fn test_expressions_match() { assert!(expressions_match("x+y", "x + y")); assert!(expressions_match(r"\frac{1}{2}", r"\frac{1}{2}")); assert!(!expressions_match("x+y", "x-y")); } #[test] fn test_calculate_similarity() { assert!(calculate_similarity("abc", "abc") == 1.0); assert!(calculate_similarity("abc", "abd") > 0.6); assert!(calculate_similarity("abc", "xyz") < 0.5); } #[test] fn test_extract_commands() { let latex = r"\frac{1}{2} + \sqrt{x}"; let commands = extract_commands(latex); assert!(commands.contains(r"\frac")); assert!(commands.contains(r"\sqrt")); } #[test] fn test_validate_syntax() { assert!(validate_syntax(r"\frac{1}{2}").is_ok()); assert!(validate_syntax(r"\frac{1}{2").is_err()); assert!(validate_syntax(r"\frac{1}2}").is_err()); } }