Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
230
vendor/ruvector/examples/scipix/tests/common/latex.rs
vendored
Normal file
230
vendor/ruvector/examples/scipix/tests/common/latex.rs
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
// LaTeX comparison and manipulation utilities
|
||||
//
|
||||
// Provides functions to normalize, compare, and analyze LaTeX strings
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Normalize LaTeX string for comparison
|
||||
pub fn normalize(latex: &str) -> String {
|
||||
latex
|
||||
.chars()
|
||||
.filter(|c| !c.is_whitespace())
|
||||
.collect::<String>()
|
||||
.to_lowercase()
|
||||
}
|
||||
|
||||
/// Check if two LaTeX expressions match semantically
|
||||
pub fn expressions_match(a: &str, b: &str) -> bool {
|
||||
let norm_a = normalize(a);
|
||||
let norm_b = normalize(b);
|
||||
|
||||
// Direct match
|
||||
if norm_a == norm_b {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try alternative representations
|
||||
// e.g., \frac{1}{2} vs 0.5, x^{2} vs x^2, etc.
|
||||
|
||||
// For now, use normalized comparison
|
||||
norm_a == norm_b
|
||||
}
|
||||
|
||||
/// Calculate similarity between two LaTeX strings (0.0 to 1.0)
|
||||
pub fn calculate_similarity(a: &str, b: &str) -> f64 {
|
||||
let norm_a = normalize(a);
|
||||
let norm_b = normalize(b);
|
||||
|
||||
// Use Levenshtein distance ratio
|
||||
let distance = levenshtein_distance(&norm_a, &norm_b);
|
||||
let max_len = norm_a.len().max(norm_b.len()) as f64;
|
||||
|
||||
if max_len == 0.0 {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
1.0 - (distance as f64 / max_len)
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance between two strings
|
||||
fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut matrix = vec![vec![0; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
matrix[i][j] = *[
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
]
|
||||
.iter()
|
||||
.min()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Extract LaTeX commands from string
|
||||
pub fn extract_commands(latex: &str) -> HashSet<String> {
|
||||
let mut commands = HashSet::new();
|
||||
let mut chars = latex.chars().peekable();
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '\\' {
|
||||
let mut command = String::from("\\");
|
||||
while let Some(&next_ch) = chars.peek() {
|
||||
if next_ch.is_alphabetic() {
|
||||
command.push(next_ch);
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if command.len() > 1 {
|
||||
commands.insert(command);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commands
|
||||
}
|
||||
|
||||
/// Count LaTeX elements (fractions, superscripts, etc.)
|
||||
pub fn count_elements(latex: &str) -> ElementCounts {
|
||||
let mut counts = ElementCounts::default();
|
||||
|
||||
if latex.contains(r"\frac") {
|
||||
counts.fractions = latex.matches(r"\frac").count();
|
||||
}
|
||||
if latex.contains(r"\int") {
|
||||
counts.integrals = latex.matches(r"\int").count();
|
||||
}
|
||||
if latex.contains(r"\sum") {
|
||||
counts.sums = latex.matches(r"\sum").count();
|
||||
}
|
||||
if latex.contains("^") {
|
||||
counts.superscripts = latex.matches("^").count();
|
||||
}
|
||||
if latex.contains("_") {
|
||||
counts.subscripts = latex.matches("_").count();
|
||||
}
|
||||
if latex.contains(r"\begin{matrix}") || latex.contains(r"\begin{bmatrix}") {
|
||||
counts.matrices = 1;
|
||||
}
|
||||
|
||||
counts
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq)]
|
||||
pub struct ElementCounts {
|
||||
pub fractions: usize,
|
||||
pub integrals: usize,
|
||||
pub sums: usize,
|
||||
pub superscripts: usize,
|
||||
pub subscripts: usize,
|
||||
pub matrices: usize,
|
||||
}
|
||||
|
||||
/// Validate LaTeX syntax (basic check)
|
||||
pub fn validate_syntax(latex: &str) -> Result<(), String> {
|
||||
let mut brace_count = 0;
|
||||
let mut bracket_count = 0;
|
||||
|
||||
for ch in latex.chars() {
|
||||
match ch {
|
||||
'{' => brace_count += 1,
|
||||
'}' => {
|
||||
brace_count -= 1;
|
||||
if brace_count < 0 {
|
||||
return Err("Unmatched closing brace".to_string());
|
||||
}
|
||||
}
|
||||
'[' => bracket_count += 1,
|
||||
']' => {
|
||||
bracket_count -= 1;
|
||||
if bracket_count < 0 {
|
||||
return Err("Unmatched closing bracket".to_string());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if brace_count != 0 {
|
||||
return Err(format!("Unmatched braces: {} unclosed", brace_count));
|
||||
}
|
||||
if bracket_count != 0 {
|
||||
return Err(format!("Unmatched brackets: {} unclosed", bracket_count));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize() {
|
||||
assert_eq!(normalize("x + y"), "x+y");
|
||||
assert_eq!(normalize(" a b "), "ab");
|
||||
assert_eq!(normalize(r"\frac{1}{2}"), r"\frac{1}{2}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expressions_match() {
|
||||
assert!(expressions_match("x+y", "x + y"));
|
||||
assert!(expressions_match(r"\frac{1}{2}", r"\frac{1}{2}"));
|
||||
assert!(!expressions_match("x+y", "x-y"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_similarity() {
|
||||
assert!(calculate_similarity("abc", "abc") == 1.0);
|
||||
assert!(calculate_similarity("abc", "abd") > 0.6);
|
||||
assert!(calculate_similarity("abc", "xyz") < 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_commands() {
|
||||
let latex = r"\frac{1}{2} + \sqrt{x}";
|
||||
let commands = extract_commands(latex);
|
||||
assert!(commands.contains(r"\frac"));
|
||||
assert!(commands.contains(r"\sqrt"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_syntax() {
|
||||
assert!(validate_syntax(r"\frac{1}{2}").is_ok());
|
||||
assert!(validate_syntax(r"\frac{1}{2").is_err());
|
||||
assert!(validate_syntax(r"\frac{1}2}").is_err());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user