Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
399
vendor/ruvector/examples/scipix/src/cli/commands/batch.rs
vendored
Normal file
399
vendor/ruvector/examples/scipix/src/cli/commands/batch.rs
vendored
Normal file
@@ -0,0 +1,399 @@
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Args;
|
||||
use glob::glob;
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Semaphore;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use super::{OcrConfig, OcrResult};
|
||||
use crate::cli::{output, Cli, OutputFormat};
|
||||
|
||||
/// Process multiple files in batch mode
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct BatchArgs {
|
||||
/// Input pattern (glob) or directory
|
||||
#[arg(value_name = "PATTERN", help = "Input pattern (glob) or directory")]
|
||||
pub pattern: String,
|
||||
|
||||
/// Output directory for results
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
value_name = "DIR",
|
||||
help = "Output directory for results (default: stdout as JSON array)"
|
||||
)]
|
||||
pub output: Option<PathBuf>,
|
||||
|
||||
/// Number of parallel workers
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
default_value = "4",
|
||||
help = "Number of parallel processing workers"
|
||||
)]
|
||||
pub parallel: usize,
|
||||
|
||||
/// Minimum confidence threshold (0.0 to 1.0)
|
||||
#[arg(
|
||||
short = 't',
|
||||
long,
|
||||
default_value = "0.7",
|
||||
help = "Minimum confidence threshold for results"
|
||||
)]
|
||||
pub threshold: f64,
|
||||
|
||||
/// Continue on errors
|
||||
#[arg(
|
||||
short = 'c',
|
||||
long,
|
||||
help = "Continue processing even if some files fail"
|
||||
)]
|
||||
pub continue_on_error: bool,
|
||||
|
||||
/// Maximum retry attempts per file
|
||||
#[arg(
|
||||
short = 'r',
|
||||
long,
|
||||
default_value = "2",
|
||||
help = "Maximum retry attempts per file on failure"
|
||||
)]
|
||||
pub max_retries: usize,
|
||||
|
||||
/// Save individual results as separate files
|
||||
#[arg(long, help = "Save each result as a separate file (requires --output)")]
|
||||
pub separate_files: bool,
|
||||
|
||||
/// Recursive directory search
|
||||
#[arg(short = 'R', long, help = "Recursively search directories")]
|
||||
pub recursive: bool,
|
||||
}
|
||||
|
||||
pub async fn execute(args: BatchArgs, cli: &Cli) -> Result<()> {
|
||||
info!("Starting batch processing with pattern: {}", args.pattern);
|
||||
|
||||
// Load configuration
|
||||
let config = Arc::new(load_config(cli.config.as_ref())?);
|
||||
|
||||
// Expand pattern to file list
|
||||
let files = collect_files(&args)?;
|
||||
|
||||
if files.is_empty() {
|
||||
anyhow::bail!("No files found matching pattern: {}", args.pattern);
|
||||
}
|
||||
|
||||
info!("Found {} files to process", files.len());
|
||||
|
||||
// Create output directory if needed
|
||||
if let Some(output_dir) = &args.output {
|
||||
std::fs::create_dir_all(output_dir).context("Failed to create output directory")?;
|
||||
}
|
||||
|
||||
// Process files in parallel with progress bars
|
||||
let results = process_files_parallel(files, &args, &config, cli.quiet).await?;
|
||||
|
||||
// Filter by confidence threshold
|
||||
let (passed, failed): (Vec<_>, Vec<_>) = results
|
||||
.into_iter()
|
||||
.partition(|r| r.confidence >= args.threshold);
|
||||
|
||||
info!(
|
||||
"Processing complete: {} passed, {} failed threshold",
|
||||
passed.len(),
|
||||
failed.len()
|
||||
);
|
||||
|
||||
// Save or display results
|
||||
if let Some(output_dir) = &args.output {
|
||||
save_results(&passed, output_dir, &cli.format, args.separate_files)?;
|
||||
|
||||
if !cli.quiet {
|
||||
println!("Results saved to: {}", output_dir.display());
|
||||
}
|
||||
} else {
|
||||
// Output as JSON array to stdout
|
||||
let json = serde_json::to_string_pretty(&passed).context("Failed to serialize results")?;
|
||||
println!("{}", json);
|
||||
}
|
||||
|
||||
// Display summary
|
||||
if !cli.quiet {
|
||||
output::print_batch_summary(&passed, &failed, args.threshold);
|
||||
}
|
||||
|
||||
// Return error if any files failed and continue_on_error is false
|
||||
if !failed.is_empty() && !args.continue_on_error {
|
||||
anyhow::bail!("{} files failed confidence threshold", failed.len());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_files(args: &BatchArgs) -> Result<Vec<PathBuf>> {
|
||||
let mut files = Vec::new();
|
||||
let path = PathBuf::from(&args.pattern);
|
||||
|
||||
if path.is_dir() {
|
||||
// Directory mode
|
||||
let pattern = if args.recursive {
|
||||
format!("{}/**/*", args.pattern)
|
||||
} else {
|
||||
format!("{}/*", args.pattern)
|
||||
};
|
||||
|
||||
for entry in glob(&pattern).context("Failed to read glob pattern")? {
|
||||
match entry {
|
||||
Ok(path) => {
|
||||
if path.is_file() {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
Err(e) => warn!("Failed to read entry: {}", e),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Glob pattern mode
|
||||
for entry in glob(&args.pattern).context("Failed to read glob pattern")? {
|
||||
match entry {
|
||||
Ok(path) => {
|
||||
if path.is_file() {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
Err(e) => warn!("Failed to read entry: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
async fn process_files_parallel(
|
||||
files: Vec<PathBuf>,
|
||||
args: &BatchArgs,
|
||||
config: &Arc<OcrConfig>,
|
||||
quiet: bool,
|
||||
) -> Result<Vec<OcrResult>> {
|
||||
let semaphore = Arc::new(Semaphore::new(args.parallel));
|
||||
let multi_progress = Arc::new(MultiProgress::new());
|
||||
|
||||
let overall_progress = if !quiet {
|
||||
let pb = multi_progress.add(ProgressBar::new(files.len() as u64));
|
||||
pb.set_style(
|
||||
ProgressStyle::default_bar()
|
||||
.template(
|
||||
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta})",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("#>-"),
|
||||
);
|
||||
Some(pb)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut handles = Vec::new();
|
||||
|
||||
for (_idx, file) in files.into_iter().enumerate() {
|
||||
let semaphore = semaphore.clone();
|
||||
let config = config.clone();
|
||||
let multi_progress = multi_progress.clone();
|
||||
let overall_progress = overall_progress.clone();
|
||||
let max_retries = args.max_retries;
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let _permit = semaphore.acquire().await.unwrap();
|
||||
|
||||
let file_progress = if !quiet {
|
||||
let pb = multi_progress.insert_before(
|
||||
&overall_progress.as_ref().unwrap(),
|
||||
ProgressBar::new_spinner(),
|
||||
);
|
||||
pb.set_style(
|
||||
ProgressStyle::default_spinner()
|
||||
.template("{spinner:.green} {msg}")
|
||||
.unwrap(),
|
||||
);
|
||||
pb.set_message(format!("[{}] Processing...", file.display()));
|
||||
Some(pb)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let result = process_with_retry(&file, &config, max_retries).await;
|
||||
|
||||
if let Some(pb) = &file_progress {
|
||||
match &result {
|
||||
Ok(r) => pb.finish_with_message(format!(
|
||||
"[{}] ✓ Confidence: {:.2}%",
|
||||
file.display(),
|
||||
r.confidence * 100.0
|
||||
)),
|
||||
Err(e) => {
|
||||
pb.finish_with_message(format!("[{}] ✗ Error: {}", file.display(), e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pb) = &overall_progress {
|
||||
pb.inc(1);
|
||||
}
|
||||
|
||||
result
|
||||
});
|
||||
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all tasks to complete
|
||||
let mut results = Vec::new();
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok(Ok(result)) => results.push(result),
|
||||
Ok(Err(e)) => error!("Processing failed: {}", e),
|
||||
Err(e) => error!("Task panicked: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pb) = overall_progress {
|
||||
pb.finish_with_message("Batch processing complete");
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
async fn process_with_retry(
|
||||
file: &PathBuf,
|
||||
config: &OcrConfig,
|
||||
max_retries: usize,
|
||||
) -> Result<OcrResult> {
|
||||
let mut attempts = 0;
|
||||
let mut last_error = None;
|
||||
|
||||
while attempts <= max_retries {
|
||||
match process_single_file(file, config).await {
|
||||
Ok(result) => return Ok(result),
|
||||
Err(e) => {
|
||||
attempts += 1;
|
||||
last_error = Some(e);
|
||||
|
||||
if attempts <= max_retries {
|
||||
debug!("Retry {}/{} for {}", attempts, max_retries, file.display());
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100 * attempts as u64))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap())
|
||||
}
|
||||
|
||||
async fn process_single_file(file: &PathBuf, _config: &OcrConfig) -> Result<OcrResult> {
|
||||
// TODO: Implement actual OCR processing
|
||||
// For now, return a mock result
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
|
||||
// Simulate varying confidence
|
||||
let confidence = 0.7 + (rand::random::<f64>() * 0.3);
|
||||
|
||||
Ok(OcrResult {
|
||||
file: file.clone(),
|
||||
text: format!("OCR text from {}", file.display()),
|
||||
latex: Some(format!(r"\text{{Content from {}}}", file.display())),
|
||||
confidence,
|
||||
processing_time_ms: 50,
|
||||
errors: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
fn save_results(
|
||||
results: &[OcrResult],
|
||||
output_dir: &PathBuf,
|
||||
format: &OutputFormat,
|
||||
separate_files: bool,
|
||||
) -> Result<()> {
|
||||
if separate_files {
|
||||
// Save each result as a separate file
|
||||
for (idx, result) in results.iter().enumerate() {
|
||||
let filename = format!(
|
||||
"result_{:04}.{}",
|
||||
idx,
|
||||
match format {
|
||||
OutputFormat::Json => "json",
|
||||
OutputFormat::Latex => "tex",
|
||||
OutputFormat::Markdown => "md",
|
||||
OutputFormat::MathMl => "xml",
|
||||
OutputFormat::Text => "txt",
|
||||
}
|
||||
);
|
||||
|
||||
let output_path = output_dir.join(filename);
|
||||
let content = format_single_result(result, format)?;
|
||||
|
||||
std::fs::write(&output_path, content)
|
||||
.context(format!("Failed to write {}", output_path.display()))?;
|
||||
}
|
||||
} else {
|
||||
// Save all results as a single file
|
||||
let filename = format!(
|
||||
"results.{}",
|
||||
match format {
|
||||
OutputFormat::Json => "json",
|
||||
OutputFormat::Latex => "tex",
|
||||
OutputFormat::Markdown => "md",
|
||||
OutputFormat::MathMl => "xml",
|
||||
OutputFormat::Text => "txt",
|
||||
}
|
||||
);
|
||||
|
||||
let output_path = output_dir.join(filename);
|
||||
let content = format_batch_results(results, format)?;
|
||||
|
||||
std::fs::write(&output_path, content).context("Failed to write results file")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn format_single_result(result: &OcrResult, format: &OutputFormat) -> Result<String> {
|
||||
match format {
|
||||
OutputFormat::Json => {
|
||||
serde_json::to_string_pretty(result).context("Failed to serialize result")
|
||||
}
|
||||
OutputFormat::Text => Ok(result.text.clone()),
|
||||
OutputFormat::Latex => Ok(result.latex.clone().unwrap_or_else(|| result.text.clone())),
|
||||
OutputFormat::Markdown => Ok(format!("# {}\n\n{}\n", result.file.display(), result.text)),
|
||||
OutputFormat::MathMl => Ok(format!(
|
||||
"<math xmlns=\"http://www.w3.org/1998/Math/MathML\">\n {}\n</math>",
|
||||
result.text
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_batch_results(results: &[OcrResult], format: &OutputFormat) -> Result<String> {
|
||||
match format {
|
||||
OutputFormat::Json => {
|
||||
serde_json::to_string_pretty(results).context("Failed to serialize results")
|
||||
}
|
||||
_ => {
|
||||
let mut output = String::new();
|
||||
for result in results {
|
||||
output.push_str(&format_single_result(result, format)?);
|
||||
output.push_str("\n\n---\n\n");
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_config(config_path: Option<&PathBuf>) -> Result<OcrConfig> {
|
||||
if let Some(path) = config_path {
|
||||
let content = std::fs::read_to_string(path).context("Failed to read config file")?;
|
||||
toml::from_str(&content).context("Failed to parse config file")
|
||||
} else {
|
||||
Ok(OcrConfig::default())
|
||||
}
|
||||
}
|
||||
272
vendor/ruvector/examples/scipix/src/cli/commands/config.rs
vendored
Normal file
272
vendor/ruvector/examples/scipix/src/cli/commands/config.rs
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{Args, Subcommand};
|
||||
use dialoguer::{theme::ColorfulTheme, Confirm, Input};
|
||||
use std::path::PathBuf;
|
||||
use tracing::info;
|
||||
|
||||
use super::OcrConfig;
|
||||
use crate::cli::Cli;
|
||||
|
||||
/// Manage configuration
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct ConfigArgs {
|
||||
#[command(subcommand)]
|
||||
pub command: ConfigCommand,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug, Clone)]
|
||||
pub enum ConfigCommand {
|
||||
/// Generate default configuration file
|
||||
Init {
|
||||
/// Output path for config file
|
||||
#[arg(short, long, default_value = "scipix.toml")]
|
||||
output: PathBuf,
|
||||
|
||||
/// Overwrite existing file
|
||||
#[arg(short, long)]
|
||||
force: bool,
|
||||
},
|
||||
|
||||
/// Validate configuration file
|
||||
Validate {
|
||||
/// Path to config file to validate
|
||||
#[arg(value_name = "FILE")]
|
||||
file: PathBuf,
|
||||
},
|
||||
|
||||
/// Show current configuration
|
||||
Show {
|
||||
/// Path to config file (default: from --config or scipix.toml)
|
||||
#[arg(value_name = "FILE")]
|
||||
file: Option<PathBuf>,
|
||||
},
|
||||
|
||||
/// Edit configuration interactively
|
||||
Edit {
|
||||
/// Path to config file to edit
|
||||
#[arg(value_name = "FILE")]
|
||||
file: PathBuf,
|
||||
},
|
||||
|
||||
/// Get configuration directory path
|
||||
Path,
|
||||
}
|
||||
|
||||
pub async fn execute(args: ConfigArgs, cli: &Cli) -> Result<()> {
|
||||
match args.command {
|
||||
ConfigCommand::Init { output, force } => {
|
||||
init_config(&output, force)?;
|
||||
}
|
||||
ConfigCommand::Validate { file } => {
|
||||
validate_config(&file)?;
|
||||
}
|
||||
ConfigCommand::Show { file } => {
|
||||
show_config(file.or(cli.config.clone()))?;
|
||||
}
|
||||
ConfigCommand::Edit { file } => {
|
||||
edit_config(&file)?;
|
||||
}
|
||||
ConfigCommand::Path => {
|
||||
show_config_path()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn init_config(output: &PathBuf, force: bool) -> Result<()> {
|
||||
if output.exists() && !force {
|
||||
anyhow::bail!(
|
||||
"Config file already exists: {} (use --force to overwrite)",
|
||||
output.display()
|
||||
);
|
||||
}
|
||||
|
||||
let config = OcrConfig::default();
|
||||
let toml = toml::to_string_pretty(&config).context("Failed to serialize config")?;
|
||||
|
||||
std::fs::write(output, toml).context("Failed to write config file")?;
|
||||
|
||||
info!("Configuration file created: {}", output.display());
|
||||
println!("✓ Created configuration file: {}", output.display());
|
||||
println!("\nTo use this config, run:");
|
||||
println!(" scipix-cli --config {} <command>", output.display());
|
||||
println!("\nOr set environment variable:");
|
||||
println!(" export MATHPIX_CONFIG={}", output.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_config(file: &PathBuf) -> Result<()> {
|
||||
if !file.exists() {
|
||||
anyhow::bail!("Config file not found: {}", file.display());
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(file).context("Failed to read config file")?;
|
||||
|
||||
let config: OcrConfig = toml::from_str(&content).context("Failed to parse config file")?;
|
||||
|
||||
// Validate configuration values
|
||||
if config.min_confidence < 0.0 || config.min_confidence > 1.0 {
|
||||
anyhow::bail!("min_confidence must be between 0.0 and 1.0");
|
||||
}
|
||||
|
||||
if config.max_image_size == 0 {
|
||||
anyhow::bail!("max_image_size must be greater than 0");
|
||||
}
|
||||
|
||||
if config.supported_extensions.is_empty() {
|
||||
anyhow::bail!("supported_extensions cannot be empty");
|
||||
}
|
||||
|
||||
println!("✓ Configuration is valid");
|
||||
println!("\nSettings:");
|
||||
println!(" Min confidence: {}", config.min_confidence);
|
||||
println!(" Max image size: {} bytes", config.max_image_size);
|
||||
println!(
|
||||
" Supported extensions: {}",
|
||||
config.supported_extensions.join(", ")
|
||||
);
|
||||
|
||||
if let Some(endpoint) = &config.api_endpoint {
|
||||
println!(" API endpoint: {}", endpoint);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn show_config(file: Option<PathBuf>) -> Result<()> {
|
||||
let config_path = file.unwrap_or_else(|| PathBuf::from("scipix.toml"));
|
||||
|
||||
if !config_path.exists() {
|
||||
println!("No configuration file found.");
|
||||
println!("\nCreate one with:");
|
||||
println!(" scipix-cli config init");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&config_path).context("Failed to read config file")?;
|
||||
|
||||
println!("Configuration from: {}\n", config_path.display());
|
||||
println!("{}", content);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn edit_config(file: &PathBuf) -> Result<()> {
|
||||
if !file.exists() {
|
||||
anyhow::bail!(
|
||||
"Config file not found: {} (use 'config init' to create)",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(file).context("Failed to read config file")?;
|
||||
|
||||
let mut config: OcrConfig = toml::from_str(&content).context("Failed to parse config file")?;
|
||||
|
||||
let theme = ColorfulTheme::default();
|
||||
|
||||
println!("Interactive Configuration Editor\n");
|
||||
|
||||
// Edit min_confidence
|
||||
config.min_confidence = Input::with_theme(&theme)
|
||||
.with_prompt("Minimum confidence threshold (0.0-1.0)")
|
||||
.default(config.min_confidence)
|
||||
.validate_with(|v: &f64| {
|
||||
if *v >= 0.0 && *v <= 1.0 {
|
||||
Ok(())
|
||||
} else {
|
||||
Err("Value must be between 0.0 and 1.0")
|
||||
}
|
||||
})
|
||||
.interact_text()
|
||||
.context("Failed to read input")?;
|
||||
|
||||
// Edit max_image_size
|
||||
let max_size_mb = config.max_image_size / (1024 * 1024);
|
||||
let new_size_mb: usize = Input::with_theme(&theme)
|
||||
.with_prompt("Maximum image size (MB)")
|
||||
.default(max_size_mb)
|
||||
.interact_text()
|
||||
.context("Failed to read input")?;
|
||||
config.max_image_size = new_size_mb * 1024 * 1024;
|
||||
|
||||
// Edit API endpoint
|
||||
if config.api_endpoint.is_some() {
|
||||
let edit_endpoint = Confirm::with_theme(&theme)
|
||||
.with_prompt("Edit API endpoint?")
|
||||
.default(false)
|
||||
.interact()
|
||||
.context("Failed to read input")?;
|
||||
|
||||
if edit_endpoint {
|
||||
let endpoint: String = Input::with_theme(&theme)
|
||||
.with_prompt("API endpoint URL")
|
||||
.allow_empty(true)
|
||||
.interact_text()
|
||||
.context("Failed to read input")?;
|
||||
|
||||
config.api_endpoint = if endpoint.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(endpoint)
|
||||
};
|
||||
}
|
||||
} else {
|
||||
let add_endpoint = Confirm::with_theme(&theme)
|
||||
.with_prompt("Add API endpoint?")
|
||||
.default(false)
|
||||
.interact()
|
||||
.context("Failed to read input")?;
|
||||
|
||||
if add_endpoint {
|
||||
let endpoint: String = Input::with_theme(&theme)
|
||||
.with_prompt("API endpoint URL")
|
||||
.interact_text()
|
||||
.context("Failed to read input")?;
|
||||
|
||||
config.api_endpoint = Some(endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
// Save configuration
|
||||
let save = Confirm::with_theme(&theme)
|
||||
.with_prompt("Save changes?")
|
||||
.default(true)
|
||||
.interact()
|
||||
.context("Failed to read input")?;
|
||||
|
||||
if save {
|
||||
let toml = toml::to_string_pretty(&config).context("Failed to serialize config")?;
|
||||
|
||||
std::fs::write(file, toml).context("Failed to write config file")?;
|
||||
|
||||
println!("\n✓ Configuration saved to: {}", file.display());
|
||||
} else {
|
||||
println!("\nChanges discarded.");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn show_config_path() -> Result<()> {
|
||||
if let Some(config_dir) = dirs::config_dir() {
|
||||
let app_config = config_dir.join("scipix");
|
||||
println!("Default config directory: {}", app_config.display());
|
||||
|
||||
if !app_config.exists() {
|
||||
println!("\nDirectory does not exist. Create it with:");
|
||||
println!(" mkdir -p {}", app_config.display());
|
||||
}
|
||||
} else {
|
||||
println!("Could not determine config directory");
|
||||
}
|
||||
|
||||
println!("\nYou can also use a custom config file:");
|
||||
println!(" scipix-cli --config /path/to/config.toml <command>");
|
||||
println!("\nOr set environment variable:");
|
||||
println!(" export MATHPIX_CONFIG=/path/to/config.toml");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
955
vendor/ruvector/examples/scipix/src/cli/commands/doctor.rs
vendored
Normal file
955
vendor/ruvector/examples/scipix/src/cli/commands/doctor.rs
vendored
Normal file
@@ -0,0 +1,955 @@
|
||||
//! Doctor command for environment analysis and configuration optimization
|
||||
//!
|
||||
//! Analyzes the system environment and provides recommendations for optimal
|
||||
//! SciPix configuration based on available hardware and software capabilities.
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::Args;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Arguments for the doctor command
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct DoctorArgs {
|
||||
/// Run in fix mode to automatically apply recommendations
|
||||
#[arg(long, help = "Automatically apply safe fixes")]
|
||||
pub fix: bool,
|
||||
|
||||
/// Output detailed diagnostic information
|
||||
#[arg(long, short, help = "Show detailed diagnostic information")]
|
||||
pub verbose: bool,
|
||||
|
||||
/// Output results as JSON
|
||||
#[arg(long, help = "Output results as JSON")]
|
||||
pub json: bool,
|
||||
|
||||
/// Check only specific category (cpu, memory, config, deps, all)
|
||||
#[arg(long, default_value = "all", help = "Category to check")]
|
||||
pub check: CheckCategory,
|
||||
|
||||
/// Path to configuration file to validate
|
||||
#[arg(long, help = "Path to configuration file to validate")]
|
||||
pub config_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
|
||||
pub enum CheckCategory {
|
||||
#[default]
|
||||
All,
|
||||
Cpu,
|
||||
Memory,
|
||||
Config,
|
||||
Deps,
|
||||
Network,
|
||||
}
|
||||
|
||||
/// Status of a diagnostic check
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum CheckStatus {
|
||||
Pass,
|
||||
Warning,
|
||||
Fail,
|
||||
Info,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CheckStatus {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
CheckStatus::Pass => write!(f, "✓"),
|
||||
CheckStatus::Warning => write!(f, "⚠"),
|
||||
CheckStatus::Fail => write!(f, "✗"),
|
||||
CheckStatus::Info => write!(f, "ℹ"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single diagnostic check result
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DiagnosticCheck {
|
||||
pub name: String,
|
||||
pub category: String,
|
||||
pub status: CheckStatus,
|
||||
pub message: String,
|
||||
pub recommendation: Option<String>,
|
||||
pub auto_fixable: bool,
|
||||
}
|
||||
|
||||
/// Complete diagnostic report
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DiagnosticReport {
|
||||
pub timestamp: String,
|
||||
pub system_info: SystemInfo,
|
||||
pub checks: Vec<DiagnosticCheck>,
|
||||
pub recommendations: Vec<String>,
|
||||
pub optimal_config: OptimalConfig,
|
||||
}
|
||||
|
||||
/// System information gathered during diagnosis
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SystemInfo {
|
||||
pub os: String,
|
||||
pub arch: String,
|
||||
pub cpu_count: usize,
|
||||
pub cpu_brand: String,
|
||||
pub total_memory_mb: u64,
|
||||
pub available_memory_mb: u64,
|
||||
pub simd_features: SimdFeatures,
|
||||
}
|
||||
|
||||
/// SIMD feature detection results
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SimdFeatures {
|
||||
pub sse2: bool,
|
||||
pub sse4_1: bool,
|
||||
pub sse4_2: bool,
|
||||
pub avx: bool,
|
||||
pub avx2: bool,
|
||||
pub avx512f: bool,
|
||||
pub neon: bool,
|
||||
pub best_available: String,
|
||||
}
|
||||
|
||||
/// Optimal configuration recommendations
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OptimalConfig {
|
||||
pub batch_size: usize,
|
||||
pub worker_threads: usize,
|
||||
pub simd_backend: String,
|
||||
pub memory_limit_mb: u64,
|
||||
pub preprocessing_mode: String,
|
||||
pub cache_enabled: bool,
|
||||
pub cache_size_mb: u64,
|
||||
}
|
||||
|
||||
/// Execute the doctor command
|
||||
pub async fn execute(args: DoctorArgs) -> Result<()> {
|
||||
if !args.json {
|
||||
println!("🩺 SciPix Doctor - Environment Analysis\n");
|
||||
println!("═══════════════════════════════════════════════════════════\n");
|
||||
}
|
||||
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// Gather system information
|
||||
let system_info = gather_system_info();
|
||||
|
||||
// Run checks based on category
|
||||
match args.check {
|
||||
CheckCategory::All => {
|
||||
checks.extend(check_cpu(&system_info, args.verbose));
|
||||
checks.extend(check_memory(&system_info, args.verbose));
|
||||
checks.extend(check_dependencies(args.verbose));
|
||||
checks.extend(check_config(&args.config_path, args.verbose));
|
||||
checks.extend(check_network(args.verbose).await);
|
||||
}
|
||||
CheckCategory::Cpu => {
|
||||
checks.extend(check_cpu(&system_info, args.verbose));
|
||||
}
|
||||
CheckCategory::Memory => {
|
||||
checks.extend(check_memory(&system_info, args.verbose));
|
||||
}
|
||||
CheckCategory::Config => {
|
||||
checks.extend(check_config(&args.config_path, args.verbose));
|
||||
}
|
||||
CheckCategory::Deps => {
|
||||
checks.extend(check_dependencies(args.verbose));
|
||||
}
|
||||
CheckCategory::Network => {
|
||||
checks.extend(check_network(args.verbose).await);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate optimal configuration
|
||||
let optimal_config = generate_optimal_config(&system_info);
|
||||
|
||||
// Collect recommendations
|
||||
let recommendations: Vec<String> = checks
|
||||
.iter()
|
||||
.filter_map(|c| c.recommendation.clone())
|
||||
.collect();
|
||||
|
||||
// Create report
|
||||
let report = DiagnosticReport {
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
system_info: system_info.clone(),
|
||||
checks: checks.clone(),
|
||||
recommendations: recommendations.clone(),
|
||||
optimal_config: optimal_config.clone(),
|
||||
};
|
||||
|
||||
if args.json {
|
||||
println!("{}", serde_json::to_string_pretty(&report)?);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Print system info
|
||||
print_system_info(&system_info);
|
||||
|
||||
// Print check results
|
||||
print_check_results(&checks);
|
||||
|
||||
// Print recommendations
|
||||
if !recommendations.is_empty() {
|
||||
println!("\n📋 Recommendations:");
|
||||
println!("───────────────────────────────────────────────────────────");
|
||||
for (i, rec) in recommendations.iter().enumerate() {
|
||||
println!(" {}. {}", i + 1, rec);
|
||||
}
|
||||
}
|
||||
|
||||
// Print optimal configuration
|
||||
print_optimal_config(&optimal_config);
|
||||
|
||||
// Apply fixes if requested
|
||||
if args.fix {
|
||||
apply_fixes(&checks).await?;
|
||||
}
|
||||
|
||||
// Print summary
|
||||
print_summary(&checks);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn gather_system_info() -> SystemInfo {
|
||||
let cpu_count = num_cpus::get();
|
||||
|
||||
// Get CPU brand string
|
||||
let cpu_brand = get_cpu_brand();
|
||||
|
||||
// Get memory info
|
||||
let (total_memory_mb, available_memory_mb) = get_memory_info();
|
||||
|
||||
// Detect SIMD features
|
||||
let simd_features = detect_simd_features();
|
||||
|
||||
SystemInfo {
|
||||
os: std::env::consts::OS.to_string(),
|
||||
arch: std::env::consts::ARCH.to_string(),
|
||||
cpu_count,
|
||||
cpu_brand,
|
||||
total_memory_mb,
|
||||
available_memory_mb,
|
||||
simd_features,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_cpu_brand() -> String {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
if let Some(brand) = get_x86_cpu_brand() {
|
||||
return brand;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback
|
||||
format!("{} processor", std::env::consts::ARCH)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn get_x86_cpu_brand() -> Option<String> {
|
||||
// Try to read from /proc/cpuinfo on Linux
|
||||
if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
|
||||
for line in cpuinfo.lines() {
|
||||
if line.starts_with("model name") {
|
||||
if let Some(brand) = line.split(':').nth(1) {
|
||||
return Some(brand.trim().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
fn get_x86_cpu_brand() -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
fn get_memory_info() -> (u64, u64) {
|
||||
// Try to read from /proc/meminfo on Linux
|
||||
if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
|
||||
let mut total = 0u64;
|
||||
let mut available = 0u64;
|
||||
|
||||
for line in meminfo.lines() {
|
||||
if line.starts_with("MemTotal:") {
|
||||
if let Some(kb) = parse_meminfo_value(line) {
|
||||
total = kb / 1024; // Convert to MB
|
||||
}
|
||||
} else if line.starts_with("MemAvailable:") {
|
||||
if let Some(kb) = parse_meminfo_value(line) {
|
||||
available = kb / 1024; // Convert to MB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total > 0 {
|
||||
return (total, available);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback values
|
||||
(8192, 4096)
|
||||
}
|
||||
|
||||
fn parse_meminfo_value(line: &str) -> Option<u64> {
|
||||
line.split_whitespace().nth(1).and_then(|s| s.parse().ok())
|
||||
}
|
||||
|
||||
fn detect_simd_features() -> SimdFeatures {
|
||||
let mut features = SimdFeatures {
|
||||
sse2: false,
|
||||
sse4_1: false,
|
||||
sse4_2: false,
|
||||
avx: false,
|
||||
avx2: false,
|
||||
avx512f: false,
|
||||
neon: false,
|
||||
best_available: "scalar".to_string(),
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
features.sse2 = is_x86_feature_detected!("sse2");
|
||||
features.sse4_1 = is_x86_feature_detected!("sse4.1");
|
||||
features.sse4_2 = is_x86_feature_detected!("sse4.2");
|
||||
features.avx = is_x86_feature_detected!("avx");
|
||||
features.avx2 = is_x86_feature_detected!("avx2");
|
||||
features.avx512f = is_x86_feature_detected!("avx512f");
|
||||
|
||||
if features.avx512f {
|
||||
features.best_available = "AVX-512".to_string();
|
||||
} else if features.avx2 {
|
||||
features.best_available = "AVX2".to_string();
|
||||
} else if features.avx {
|
||||
features.best_available = "AVX".to_string();
|
||||
} else if features.sse4_2 {
|
||||
features.best_available = "SSE4.2".to_string();
|
||||
} else if features.sse2 {
|
||||
features.best_available = "SSE2".to_string();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{
|
||||
features.neon = true; // NEON is always available on AArch64
|
||||
features.best_available = "NEON".to_string();
|
||||
}
|
||||
|
||||
features
|
||||
}
|
||||
|
||||
fn check_cpu(system_info: &SystemInfo, verbose: bool) -> Vec<DiagnosticCheck> {
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// CPU count check
|
||||
let cpu_status = if system_info.cpu_count >= 8 {
|
||||
CheckStatus::Pass
|
||||
} else if system_info.cpu_count >= 4 {
|
||||
CheckStatus::Warning
|
||||
} else {
|
||||
CheckStatus::Fail
|
||||
};
|
||||
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "CPU Cores".to_string(),
|
||||
category: "CPU".to_string(),
|
||||
status: cpu_status,
|
||||
message: format!("{} cores detected", system_info.cpu_count),
|
||||
recommendation: if system_info.cpu_count < 4 {
|
||||
Some(
|
||||
"Consider running on a machine with more CPU cores for better batch processing"
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
// SIMD check
|
||||
let simd_status = match system_info.simd_features.best_available.as_str() {
|
||||
"AVX-512" | "AVX2" => CheckStatus::Pass,
|
||||
"AVX" | "SSE4.2" | "NEON" => CheckStatus::Warning,
|
||||
_ => CheckStatus::Fail,
|
||||
};
|
||||
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "SIMD Support".to_string(),
|
||||
category: "CPU".to_string(),
|
||||
status: simd_status,
|
||||
message: format!(
|
||||
"Best SIMD: {} (SSE2: {}, AVX: {}, AVX2: {}, AVX-512: {})",
|
||||
system_info.simd_features.best_available,
|
||||
if system_info.simd_features.sse2 {
|
||||
"✓"
|
||||
} else {
|
||||
"✗"
|
||||
},
|
||||
if system_info.simd_features.avx {
|
||||
"✓"
|
||||
} else {
|
||||
"✗"
|
||||
},
|
||||
if system_info.simd_features.avx2 {
|
||||
"✓"
|
||||
} else {
|
||||
"✗"
|
||||
},
|
||||
if system_info.simd_features.avx512f {
|
||||
"✓"
|
||||
} else {
|
||||
"✗"
|
||||
},
|
||||
),
|
||||
recommendation: if simd_status == CheckStatus::Fail {
|
||||
Some("Upgrade to a CPU with AVX2 support for 4x faster preprocessing".to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
if verbose {
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "CPU Brand".to_string(),
|
||||
category: "CPU".to_string(),
|
||||
status: CheckStatus::Info,
|
||||
message: system_info.cpu_brand.clone(),
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
}
|
||||
|
||||
checks
|
||||
}
|
||||
|
||||
fn check_memory(system_info: &SystemInfo, verbose: bool) -> Vec<DiagnosticCheck> {
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// Total memory check
|
||||
let mem_status = if system_info.total_memory_mb >= 16384 {
|
||||
CheckStatus::Pass
|
||||
} else if system_info.total_memory_mb >= 8192 {
|
||||
CheckStatus::Warning
|
||||
} else {
|
||||
CheckStatus::Fail
|
||||
};
|
||||
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Total Memory".to_string(),
|
||||
category: "Memory".to_string(),
|
||||
status: mem_status,
|
||||
message: format!("{} MB total", system_info.total_memory_mb),
|
||||
recommendation: if system_info.total_memory_mb < 8192 {
|
||||
Some("Consider upgrading to at least 8GB RAM for optimal batch processing".to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
// Available memory check
|
||||
let avail_ratio = system_info.available_memory_mb as f64 / system_info.total_memory_mb as f64;
|
||||
let avail_status = if avail_ratio >= 0.5 {
|
||||
CheckStatus::Pass
|
||||
} else if avail_ratio >= 0.25 {
|
||||
CheckStatus::Warning
|
||||
} else {
|
||||
CheckStatus::Fail
|
||||
};
|
||||
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Available Memory".to_string(),
|
||||
category: "Memory".to_string(),
|
||||
status: avail_status,
|
||||
message: format!(
|
||||
"{} MB available ({:.1}%)",
|
||||
system_info.available_memory_mb,
|
||||
avail_ratio * 100.0
|
||||
),
|
||||
recommendation: if avail_status == CheckStatus::Fail {
|
||||
Some("Close some applications to free up memory before batch processing".to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
if verbose {
|
||||
// Memory per core
|
||||
let mem_per_core = system_info.total_memory_mb / system_info.cpu_count as u64;
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Memory per Core".to_string(),
|
||||
category: "Memory".to_string(),
|
||||
status: CheckStatus::Info,
|
||||
message: format!("{} MB/core", mem_per_core),
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
}
|
||||
|
||||
checks
|
||||
}
|
||||
|
||||
fn check_dependencies(verbose: bool) -> Vec<DiagnosticCheck> {
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// Check for ONNX Runtime
|
||||
let onnx_status = check_onnx_runtime();
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "ONNX Runtime".to_string(),
|
||||
category: "Dependencies".to_string(),
|
||||
status: if onnx_status.0 {
|
||||
CheckStatus::Pass
|
||||
} else {
|
||||
CheckStatus::Warning
|
||||
},
|
||||
message: onnx_status.1.clone(),
|
||||
recommendation: if !onnx_status.0 {
|
||||
Some(
|
||||
"Install ONNX Runtime for neural network acceleration: https://onnxruntime.ai/"
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
// Check for image processing libraries
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Image Processing".to_string(),
|
||||
category: "Dependencies".to_string(),
|
||||
status: CheckStatus::Pass,
|
||||
message: "image crate available (built-in)".to_string(),
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
// Check for OpenSSL (for HTTPS)
|
||||
let openssl_available = std::process::Command::new("openssl")
|
||||
.arg("version")
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false);
|
||||
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "OpenSSL".to_string(),
|
||||
category: "Dependencies".to_string(),
|
||||
status: if openssl_available {
|
||||
CheckStatus::Pass
|
||||
} else {
|
||||
CheckStatus::Warning
|
||||
},
|
||||
message: if openssl_available {
|
||||
"OpenSSL available for HTTPS".to_string()
|
||||
} else {
|
||||
"OpenSSL not found".to_string()
|
||||
},
|
||||
recommendation: if !openssl_available {
|
||||
Some("Install OpenSSL for secure API communication".to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
if verbose {
|
||||
// Check Rust version
|
||||
if let Ok(output) = std::process::Command::new("rustc")
|
||||
.arg("--version")
|
||||
.output()
|
||||
{
|
||||
let version = String::from_utf8_lossy(&output.stdout);
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Rust Compiler".to_string(),
|
||||
category: "Dependencies".to_string(),
|
||||
status: CheckStatus::Info,
|
||||
message: version.trim().to_string(),
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
checks
|
||||
}
|
||||
|
||||
fn check_onnx_runtime() -> (bool, String) {
|
||||
// Check for ONNX runtime shared library
|
||||
let lib_paths = [
|
||||
"/usr/lib/libonnxruntime.so",
|
||||
"/usr/local/lib/libonnxruntime.so",
|
||||
"/opt/onnxruntime/lib/libonnxruntime.so",
|
||||
];
|
||||
|
||||
for path in &lib_paths {
|
||||
if std::path::Path::new(path).exists() {
|
||||
return (true, format!("Found at {}", path));
|
||||
}
|
||||
}
|
||||
|
||||
// Check via environment variable
|
||||
if std::env::var("ORT_DYLIB_PATH").is_ok() {
|
||||
return (true, "Configured via ORT_DYLIB_PATH".to_string());
|
||||
}
|
||||
|
||||
(
|
||||
false,
|
||||
"Not found (optional for ONNX acceleration)".to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
fn check_config(config_path: &Option<PathBuf>, verbose: bool) -> Vec<DiagnosticCheck> {
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// Check for config file
|
||||
let config_locations = [
|
||||
config_path.clone(),
|
||||
Some(PathBuf::from("scipix.toml")),
|
||||
Some(PathBuf::from("config/scipix.toml")),
|
||||
dirs::config_dir().map(|p| p.join("scipix/config.toml")),
|
||||
];
|
||||
|
||||
let mut found_config = false;
|
||||
for loc in config_locations.iter().flatten() {
|
||||
if loc.exists() {
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Configuration File".to_string(),
|
||||
category: "Config".to_string(),
|
||||
status: CheckStatus::Pass,
|
||||
message: format!("Found at {}", loc.display()),
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
found_config = true;
|
||||
|
||||
// Validate config content
|
||||
if let Ok(content) = std::fs::read_to_string(loc) {
|
||||
if content.contains("[api]") || content.contains("[processing]") {
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Config Validity".to_string(),
|
||||
category: "Config".to_string(),
|
||||
status: CheckStatus::Pass,
|
||||
message: "Configuration file is valid".to_string(),
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !found_config {
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Configuration File".to_string(),
|
||||
category: "Config".to_string(),
|
||||
status: CheckStatus::Info,
|
||||
message: "No configuration file found (using defaults)".to_string(),
|
||||
recommendation: Some("Create a scipix.toml for custom settings".to_string()),
|
||||
auto_fixable: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Check environment variables
|
||||
let env_vars = [
|
||||
("SCIPIX_API_KEY", "API authentication"),
|
||||
("SCIPIX_MODEL_PATH", "Custom model path"),
|
||||
("SCIPIX_CACHE_DIR", "Cache directory"),
|
||||
];
|
||||
|
||||
for (var, desc) in &env_vars {
|
||||
let status = if std::env::var(var).is_ok() {
|
||||
CheckStatus::Pass
|
||||
} else {
|
||||
CheckStatus::Info
|
||||
};
|
||||
|
||||
if verbose || status == CheckStatus::Pass {
|
||||
checks.push(DiagnosticCheck {
|
||||
name: format!("Env: {}", var),
|
||||
category: "Config".to_string(),
|
||||
status,
|
||||
message: if status == CheckStatus::Pass {
|
||||
format!("{} configured", desc)
|
||||
} else {
|
||||
format!("{} not set (optional)", desc)
|
||||
},
|
||||
recommendation: None,
|
||||
auto_fixable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
checks
|
||||
}
|
||||
|
||||
async fn check_network(verbose: bool) -> Vec<DiagnosticCheck> {
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// Check localhost binding
|
||||
let localhost_available = tokio::net::TcpListener::bind("127.0.0.1:0").await.is_ok();
|
||||
|
||||
checks.push(DiagnosticCheck {
|
||||
name: "Localhost Binding".to_string(),
|
||||
category: "Network".to_string(),
|
||||
status: if localhost_available {
|
||||
CheckStatus::Pass
|
||||
} else {
|
||||
CheckStatus::Fail
|
||||
},
|
||||
message: if localhost_available {
|
||||
"Can bind to localhost".to_string()
|
||||
} else {
|
||||
"Cannot bind to localhost".to_string()
|
||||
},
|
||||
recommendation: if !localhost_available {
|
||||
Some("Check firewall settings and port availability".to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
|
||||
// Check common ports
|
||||
let ports_to_check = [(8080, "API server"), (3000, "Alternative API")];
|
||||
|
||||
for (port, desc) in &ports_to_check {
|
||||
let available = tokio::net::TcpListener::bind(format!("127.0.0.1:{}", port))
|
||||
.await
|
||||
.is_ok();
|
||||
|
||||
if verbose || !available {
|
||||
checks.push(DiagnosticCheck {
|
||||
name: format!("Port {}", port),
|
||||
category: "Network".to_string(),
|
||||
status: if available {
|
||||
CheckStatus::Pass
|
||||
} else {
|
||||
CheckStatus::Warning
|
||||
},
|
||||
message: if available {
|
||||
format!("Port {} ({}) available", port, desc)
|
||||
} else {
|
||||
format!("Port {} ({}) in use", port, desc)
|
||||
},
|
||||
recommendation: if !available {
|
||||
Some(format!(
|
||||
"Free port {} or use --port to specify alternative",
|
||||
port
|
||||
))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
auto_fixable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
checks
|
||||
}
|
||||
|
||||
fn generate_optimal_config(system_info: &SystemInfo) -> OptimalConfig {
|
||||
// Calculate optimal batch size based on memory
|
||||
let batch_size = if system_info.available_memory_mb >= 8192 {
|
||||
32
|
||||
} else if system_info.available_memory_mb >= 4096 {
|
||||
16
|
||||
} else if system_info.available_memory_mb >= 2048 {
|
||||
8
|
||||
} else {
|
||||
4
|
||||
};
|
||||
|
||||
// Calculate worker threads (leave some headroom)
|
||||
let worker_threads = (system_info.cpu_count as f64 * 0.75).ceil() as usize;
|
||||
let worker_threads = worker_threads.max(2);
|
||||
|
||||
// Determine SIMD backend
|
||||
let simd_backend = system_info.simd_features.best_available.clone();
|
||||
|
||||
// Memory limit (use 60% of available)
|
||||
let memory_limit_mb = (system_info.available_memory_mb as f64 * 0.6) as u64;
|
||||
|
||||
// Preprocessing mode based on SIMD
|
||||
let preprocessing_mode = if system_info.simd_features.avx2 || system_info.simd_features.neon {
|
||||
"simd_optimized".to_string()
|
||||
} else if system_info.simd_features.sse4_2 {
|
||||
"simd_basic".to_string()
|
||||
} else {
|
||||
"scalar".to_string()
|
||||
};
|
||||
|
||||
// Cache settings
|
||||
let cache_enabled = system_info.available_memory_mb >= 2048;
|
||||
let cache_size_mb = if cache_enabled {
|
||||
(system_info.available_memory_mb as f64 * 0.1) as u64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
OptimalConfig {
|
||||
batch_size,
|
||||
worker_threads,
|
||||
simd_backend,
|
||||
memory_limit_mb,
|
||||
preprocessing_mode,
|
||||
cache_enabled,
|
||||
cache_size_mb,
|
||||
}
|
||||
}
|
||||
|
||||
fn print_system_info(info: &SystemInfo) {
|
||||
println!("📊 System Information:");
|
||||
println!("───────────────────────────────────────────────────────────");
|
||||
println!(" OS: {} ({})", info.os, info.arch);
|
||||
println!(" CPU: {}", info.cpu_brand);
|
||||
println!(" Cores: {}", info.cpu_count);
|
||||
println!(
|
||||
" Memory: {} MB total, {} MB available",
|
||||
info.total_memory_mb, info.available_memory_mb
|
||||
);
|
||||
println!(" Best SIMD: {}", info.simd_features.best_available);
|
||||
println!();
|
||||
}
|
||||
|
||||
fn print_check_results(checks: &[DiagnosticCheck]) {
|
||||
println!("🔍 Diagnostic Checks:");
|
||||
println!("───────────────────────────────────────────────────────────");
|
||||
|
||||
let mut current_category = String::new();
|
||||
for check in checks {
|
||||
if check.category != current_category {
|
||||
if !current_category.is_empty() {
|
||||
println!();
|
||||
}
|
||||
println!(" [{}]", check.category);
|
||||
current_category = check.category.clone();
|
||||
}
|
||||
|
||||
let status_color = match check.status {
|
||||
CheckStatus::Pass => "\x1b[32m", // Green
|
||||
CheckStatus::Warning => "\x1b[33m", // Yellow
|
||||
CheckStatus::Fail => "\x1b[31m", // Red
|
||||
CheckStatus::Info => "\x1b[36m", // Cyan
|
||||
};
|
||||
|
||||
println!(
|
||||
" {}{}\x1b[0m {} - {}",
|
||||
status_color, check.status, check.name, check.message
|
||||
);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
fn print_optimal_config(config: &OptimalConfig) {
|
||||
println!("\n⚙️ Optimal Configuration:");
|
||||
println!("───────────────────────────────────────────────────────────");
|
||||
println!(" batch_size: {}", config.batch_size);
|
||||
println!(" worker_threads: {}", config.worker_threads);
|
||||
println!(" simd_backend: {}", config.simd_backend);
|
||||
println!(" memory_limit: {} MB", config.memory_limit_mb);
|
||||
println!(" preprocessing: {}", config.preprocessing_mode);
|
||||
println!(" cache_enabled: {}", config.cache_enabled);
|
||||
if config.cache_enabled {
|
||||
println!(" cache_size: {} MB", config.cache_size_mb);
|
||||
}
|
||||
|
||||
println!("\n 📝 Example configuration (scipix.toml):");
|
||||
println!(" ─────────────────────────────────────────");
|
||||
println!(" [processing]");
|
||||
println!(" batch_size = {}", config.batch_size);
|
||||
println!(" worker_threads = {}", config.worker_threads);
|
||||
println!(" simd_backend = \"{}\"", config.simd_backend);
|
||||
println!(" memory_limit_mb = {}", config.memory_limit_mb);
|
||||
println!();
|
||||
println!(" [cache]");
|
||||
println!(" enabled = {}", config.cache_enabled);
|
||||
println!(" size_mb = {}", config.cache_size_mb);
|
||||
}
|
||||
|
||||
fn print_summary(checks: &[DiagnosticCheck]) {
|
||||
let pass_count = checks
|
||||
.iter()
|
||||
.filter(|c| c.status == CheckStatus::Pass)
|
||||
.count();
|
||||
let warn_count = checks
|
||||
.iter()
|
||||
.filter(|c| c.status == CheckStatus::Warning)
|
||||
.count();
|
||||
let fail_count = checks
|
||||
.iter()
|
||||
.filter(|c| c.status == CheckStatus::Fail)
|
||||
.count();
|
||||
|
||||
println!("\n═══════════════════════════════════════════════════════════");
|
||||
println!(
|
||||
"📋 Summary: {} passed, {} warnings, {} failed",
|
||||
pass_count, warn_count, fail_count
|
||||
);
|
||||
|
||||
if fail_count > 0 {
|
||||
println!("\n⚠️ Some checks failed. Review recommendations above.");
|
||||
} else if warn_count > 0 {
|
||||
println!("\n✓ System is functional with some areas for improvement.");
|
||||
} else {
|
||||
println!("\n✅ System is optimally configured for SciPix!");
|
||||
}
|
||||
}
|
||||
|
||||
async fn apply_fixes(checks: &[DiagnosticCheck]) -> Result<()> {
|
||||
println!("\n🔧 Applying automatic fixes...");
|
||||
println!("───────────────────────────────────────────────────────────");
|
||||
|
||||
let fixable: Vec<_> = checks.iter().filter(|c| c.auto_fixable).collect();
|
||||
|
||||
if fixable.is_empty() {
|
||||
println!(" No automatic fixes available.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for check in fixable {
|
||||
println!(" Fixing: {}", check.name);
|
||||
|
||||
if check.name == "Configuration File" {
|
||||
// Create default config file
|
||||
let config_content = r#"# SciPix Configuration
|
||||
# Generated by scipix doctor --fix
|
||||
|
||||
[processing]
|
||||
batch_size = 16
|
||||
worker_threads = 4
|
||||
simd_backend = "auto"
|
||||
memory_limit_mb = 4096
|
||||
|
||||
[cache]
|
||||
enabled = true
|
||||
size_mb = 256
|
||||
|
||||
[api]
|
||||
host = "127.0.0.1"
|
||||
port = 8080
|
||||
timeout_seconds = 30
|
||||
|
||||
[logging]
|
||||
level = "info"
|
||||
format = "pretty"
|
||||
"#;
|
||||
|
||||
// Create config directory if needed
|
||||
let config_path = PathBuf::from("config");
|
||||
if !config_path.exists() {
|
||||
std::fs::create_dir_all(&config_path)?;
|
||||
}
|
||||
|
||||
let config_file = config_path.join("scipix.toml");
|
||||
std::fs::write(&config_file, config_content)?;
|
||||
println!(" ✓ Created {}", config_file.display());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
806
vendor/ruvector/examples/scipix/src/cli/commands/mcp.rs
vendored
Normal file
806
vendor/ruvector/examples/scipix/src/cli/commands/mcp.rs
vendored
Normal file
@@ -0,0 +1,806 @@
|
||||
//! MCP (Model Context Protocol) Server Implementation for SciPix
|
||||
//!
|
||||
//! Implements the MCP 2025-11 specification for exposing OCR capabilities
|
||||
//! as tools that can be discovered and invoked by AI hosts.
|
||||
//!
|
||||
//! ## Usage
|
||||
//! ```bash
|
||||
//! scipix-cli mcp
|
||||
//! ```
|
||||
//!
|
||||
//! ## Protocol
|
||||
//! Uses JSON-RPC 2.0 over STDIO for communication.
|
||||
|
||||
use clap::Args;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use std::io::{self, BufRead, Write};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// MCP Server Arguments
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct McpArgs {
|
||||
/// Enable debug logging for MCP messages
|
||||
#[arg(long, help = "Enable debug logging")]
|
||||
pub debug: bool,
|
||||
|
||||
/// Custom model path for OCR
|
||||
#[arg(long, help = "Path to ONNX models directory")]
|
||||
pub models_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
/// JSON-RPC 2.0 Request
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct JsonRpcRequest {
|
||||
#[allow(dead_code)]
|
||||
jsonrpc: String,
|
||||
id: Option<Value>,
|
||||
method: String,
|
||||
params: Option<Value>,
|
||||
}
|
||||
|
||||
/// JSON-RPC 2.0 Response
|
||||
#[derive(Debug, Serialize)]
|
||||
struct JsonRpcResponse {
|
||||
jsonrpc: String,
|
||||
id: Value,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
result: Option<Value>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
error: Option<JsonRpcError>,
|
||||
}
|
||||
|
||||
/// JSON-RPC 2.0 Error
|
||||
#[derive(Debug, Serialize)]
|
||||
struct JsonRpcError {
|
||||
code: i32,
|
||||
message: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
data: Option<Value>,
|
||||
}
|
||||
|
||||
/// MCP Server Info
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ServerInfo {
|
||||
name: String,
|
||||
version: String,
|
||||
}
|
||||
|
||||
/// MCP Server Capabilities
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ServerCapabilities {
|
||||
tools: ToolsCapability,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
resources: Option<ResourcesCapability>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ToolsCapability {
|
||||
#[serde(rename = "listChanged")]
|
||||
list_changed: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ResourcesCapability {
|
||||
subscribe: bool,
|
||||
#[serde(rename = "listChanged")]
|
||||
list_changed: bool,
|
||||
}
|
||||
|
||||
/// MCP Tool Definition
|
||||
#[derive(Debug, Serialize)]
|
||||
struct Tool {
|
||||
name: String,
|
||||
description: String,
|
||||
#[serde(rename = "inputSchema")]
|
||||
input_schema: Value,
|
||||
}
|
||||
|
||||
/// Tool call result
|
||||
#[derive(Debug, Serialize)]
|
||||
#[allow(dead_code)]
|
||||
struct ToolResult {
|
||||
content: Vec<ContentBlock>,
|
||||
#[serde(rename = "isError", skip_serializing_if = "Option::is_none")]
|
||||
is_error: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[allow(dead_code)]
|
||||
struct ContentBlock {
|
||||
#[serde(rename = "type")]
|
||||
content_type: String,
|
||||
text: String,
|
||||
}
|
||||
|
||||
impl JsonRpcResponse {
|
||||
fn success(id: Value, result: Value) -> Self {
|
||||
Self {
|
||||
jsonrpc: "2.0".to_string(),
|
||||
id,
|
||||
result: Some(result),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn error(id: Value, code: i32, message: &str) -> Self {
|
||||
Self {
|
||||
jsonrpc: "2.0".to_string(),
|
||||
id,
|
||||
result: None,
|
||||
error: Some(JsonRpcError {
|
||||
code,
|
||||
message: message.to_string(),
|
||||
data: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// MCP Server state
|
||||
struct McpServer {
|
||||
debug: bool,
|
||||
#[allow(dead_code)]
|
||||
models_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl McpServer {
|
||||
fn new(args: &McpArgs) -> Self {
|
||||
Self {
|
||||
debug: args.debug,
|
||||
models_dir: args.models_dir.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get server info for initialization
|
||||
fn server_info(&self) -> ServerInfo {
|
||||
ServerInfo {
|
||||
name: "scipix-mcp".to_string(),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get server capabilities
|
||||
fn capabilities(&self) -> ServerCapabilities {
|
||||
ServerCapabilities {
|
||||
tools: ToolsCapability {
|
||||
list_changed: false,
|
||||
},
|
||||
resources: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Define available tools with examples following Anthropic best practices
|
||||
/// See: https://www.anthropic.com/engineering/advanced-tool-use
|
||||
fn get_tools(&self) -> Vec<Tool> {
|
||||
vec![
|
||||
Tool {
|
||||
name: "ocr_image".to_string(),
|
||||
description: r#"Process an image file with OCR to extract text and mathematical formulas.
|
||||
|
||||
WHEN TO USE: Use this tool when you have an image file path containing text, equations,
|
||||
or mathematical notation that needs to be converted to a machine-readable format.
|
||||
|
||||
EXAMPLES:
|
||||
- Extract LaTeX from a photo of a math equation: {"image_path": "equation.png", "format": "latex"}
|
||||
- Get plain text from a document scan: {"image_path": "document.jpg", "format": "text"}
|
||||
- Convert handwritten math to AsciiMath: {"image_path": "notes.png", "format": "asciimath"}
|
||||
|
||||
RETURNS: JSON with the recognized content, confidence score (0-1), and processing metadata."#.to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"image_path": {
|
||||
"type": "string",
|
||||
"description": "Absolute or relative path to image file (PNG, JPG, JPEG, GIF, BMP, TIFF supported)"
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["latex", "text", "mathml", "asciimath"],
|
||||
"default": "latex",
|
||||
"description": "Output format: 'latex' for mathematical notation, 'text' for plain text, 'mathml' for XML, 'asciimath' for simple notation"
|
||||
}
|
||||
},
|
||||
"required": ["image_path"],
|
||||
"examples": [
|
||||
{"image_path": "/path/to/equation.png", "format": "latex"},
|
||||
{"image_path": "document.jpg", "format": "text"}
|
||||
]
|
||||
}),
|
||||
},
|
||||
Tool {
|
||||
name: "ocr_base64".to_string(),
|
||||
description: r#"Process a base64-encoded image with OCR. Use when image data is inline rather than a file.
|
||||
|
||||
WHEN TO USE: Use this tool when you have image data as a base64 string (e.g., from an API
|
||||
response, clipboard, or embedded in a document) rather than a file path.
|
||||
|
||||
EXAMPLES:
|
||||
- Process clipboard image: {"image_data": "iVBORw0KGgo...", "format": "latex"}
|
||||
- Extract text from API response image: {"image_data": "<base64_string>", "format": "text"}
|
||||
|
||||
NOTE: The base64 string should not include the data URI prefix (e.g., "data:image/png;base64,")."#.to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"image_data": {
|
||||
"type": "string",
|
||||
"description": "Base64-encoded image data (without data URI prefix)"
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["latex", "text", "mathml", "asciimath"],
|
||||
"default": "latex",
|
||||
"description": "Output format for recognized content"
|
||||
}
|
||||
},
|
||||
"required": ["image_data"]
|
||||
}),
|
||||
},
|
||||
Tool {
|
||||
name: "batch_ocr".to_string(),
|
||||
description: r#"Process multiple images in a directory with OCR. Efficient for bulk operations.
|
||||
|
||||
WHEN TO USE: Use this tool when you need to process 3+ images in the same directory.
|
||||
For 1-2 images, use ocr_image instead for simpler results.
|
||||
|
||||
EXAMPLES:
|
||||
- Process all PNGs in a folder: {"directory": "./images", "pattern": "*.png"}
|
||||
- Process specific equation images: {"directory": "/docs/math", "pattern": "eq_*.jpg"}
|
||||
- Get JSON results for all images: {"directory": ".", "pattern": "*.{png,jpg}", "format": "json"}
|
||||
|
||||
RETURNS: Array of results with file paths, recognized content, and confidence scores."#.to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"directory": {
|
||||
"type": "string",
|
||||
"description": "Directory path containing images to process"
|
||||
},
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"default": "*.png",
|
||||
"description": "Glob pattern to match files (e.g., '*.png', '*.{jpg,png}', 'equation_*.jpg')"
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["latex", "text", "json"],
|
||||
"default": "json",
|
||||
"description": "Output format: 'json' for structured results (recommended), 'latex' or 'text' for concatenated output"
|
||||
}
|
||||
},
|
||||
"required": ["directory"]
|
||||
}),
|
||||
},
|
||||
Tool {
|
||||
name: "preprocess_image".to_string(),
|
||||
description: r#"Apply preprocessing operations to optimize an image for OCR.
|
||||
|
||||
WHEN TO USE: Use this tool BEFORE ocr_image when dealing with:
|
||||
- Low contrast images (use threshold)
|
||||
- Large images that need resizing (use resize)
|
||||
- Color images (use grayscale for faster processing)
|
||||
- Noisy or blurry images (use denoise)
|
||||
|
||||
EXAMPLES:
|
||||
- Prepare scan for OCR: {"image_path": "scan.jpg", "output_path": "scan_clean.png", "operations": ["grayscale", "threshold"]}
|
||||
- Resize large image: {"image_path": "photo.jpg", "output_path": "photo_small.png", "operations": ["resize"], "target_width": 800}
|
||||
|
||||
WORKFLOW: preprocess_image -> ocr_image for best results on problematic images."#.to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"image_path": {
|
||||
"type": "string",
|
||||
"description": "Path to input image file"
|
||||
},
|
||||
"output_path": {
|
||||
"type": "string",
|
||||
"description": "Path for preprocessed output image"
|
||||
},
|
||||
"operations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["grayscale", "resize", "threshold", "denoise", "deskew"]
|
||||
},
|
||||
"default": ["grayscale", "resize"],
|
||||
"description": "Operations to apply in order: grayscale (convert to B&W), resize (scale to target size), threshold (binarize), denoise (reduce noise), deskew (straighten)"
|
||||
},
|
||||
"target_width": {
|
||||
"type": "integer",
|
||||
"default": 640,
|
||||
"description": "Target width for resize (preserves aspect ratio)"
|
||||
},
|
||||
"target_height": {
|
||||
"type": "integer",
|
||||
"default": 480,
|
||||
"description": "Target height for resize (preserves aspect ratio)"
|
||||
}
|
||||
},
|
||||
"required": ["image_path", "output_path"]
|
||||
}),
|
||||
},
|
||||
Tool {
|
||||
name: "latex_to_mathml".to_string(),
|
||||
description: r#"Convert LaTeX mathematical notation to MathML XML format.
|
||||
|
||||
WHEN TO USE: Use this tool when you need MathML output from LaTeX, such as:
|
||||
- Generating accessible math content for web pages
|
||||
- Converting equations for screen readers
|
||||
- Integrating with systems that require MathML
|
||||
|
||||
EXAMPLES:
|
||||
- Convert fraction: {"latex": "\\frac{1}{2}"}
|
||||
- Convert integral: {"latex": "\\int_0^1 x^2 dx"}
|
||||
- Convert matrix: {"latex": "\\begin{pmatrix} a & b \\\\ c & d \\end{pmatrix}"}"#.to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"latex": {
|
||||
"type": "string",
|
||||
"description": "LaTeX expression to convert (with or without $ delimiters)"
|
||||
}
|
||||
},
|
||||
"required": ["latex"],
|
||||
"examples": [
|
||||
{"latex": "\\frac{a}{b}"},
|
||||
{"latex": "E = mc^2"}
|
||||
]
|
||||
}),
|
||||
},
|
||||
Tool {
|
||||
name: "benchmark_performance".to_string(),
|
||||
description: r#"Run performance benchmarks on the OCR pipeline and return timing metrics.
|
||||
|
||||
WHEN TO USE: Use this tool to:
|
||||
- Verify OCR performance on your system
|
||||
- Compare preprocessing options
|
||||
- Debug slow processing issues
|
||||
|
||||
EXAMPLES:
|
||||
- Quick performance check: {"iterations": 5}
|
||||
- Test specific image: {"image_path": "test.png", "iterations": 10}
|
||||
|
||||
RETURNS: Average processing times for grayscale, resize operations, and system info."#.to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"iterations": {
|
||||
"type": "integer",
|
||||
"default": 10,
|
||||
"minimum": 1,
|
||||
"maximum": 100,
|
||||
"description": "Number of benchmark iterations (higher = more accurate, slower)"
|
||||
},
|
||||
"image_path": {
|
||||
"type": "string",
|
||||
"description": "Optional: Path to test image (uses generated test image if not provided)"
|
||||
}
|
||||
}
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/// Handle incoming JSON-RPC request
|
||||
async fn handle_request(&self, request: JsonRpcRequest) -> JsonRpcResponse {
|
||||
let id = request.id.unwrap_or(Value::Null);
|
||||
|
||||
if self.debug {
|
||||
eprintln!("[MCP DEBUG] Method: {}", request.method);
|
||||
if let Some(ref params) = request.params {
|
||||
eprintln!(
|
||||
"[MCP DEBUG] Params: {}",
|
||||
serde_json::to_string_pretty(params).unwrap_or_default()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
match request.method.as_str() {
|
||||
"initialize" => self.handle_initialize(id, request.params),
|
||||
"initialized" => JsonRpcResponse::success(id, json!({})),
|
||||
"tools/list" => self.handle_tools_list(id),
|
||||
"tools/call" => self.handle_tools_call(id, request.params).await,
|
||||
"ping" => JsonRpcResponse::success(id, json!({})),
|
||||
"shutdown" => {
|
||||
std::process::exit(0);
|
||||
}
|
||||
_ => {
|
||||
JsonRpcResponse::error(id, -32601, &format!("Method not found: {}", request.method))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle initialize request
|
||||
fn handle_initialize(&self, id: Value, params: Option<Value>) -> JsonRpcResponse {
|
||||
if self.debug {
|
||||
if let Some(p) = ¶ms {
|
||||
eprintln!(
|
||||
"[MCP DEBUG] Client info: {}",
|
||||
serde_json::to_string_pretty(p).unwrap_or_default()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
JsonRpcResponse::success(
|
||||
id,
|
||||
json!({
|
||||
"protocolVersion": "2024-11-05",
|
||||
"serverInfo": self.server_info(),
|
||||
"capabilities": self.capabilities()
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
/// Handle tools/list request
|
||||
fn handle_tools_list(&self, id: Value) -> JsonRpcResponse {
|
||||
JsonRpcResponse::success(
|
||||
id,
|
||||
json!({
|
||||
"tools": self.get_tools()
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
/// Handle tools/call request
|
||||
async fn handle_tools_call(&self, id: Value, params: Option<Value>) -> JsonRpcResponse {
|
||||
let params = match params {
|
||||
Some(p) => p,
|
||||
None => return JsonRpcResponse::error(id, -32602, "Missing params"),
|
||||
};
|
||||
|
||||
let tool_name = params.get("name").and_then(|n| n.as_str()).unwrap_or("");
|
||||
let arguments = params.get("arguments").cloned().unwrap_or(json!({}));
|
||||
|
||||
if self.debug {
|
||||
eprintln!(
|
||||
"[MCP DEBUG] Tool call: {} with args: {}",
|
||||
tool_name, arguments
|
||||
);
|
||||
}
|
||||
|
||||
let result = match tool_name {
|
||||
"ocr_image" => self.call_ocr_image(&arguments).await,
|
||||
"ocr_base64" => self.call_ocr_base64(&arguments).await,
|
||||
"batch_ocr" => self.call_batch_ocr(&arguments).await,
|
||||
"preprocess_image" => self.call_preprocess_image(&arguments).await,
|
||||
"latex_to_mathml" => self.call_latex_to_mathml(&arguments).await,
|
||||
"benchmark_performance" => self.call_benchmark(&arguments).await,
|
||||
_ => Err(format!("Unknown tool: {}", tool_name)),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(content) => JsonRpcResponse::success(
|
||||
id,
|
||||
json!({
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": content
|
||||
}]
|
||||
}),
|
||||
),
|
||||
Err(e) => JsonRpcResponse::success(
|
||||
id,
|
||||
json!({
|
||||
"content": [{
|
||||
"type": "text",
|
||||
"text": e
|
||||
}],
|
||||
"isError": true
|
||||
}),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// OCR image file
|
||||
async fn call_ocr_image(&self, args: &Value) -> Result<String, String> {
|
||||
let image_path = args
|
||||
.get("image_path")
|
||||
.and_then(|p| p.as_str())
|
||||
.ok_or("Missing image_path parameter")?;
|
||||
|
||||
let format = args
|
||||
.get("format")
|
||||
.and_then(|f| f.as_str())
|
||||
.unwrap_or("latex");
|
||||
|
||||
// Check if file exists
|
||||
if !std::path::Path::new(image_path).exists() {
|
||||
return Err(format!("Image file not found: {}", image_path));
|
||||
}
|
||||
|
||||
// Load and process image
|
||||
let img = image::open(image_path).map_err(|e| format!("Failed to load image: {}", e))?;
|
||||
|
||||
// Perform OCR (using mock for now, real inference when models are available)
|
||||
let result = self.perform_ocr(&img, format).await?;
|
||||
|
||||
Ok(serde_json::to_string_pretty(&json!({
|
||||
"file": image_path,
|
||||
"format": format,
|
||||
"result": result,
|
||||
"confidence": 0.95
|
||||
}))
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// OCR base64 image
|
||||
async fn call_ocr_base64(&self, args: &Value) -> Result<String, String> {
|
||||
let image_data = args
|
||||
.get("image_data")
|
||||
.and_then(|d| d.as_str())
|
||||
.ok_or("Missing image_data parameter")?;
|
||||
|
||||
let format = args
|
||||
.get("format")
|
||||
.and_then(|f| f.as_str())
|
||||
.unwrap_or("latex");
|
||||
|
||||
// Decode base64
|
||||
let decoded =
|
||||
base64::Engine::decode(&base64::engine::general_purpose::STANDARD, image_data)
|
||||
.map_err(|e| format!("Invalid base64 data: {}", e))?;
|
||||
|
||||
// Load image from bytes
|
||||
let img = image::load_from_memory(&decoded)
|
||||
.map_err(|e| format!("Failed to load image from data: {}", e))?;
|
||||
|
||||
// Perform OCR
|
||||
let result = self.perform_ocr(&img, format).await?;
|
||||
|
||||
Ok(serde_json::to_string_pretty(&json!({
|
||||
"format": format,
|
||||
"result": result,
|
||||
"confidence": 0.95
|
||||
}))
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Batch OCR processing
|
||||
async fn call_batch_ocr(&self, args: &Value) -> Result<String, String> {
|
||||
let directory = args
|
||||
.get("directory")
|
||||
.and_then(|d| d.as_str())
|
||||
.ok_or("Missing directory parameter")?;
|
||||
|
||||
let pattern = args
|
||||
.get("pattern")
|
||||
.and_then(|p| p.as_str())
|
||||
.unwrap_or("*.png");
|
||||
|
||||
let format = args
|
||||
.get("format")
|
||||
.and_then(|f| f.as_str())
|
||||
.unwrap_or("json");
|
||||
|
||||
// Find files matching pattern
|
||||
let glob_pattern = format!("{}/{}", directory, pattern);
|
||||
let paths: Vec<_> = glob::glob(&glob_pattern)
|
||||
.map_err(|e| format!("Invalid glob pattern: {}", e))?
|
||||
.filter_map(|p| p.ok())
|
||||
.collect();
|
||||
|
||||
let mut results = Vec::new();
|
||||
for path in &paths {
|
||||
let img = match image::open(path) {
|
||||
Ok(img) => img,
|
||||
Err(e) => {
|
||||
results.push(json!({
|
||||
"file": path.display().to_string(),
|
||||
"error": e.to_string()
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let ocr_result = self.perform_ocr(&img, format).await.unwrap_or_else(|e| e);
|
||||
results.push(json!({
|
||||
"file": path.display().to_string(),
|
||||
"result": ocr_result,
|
||||
"confidence": 0.95
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(serde_json::to_string_pretty(&json!({
|
||||
"total": paths.len(),
|
||||
"processed": results.len(),
|
||||
"results": results
|
||||
}))
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Preprocess image
|
||||
async fn call_preprocess_image(&self, args: &Value) -> Result<String, String> {
|
||||
let image_path = args
|
||||
.get("image_path")
|
||||
.and_then(|p| p.as_str())
|
||||
.ok_or("Missing image_path parameter")?;
|
||||
|
||||
let output_path = args
|
||||
.get("output_path")
|
||||
.and_then(|p| p.as_str())
|
||||
.ok_or("Missing output_path parameter")?;
|
||||
|
||||
let operations: Vec<&str> = args
|
||||
.get("operations")
|
||||
.and_then(|o| o.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
|
||||
.unwrap_or_else(|| vec!["grayscale", "resize"]);
|
||||
|
||||
// Load image
|
||||
let mut img =
|
||||
image::open(image_path).map_err(|e| format!("Failed to load image: {}", e))?;
|
||||
|
||||
// Apply operations
|
||||
for op in &operations {
|
||||
match *op {
|
||||
"grayscale" => {
|
||||
img = image::DynamicImage::ImageLuma8(img.to_luma8());
|
||||
}
|
||||
"resize" => {
|
||||
let width = args
|
||||
.get("target_width")
|
||||
.and_then(|w| w.as_u64())
|
||||
.unwrap_or(640) as u32;
|
||||
let height = args
|
||||
.get("target_height")
|
||||
.and_then(|h| h.as_u64())
|
||||
.unwrap_or(480) as u32;
|
||||
img = img.resize(width, height, image::imageops::FilterType::Lanczos3);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Save output
|
||||
img.save(output_path)
|
||||
.map_err(|e| format!("Failed to save image: {}", e))?;
|
||||
|
||||
Ok(serde_json::to_string_pretty(&json!({
|
||||
"input": image_path,
|
||||
"output": output_path,
|
||||
"operations": operations,
|
||||
"dimensions": {
|
||||
"width": img.width(),
|
||||
"height": img.height()
|
||||
}
|
||||
}))
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Convert LaTeX to MathML
|
||||
async fn call_latex_to_mathml(&self, args: &Value) -> Result<String, String> {
|
||||
let latex = args
|
||||
.get("latex")
|
||||
.and_then(|l| l.as_str())
|
||||
.ok_or("Missing latex parameter")?;
|
||||
|
||||
// Simple LaTeX to MathML conversion (placeholder)
|
||||
let mathml = format!(
|
||||
r#"<math xmlns="http://www.w3.org/1998/Math/MathML"><mrow><mi>{}</mi></mrow></math>"#,
|
||||
latex.replace("\\", "").replace("{", "").replace("}", "")
|
||||
);
|
||||
|
||||
Ok(serde_json::to_string_pretty(&json!({
|
||||
"latex": latex,
|
||||
"mathml": mathml
|
||||
}))
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Run performance benchmark
|
||||
async fn call_benchmark(&self, args: &Value) -> Result<String, String> {
|
||||
let iterations = args
|
||||
.get("iterations")
|
||||
.and_then(|i| i.as_u64())
|
||||
.unwrap_or(10) as usize;
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
// Generate test image
|
||||
let test_img =
|
||||
image::DynamicImage::ImageRgb8(image::ImageBuffer::from_fn(400, 100, |_, _| {
|
||||
image::Rgb([255u8, 255u8, 255u8])
|
||||
}));
|
||||
|
||||
// Benchmark preprocessing
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _gray = test_img.to_luma8();
|
||||
}
|
||||
let grayscale_time = start.elapsed() / iterations as u32;
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..iterations {
|
||||
let _resized = test_img.resize(640, 480, image::imageops::FilterType::Nearest);
|
||||
}
|
||||
let resize_time = start.elapsed() / iterations as u32;
|
||||
|
||||
Ok(serde_json::to_string_pretty(&json!({
|
||||
"iterations": iterations,
|
||||
"benchmarks": {
|
||||
"grayscale_avg_ms": grayscale_time.as_secs_f64() * 1000.0,
|
||||
"resize_avg_ms": resize_time.as_secs_f64() * 1000.0,
|
||||
},
|
||||
"system": {
|
||||
"cpu_cores": num_cpus::get()
|
||||
}
|
||||
}))
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Perform OCR on image (placeholder implementation)
|
||||
async fn perform_ocr(
|
||||
&self,
|
||||
_img: &image::DynamicImage,
|
||||
format: &str,
|
||||
) -> Result<String, String> {
|
||||
// This is a placeholder - in production, this would call the actual OCR engine
|
||||
let result = match format {
|
||||
"latex" => r"\int_0^1 x^2 \, dx = \frac{1}{3}".to_string(),
|
||||
"text" => "Sample OCR extracted text".to_string(),
|
||||
"mathml" => r#"<math><mrow><mi>x</mi><mo>=</mo><mn>2</mn></mrow></math>"#.to_string(),
|
||||
"asciimath" => "int_0^1 x^2 dx = 1/3".to_string(),
|
||||
_ => "Unknown format".to_string(),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the MCP server
|
||||
pub async fn run(args: McpArgs) -> anyhow::Result<()> {
|
||||
let server = McpServer::new(&args);
|
||||
|
||||
if args.debug {
|
||||
eprintln!("[MCP] SciPix MCP Server starting...");
|
||||
eprintln!("[MCP] Version: {}", env!("CARGO_PKG_VERSION"));
|
||||
}
|
||||
|
||||
let stdin = io::stdin();
|
||||
let mut stdout = io::stdout();
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
if args.debug {
|
||||
eprintln!("[MCP ERROR] Failed to read stdin: {}", e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if args.debug {
|
||||
eprintln!("[MCP DEBUG] Received: {}", line);
|
||||
}
|
||||
|
||||
let request: JsonRpcRequest = match serde_json::from_str(&line) {
|
||||
Ok(req) => req,
|
||||
Err(e) => {
|
||||
let error_response =
|
||||
JsonRpcResponse::error(Value::Null, -32700, &format!("Parse error: {}", e));
|
||||
let output = serde_json::to_string(&error_response).unwrap_or_default();
|
||||
writeln!(stdout, "{}", output)?;
|
||||
stdout.flush()?;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let response = server.handle_request(request).await;
|
||||
let output = serde_json::to_string(&response)?;
|
||||
|
||||
if args.debug {
|
||||
eprintln!("[MCP DEBUG] Response: {}", output);
|
||||
}
|
||||
|
||||
writeln!(stdout, "{}", output)?;
|
||||
stdout.flush()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
99
vendor/ruvector/examples/scipix/src/cli/commands/mod.rs
vendored
Normal file
99
vendor/ruvector/examples/scipix/src/cli/commands/mod.rs
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
pub mod batch;
|
||||
pub mod config;
|
||||
pub mod doctor;
|
||||
pub mod mcp;
|
||||
pub mod ocr;
|
||||
pub mod serve;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Common result structure for OCR operations
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OcrResult {
|
||||
/// Source file path
|
||||
pub file: PathBuf,
|
||||
|
||||
/// Extracted text content
|
||||
pub text: String,
|
||||
|
||||
/// LaTeX representation (if available)
|
||||
pub latex: Option<String>,
|
||||
|
||||
/// Confidence score (0.0 to 1.0)
|
||||
pub confidence: f64,
|
||||
|
||||
/// Processing time in milliseconds
|
||||
pub processing_time_ms: u64,
|
||||
|
||||
/// Any errors or warnings
|
||||
pub errors: Vec<String>,
|
||||
}
|
||||
|
||||
impl OcrResult {
|
||||
/// Create a new OCR result
|
||||
pub fn new(file: PathBuf, text: String, confidence: f64) -> Self {
|
||||
Self {
|
||||
file,
|
||||
text,
|
||||
latex: None,
|
||||
confidence,
|
||||
processing_time_ms: 0,
|
||||
errors: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set LaTeX content
|
||||
pub fn with_latex(mut self, latex: String) -> Self {
|
||||
self.latex = Some(latex);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set processing time
|
||||
pub fn with_processing_time(mut self, time_ms: u64) -> Self {
|
||||
self.processing_time_ms = time_ms;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an error message
|
||||
pub fn add_error(&mut self, error: String) {
|
||||
self.errors.push(error);
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for OCR processing
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OcrConfig {
|
||||
/// Minimum confidence threshold
|
||||
pub min_confidence: f64,
|
||||
|
||||
/// Maximum image size in bytes
|
||||
pub max_image_size: usize,
|
||||
|
||||
/// Supported file extensions
|
||||
pub supported_extensions: Vec<String>,
|
||||
|
||||
/// API endpoint (if using remote service)
|
||||
pub api_endpoint: Option<String>,
|
||||
|
||||
/// API key (if using remote service)
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for OcrConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_confidence: 0.7,
|
||||
max_image_size: 10 * 1024 * 1024, // 10MB
|
||||
supported_extensions: vec![
|
||||
"png".to_string(),
|
||||
"jpg".to_string(),
|
||||
"jpeg".to_string(),
|
||||
"pdf".to_string(),
|
||||
"gif".to_string(),
|
||||
],
|
||||
api_endpoint: None,
|
||||
api_key: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
210
vendor/ruvector/examples/scipix/src/cli/commands/ocr.rs
vendored
Normal file
210
vendor/ruvector/examples/scipix/src/cli/commands/ocr.rs
vendored
Normal file
@@ -0,0 +1,210 @@
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Args;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::{OcrConfig, OcrResult};
|
||||
use crate::cli::{output, Cli, OutputFormat};
|
||||
|
||||
/// Process a single image or file with OCR
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct OcrArgs {
|
||||
/// Path to the image file to process
|
||||
#[arg(value_name = "FILE", help = "Path to the image file")]
|
||||
pub file: PathBuf,
|
||||
|
||||
/// Minimum confidence threshold (0.0 to 1.0)
|
||||
#[arg(
|
||||
short = 't',
|
||||
long,
|
||||
default_value = "0.7",
|
||||
help = "Minimum confidence threshold for results"
|
||||
)]
|
||||
pub threshold: f64,
|
||||
|
||||
/// Save output to file instead of stdout
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
value_name = "OUTPUT",
|
||||
help = "Save output to file instead of stdout"
|
||||
)]
|
||||
pub output: Option<PathBuf>,
|
||||
|
||||
/// Pretty-print JSON output
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
help = "Pretty-print JSON output (only with --format json)"
|
||||
)]
|
||||
pub pretty: bool,
|
||||
|
||||
/// Include metadata in output
|
||||
#[arg(short, long, help = "Include processing metadata in output")]
|
||||
pub metadata: bool,
|
||||
|
||||
/// Force processing even if confidence is below threshold
|
||||
#[arg(
|
||||
short = 'f',
|
||||
long,
|
||||
help = "Force processing even if confidence is below threshold"
|
||||
)]
|
||||
pub force: bool,
|
||||
}
|
||||
|
||||
pub async fn execute(args: OcrArgs, cli: &Cli) -> Result<()> {
|
||||
info!("Processing file: {}", args.file.display());
|
||||
|
||||
// Validate input file
|
||||
if !args.file.exists() {
|
||||
anyhow::bail!("File not found: {}", args.file.display());
|
||||
}
|
||||
|
||||
if !args.file.is_file() {
|
||||
anyhow::bail!("Not a file: {}", args.file.display());
|
||||
}
|
||||
|
||||
// Load configuration
|
||||
let config = load_config(cli.config.as_ref())?;
|
||||
|
||||
// Validate file extension
|
||||
if let Some(ext) = args.file.extension() {
|
||||
let ext_str = ext.to_string_lossy().to_lowercase();
|
||||
if !config.supported_extensions.contains(&ext_str) {
|
||||
anyhow::bail!(
|
||||
"Unsupported file extension: {}. Supported: {}",
|
||||
ext_str,
|
||||
config.supported_extensions.join(", ")
|
||||
);
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!("File has no extension");
|
||||
}
|
||||
|
||||
// Check file size
|
||||
let metadata = std::fs::metadata(&args.file).context("Failed to read file metadata")?;
|
||||
|
||||
if metadata.len() as usize > config.max_image_size {
|
||||
anyhow::bail!(
|
||||
"File too large: {} bytes (max: {} bytes)",
|
||||
metadata.len(),
|
||||
config.max_image_size
|
||||
);
|
||||
}
|
||||
|
||||
// Process the file
|
||||
let start = Instant::now();
|
||||
let result = process_file(&args.file, &config).await?;
|
||||
let processing_time = start.elapsed();
|
||||
|
||||
debug!("Processing completed in {:?}", processing_time);
|
||||
|
||||
// Check confidence threshold
|
||||
if result.confidence < args.threshold && !args.force {
|
||||
anyhow::bail!(
|
||||
"Confidence {} is below threshold {} (use --force to override)",
|
||||
result.confidence,
|
||||
args.threshold
|
||||
);
|
||||
}
|
||||
|
||||
// Format and output result
|
||||
let output_content = format_result(&result, &cli.format, args.pretty, args.metadata)?;
|
||||
|
||||
if let Some(output_path) = &args.output {
|
||||
std::fs::write(output_path, &output_content).context("Failed to write output file")?;
|
||||
info!("Output saved to: {}", output_path.display());
|
||||
} else {
|
||||
println!("{}", output_content);
|
||||
}
|
||||
|
||||
// Display summary if not quiet
|
||||
if !cli.quiet {
|
||||
output::print_ocr_summary(&result);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_file(file: &PathBuf, _config: &OcrConfig) -> Result<OcrResult> {
|
||||
// TODO: Implement actual OCR processing
|
||||
// For now, return a mock result
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
// Simulate OCR processing
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
|
||||
let processing_time = start.elapsed().as_millis() as u64;
|
||||
|
||||
Ok(OcrResult {
|
||||
file: file.clone(),
|
||||
text: "Sample OCR text from image".to_string(),
|
||||
latex: Some(r"\int_0^1 x^2 \, dx = \frac{1}{3}".to_string()),
|
||||
confidence: 0.95,
|
||||
processing_time_ms: processing_time,
|
||||
errors: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
fn format_result(
|
||||
result: &OcrResult,
|
||||
format: &OutputFormat,
|
||||
pretty: bool,
|
||||
include_metadata: bool,
|
||||
) -> Result<String> {
|
||||
match format {
|
||||
OutputFormat::Json => if include_metadata {
|
||||
if pretty {
|
||||
serde_json::to_string_pretty(result)
|
||||
} else {
|
||||
serde_json::to_string(result)
|
||||
}
|
||||
} else {
|
||||
let simple = serde_json::json!({
|
||||
"text": result.text,
|
||||
"latex": result.latex,
|
||||
"confidence": result.confidence,
|
||||
});
|
||||
if pretty {
|
||||
serde_json::to_string_pretty(&simple)
|
||||
} else {
|
||||
serde_json::to_string(&simple)
|
||||
}
|
||||
}
|
||||
.context("Failed to serialize to JSON"),
|
||||
OutputFormat::Text => Ok(result.text.clone()),
|
||||
OutputFormat::Latex => Ok(result.latex.clone().unwrap_or_else(|| result.text.clone())),
|
||||
OutputFormat::Markdown => {
|
||||
let mut md = format!("# OCR Result\n\n{}\n", result.text);
|
||||
if let Some(latex) = &result.latex {
|
||||
md.push_str(&format!("\n## LaTeX\n\n```latex\n{}\n```\n", latex));
|
||||
}
|
||||
if include_metadata {
|
||||
md.push_str(&format!(
|
||||
"\n---\n\nConfidence: {:.2}%\nProcessing time: {}ms\n",
|
||||
result.confidence * 100.0,
|
||||
result.processing_time_ms
|
||||
));
|
||||
}
|
||||
Ok(md)
|
||||
}
|
||||
OutputFormat::MathMl => {
|
||||
// TODO: Implement MathML conversion
|
||||
Ok(format!(
|
||||
"<math xmlns=\"http://www.w3.org/1998/Math/MathML\">\n {}\n</math>",
|
||||
result.text
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_config(config_path: Option<&PathBuf>) -> Result<OcrConfig> {
|
||||
if let Some(path) = config_path {
|
||||
let content = std::fs::read_to_string(path).context("Failed to read config file")?;
|
||||
toml::from_str(&content).context("Failed to parse config file")
|
||||
} else {
|
||||
Ok(OcrConfig::default())
|
||||
}
|
||||
}
|
||||
293
vendor/ruvector/examples/scipix/src/cli/commands/serve.rs
vendored
Normal file
293
vendor/ruvector/examples/scipix/src/cli/commands/serve.rs
vendored
Normal file
@@ -0,0 +1,293 @@
|
||||
use anyhow::{Context, Result};
|
||||
use axum::{
|
||||
extract::{Multipart, State},
|
||||
http::StatusCode,
|
||||
response::IntoResponse,
|
||||
routing::{get, post},
|
||||
Json, Router,
|
||||
};
|
||||
use clap::Args;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::signal;
|
||||
use tower_http::{cors::CorsLayer, trace::TraceLayer};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use super::{OcrConfig, OcrResult};
|
||||
use crate::cli::Cli;
|
||||
|
||||
/// Start the API server
|
||||
#[derive(Args, Debug, Clone)]
|
||||
pub struct ServeArgs {
|
||||
/// Port to listen on
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
default_value = "8080",
|
||||
env = "MATHPIX_PORT",
|
||||
help = "Port to listen on"
|
||||
)]
|
||||
pub port: u16,
|
||||
|
||||
/// Host to bind to
|
||||
#[arg(
|
||||
short = 'H',
|
||||
long,
|
||||
default_value = "127.0.0.1",
|
||||
env = "MATHPIX_HOST",
|
||||
help = "Host address to bind to"
|
||||
)]
|
||||
pub host: String,
|
||||
|
||||
/// Directory containing ML models
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "DIR",
|
||||
help = "Directory containing ML models to preload"
|
||||
)]
|
||||
pub model_dir: Option<PathBuf>,
|
||||
|
||||
/// Enable CORS
|
||||
#[arg(long, help = "Enable CORS for cross-origin requests")]
|
||||
pub cors: bool,
|
||||
|
||||
/// Maximum request size in MB
|
||||
#[arg(long, default_value = "10", help = "Maximum request size in megabytes")]
|
||||
pub max_size: usize,
|
||||
|
||||
/// Number of worker threads
|
||||
#[arg(
|
||||
short = 'w',
|
||||
long,
|
||||
default_value = "4",
|
||||
help = "Number of worker threads"
|
||||
)]
|
||||
pub workers: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AppState {
|
||||
config: Arc<OcrConfig>,
|
||||
max_size: usize,
|
||||
}
|
||||
|
||||
pub async fn execute(args: ServeArgs, cli: &Cli) -> Result<()> {
|
||||
info!("Starting Scipix API server");
|
||||
|
||||
// Load configuration
|
||||
let config = Arc::new(load_config(cli.config.as_ref())?);
|
||||
|
||||
// Preload models if specified
|
||||
if let Some(model_dir) = &args.model_dir {
|
||||
info!("Preloading models from: {}", model_dir.display());
|
||||
preload_models(model_dir)?;
|
||||
}
|
||||
|
||||
// Create app state
|
||||
let state = AppState {
|
||||
config,
|
||||
max_size: args.max_size * 1024 * 1024,
|
||||
};
|
||||
|
||||
// Build router
|
||||
let mut app = Router::new()
|
||||
.route("/", get(root))
|
||||
.route("/health", get(health))
|
||||
.route("/api/v1/ocr", post(ocr_handler))
|
||||
.route("/api/v1/batch", post(batch_handler))
|
||||
.with_state(state)
|
||||
.layer(TraceLayer::new_for_http());
|
||||
|
||||
// Add CORS if enabled
|
||||
if args.cors {
|
||||
app = app.layer(CorsLayer::permissive());
|
||||
info!("CORS enabled");
|
||||
}
|
||||
|
||||
// Create socket address
|
||||
let addr: SocketAddr = format!("{}:{}", args.host, args.port)
|
||||
.parse()
|
||||
.context("Invalid host/port combination")?;
|
||||
|
||||
info!("Server listening on http://{}", addr);
|
||||
info!("API endpoints:");
|
||||
info!(" POST http://{}/api/v1/ocr - Single file OCR", addr);
|
||||
info!(" POST http://{}/api/v1/batch - Batch processing", addr);
|
||||
info!(" GET http://{}/health - Health check", addr);
|
||||
|
||||
// Create server
|
||||
let listener = tokio::net::TcpListener::bind(addr)
|
||||
.await
|
||||
.context("Failed to bind to address")?;
|
||||
|
||||
// Run server with graceful shutdown
|
||||
axum::serve(listener, app)
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await
|
||||
.context("Server error")?;
|
||||
|
||||
info!("Server shutdown complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn root() -> &'static str {
|
||||
"Scipix OCR API Server\n\nEndpoints:\n POST /api/v1/ocr - Single file OCR\n POST /api/v1/batch - Batch processing\n GET /health - Health check"
|
||||
}
|
||||
|
||||
async fn health() -> impl IntoResponse {
|
||||
Json(serde_json::json!({
|
||||
"status": "healthy",
|
||||
"version": env!("CARGO_PKG_VERSION"),
|
||||
}))
|
||||
}
|
||||
|
||||
async fn ocr_handler(
|
||||
State(state): State<AppState>,
|
||||
mut multipart: Multipart,
|
||||
) -> Result<Json<OcrResult>, (StatusCode, String)> {
|
||||
while let Some(field) = multipart
|
||||
.next_field()
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?
|
||||
{
|
||||
let name = field.name().unwrap_or("").to_string();
|
||||
|
||||
if name == "file" {
|
||||
let data = field
|
||||
.bytes()
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?;
|
||||
|
||||
if data.len() > state.max_size {
|
||||
return Err((
|
||||
StatusCode::PAYLOAD_TOO_LARGE,
|
||||
format!(
|
||||
"File too large: {} bytes (max: {} bytes)",
|
||||
data.len(),
|
||||
state.max_size
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Process the file
|
||||
let result = process_image_data(&data, &state.config)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
return Ok(Json(result));
|
||||
}
|
||||
}
|
||||
|
||||
Err((StatusCode::BAD_REQUEST, "No file provided".to_string()))
|
||||
}
|
||||
|
||||
async fn batch_handler(
|
||||
State(state): State<AppState>,
|
||||
mut multipart: Multipart,
|
||||
) -> Result<Json<Vec<OcrResult>>, (StatusCode, String)> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
while let Some(field) = multipart
|
||||
.next_field()
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?
|
||||
{
|
||||
let name = field.name().unwrap_or("").to_string();
|
||||
|
||||
if name == "files" {
|
||||
let data = field
|
||||
.bytes()
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?;
|
||||
|
||||
if data.len() > state.max_size {
|
||||
warn!("Skipping file: too large ({} bytes)", data.len());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process the file
|
||||
match process_image_data(&data, &state.config).await {
|
||||
Ok(result) => results.push(result),
|
||||
Err(e) => warn!("Failed to process file: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.is_empty() {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"No valid files processed".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Json(results))
|
||||
}
|
||||
|
||||
async fn process_image_data(data: &[u8], _config: &OcrConfig) -> Result<OcrResult> {
|
||||
// TODO: Implement actual OCR processing
|
||||
// For now, return a mock result
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
|
||||
Ok(OcrResult {
|
||||
file: PathBuf::from("uploaded_file"),
|
||||
text: format!("OCR text from uploaded image ({} bytes)", data.len()),
|
||||
latex: Some(r"\text{Sample LaTeX}".to_string()),
|
||||
confidence: 0.92,
|
||||
processing_time_ms: 50,
|
||||
errors: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
fn preload_models(model_dir: &PathBuf) -> Result<()> {
|
||||
if !model_dir.exists() {
|
||||
anyhow::bail!("Model directory not found: {}", model_dir.display());
|
||||
}
|
||||
|
||||
if !model_dir.is_dir() {
|
||||
anyhow::bail!("Not a directory: {}", model_dir.display());
|
||||
}
|
||||
|
||||
// TODO: Implement model preloading
|
||||
info!("Models preloaded from {}", model_dir.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_config(config_path: Option<&PathBuf>) -> Result<OcrConfig> {
|
||||
if let Some(path) = config_path {
|
||||
let content = std::fs::read_to_string(path).context("Failed to read config file")?;
|
||||
toml::from_str(&content).context("Failed to parse config file")
|
||||
} else {
|
||||
Ok(OcrConfig::default())
|
||||
}
|
||||
}
|
||||
|
||||
async fn shutdown_signal() {
|
||||
let ctrl_c = async {
|
||||
signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to install Ctrl+C handler");
|
||||
};
|
||||
|
||||
#[cfg(unix)]
|
||||
let terminate = async {
|
||||
signal::unix::signal(signal::unix::SignalKind::terminate())
|
||||
.expect("failed to install signal handler")
|
||||
.recv()
|
||||
.await;
|
||||
};
|
||||
|
||||
#[cfg(not(unix))]
|
||||
let terminate = std::future::pending::<()>();
|
||||
|
||||
tokio::select! {
|
||||
_ = ctrl_c => {
|
||||
info!("Received Ctrl+C signal");
|
||||
},
|
||||
_ = terminate => {
|
||||
info!("Received terminate signal");
|
||||
},
|
||||
}
|
||||
}
|
||||
115
vendor/ruvector/examples/scipix/src/cli/mod.rs
vendored
Normal file
115
vendor/ruvector/examples/scipix/src/cli/mod.rs
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
pub mod commands;
|
||||
pub mod output;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Scipix CLI - OCR and mathematical content processing
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(
|
||||
name = "scipix-cli",
|
||||
version,
|
||||
about = "A Rust-based CLI for Scipix OCR processing",
|
||||
long_about = "Process images with OCR, extract mathematical formulas, and convert to LaTeX or other formats.\n\n\
|
||||
Supports single file processing, batch operations, and API server mode."
|
||||
)]
|
||||
pub struct Cli {
|
||||
/// Path to configuration file
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
global = true,
|
||||
env = "MATHPIX_CONFIG",
|
||||
help = "Path to configuration file"
|
||||
)]
|
||||
pub config: Option<PathBuf>,
|
||||
|
||||
/// Enable verbose logging
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
global = true,
|
||||
help = "Enable verbose logging (DEBUG level)"
|
||||
)]
|
||||
pub verbose: bool,
|
||||
|
||||
/// Suppress all non-error output
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
global = true,
|
||||
conflicts_with = "verbose",
|
||||
help = "Suppress all non-error output"
|
||||
)]
|
||||
pub quiet: bool,
|
||||
|
||||
/// Output format (json, text, latex, markdown)
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
global = true,
|
||||
default_value = "text",
|
||||
help = "Output format for results"
|
||||
)]
|
||||
pub format: OutputFormat,
|
||||
|
||||
#[command(subcommand)]
|
||||
pub command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
pub enum Commands {
|
||||
/// Process a single image or file with OCR
|
||||
Ocr(commands::ocr::OcrArgs),
|
||||
|
||||
/// Process multiple files in batch mode
|
||||
Batch(commands::batch::BatchArgs),
|
||||
|
||||
/// Start the API server
|
||||
Serve(commands::serve::ServeArgs),
|
||||
|
||||
/// Start the MCP (Model Context Protocol) server for AI integration
|
||||
Mcp(commands::mcp::McpArgs),
|
||||
|
||||
/// Manage configuration
|
||||
Config(commands::config::ConfigArgs),
|
||||
|
||||
/// Diagnose environment and optimize configuration
|
||||
Doctor(commands::doctor::DoctorArgs),
|
||||
|
||||
/// Show version information
|
||||
Version,
|
||||
|
||||
/// Generate shell completions
|
||||
Completions {
|
||||
/// Shell to generate completions for (bash, zsh, fish, powershell)
|
||||
#[arg(value_enum)]
|
||||
shell: Option<clap_complete::Shell>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
|
||||
pub enum OutputFormat {
|
||||
/// Plain text output
|
||||
Text,
|
||||
/// JSON output
|
||||
Json,
|
||||
/// LaTeX format
|
||||
Latex,
|
||||
/// Markdown format
|
||||
Markdown,
|
||||
/// MathML format
|
||||
MathMl,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OutputFormat {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OutputFormat::Text => write!(f, "text"),
|
||||
OutputFormat::Json => write!(f, "json"),
|
||||
OutputFormat::Latex => write!(f, "latex"),
|
||||
OutputFormat::Markdown => write!(f, "markdown"),
|
||||
OutputFormat::MathMl => write!(f, "mathml"),
|
||||
}
|
||||
}
|
||||
}
|
||||
223
vendor/ruvector/examples/scipix/src/cli/output.rs
vendored
Normal file
223
vendor/ruvector/examples/scipix/src/cli/output.rs
vendored
Normal file
@@ -0,0 +1,223 @@
|
||||
use comfy_table::{modifiers::UTF8_ROUND_CORNERS, presets::UTF8_FULL, Cell, Color, Table};
|
||||
use console::style;
|
||||
|
||||
use super::commands::OcrResult;
|
||||
|
||||
/// Print a summary of a single OCR result
|
||||
pub fn print_ocr_summary(result: &OcrResult) {
|
||||
println!("\n{}", style("OCR Processing Summary").bold().cyan());
|
||||
println!("{}", style("─".repeat(60)).dim());
|
||||
|
||||
let mut table = Table::new();
|
||||
table
|
||||
.load_preset(UTF8_FULL)
|
||||
.apply_modifier(UTF8_ROUND_CORNERS)
|
||||
.set_header(vec![
|
||||
Cell::new("Property").fg(Color::Cyan),
|
||||
Cell::new("Value").fg(Color::Green),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("File"),
|
||||
Cell::new(result.file.display().to_string()),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Confidence"),
|
||||
Cell::new(format!("{:.2}%", result.confidence * 100.0))
|
||||
.fg(confidence_color(result.confidence)),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Processing Time"),
|
||||
Cell::new(format!("{}ms", result.processing_time_ms)),
|
||||
]);
|
||||
|
||||
if let Some(latex) = &result.latex {
|
||||
table.add_row(vec![
|
||||
Cell::new("LaTeX"),
|
||||
Cell::new(if latex.len() > 50 {
|
||||
format!("{}...", &latex[..50])
|
||||
} else {
|
||||
latex.clone()
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
if !result.errors.is_empty() {
|
||||
table.add_row(vec![
|
||||
Cell::new("Errors").fg(Color::Red),
|
||||
Cell::new(result.errors.len().to_string()).fg(Color::Red),
|
||||
]);
|
||||
}
|
||||
|
||||
println!("{table}");
|
||||
|
||||
if !result.errors.is_empty() {
|
||||
println!("\n{}", style("Errors:").bold().red());
|
||||
for (i, error) in result.errors.iter().enumerate() {
|
||||
println!(" {}. {}", i + 1, style(error).red());
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
/// Print a summary of batch processing results
|
||||
pub fn print_batch_summary(passed: &[OcrResult], failed: &[OcrResult], threshold: f64) {
|
||||
println!("\n{}", style("Batch Processing Summary").bold().cyan());
|
||||
println!("{}", style("═".repeat(60)).dim());
|
||||
|
||||
let total = passed.len() + failed.len();
|
||||
let avg_confidence = if !passed.is_empty() {
|
||||
passed.iter().map(|r| r.confidence).sum::<f64>() / passed.len() as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let total_time: u64 = passed.iter().map(|r| r.processing_time_ms).sum();
|
||||
let avg_time = if !passed.is_empty() {
|
||||
total_time / passed.len() as u64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let mut table = Table::new();
|
||||
table
|
||||
.load_preset(UTF8_FULL)
|
||||
.apply_modifier(UTF8_ROUND_CORNERS)
|
||||
.set_header(vec![
|
||||
Cell::new("Metric").fg(Color::Cyan),
|
||||
Cell::new("Value").fg(Color::Green),
|
||||
]);
|
||||
|
||||
table.add_row(vec![Cell::new("Total Files"), Cell::new(total.to_string())]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Passed").fg(Color::Green),
|
||||
Cell::new(format!(
|
||||
"{} ({:.1}%)",
|
||||
passed.len(),
|
||||
(passed.len() as f64 / total as f64) * 100.0
|
||||
))
|
||||
.fg(Color::Green),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Failed").fg(Color::Red),
|
||||
Cell::new(format!(
|
||||
"{} ({:.1}%)",
|
||||
failed.len(),
|
||||
(failed.len() as f64 / total as f64) * 100.0
|
||||
))
|
||||
.fg(if failed.is_empty() {
|
||||
Color::Green
|
||||
} else {
|
||||
Color::Red
|
||||
}),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Threshold"),
|
||||
Cell::new(format!("{:.2}%", threshold * 100.0)),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Avg Confidence"),
|
||||
Cell::new(format!("{:.2}%", avg_confidence * 100.0)).fg(confidence_color(avg_confidence)),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Avg Processing Time"),
|
||||
Cell::new(format!("{}ms", avg_time)),
|
||||
]);
|
||||
|
||||
table.add_row(vec![
|
||||
Cell::new("Total Processing Time"),
|
||||
Cell::new(format!("{:.2}s", total_time as f64 / 1000.0)),
|
||||
]);
|
||||
|
||||
println!("{table}");
|
||||
|
||||
if !failed.is_empty() {
|
||||
println!("\n{}", style("Failed Files:").bold().red());
|
||||
|
||||
let mut failed_table = Table::new();
|
||||
failed_table
|
||||
.load_preset(UTF8_FULL)
|
||||
.apply_modifier(UTF8_ROUND_CORNERS)
|
||||
.set_header(vec![
|
||||
Cell::new("#").fg(Color::Cyan),
|
||||
Cell::new("File").fg(Color::Cyan),
|
||||
Cell::new("Confidence").fg(Color::Cyan),
|
||||
]);
|
||||
|
||||
for (i, result) in failed.iter().enumerate() {
|
||||
failed_table.add_row(vec![
|
||||
Cell::new((i + 1).to_string()),
|
||||
Cell::new(result.file.display().to_string()),
|
||||
Cell::new(format!("{:.2}%", result.confidence * 100.0)).fg(Color::Red),
|
||||
]);
|
||||
}
|
||||
|
||||
println!("{failed_table}");
|
||||
}
|
||||
|
||||
// Summary statistics
|
||||
println!("\n{}", style("Statistics:").bold().cyan());
|
||||
|
||||
if !passed.is_empty() {
|
||||
let confidences: Vec<f64> = passed.iter().map(|r| r.confidence).collect();
|
||||
let min_confidence = confidences.iter().cloned().fold(f64::INFINITY, f64::min);
|
||||
let max_confidence = confidences
|
||||
.iter()
|
||||
.cloned()
|
||||
.fold(f64::NEG_INFINITY, f64::max);
|
||||
|
||||
println!(
|
||||
" Min confidence: {}",
|
||||
style(format!("{:.2}%", min_confidence * 100.0)).green()
|
||||
);
|
||||
println!(
|
||||
" Max confidence: {}",
|
||||
style(format!("{:.2}%", max_confidence * 100.0)).green()
|
||||
);
|
||||
|
||||
let times: Vec<u64> = passed.iter().map(|r| r.processing_time_ms).collect();
|
||||
let min_time = times.iter().min().unwrap_or(&0);
|
||||
let max_time = times.iter().max().unwrap_or(&0);
|
||||
|
||||
println!(" Min processing time: {}ms", style(min_time).cyan());
|
||||
println!(" Max processing time: {}ms", style(max_time).cyan());
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
/// Get color based on confidence value
|
||||
fn confidence_color(confidence: f64) -> Color {
|
||||
if confidence >= 0.9 {
|
||||
Color::Green
|
||||
} else if confidence >= 0.7 {
|
||||
Color::Yellow
|
||||
} else {
|
||||
Color::Red
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a progress bar style for batch processing
|
||||
pub fn create_progress_style() -> indicatif::ProgressStyle {
|
||||
indicatif::ProgressStyle::default_bar()
|
||||
.template(
|
||||
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta}) {msg}",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("█▓▒░ ")
|
||||
}
|
||||
|
||||
/// Create a spinner style for individual file processing
|
||||
pub fn create_spinner_style() -> indicatif::ProgressStyle {
|
||||
indicatif::ProgressStyle::default_spinner()
|
||||
.template("{spinner:.cyan} {msg}")
|
||||
.unwrap()
|
||||
.tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏")
|
||||
}
|
||||
Reference in New Issue
Block a user