//! Research frontier detection using coherence signals use std::collections::HashMap; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use crate::{TopicEdge, TopicGraph, TopicNode, Work}; /// An emerging research frontier #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EmergingFrontier { /// Frontier identifier pub id: String, /// Primary topic name pub name: String, /// Related topic names pub related_topics: Vec, /// Growth rate (works per year) pub growth_rate: f64, /// Coherence delta (change in min-cut boundary) pub coherence_delta: f64, /// Citation momentum (trend in citation rates) pub citation_momentum: f64, /// Detected boundary nodes (topics at the frontier edge) pub boundary_topics: Vec, /// First detected pub detected_at: DateTime, /// Confidence score (0-1) pub confidence: f64, /// Evidence supporting this frontier pub evidence: Vec, } /// Evidence for a frontier detection #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FrontierEvidence { /// Evidence type pub evidence_type: String, /// Value pub value: f64, /// Explanation pub explanation: String, } /// A cross-domain bridge connecting two research areas #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CrossDomainBridge { /// Bridge identifier pub id: String, /// Source domain/topic pub source_domain: String, /// Target domain/topic pub target_domain: String, /// Bridge topics (connector nodes) pub bridge_topics: Vec, /// Citation flow (source → target) pub citation_flow: f64, /// Reverse flow (target → source) pub reverse_flow: f64, /// Bridge strength (combined normalized flow) pub strength: f64, /// Is this a new connection? pub is_emerging: bool, /// First observed pub first_observed: DateTime, /// Key papers establishing the bridge pub key_works: Vec, } /// Research frontier radar for detecting emerging fields pub struct FrontierRadar { /// Topic graph snapshots over time snapshots: Vec<(DateTime, TopicGraph)>, /// Minimum growth rate to consider min_growth_rate: f64, /// Minimum coherence shift to detect min_coherence_shift: f64, /// Detected frontiers frontiers: Vec, /// Detected bridges bridges: Vec, } impl FrontierRadar { /// Create a new frontier radar pub fn new(min_growth_rate: f64, min_coherence_shift: f64) -> Self { Self { snapshots: Vec::new(), min_growth_rate, min_coherence_shift, frontiers: Vec::new(), bridges: Vec::new(), } } /// Add a topic graph snapshot pub fn add_snapshot(&mut self, timestamp: DateTime, graph: TopicGraph) { self.snapshots.push((timestamp, graph)); self.snapshots.sort_by_key(|(ts, _)| *ts); } /// Build snapshots from works partitioned by time pub fn build_from_works(&mut self, works: &[Work], window_days: i64) { if works.is_empty() { return; } // Find time range let mut min_date = Utc::now(); let mut max_date = DateTime::::MIN_UTC; for work in works { if let Some(date) = work.publication_date { if date < min_date { min_date = date; } if date > max_date { max_date = date; } } } // Partition works into time windows let window_duration = chrono::Duration::days(window_days); let mut current_start = min_date; while current_start < max_date { let current_end = current_start + window_duration; let window_works: Vec<_> = works .iter() .filter(|w| { w.publication_date .map(|d| d >= current_start && d < current_end) .unwrap_or(false) }) .cloned() .collect(); if !window_works.is_empty() { let graph = TopicGraph::from_works(&window_works); self.add_snapshot(current_start, graph); } current_start = current_end; } } /// Detect emerging frontiers from snapshots pub fn detect_frontiers(&mut self) -> Vec { if self.snapshots.len() < 2 { return vec![]; } let mut frontiers = Vec::new(); let mut frontier_counter = 0; // Compare consecutive snapshots for i in 1..self.snapshots.len() { let (prev_ts, prev_graph) = &self.snapshots[i - 1]; let (curr_ts, curr_graph) = &self.snapshots[i]; // Find topics with significant growth for (topic_id, curr_node) in &curr_graph.topics { let prev_node = prev_graph.topics.get(topic_id); let growth = if let Some(prev) = prev_node { if prev.work_count > 0 { (curr_node.work_count as f64 - prev.work_count as f64) / prev.work_count as f64 } else { f64::INFINITY } } else { // New topic f64::INFINITY }; if growth > self.min_growth_rate { // Calculate coherence shift let coherence_delta = self.compute_topic_coherence_delta( topic_id, prev_graph, curr_graph, ); if coherence_delta.abs() > self.min_coherence_shift { // Calculate citation momentum let citation_momentum = curr_node.avg_citations - prev_node.map(|n| n.avg_citations).unwrap_or(0.0); // Find boundary topics let boundary_topics = self.find_boundary_topics(topic_id, curr_graph); // Build evidence let mut evidence = vec![ FrontierEvidence { evidence_type: "growth_rate".to_string(), value: growth, explanation: format!( "{:.0}% increase in works", growth * 100.0 ), }, FrontierEvidence { evidence_type: "coherence_delta".to_string(), value: coherence_delta, explanation: format!( "Coherence {} by {:.2}", if coherence_delta > 0.0 { "increased" } else { "decreased" }, coherence_delta.abs() ), }, ]; if citation_momentum > 0.0 { evidence.push(FrontierEvidence { evidence_type: "citation_momentum".to_string(), value: citation_momentum, explanation: format!( "+{:.1} avg citations", citation_momentum ), }); } // Calculate confidence based on evidence strength let confidence = self.calculate_confidence(growth, coherence_delta, citation_momentum); if confidence >= 0.3 { frontiers.push(EmergingFrontier { id: format!("frontier_{}", frontier_counter), name: curr_node.name.clone(), related_topics: self.find_related_topics(topic_id, curr_graph), growth_rate: curr_node.growth_rate, coherence_delta, citation_momentum, boundary_topics, detected_at: *curr_ts, confidence, evidence, }); frontier_counter += 1; } } } } } // Sort by confidence frontiers.sort_by(|a, b| { b.confidence .partial_cmp(&a.confidence) .unwrap_or(std::cmp::Ordering::Equal) }); self.frontiers = frontiers.clone(); frontiers } /// Detect cross-domain bridges pub fn detect_bridges(&mut self) -> Vec { if self.snapshots.is_empty() { return vec![]; } let mut bridges = Vec::new(); let mut bridge_counter = 0; let (curr_ts, curr_graph) = self.snapshots.last().unwrap(); // Build domain → topics mapping (simplified: use top-level grouping) let mut domain_topics: HashMap> = HashMap::new(); for (topic_id, node) in &curr_graph.topics { // Use first word as domain (simplified) let domain = node .name .split_whitespace() .next() .unwrap_or("Unknown") .to_string(); domain_topics .entry(domain.clone()) .or_default() .push(topic_id.clone()); } // Find cross-domain edges let mut domain_flows: HashMap<(String, String), Vec<&TopicEdge>> = HashMap::new(); for edge in &curr_graph.edges { let src_domain = self.get_domain(&edge.source, curr_graph); let tgt_domain = self.get_domain(&edge.target, curr_graph); if src_domain != tgt_domain { domain_flows .entry((src_domain.clone(), tgt_domain.clone())) .or_default() .push(edge); } } // Create bridge records for ((src_domain, tgt_domain), edges) in domain_flows { let total_flow: f64 = edges.iter().map(|e| e.weight).sum(); let citation_count: usize = edges.iter().map(|e| e.citation_count).sum(); if citation_count >= 5 { // Minimum threshold let bridge_topics: Vec = edges .iter() .flat_map(|e| vec![e.source.clone(), e.target.clone()]) .collect::>() .into_iter() .collect(); // Check if this is emerging (compare with previous snapshot) let is_emerging = if self.snapshots.len() >= 2 { let (_, prev_graph) = &self.snapshots[self.snapshots.len() - 2]; let prev_flow: f64 = prev_graph .edges .iter() .filter(|e| { self.get_domain(&e.source, prev_graph) == src_domain && self.get_domain(&e.target, prev_graph) == tgt_domain }) .map(|e| e.weight) .sum(); total_flow > prev_flow * 1.5 // 50% growth } else { true }; bridges.push(CrossDomainBridge { id: format!("bridge_{}", bridge_counter), source_domain: src_domain.clone(), target_domain: tgt_domain.clone(), bridge_topics, citation_flow: total_flow, reverse_flow: 0.0, // Would need to compute reverse direction strength: total_flow / citation_count as f64, is_emerging, first_observed: *curr_ts, key_works: vec![], // Would need work-level data }); bridge_counter += 1; } } // Sort by strength bridges.sort_by(|a, b| { b.strength .partial_cmp(&a.strength) .unwrap_or(std::cmp::Ordering::Equal) }); self.bridges = bridges.clone(); bridges } /// Compute coherence delta for a topic between snapshots fn compute_topic_coherence_delta( &self, topic_id: &str, prev_graph: &TopicGraph, curr_graph: &TopicGraph, ) -> f64 { // Compute local coherence as ratio of intra-topic to inter-topic edges let prev_coherence = self.compute_local_coherence(topic_id, prev_graph); let curr_coherence = self.compute_local_coherence(topic_id, curr_graph); curr_coherence - prev_coherence } /// Compute local coherence for a topic fn compute_local_coherence(&self, topic_id: &str, graph: &TopicGraph) -> f64 { // Find edges involving this topic let edges: Vec<_> = graph .edges .iter() .filter(|e| e.source == topic_id || e.target == topic_id) .collect(); if edges.is_empty() { return 0.0; } // Coherence = sum of weights edges.iter().map(|e| e.weight).sum::() / edges.len() as f64 } /// Find topics at the boundary (connected to other clusters) fn find_boundary_topics(&self, topic_id: &str, graph: &TopicGraph) -> Vec { // Find topics connected to this topic that have high connectivity elsewhere graph .edges .iter() .filter(|e| e.source == topic_id) .map(|e| e.target.clone()) .take(5) .collect() } /// Find related topics fn find_related_topics(&self, topic_id: &str, graph: &TopicGraph) -> Vec { graph .edges .iter() .filter(|e| e.source == topic_id || e.target == topic_id) .flat_map(|e| { if e.source == topic_id { vec![e.target.clone()] } else { vec![e.source.clone()] } }) .take(10) .collect() } /// Get domain for a topic (simplified) fn get_domain(&self, topic_id: &str, graph: &TopicGraph) -> String { graph .topics .get(topic_id) .map(|n| { n.name .split_whitespace() .next() .unwrap_or("Unknown") .to_string() }) .unwrap_or_else(|| "Unknown".to_string()) } /// Calculate confidence score fn calculate_confidence( &self, growth: f64, coherence_delta: f64, citation_momentum: f64, ) -> f64 { let growth_score = (growth.min(5.0) / 5.0).max(0.0); let coherence_score = (coherence_delta.abs().min(1.0)).max(0.0); let citation_score = (citation_momentum / 10.0).min(1.0).max(0.0); (growth_score * 0.4 + coherence_score * 0.4 + citation_score * 0.2).min(1.0) } /// Get detected frontiers pub fn frontiers(&self) -> &[EmergingFrontier] { &self.frontiers } /// Get detected bridges pub fn bridges(&self) -> &[CrossDomainBridge] { &self.bridges } /// Get highest confidence frontiers pub fn top_frontiers(&self, n: usize) -> Vec<&EmergingFrontier> { self.frontiers.iter().take(n).collect() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_frontier_radar_creation() { let radar = FrontierRadar::new(0.1, 0.2); assert!(radar.frontiers().is_empty()); assert!(radar.bridges().is_empty()); } #[test] fn test_confidence_calculation() { let radar = FrontierRadar::new(0.1, 0.2); // High confidence let high = radar.calculate_confidence(2.0, 0.5, 5.0); assert!(high > 0.5); // Low confidence let low = radar.calculate_confidence(0.05, 0.01, 0.1); assert!(low < 0.3); } }