Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

2026-02-28 14:39:40 -05:00
parent 7885bf6278 d803bfe2b1
commit cd5943df23
7854 changed files with 3522914 additions and 0 deletions
--- a/vendor/ruvector/examples/data/openalex/Cargo.toml
+++ b/vendor/ruvector/examples/data/openalex/Cargo.toml
@@ -0,0 +1,49 @@
+[package]
+name = "ruvector-data-openalex"
+version.workspace = true
+edition.workspace = true
+description = "OpenAlex research intelligence integration for RuVector"
+license.workspace = true
+repository.workspace = true
+keywords = ["openalex", "research", "citations", "graph", "discovery"]
+categories = ["science", "database"]
+
+[dependencies]
+# Core framework
+ruvector-data-framework = { path = "../framework" }
+
+# Async runtime
+tokio.workspace = true
+futures.workspace = true
+async-trait.workspace = true
+
+# Serialization
+serde.workspace = true
+serde_json.workspace = true
+
+# HTTP client
+reqwest.workspace = true
+
+# Time handling
+chrono.workspace = true
+
+# Logging
+tracing.workspace = true
+thiserror.workspace = true
+
+# Data processing
+rayon.workspace = true
+
+# URL encoding
+urlencoding = "2.1"
+
+# Compression for bulk downloads
+flate2 = "1.0"
+
+[dev-dependencies]
+tokio-test = "0.4"
+rand = "0.8"
+
+[[example]]
+name = "frontier_radar"
+path = "examples/frontier_radar.rs"
--- a/vendor/ruvector/examples/data/openalex/examples/frontier_radar.rs
+++ b/vendor/ruvector/examples/data/openalex/examples/frontier_radar.rs
@@ -0,0 +1,322 @@
+//! OpenAlex Research Frontier Discovery
+//!
+//! This example detects emerging research frontiers using citation graph analysis
+//! and RuVector's dynamic coherence detection.
+
+use chrono::{Duration, Utc};
+use ruvector_data_openalex::{
+    OpenAlexClient, OpenAlexConfig, EntityType,
+    TopicGraph, TopicNode, TopicEdge,
+    frontier::{FrontierRadar, FrontierConfig},
+};
+use std::collections::HashMap;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("╔══════════════════════════════════════════════════════════════╗");
+    println!("║          OpenAlex Research Frontier Discovery                 ║");
+    println!("║    Detecting Emerging Research via Citation Dynamics          ║");
+    println!("╚══════════════════════════════════════════════════════════════╝");
+    println!();
+
+    // Initialize OpenAlex client
+    let config = OpenAlexConfig {
+        email: Some("ruvector-discovery@example.com".to_string()),
+        per_page: 200,
+        ..Default::default()
+    };
+    let client = OpenAlexClient::new(config);
+
+    // Research areas to scan for emerging frontiers
+    let research_domains = [
+        ("Quantum Machine Learning", "quantum computing AND machine learning"),
+        ("Foundation Models", "large language model OR foundation model"),
+        ("Embodied AI", "embodied AI OR robotics learning"),
+        ("Mechanistic Interpretability", "interpretability AND neural network"),
+        ("AI Safety", "AI safety OR alignment"),
+        ("Synthetic Biology AI", "synthetic biology AND AI"),
+        ("Climate AI", "climate AND machine learning"),
+        ("Materials Discovery", "materials discovery AND AI"),
+    ];
+
+    println!("🔍 Scanning {} research domains for emerging frontiers...\n", research_domains.len());
+
+    // Configure frontier detection
+    let frontier_config = FrontierConfig {
+        min_growth_rate: 0.15,           // 15% citation growth threshold
+        coherence_sensitivity: 0.7,       // High sensitivity to structure changes
+        time_window_months: 6,            // Look at last 6 months
+        min_boundary_topics: 3,           // Minimum topics at frontier
+        min_papers: 10,                   // Minimum papers to consider
+    };
+
+    let mut radar = FrontierRadar::new(frontier_config);
+
+    let mut all_discoveries = Vec::new();
+    let cutoff_date = Utc::now() - Duration::days(180);
+
+    for (domain_name, query) in &research_domains {
+        println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
+        println!("📚 Domain: {}", domain_name);
+        println!();
+
+        // Fetch recent works in this domain
+        match client.search_works(query, Some(cutoff_date)).await {
+            Ok(works) => {
+                println!("   Found {} recent papers", works.len());
+
+                if works.is_empty() {
+                    println!("   ⚠️ No papers found, skipping domain\n");
+                    continue;
+                }
+
+                // Build topic citation graph
+                let mut topic_graph = TopicGraph::new();
+                let mut topic_papers: HashMap<String, Vec<String>> = HashMap::new();
+                let mut topic_citations: HashMap<String, usize> = HashMap::new();
+
+                for work in &works {
+                    if let Some(concepts) = &work.concepts {
+                        // Add topics and track papers per topic
+                        for concept in concepts.iter().filter(|c| c.score > 0.3) {
+                            let topic_id = concept.id.clone();
+
+                            // Add topic node if not exists
+                            if !topic_graph.nodes.iter().any(|n| n.id == topic_id) {
+                                topic_graph.nodes.push(TopicNode {
+                                    id: topic_id.clone(),
+                                    name: concept.display_name.clone(),
+                                    level: concept.level as usize,
+                                    paper_count: 1,
+                                    citation_count: work.cited_by_count.unwrap_or(0) as usize,
+                                    score: concept.score,
+                                });
+                            } else {
+                                // Update counts
+                                if let Some(node) = topic_graph.nodes.iter_mut().find(|n| n.id == topic_id) {
+                                    node.paper_count += 1;
+                                    node.citation_count += work.cited_by_count.unwrap_or(0) as usize;
+                                }
+                            }
+
+                            topic_papers.entry(topic_id.clone()).or_default().push(work.id.clone());
+                            *topic_citations.entry(topic_id.clone()).or_insert(0) += work.cited_by_count.unwrap_or(0) as usize;
+                        }
+
+                        // Build edges between co-occurring topics
+                        let topic_ids: Vec<String> = concepts.iter()
+                            .filter(|c| c.score > 0.3)
+                            .map(|c| c.id.clone())
+                            .collect();
+
+                        for i in 0..topic_ids.len() {
+                            for j in (i + 1)..topic_ids.len() {
+                                let source = &topic_ids[i];
+                                let target = &topic_ids[j];
+
+                                // Check if edge exists
+                                if let Some(edge) = topic_graph.edges.iter_mut()
+                                    .find(|e| (e.source == *source && e.target == *target) ||
+                                              (e.source == *target && e.target == *source)) {
+                                    edge.weight += 1.0;
+                                } else {
+                                    topic_graph.edges.push(TopicEdge {
+                                        source: source.clone(),
+                                        target: target.clone(),
+                                        weight: 1.0,
+                                        citation_flow: 0,
+                                    });
+                                }
+                            }
+                        }
+                    }
+                }
+
+                println!("   Built topic graph: {} nodes, {} edges",
+                    topic_graph.nodes.len(), topic_graph.edges.len());
+
+                // Add snapshot to radar
+                radar.add_snapshot(Utc::now(), topic_graph.clone());
+
+                // Analyze for emerging frontiers (need at least 2 snapshots for delta)
+                // For demo, we analyze the single snapshot structure
+                let discoveries = analyze_frontier_structure(&topic_graph, domain_name);
+
+                if !discoveries.is_empty() {
+                    println!("\n   🌟 Potential Frontiers Detected:\n");
+                    for discovery in &discoveries {
+                        all_discoveries.push(discovery.clone());
+                        println!("   {}", discovery);
+                    }
+                } else {
+                    println!("   📊 No clear frontier signals in current snapshot");
+                }
+            }
+            Err(e) => {
+                println!("   ❌ Error fetching papers: {}", e);
+            }
+        }
+        println!();
+    }
+
+    // Cross-domain bridge analysis
+    println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
+    println!("🌉 Cross-Domain Bridge Analysis");
+    println!();
+
+    // Look for papers bridging multiple domains
+    let bridge_query = "(quantum AND machine learning) OR (biology AND AI AND materials)";
+    match client.search_works(bridge_query, Some(cutoff_date)).await {
+        Ok(bridge_works) => {
+            println!("   Found {} potential bridge papers\n", bridge_works.len());
+
+            // Analyze bridge patterns
+            let bridges = analyze_bridge_papers(&bridge_works);
+            for bridge in &bridges {
+                println!("   {}", bridge);
+                all_discoveries.push(bridge.clone());
+            }
+        }
+        Err(e) => {
+            println!("   ❌ Error: {}", e);
+        }
+    }
+
+    // Summary
+    println!("\n╔══════════════════════════════════════════════════════════════╗");
+    println!("║                    Discovery Summary                          ║");
+    println!("╚══════════════════════════════════════════════════════════════╝");
+    println!();
+    println!("Total potential frontiers identified: {}", all_discoveries.len());
+    println!();
+
+    // Rank discoveries by potential
+    println!("📈 Top Emerging Areas (by structural signals):\n");
+    for (i, discovery) in all_discoveries.iter().take(5).enumerate() {
+        println!("   {}. {}", i + 1, discovery);
+    }
+
+    Ok(())
+}
+
+/// Analyze topic graph structure for frontier signals
+fn analyze_frontier_structure(graph: &TopicGraph, domain: &str) -> Vec<String> {
+    let mut discoveries = Vec::new();
+
+    // 1. High-degree but low-level topics (emerging connectors)
+    let mut topic_degrees: HashMap<&str, usize> = HashMap::new();
+    for edge in &graph.edges {
+        *topic_degrees.entry(&edge.source).or_insert(0) += 1;
+        *topic_degrees.entry(&edge.target).or_insert(0) += 1;
+    }
+
+    for node in &graph.nodes {
+        let degree = topic_degrees.get(node.id.as_str()).copied().unwrap_or(0);
+
+        // Look for topics that connect many others but have relatively few papers
+        // This indicates an emerging organizing concept
+        if degree > 3 && node.paper_count < 20 && node.level >= 2 {
+            discoveries.push(format!(
+                "🔺 [{}] '{}' - High connectivity ({} connections), only {} papers - potential emerging organizer",
+                domain, node.name, degree, node.paper_count
+            ));
+        }
+
+        // Look for high citation velocity (citations per paper)
+        if node.paper_count > 5 {
+            let citation_velocity = node.citation_count as f64 / node.paper_count as f64;
+            if citation_velocity > 20.0 {
+                discoveries.push(format!(
+                    "🔥 [{}] '{}' - High citation velocity ({:.1} citations/paper) - gaining attention",
+                    domain, node.name, citation_velocity
+                ));
+            }
+        }
+    }
+
+    // 2. Detect weakly connected clusters (potential specialization frontiers)
+    let components = find_weak_bridges(graph);
+    for (topic1, topic2, bridge_strength) in components {
+        if bridge_strength < 3.0 && bridge_strength > 0.0 {
+            discoveries.push(format!(
+                "🌉 [{}] Weak bridge between '{}' and '{}' (strength {:.1}) - potential specialization point",
+                domain, topic1, topic2, bridge_strength
+            ));
+        }
+    }
+
+    discoveries
+}
+
+/// Find weak bridges between topic clusters
+fn find_weak_bridges(graph: &TopicGraph) -> Vec<(String, String, f64)> {
+    let mut bridges = Vec::new();
+
+    // Simple heuristic: edges with low weight connecting high-degree nodes
+    let mut topic_degrees: HashMap<&str, usize> = HashMap::new();
+    for edge in &graph.edges {
+        *topic_degrees.entry(&edge.source).or_insert(0) += 1;
+        *topic_degrees.entry(&edge.target).or_insert(0) += 1;
+    }
+
+    for edge in &graph.edges {
+        let source_degree = topic_degrees.get(edge.source.as_str()).copied().unwrap_or(0);
+        let target_degree = topic_degrees.get(edge.target.as_str()).copied().unwrap_or(0);
+
+        // High-degree nodes connected by weak edge = potential bridge
+        if source_degree > 3 && target_degree > 3 && edge.weight < 3.0 {
+            let source_name = graph.nodes.iter()
+                .find(|n| n.id == edge.source)
+                .map(|n| n.name.clone())
+                .unwrap_or_else(|| edge.source.clone());
+            let target_name = graph.nodes.iter()
+                .find(|n| n.id == edge.target)
+                .map(|n| n.name.clone())
+                .unwrap_or_else(|| edge.target.clone());
+
+            bridges.push((source_name, target_name, edge.weight));
+        }
+    }
+
+    bridges
+}
+
+/// Analyze papers that bridge multiple research domains
+fn analyze_bridge_papers(works: &[ruvector_data_openalex::Work]) -> Vec<String> {
+    let mut discoveries = Vec::new();
+
+    // Group papers by their concept combinations
+    let mut concept_combos: HashMap<String, Vec<&ruvector_data_openalex::Work>> = HashMap::new();
+
+    for work in works {
+        if let Some(concepts) = &work.concepts {
+            // Get high-level concepts (level 0-1)
+            let high_level: Vec<String> = concepts.iter()
+                .filter(|c| c.level <= 1 && c.score > 0.4)
+                .map(|c| c.display_name.clone())
+                .collect();
+
+            if high_level.len() >= 2 {
+                let key = format!("{} ↔ {}", high_level[0], high_level[1]);
+                concept_combos.entry(key).or_default().push(work);
+            }
+        }
+    }
+
+    // Report unusual combinations with high citations
+    for (combo, papers) in &concept_combos {
+        let total_citations: i32 = papers.iter()
+            .filter_map(|w| w.cited_by_count)
+            .sum();
+        let avg_citations = total_citations as f64 / papers.len() as f64;
+
+        if papers.len() >= 3 && avg_citations > 10.0 {
+            discoveries.push(format!(
+                "🔗 Bridge area: {} ({} papers, {:.0} avg citations) - cross-domain synthesis",
+                combo, papers.len(), avg_citations
+            ));
+        }
+    }
+
+    discoveries
+}
--- a/vendor/ruvector/examples/data/openalex/src/client.rs
+++ b/vendor/ruvector/examples/data/openalex/src/client.rs
@@ -0,0 +1,267 @@
+//! OpenAlex API client
+
+use std::time::Duration;
+
+use reqwest::{Client, StatusCode};
+use serde::Deserialize;
+
+use crate::{OpenAlexError, Work};
+
+/// OpenAlex API client
+pub struct OpenAlexClient {
+    client: Client,
+    base_url: String,
+    email: Option<String>,
+}
+
+/// API response wrapper
+#[derive(Debug, Deserialize)]
+pub struct ApiResponse<T> {
+    /// Metadata
+    pub meta: ApiMeta,
+
+    /// Results
+    pub results: Vec<T>,
+}
+
+/// API metadata
+#[derive(Debug, Deserialize)]
+pub struct ApiMeta {
+    /// Total count
+    pub count: u64,
+
+    /// Current page
+    pub page: Option<u32>,
+
+    /// Results per page
+    pub per_page: Option<u32>,
+
+    /// Next cursor (for cursor-based pagination)
+    pub next_cursor: Option<String>,
+}
+
+impl OpenAlexClient {
+    /// Create a new OpenAlex client
+    ///
+    /// Providing an email enables the "polite pool" with higher rate limits.
+    pub fn new(email: Option<String>) -> Self {
+        let client = Client::builder()
+            .timeout(Duration::from_secs(30))
+            .user_agent("RuVector/0.1.0")
+            .gzip(true)
+            .build()
+            .expect("Failed to build HTTP client");
+
+        Self {
+            client,
+            base_url: "https://api.openalex.org".to_string(),
+            email,
+        }
+    }
+
+    /// Set custom base URL (for testing)
+    pub fn with_base_url(mut self, url: &str) -> Self {
+        self.base_url = url.to_string();
+        self
+    }
+
+    /// Build URL with email parameter
+    fn build_url(&self, endpoint: &str, params: &str) -> String {
+        let mut url = format!("{}/{}?{}", self.base_url, endpoint, params);
+
+        if let Some(ref email) = self.email {
+            if !params.is_empty() {
+                url.push('&');
+            }
+            url.push_str(&format!("mailto={}", email));
+        }
+
+        url
+    }
+
+    /// Health check - verify API is accessible
+    pub async fn health_check(&self) -> Result<bool, OpenAlexError> {
+        let url = format!("{}/works?per_page=1", self.base_url);
+        let response = self.client.get(&url).send().await?;
+        Ok(response.status().is_success())
+    }
+
+    /// Fetch a page of works with pagination
+    pub async fn fetch_works_page(
+        &self,
+        filter: &str,
+        cursor: Option<String>,
+        per_page: usize,
+    ) -> Result<(Vec<Work>, Option<String>), OpenAlexError> {
+        let mut params = format!("per_page={}", per_page);
+
+        if !filter.is_empty() {
+            params.push_str(&format!("&{}", filter));
+        }
+
+        if let Some(c) = cursor {
+            params.push_str(&format!("&cursor={}", c));
+        } else {
+            // Use cursor-based pagination for bulk
+            params.push_str("&cursor=*");
+        }
+
+        let url = self.build_url("works", &params);
+        let response = self.client.get(&url).send().await?;
+
+        match response.status() {
+            StatusCode::OK => {
+                let api_response: ApiResponse<Work> = response.json().await?;
+                Ok((api_response.results, api_response.meta.next_cursor))
+            }
+            StatusCode::TOO_MANY_REQUESTS => {
+                let retry_after = response
+                    .headers()
+                    .get("retry-after")
+                    .and_then(|v| v.to_str().ok())
+                    .and_then(|s| s.parse().ok())
+                    .unwrap_or(60);
+                Err(OpenAlexError::RateLimited(retry_after))
+            }
+            status => Err(OpenAlexError::Api(format!(
+                "Unexpected status: {}",
+                status
+            ))),
+        }
+    }
+
+    /// Fetch a single work by ID
+    pub async fn get_work(&self, id: &str) -> Result<Work, OpenAlexError> {
+        // Normalize ID format
+        let normalized_id = if id.starts_with("https://") {
+            id.to_string()
+        } else if id.starts_with("W") {
+            format!("https://openalex.org/{}", id)
+        } else {
+            return Err(OpenAlexError::InvalidId(id.to_string()));
+        };
+
+        let url = self.build_url(&format!("works/{}", normalized_id), "");
+        let response = self.client.get(&url).send().await?;
+
+        match response.status() {
+            StatusCode::OK => Ok(response.json().await?),
+            StatusCode::NOT_FOUND => Err(OpenAlexError::InvalidId(id.to_string())),
+            status => Err(OpenAlexError::Api(format!(
+                "Unexpected status: {}",
+                status
+            ))),
+        }
+    }
+
+    /// Search works by query
+    pub async fn search_works(
+        &self,
+        query: &str,
+        per_page: usize,
+    ) -> Result<Vec<Work>, OpenAlexError> {
+        let params = format!("search={}&per_page={}", urlencoding::encode(query), per_page);
+        let url = self.build_url("works", &params);
+        let response = self.client.get(&url).send().await?;
+
+        match response.status() {
+            StatusCode::OK => {
+                let api_response: ApiResponse<Work> = response.json().await?;
+                Ok(api_response.results)
+            }
+            status => Err(OpenAlexError::Api(format!(
+                "Unexpected status: {}",
+                status
+            ))),
+        }
+    }
+
+    /// Fetch works by topic
+    pub async fn works_by_topic(
+        &self,
+        topic_id: &str,
+        per_page: usize,
+    ) -> Result<Vec<Work>, OpenAlexError> {
+        let filter = format!("filter=primary_topic.id:{}", topic_id);
+        let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
+        Ok(works)
+    }
+
+    /// Fetch works by author
+    pub async fn works_by_author(
+        &self,
+        author_id: &str,
+        per_page: usize,
+    ) -> Result<Vec<Work>, OpenAlexError> {
+        let filter = format!("filter=authorships.author.id:{}", author_id);
+        let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
+        Ok(works)
+    }
+
+    /// Fetch works by institution
+    pub async fn works_by_institution(
+        &self,
+        institution_id: &str,
+        per_page: usize,
+    ) -> Result<Vec<Work>, OpenAlexError> {
+        let filter = format!(
+            "filter=authorships.institutions.id:{}",
+            institution_id
+        );
+        let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
+        Ok(works)
+    }
+
+    /// Fetch works citing a specific work
+    pub async fn citing_works(
+        &self,
+        work_id: &str,
+        per_page: usize,
+    ) -> Result<Vec<Work>, OpenAlexError> {
+        let filter = format!("filter=cites:{}", work_id);
+        let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
+        Ok(works)
+    }
+
+    /// Fetch works cited by a specific work
+    pub async fn cited_by_work(&self, work_id: &str) -> Result<Vec<Work>, OpenAlexError> {
+        let work = self.get_work(work_id).await?;
+
+        // Fetch referenced works
+        let mut cited_works = Vec::new();
+        for ref_id in work.referenced_works.iter().take(100) {
+            // Limit to avoid too many requests
+            if let Ok(cited) = self.get_work(ref_id).await {
+                cited_works.push(cited);
+            }
+        }
+
+        Ok(cited_works)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_client_creation() {
+        let client = OpenAlexClient::new(None);
+        assert_eq!(client.base_url, "https://api.openalex.org");
+    }
+
+    #[test]
+    fn test_client_with_email() {
+        let client = OpenAlexClient::new(Some("test@example.com".to_string()));
+        let url = client.build_url("works", "per_page=10");
+        assert!(url.contains("mailto=test@example.com"));
+    }
+
+    #[test]
+    fn test_url_building() {
+        let client = OpenAlexClient::new(None);
+        let url = client.build_url("works", "filter=publication_year:2023");
+        assert!(url.starts_with("https://api.openalex.org/works"));
+        assert!(url.contains("filter=publication_year:2023"));
+    }
+}
--- a/vendor/ruvector/examples/data/openalex/src/frontier.rs
+++ b/vendor/ruvector/examples/data/openalex/src/frontier.rs
@@ -0,0 +1,518 @@
+//! Research frontier detection using coherence signals
+
+use std::collections::HashMap;
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+use crate::{TopicEdge, TopicGraph, TopicNode, Work};
+
+/// An emerging research frontier
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EmergingFrontier {
+    /// Frontier identifier
+    pub id: String,
+
+    /// Primary topic name
+    pub name: String,
+
+    /// Related topic names
+    pub related_topics: Vec<String>,
+
+    /// Growth rate (works per year)
+    pub growth_rate: f64,
+
+    /// Coherence delta (change in min-cut boundary)
+    pub coherence_delta: f64,
+
+    /// Citation momentum (trend in citation rates)
+    pub citation_momentum: f64,
+
+    /// Detected boundary nodes (topics at the frontier edge)
+    pub boundary_topics: Vec<String>,
+
+    /// First detected
+    pub detected_at: DateTime<Utc>,
+
+    /// Confidence score (0-1)
+    pub confidence: f64,
+
+    /// Evidence supporting this frontier
+    pub evidence: Vec<FrontierEvidence>,
+}
+
+/// Evidence for a frontier detection
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FrontierEvidence {
+    /// Evidence type
+    pub evidence_type: String,
+
+    /// Value
+    pub value: f64,
+
+    /// Explanation
+    pub explanation: String,
+}
+
+/// A cross-domain bridge connecting two research areas
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CrossDomainBridge {
+    /// Bridge identifier
+    pub id: String,
+
+    /// Source domain/topic
+    pub source_domain: String,
+
+    /// Target domain/topic
+    pub target_domain: String,
+
+    /// Bridge topics (connector nodes)
+    pub bridge_topics: Vec<String>,
+
+    /// Citation flow (source → target)
+    pub citation_flow: f64,
+
+    /// Reverse flow (target → source)
+    pub reverse_flow: f64,
+
+    /// Bridge strength (combined normalized flow)
+    pub strength: f64,
+
+    /// Is this a new connection?
+    pub is_emerging: bool,
+
+    /// First observed
+    pub first_observed: DateTime<Utc>,
+
+    /// Key papers establishing the bridge
+    pub key_works: Vec<String>,
+}
+
+/// Research frontier radar for detecting emerging fields
+pub struct FrontierRadar {
+    /// Topic graph snapshots over time
+    snapshots: Vec<(DateTime<Utc>, TopicGraph)>,
+
+    /// Minimum growth rate to consider
+    min_growth_rate: f64,
+
+    /// Minimum coherence shift to detect
+    min_coherence_shift: f64,
+
+    /// Detected frontiers
+    frontiers: Vec<EmergingFrontier>,
+
+    /// Detected bridges
+    bridges: Vec<CrossDomainBridge>,
+}
+
+impl FrontierRadar {
+    /// Create a new frontier radar
+    pub fn new(min_growth_rate: f64, min_coherence_shift: f64) -> Self {
+        Self {
+            snapshots: Vec::new(),
+            min_growth_rate,
+            min_coherence_shift,
+            frontiers: Vec::new(),
+            bridges: Vec::new(),
+        }
+    }
+
+    /// Add a topic graph snapshot
+    pub fn add_snapshot(&mut self, timestamp: DateTime<Utc>, graph: TopicGraph) {
+        self.snapshots.push((timestamp, graph));
+        self.snapshots.sort_by_key(|(ts, _)| *ts);
+    }
+
+    /// Build snapshots from works partitioned by time
+    pub fn build_from_works(&mut self, works: &[Work], window_days: i64) {
+        if works.is_empty() {
+            return;
+        }
+
+        // Find time range
+        let mut min_date = Utc::now();
+        let mut max_date = DateTime::<Utc>::MIN_UTC;
+
+        for work in works {
+            if let Some(date) = work.publication_date {
+                if date < min_date {
+                    min_date = date;
+                }
+                if date > max_date {
+                    max_date = date;
+                }
+            }
+        }
+
+        // Partition works into time windows
+        let window_duration = chrono::Duration::days(window_days);
+        let mut current_start = min_date;
+
+        while current_start < max_date {
+            let current_end = current_start + window_duration;
+
+            let window_works: Vec<_> = works
+                .iter()
+                .filter(|w| {
+                    w.publication_date
+                        .map(|d| d >= current_start && d < current_end)
+                        .unwrap_or(false)
+                })
+                .cloned()
+                .collect();
+
+            if !window_works.is_empty() {
+                let graph = TopicGraph::from_works(&window_works);
+                self.add_snapshot(current_start, graph);
+            }
+
+            current_start = current_end;
+        }
+    }
+
+    /// Detect emerging frontiers from snapshots
+    pub fn detect_frontiers(&mut self) -> Vec<EmergingFrontier> {
+        if self.snapshots.len() < 2 {
+            return vec![];
+        }
+
+        let mut frontiers = Vec::new();
+        let mut frontier_counter = 0;
+
+        // Compare consecutive snapshots
+        for i in 1..self.snapshots.len() {
+            let (prev_ts, prev_graph) = &self.snapshots[i - 1];
+            let (curr_ts, curr_graph) = &self.snapshots[i];
+
+            // Find topics with significant growth
+            for (topic_id, curr_node) in &curr_graph.topics {
+                let prev_node = prev_graph.topics.get(topic_id);
+
+                let growth = if let Some(prev) = prev_node {
+                    if prev.work_count > 0 {
+                        (curr_node.work_count as f64 - prev.work_count as f64)
+                            / prev.work_count as f64
+                    } else {
+                        f64::INFINITY
+                    }
+                } else {
+                    // New topic
+                    f64::INFINITY
+                };
+
+                if growth > self.min_growth_rate {
+                    // Calculate coherence shift
+                    let coherence_delta = self.compute_topic_coherence_delta(
+                        topic_id,
+                        prev_graph,
+                        curr_graph,
+                    );
+
+                    if coherence_delta.abs() > self.min_coherence_shift {
+                        // Calculate citation momentum
+                        let citation_momentum = curr_node.avg_citations
+                            - prev_node.map(|n| n.avg_citations).unwrap_or(0.0);
+
+                        // Find boundary topics
+                        let boundary_topics = self.find_boundary_topics(topic_id, curr_graph);
+
+                        // Build evidence
+                        let mut evidence = vec![
+                            FrontierEvidence {
+                                evidence_type: "growth_rate".to_string(),
+                                value: growth,
+                                explanation: format!(
+                                    "{:.0}% increase in works",
+                                    growth * 100.0
+                                ),
+                            },
+                            FrontierEvidence {
+                                evidence_type: "coherence_delta".to_string(),
+                                value: coherence_delta,
+                                explanation: format!(
+                                    "Coherence {} by {:.2}",
+                                    if coherence_delta > 0.0 {
+                                        "increased"
+                                    } else {
+                                        "decreased"
+                                    },
+                                    coherence_delta.abs()
+                                ),
+                            },
+                        ];
+
+                        if citation_momentum > 0.0 {
+                            evidence.push(FrontierEvidence {
+                                evidence_type: "citation_momentum".to_string(),
+                                value: citation_momentum,
+                                explanation: format!(
+                                    "+{:.1} avg citations",
+                                    citation_momentum
+                                ),
+                            });
+                        }
+
+                        // Calculate confidence based on evidence strength
+                        let confidence = self.calculate_confidence(growth, coherence_delta, citation_momentum);
+
+                        if confidence >= 0.3 {
+                            frontiers.push(EmergingFrontier {
+                                id: format!("frontier_{}", frontier_counter),
+                                name: curr_node.name.clone(),
+                                related_topics: self.find_related_topics(topic_id, curr_graph),
+                                growth_rate: curr_node.growth_rate,
+                                coherence_delta,
+                                citation_momentum,
+                                boundary_topics,
+                                detected_at: *curr_ts,
+                                confidence,
+                                evidence,
+                            });
+                            frontier_counter += 1;
+                        }
+                    }
+                }
+            }
+        }
+
+        // Sort by confidence
+        frontiers.sort_by(|a, b| {
+            b.confidence
+                .partial_cmp(&a.confidence)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        self.frontiers = frontiers.clone();
+        frontiers
+    }
+
+    /// Detect cross-domain bridges
+    pub fn detect_bridges(&mut self) -> Vec<CrossDomainBridge> {
+        if self.snapshots.is_empty() {
+            return vec![];
+        }
+
+        let mut bridges = Vec::new();
+        let mut bridge_counter = 0;
+
+        let (curr_ts, curr_graph) = self.snapshots.last().unwrap();
+
+        // Build domain → topics mapping (simplified: use top-level grouping)
+        let mut domain_topics: HashMap<String, Vec<String>> = HashMap::new();
+        for (topic_id, node) in &curr_graph.topics {
+            // Use first word as domain (simplified)
+            let domain = node
+                .name
+                .split_whitespace()
+                .next()
+                .unwrap_or("Unknown")
+                .to_string();
+            domain_topics
+                .entry(domain.clone())
+                .or_default()
+                .push(topic_id.clone());
+        }
+
+        // Find cross-domain edges
+        let mut domain_flows: HashMap<(String, String), Vec<&TopicEdge>> = HashMap::new();
+
+        for edge in &curr_graph.edges {
+            let src_domain = self.get_domain(&edge.source, curr_graph);
+            let tgt_domain = self.get_domain(&edge.target, curr_graph);
+
+            if src_domain != tgt_domain {
+                domain_flows
+                    .entry((src_domain.clone(), tgt_domain.clone()))
+                    .or_default()
+                    .push(edge);
+            }
+        }
+
+        // Create bridge records
+        for ((src_domain, tgt_domain), edges) in domain_flows {
+            let total_flow: f64 = edges.iter().map(|e| e.weight).sum();
+            let citation_count: usize = edges.iter().map(|e| e.citation_count).sum();
+
+            if citation_count >= 5 {
+                // Minimum threshold
+                let bridge_topics: Vec<String> = edges
+                    .iter()
+                    .flat_map(|e| vec![e.source.clone(), e.target.clone()])
+                    .collect::<std::collections::HashSet<_>>()
+                    .into_iter()
+                    .collect();
+
+                // Check if this is emerging (compare with previous snapshot)
+                let is_emerging = if self.snapshots.len() >= 2 {
+                    let (_, prev_graph) = &self.snapshots[self.snapshots.len() - 2];
+                    let prev_flow: f64 = prev_graph
+                        .edges
+                        .iter()
+                        .filter(|e| {
+                            self.get_domain(&e.source, prev_graph) == src_domain
+                                && self.get_domain(&e.target, prev_graph) == tgt_domain
+                        })
+                        .map(|e| e.weight)
+                        .sum();
+                    total_flow > prev_flow * 1.5 // 50% growth
+                } else {
+                    true
+                };
+
+                bridges.push(CrossDomainBridge {
+                    id: format!("bridge_{}", bridge_counter),
+                    source_domain: src_domain.clone(),
+                    target_domain: tgt_domain.clone(),
+                    bridge_topics,
+                    citation_flow: total_flow,
+                    reverse_flow: 0.0, // Would need to compute reverse direction
+                    strength: total_flow / citation_count as f64,
+                    is_emerging,
+                    first_observed: *curr_ts,
+                    key_works: vec![], // Would need work-level data
+                });
+                bridge_counter += 1;
+            }
+        }
+
+        // Sort by strength
+        bridges.sort_by(|a, b| {
+            b.strength
+                .partial_cmp(&a.strength)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        self.bridges = bridges.clone();
+        bridges
+    }
+
+    /// Compute coherence delta for a topic between snapshots
+    fn compute_topic_coherence_delta(
+        &self,
+        topic_id: &str,
+        prev_graph: &TopicGraph,
+        curr_graph: &TopicGraph,
+    ) -> f64 {
+        // Compute local coherence as ratio of intra-topic to inter-topic edges
+        let prev_coherence = self.compute_local_coherence(topic_id, prev_graph);
+        let curr_coherence = self.compute_local_coherence(topic_id, curr_graph);
+
+        curr_coherence - prev_coherence
+    }
+
+    /// Compute local coherence for a topic
+    fn compute_local_coherence(&self, topic_id: &str, graph: &TopicGraph) -> f64 {
+        // Find edges involving this topic
+        let edges: Vec<_> = graph
+            .edges
+            .iter()
+            .filter(|e| e.source == topic_id || e.target == topic_id)
+            .collect();
+
+        if edges.is_empty() {
+            return 0.0;
+        }
+
+        // Coherence = sum of weights
+        edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64
+    }
+
+    /// Find topics at the boundary (connected to other clusters)
+    fn find_boundary_topics(&self, topic_id: &str, graph: &TopicGraph) -> Vec<String> {
+        // Find topics connected to this topic that have high connectivity elsewhere
+        graph
+            .edges
+            .iter()
+            .filter(|e| e.source == topic_id)
+            .map(|e| e.target.clone())
+            .take(5)
+            .collect()
+    }
+
+    /// Find related topics
+    fn find_related_topics(&self, topic_id: &str, graph: &TopicGraph) -> Vec<String> {
+        graph
+            .edges
+            .iter()
+            .filter(|e| e.source == topic_id || e.target == topic_id)
+            .flat_map(|e| {
+                if e.source == topic_id {
+                    vec![e.target.clone()]
+                } else {
+                    vec![e.source.clone()]
+                }
+            })
+            .take(10)
+            .collect()
+    }
+
+    /// Get domain for a topic (simplified)
+    fn get_domain(&self, topic_id: &str, graph: &TopicGraph) -> String {
+        graph
+            .topics
+            .get(topic_id)
+            .map(|n| {
+                n.name
+                    .split_whitespace()
+                    .next()
+                    .unwrap_or("Unknown")
+                    .to_string()
+            })
+            .unwrap_or_else(|| "Unknown".to_string())
+    }
+
+    /// Calculate confidence score
+    fn calculate_confidence(
+        &self,
+        growth: f64,
+        coherence_delta: f64,
+        citation_momentum: f64,
+    ) -> f64 {
+        let growth_score = (growth.min(5.0) / 5.0).max(0.0);
+        let coherence_score = (coherence_delta.abs().min(1.0)).max(0.0);
+        let citation_score = (citation_momentum / 10.0).min(1.0).max(0.0);
+
+        (growth_score * 0.4 + coherence_score * 0.4 + citation_score * 0.2).min(1.0)
+    }
+
+    /// Get detected frontiers
+    pub fn frontiers(&self) -> &[EmergingFrontier] {
+        &self.frontiers
+    }
+
+    /// Get detected bridges
+    pub fn bridges(&self) -> &[CrossDomainBridge] {
+        &self.bridges
+    }
+
+    /// Get highest confidence frontiers
+    pub fn top_frontiers(&self, n: usize) -> Vec<&EmergingFrontier> {
+        self.frontiers.iter().take(n).collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_frontier_radar_creation() {
+        let radar = FrontierRadar::new(0.1, 0.2);
+        assert!(radar.frontiers().is_empty());
+        assert!(radar.bridges().is_empty());
+    }
+
+    #[test]
+    fn test_confidence_calculation() {
+        let radar = FrontierRadar::new(0.1, 0.2);
+
+        // High confidence
+        let high = radar.calculate_confidence(2.0, 0.5, 5.0);
+        assert!(high > 0.5);
+
+        // Low confidence
+        let low = radar.calculate_confidence(0.05, 0.01, 0.1);
+        assert!(low < 0.3);
+    }
+}
--- a/vendor/ruvector/examples/data/openalex/src/lib.rs
+++ b/vendor/ruvector/examples/data/openalex/src/lib.rs
@@ -0,0 +1,476 @@
+//! # RuVector OpenAlex Integration
+//!
+//! Integration with OpenAlex, the open catalog of scholarly works, authors,
+//! institutions, and topics. Enables novel discovery through:
+//!
+//! - **Emerging Field Detection**: Find topic splits/merges as cut boundaries shift
+//! - **Cross-Domain Bridges**: Identify connector subgraphs between disciplines
+//! - **Funding-to-Output Causality**: Map funder → lab → venue → citation chains
+//!
+//! ## OpenAlex Data Model
+//!
+//! OpenAlex provides a rich graph structure:
+//! - **Works**: 250M+ scholarly publications
+//! - **Authors**: 90M+ researchers with affiliations
+//! - **Institutions**: 100K+ universities, labs, companies
+//! - **Topics**: Hierarchical concept taxonomy
+//! - **Funders**: Research funding organizations
+//! - **Sources**: Journals, conferences, repositories
+//!
+//! ## Quick Start
+//!
+//! ```rust,ignore
+//! use ruvector_data_openalex::{OpenAlexClient, FrontierRadar, TopicGraph};
+//!
+//! // Initialize client
+//! let client = OpenAlexClient::new(Some("your-email@example.com"));
+//!
+//! // Build topic citation graph
+//! let graph = TopicGraph::build_from_works(
+//!     client.works_by_topic("machine learning", 2020..2024).await?
+//! )?;
+//!
+//! // Detect emerging research frontiers
+//! let radar = FrontierRadar::new(graph);
+//! let frontiers = radar.detect_emerging_fields(0.3).await?;
+//!
+//! for frontier in frontiers {
+//!     println!("Emerging: {} (coherence shift: {:.2})",
+//!              frontier.name, frontier.coherence_delta);
+//! }
+//! ```
+
+#![warn(missing_docs)]
+#![warn(clippy::all)]
+
+pub mod client;
+pub mod frontier;
+pub mod schema;
+
+use std::collections::HashMap;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use thiserror::Error;
+
+pub use client::OpenAlexClient;
+pub use frontier::{CrossDomainBridge, EmergingFrontier, FrontierRadar};
+pub use schema::{
+    Author, AuthorPosition, Authorship, Concept, Funder, Institution, Source, Topic, Work,
+};
+
+use ruvector_data_framework::{DataRecord, DataSource, FrameworkError, Relationship, Result};
+
+/// OpenAlex-specific error types
+#[derive(Error, Debug)]
+pub enum OpenAlexError {
+    /// API request failed
+    #[error("API error: {0}")]
+    Api(String),
+
+    /// Rate limit exceeded
+    #[error("Rate limit exceeded, retry after {0}s")]
+    RateLimited(u64),
+
+    /// Invalid entity ID
+    #[error("Invalid OpenAlex ID: {0}")]
+    InvalidId(String),
+
+    /// Parsing failed
+    #[error("Parse error: {0}")]
+    Parse(String),
+
+    /// Network error
+    #[error("Network error: {0}")]
+    Network(#[from] reqwest::Error),
+}
+
+impl From<OpenAlexError> for FrameworkError {
+    fn from(e: OpenAlexError) -> Self {
+        FrameworkError::Ingestion(e.to_string())
+    }
+}
+
+/// Configuration for OpenAlex data source
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OpenAlexConfig {
+    /// API base URL
+    pub base_url: String,
+
+    /// Email for polite pool (faster rate limits)
+    pub email: Option<String>,
+
+    /// Maximum results per page
+    pub per_page: usize,
+
+    /// Enable cursor-based pagination for bulk
+    pub use_cursor: bool,
+
+    /// Filter to specific entity types
+    pub entity_types: Vec<EntityType>,
+}
+
+impl Default for OpenAlexConfig {
+    fn default() -> Self {
+        Self {
+            base_url: "https://api.openalex.org".to_string(),
+            email: None,
+            per_page: 200,
+            use_cursor: true,
+            entity_types: vec![EntityType::Work],
+        }
+    }
+}
+
+/// OpenAlex entity types
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
+pub enum EntityType {
+    /// Scholarly works
+    Work,
+    /// Authors
+    Author,
+    /// Institutions
+    Institution,
+    /// Topics/concepts
+    Topic,
+    /// Funding sources
+    Funder,
+    /// Publication venues
+    Source,
+}
+
+impl EntityType {
+    /// Get the API endpoint for this entity type
+    pub fn endpoint(&self) -> &str {
+        match self {
+            EntityType::Work => "works",
+            EntityType::Author => "authors",
+            EntityType::Institution => "institutions",
+            EntityType::Topic => "topics",
+            EntityType::Funder => "funders",
+            EntityType::Source => "sources",
+        }
+    }
+}
+
+/// OpenAlex data source for the framework
+pub struct OpenAlexSource {
+    client: OpenAlexClient,
+    config: OpenAlexConfig,
+    filters: HashMap<String, String>,
+}
+
+impl OpenAlexSource {
+    /// Create a new OpenAlex data source
+    pub fn new(config: OpenAlexConfig) -> Self {
+        let client = OpenAlexClient::new(config.email.clone());
+        Self {
+            client,
+            config,
+            filters: HashMap::new(),
+        }
+    }
+
+    /// Add a filter (e.g., "publication_year" => "2023")
+    pub fn with_filter(mut self, key: &str, value: &str) -> Self {
+        self.filters.insert(key.to_string(), value.to_string());
+        self
+    }
+
+    /// Filter to a specific year range
+    pub fn with_year_range(self, start: i32, end: i32) -> Self {
+        self.with_filter("publication_year", &format!("{}-{}", start, end))
+    }
+
+    /// Filter to a specific topic
+    pub fn with_topic(self, topic_id: &str) -> Self {
+        self.with_filter("primary_topic.id", topic_id)
+    }
+
+    /// Filter to open access works
+    pub fn open_access_only(self) -> Self {
+        self.with_filter("open_access.is_oa", "true")
+    }
+}
+
+#[async_trait]
+impl DataSource for OpenAlexSource {
+    fn source_id(&self) -> &str {
+        "openalex"
+    }
+
+    async fn fetch_batch(
+        &self,
+        cursor: Option<String>,
+        batch_size: usize,
+    ) -> Result<(Vec<DataRecord>, Option<String>)> {
+        // Build query URL with filters
+        let mut query_parts: Vec<String> = self
+            .filters
+            .iter()
+            .map(|(k, v)| format!("{}:{}", k, v))
+            .collect();
+
+        let filter_str = if query_parts.is_empty() {
+            String::new()
+        } else {
+            format!("filter={}", query_parts.join(","))
+        };
+
+        // Fetch works from API
+        let (works, next_cursor) = self
+            .client
+            .fetch_works_page(&filter_str, cursor, batch_size.min(self.config.per_page))
+            .await
+            .map_err(|e| FrameworkError::Ingestion(e.to_string()))?;
+
+        // Convert to DataRecords
+        let records: Vec<DataRecord> = works.into_iter().map(work_to_record).collect();
+
+        Ok((records, next_cursor))
+    }
+
+    async fn total_count(&self) -> Result<Option<u64>> {
+        // OpenAlex returns count in meta
+        Ok(None) // Would require separate API call
+    }
+
+    async fn health_check(&self) -> Result<bool> {
+        self.client.health_check().await.map_err(|e| e.into())
+    }
+}
+
+/// Convert an OpenAlex Work to a DataRecord
+fn work_to_record(work: Work) -> DataRecord {
+    let mut relationships = Vec::new();
+
+    // Citations as relationships
+    for cited_id in &work.referenced_works {
+        relationships.push(Relationship {
+            target_id: cited_id.clone(),
+            rel_type: "cites".to_string(),
+            weight: 1.0,
+            properties: HashMap::new(),
+        });
+    }
+
+    // Author relationships
+    for authorship in &work.authorships {
+        relationships.push(Relationship {
+            target_id: authorship.author.id.clone(),
+            rel_type: "authored_by".to_string(),
+            weight: 1.0 / work.authorships.len() as f64,
+            properties: HashMap::new(),
+        });
+
+        // Institution relationships
+        for inst in &authorship.institutions {
+            relationships.push(Relationship {
+                target_id: inst.id.clone(),
+                rel_type: "affiliated_with".to_string(),
+                weight: 0.5,
+                properties: HashMap::new(),
+            });
+        }
+    }
+
+    // Topic relationships
+    if let Some(ref topic) = work.primary_topic {
+        relationships.push(Relationship {
+            target_id: topic.id.clone(),
+            rel_type: "primary_topic".to_string(),
+            weight: topic.score,
+            properties: HashMap::new(),
+        });
+    }
+
+    DataRecord {
+        id: work.id.clone(),
+        source: "openalex".to_string(),
+        record_type: "work".to_string(),
+        timestamp: work.publication_date.unwrap_or_else(Utc::now),
+        data: serde_json::to_value(&work).unwrap_or_default(),
+        embedding: None, // Would compute from title/abstract
+        relationships,
+    }
+}
+
+/// Topic-based citation graph for frontier detection
+pub struct TopicGraph {
+    /// Topics as nodes
+    pub topics: HashMap<String, TopicNode>,
+
+    /// Topic-to-topic edges (via citations)
+    pub edges: Vec<TopicEdge>,
+
+    /// Time window
+    pub time_window: (DateTime<Utc>, DateTime<Utc>),
+}
+
+/// A topic node in the graph
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TopicNode {
+    /// OpenAlex topic ID
+    pub id: String,
+
+    /// Topic display name
+    pub name: String,
+
+    /// Number of works in this topic
+    pub work_count: usize,
+
+    /// Average citation count
+    pub avg_citations: f64,
+
+    /// Growth rate (works per year)
+    pub growth_rate: f64,
+}
+
+/// An edge between topics
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TopicEdge {
+    /// Source topic ID
+    pub source: String,
+
+    /// Target topic ID
+    pub target: String,
+
+    /// Number of citations across boundary
+    pub citation_count: usize,
+
+    /// Normalized weight
+    pub weight: f64,
+}
+
+impl TopicGraph {
+    /// Build topic graph from works
+    pub fn from_works(works: &[Work]) -> Self {
+        let mut topics: HashMap<String, TopicNode> = HashMap::new();
+        let mut edge_counts: HashMap<(String, String), usize> = HashMap::new();
+
+        let mut min_date = Utc::now();
+        let mut max_date = DateTime::<Utc>::MIN_UTC;
+
+        for work in works {
+            if let Some(date) = work.publication_date {
+                if date < min_date {
+                    min_date = date;
+                }
+                if date > max_date {
+                    max_date = date;
+                }
+            }
+
+            // Get work's primary topic
+            let source_topic = match &work.primary_topic {
+                Some(t) => t.id.clone(),
+                None => continue,
+            };
+
+            // Update or create topic node
+            let node = topics.entry(source_topic.clone()).or_insert_with(|| TopicNode {
+                id: source_topic.clone(),
+                name: work
+                    .primary_topic
+                    .as_ref()
+                    .map(|t| t.display_name.clone())
+                    .unwrap_or_default(),
+                work_count: 0,
+                avg_citations: 0.0,
+                growth_rate: 0.0,
+            });
+            node.work_count += 1;
+            node.avg_citations = (node.avg_citations * (node.work_count - 1) as f64
+                + work.cited_by_count as f64)
+                / node.work_count as f64;
+
+            // For simplicity, we'd need referenced works' topics
+            // This is a simplified model
+        }
+
+        // Calculate growth rates
+        let time_span_years = (max_date - min_date).num_days() as f64 / 365.0;
+        for node in topics.values_mut() {
+            node.growth_rate = if time_span_years > 0.0 {
+                node.work_count as f64 / time_span_years
+            } else {
+                0.0
+            };
+        }
+
+        // Build edges
+        let edges: Vec<TopicEdge> = edge_counts
+            .into_iter()
+            .map(|((src, tgt), count)| {
+                let src_count = topics.get(&src).map(|n| n.work_count).unwrap_or(1);
+                let tgt_count = topics.get(&tgt).map(|n| n.work_count).unwrap_or(1);
+                let weight = count as f64 / (src_count * tgt_count) as f64;
+
+                TopicEdge {
+                    source: src,
+                    target: tgt,
+                    citation_count: count,
+                    weight,
+                }
+            })
+            .collect();
+
+        Self {
+            topics,
+            edges,
+            time_window: (min_date, max_date),
+        }
+    }
+
+    /// Get number of topics
+    pub fn topic_count(&self) -> usize {
+        self.topics.len()
+    }
+
+    /// Get number of edges
+    pub fn edge_count(&self) -> usize {
+        self.edges.len()
+    }
+
+    /// Get topics by growth rate
+    pub fn fastest_growing(&self, top_k: usize) -> Vec<&TopicNode> {
+        let mut nodes: Vec<_> = self.topics.values().collect();
+        nodes.sort_by(|a, b| {
+            b.growth_rate
+                .partial_cmp(&a.growth_rate)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        nodes.into_iter().take(top_k).collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_entity_endpoints() {
+        assert_eq!(EntityType::Work.endpoint(), "works");
+        assert_eq!(EntityType::Author.endpoint(), "authors");
+        assert_eq!(EntityType::Topic.endpoint(), "topics");
+    }
+
+    #[test]
+    fn test_default_config() {
+        let config = OpenAlexConfig::default();
+        assert_eq!(config.base_url, "https://api.openalex.org");
+        assert!(config.use_cursor);
+    }
+
+    #[test]
+    fn test_source_with_filters() {
+        let config = OpenAlexConfig::default();
+        let source = OpenAlexSource::new(config)
+            .with_year_range(2020, 2024)
+            .open_access_only();
+
+        assert!(source.filters.contains_key("publication_year"));
+        assert!(source.filters.contains_key("open_access.is_oa"));
+    }
+}
--- a/vendor/ruvector/examples/data/openalex/src/schema.rs
+++ b/vendor/ruvector/examples/data/openalex/src/schema.rs
@@ -0,0 +1,627 @@
+//! OpenAlex entity schemas
+//!
+//! Represents the core entity types from OpenAlex:
+//! - Works (publications)
+//! - Authors
+//! - Institutions
+//! - Topics/Concepts
+//! - Funders
+//! - Sources (journals, conferences)
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+/// A scholarly work (paper, book, dataset, etc.)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Work {
+    /// OpenAlex ID (e.g., "W2741809807")
+    pub id: String,
+
+    /// DOI (if available)
+    pub doi: Option<String>,
+
+    /// Work title
+    pub title: String,
+
+    /// Publication date
+    pub publication_date: Option<DateTime<Utc>>,
+
+    /// Publication year
+    pub publication_year: Option<i32>,
+
+    /// Work type (article, book, dataset, etc.)
+    #[serde(rename = "type")]
+    pub work_type: Option<String>,
+
+    /// Open access status
+    pub open_access: Option<OpenAccessStatus>,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// Authors and their affiliations
+    #[serde(default)]
+    pub authorships: Vec<Authorship>,
+
+    /// Primary topic
+    pub primary_topic: Option<TopicReference>,
+
+    /// All associated topics
+    #[serde(default)]
+    pub topics: Vec<TopicReference>,
+
+    /// Legacy concepts (deprecated but still in API)
+    #[serde(default)]
+    pub concepts: Vec<ConceptReference>,
+
+    /// Referenced works (citations)
+    #[serde(default)]
+    pub referenced_works: Vec<String>,
+
+    /// Related works
+    #[serde(default)]
+    pub related_works: Vec<String>,
+
+    /// Abstract (inverted index format in API)
+    pub abstract_inverted_index: Option<serde_json::Value>,
+
+    /// Publication venue
+    pub primary_location: Option<Location>,
+
+    /// Grants/funding
+    #[serde(default)]
+    pub grants: Vec<Grant>,
+
+    /// Bibliographic info
+    pub biblio: Option<Biblio>,
+
+    /// Last update time
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+/// Open access status
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OpenAccessStatus {
+    /// Is this work open access?
+    pub is_oa: bool,
+
+    /// OA status type (gold, green, hybrid, bronze)
+    pub oa_status: Option<String>,
+
+    /// OA URL if available
+    pub oa_url: Option<String>,
+}
+
+/// Author and affiliation information for a work
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Authorship {
+    /// Author position (first, middle, last)
+    pub author_position: AuthorPosition,
+
+    /// Author details
+    pub author: AuthorReference,
+
+    /// Institutions at time of publication
+    #[serde(default)]
+    pub institutions: Vec<InstitutionReference>,
+
+    /// Countries
+    #[serde(default)]
+    pub countries: Vec<String>,
+
+    /// Is corresponding author
+    #[serde(default)]
+    pub is_corresponding: bool,
+
+    /// Raw affiliation string
+    pub raw_affiliation_string: Option<String>,
+}
+
+/// Author position in author list
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub enum AuthorPosition {
+    /// First author
+    First,
+    /// Middle author
+    Middle,
+    /// Last author
+    Last,
+}
+
+/// Reference to an author
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AuthorReference {
+    /// OpenAlex author ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// ORCID (if available)
+    pub orcid: Option<String>,
+}
+
+/// Reference to an institution
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InstitutionReference {
+    /// OpenAlex institution ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Institution type (education, company, etc.)
+    #[serde(rename = "type")]
+    pub institution_type: Option<String>,
+
+    /// Country code
+    pub country_code: Option<String>,
+
+    /// ROR ID
+    pub ror: Option<String>,
+}
+
+/// Reference to a topic
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TopicReference {
+    /// OpenAlex topic ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Relevance score (0-1)
+    #[serde(default)]
+    pub score: f64,
+
+    /// Subfield
+    pub subfield: Option<FieldReference>,
+
+    /// Field
+    pub field: Option<FieldReference>,
+
+    /// Domain
+    pub domain: Option<FieldReference>,
+}
+
+/// Reference to a concept (legacy)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ConceptReference {
+    /// OpenAlex concept ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Wikidata ID
+    pub wikidata: Option<String>,
+
+    /// Relevance score
+    #[serde(default)]
+    pub score: f64,
+
+    /// Hierarchy level (0 = root)
+    #[serde(default)]
+    pub level: u32,
+}
+
+/// Reference to a field/domain
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FieldReference {
+    /// OpenAlex ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+}
+
+/// Publication location
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Location {
+    /// Is primary location
+    #[serde(default)]
+    pub is_primary: bool,
+
+    /// Landing page URL
+    pub landing_page_url: Option<String>,
+
+    /// PDF URL
+    pub pdf_url: Option<String>,
+
+    /// Source (journal/conference)
+    pub source: Option<SourceReference>,
+
+    /// License
+    pub license: Option<String>,
+
+    /// Version
+    pub version: Option<String>,
+}
+
+/// Reference to a source (journal, conference, etc.)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SourceReference {
+    /// OpenAlex source ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// ISSN
+    pub issn_l: Option<String>,
+
+    /// Source type
+    #[serde(rename = "type")]
+    pub source_type: Option<String>,
+
+    /// Is Open Access journal
+    #[serde(default)]
+    pub is_oa: bool,
+
+    /// Host organization
+    pub host_organization: Option<String>,
+}
+
+/// Grant/funding information
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Grant {
+    /// Funder
+    pub funder: Option<FunderReference>,
+
+    /// Funder display name
+    pub funder_display_name: Option<String>,
+
+    /// Award ID
+    pub award_id: Option<String>,
+}
+
+/// Reference to a funder
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FunderReference {
+    /// OpenAlex funder ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+}
+
+/// Bibliographic details
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Biblio {
+    /// Volume
+    pub volume: Option<String>,
+
+    /// Issue
+    pub issue: Option<String>,
+
+    /// First page
+    pub first_page: Option<String>,
+
+    /// Last page
+    pub last_page: Option<String>,
+}
+
+/// Full author entity
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Author {
+    /// OpenAlex author ID
+    pub id: String,
+
+    /// ORCID
+    pub orcid: Option<String>,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Alternative names
+    #[serde(default)]
+    pub display_name_alternatives: Vec<String>,
+
+    /// Works count
+    pub works_count: u64,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// H-index
+    pub summary_stats: Option<AuthorStats>,
+
+    /// Most recent institution
+    pub last_known_institution: Option<InstitutionReference>,
+
+    /// All affiliations
+    #[serde(default)]
+    pub affiliations: Vec<Affiliation>,
+
+    /// Topic areas
+    #[serde(default)]
+    pub topics: Vec<TopicReference>,
+
+    /// Works API URL
+    pub works_api_url: Option<String>,
+
+    /// Updated date
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+/// Author summary statistics
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AuthorStats {
+    /// H-index
+    pub h_index: Option<u32>,
+
+    /// i10-index
+    pub i10_index: Option<u32>,
+
+    /// Two-year mean citedness
+    #[serde(rename = "2yr_mean_citedness")]
+    pub two_year_mean_citedness: Option<f64>,
+}
+
+/// Author affiliation
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Affiliation {
+    /// Institution
+    pub institution: InstitutionReference,
+
+    /// Years affiliated
+    #[serde(default)]
+    pub years: Vec<i32>,
+}
+
+/// Full institution entity
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Institution {
+    /// OpenAlex institution ID
+    pub id: String,
+
+    /// ROR ID
+    pub ror: Option<String>,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Country code
+    pub country_code: Option<String>,
+
+    /// Institution type
+    #[serde(rename = "type")]
+    pub institution_type: Option<String>,
+
+    /// Homepage URL
+    pub homepage_url: Option<String>,
+
+    /// Works count
+    pub works_count: u64,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// Geographic info
+    pub geo: Option<GeoLocation>,
+
+    /// Parent institutions
+    #[serde(default)]
+    pub lineage: Vec<String>,
+
+    /// Associated institutions
+    #[serde(default)]
+    pub associated_institutions: Vec<InstitutionReference>,
+
+    /// Updated date
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+/// Geographic location
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GeoLocation {
+    /// City
+    pub city: Option<String>,
+
+    /// Region/state
+    pub region: Option<String>,
+
+    /// Country
+    pub country: Option<String>,
+
+    /// Country code
+    pub country_code: Option<String>,
+
+    /// Latitude
+    pub latitude: Option<f64>,
+
+    /// Longitude
+    pub longitude: Option<f64>,
+}
+
+/// Full topic entity
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Topic {
+    /// OpenAlex topic ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Description
+    pub description: Option<String>,
+
+    /// Keywords
+    #[serde(default)]
+    pub keywords: Vec<String>,
+
+    /// Works count
+    pub works_count: u64,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// Subfield
+    pub subfield: Option<FieldReference>,
+
+    /// Field
+    pub field: Option<FieldReference>,
+
+    /// Domain
+    pub domain: Option<FieldReference>,
+
+    /// Sibling topics
+    #[serde(default)]
+    pub siblings: Vec<TopicReference>,
+
+    /// Updated date
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+/// Legacy concept entity
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Concept {
+    /// OpenAlex concept ID
+    pub id: String,
+
+    /// Wikidata ID
+    pub wikidata: Option<String>,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Description
+    pub description: Option<String>,
+
+    /// Hierarchy level
+    pub level: u32,
+
+    /// Works count
+    pub works_count: u64,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// Parent concepts
+    #[serde(default)]
+    pub ancestors: Vec<ConceptReference>,
+
+    /// Child concepts
+    #[serde(default)]
+    pub related_concepts: Vec<ConceptReference>,
+
+    /// Updated date
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+/// Full source entity
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Source {
+    /// OpenAlex source ID
+    pub id: String,
+
+    /// ISSN-L
+    pub issn_l: Option<String>,
+
+    /// All ISSNs
+    #[serde(default)]
+    pub issn: Vec<String>,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Publisher
+    pub host_organization: Option<String>,
+
+    /// Source type (journal, conference, etc.)
+    #[serde(rename = "type")]
+    pub source_type: Option<String>,
+
+    /// Is Open Access
+    #[serde(default)]
+    pub is_oa: bool,
+
+    /// Homepage URL
+    pub homepage_url: Option<String>,
+
+    /// Works count
+    pub works_count: u64,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// Topics
+    #[serde(default)]
+    pub topics: Vec<TopicReference>,
+
+    /// Updated date
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+/// Full funder entity
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Funder {
+    /// OpenAlex funder ID
+    pub id: String,
+
+    /// Display name
+    pub display_name: String,
+
+    /// Alternative names
+    #[serde(default)]
+    pub alternate_titles: Vec<String>,
+
+    /// Country code
+    pub country_code: Option<String>,
+
+    /// Description
+    pub description: Option<String>,
+
+    /// Homepage URL
+    pub homepage_url: Option<String>,
+
+    /// Grants count
+    pub grants_count: u64,
+
+    /// Works count
+    pub works_count: u64,
+
+    /// Citation count
+    pub cited_by_count: u64,
+
+    /// ROR ID
+    pub ror: Option<String>,
+
+    /// Updated date
+    pub updated_date: Option<DateTime<Utc>>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_work_deserialization() {
+        let json = r#"{
+            "id": "W123",
+            "title": "Test Paper",
+            "cited_by_count": 10,
+            "authorships": [],
+            "topics": [],
+            "concepts": [],
+            "referenced_works": [],
+            "related_works": [],
+            "grants": []
+        }"#;
+
+        let work: Work = serde_json::from_str(json).unwrap();
+        assert_eq!(work.id, "W123");
+        assert_eq!(work.title, "Test Paper");
+        assert_eq!(work.cited_by_count, 10);
+    }
+
+    #[test]
+    fn test_author_position() {
+        let first = serde_json::from_str::<AuthorPosition>(r#""first""#).unwrap();
+        assert_eq!(first, AuthorPosition::First);
+
+        let last = serde_json::from_str::<AuthorPosition>(r#""last""#).unwrap();
+        assert_eq!(last, AuthorPosition::Last);
+    }
+}