Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
267
vendor/ruvector/examples/data/openalex/src/client.rs
vendored
Normal file
267
vendor/ruvector/examples/data/openalex/src/client.rs
vendored
Normal file
@@ -0,0 +1,267 @@
|
||||
//! OpenAlex API client
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest::{Client, StatusCode};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::{OpenAlexError, Work};
|
||||
|
||||
/// OpenAlex API client
|
||||
pub struct OpenAlexClient {
|
||||
client: Client,
|
||||
base_url: String,
|
||||
email: Option<String>,
|
||||
}
|
||||
|
||||
/// API response wrapper
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ApiResponse<T> {
|
||||
/// Metadata
|
||||
pub meta: ApiMeta,
|
||||
|
||||
/// Results
|
||||
pub results: Vec<T>,
|
||||
}
|
||||
|
||||
/// API metadata
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ApiMeta {
|
||||
/// Total count
|
||||
pub count: u64,
|
||||
|
||||
/// Current page
|
||||
pub page: Option<u32>,
|
||||
|
||||
/// Results per page
|
||||
pub per_page: Option<u32>,
|
||||
|
||||
/// Next cursor (for cursor-based pagination)
|
||||
pub next_cursor: Option<String>,
|
||||
}
|
||||
|
||||
impl OpenAlexClient {
|
||||
/// Create a new OpenAlex client
|
||||
///
|
||||
/// Providing an email enables the "polite pool" with higher rate limits.
|
||||
pub fn new(email: Option<String>) -> Self {
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.user_agent("RuVector/0.1.0")
|
||||
.gzip(true)
|
||||
.build()
|
||||
.expect("Failed to build HTTP client");
|
||||
|
||||
Self {
|
||||
client,
|
||||
base_url: "https://api.openalex.org".to_string(),
|
||||
email,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set custom base URL (for testing)
|
||||
pub fn with_base_url(mut self, url: &str) -> Self {
|
||||
self.base_url = url.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Build URL with email parameter
|
||||
fn build_url(&self, endpoint: &str, params: &str) -> String {
|
||||
let mut url = format!("{}/{}?{}", self.base_url, endpoint, params);
|
||||
|
||||
if let Some(ref email) = self.email {
|
||||
if !params.is_empty() {
|
||||
url.push('&');
|
||||
}
|
||||
url.push_str(&format!("mailto={}", email));
|
||||
}
|
||||
|
||||
url
|
||||
}
|
||||
|
||||
/// Health check - verify API is accessible
|
||||
pub async fn health_check(&self) -> Result<bool, OpenAlexError> {
|
||||
let url = format!("{}/works?per_page=1", self.base_url);
|
||||
let response = self.client.get(&url).send().await?;
|
||||
Ok(response.status().is_success())
|
||||
}
|
||||
|
||||
/// Fetch a page of works with pagination
|
||||
pub async fn fetch_works_page(
|
||||
&self,
|
||||
filter: &str,
|
||||
cursor: Option<String>,
|
||||
per_page: usize,
|
||||
) -> Result<(Vec<Work>, Option<String>), OpenAlexError> {
|
||||
let mut params = format!("per_page={}", per_page);
|
||||
|
||||
if !filter.is_empty() {
|
||||
params.push_str(&format!("&{}", filter));
|
||||
}
|
||||
|
||||
if let Some(c) = cursor {
|
||||
params.push_str(&format!("&cursor={}", c));
|
||||
} else {
|
||||
// Use cursor-based pagination for bulk
|
||||
params.push_str("&cursor=*");
|
||||
}
|
||||
|
||||
let url = self.build_url("works", ¶ms);
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
match response.status() {
|
||||
StatusCode::OK => {
|
||||
let api_response: ApiResponse<Work> = response.json().await?;
|
||||
Ok((api_response.results, api_response.meta.next_cursor))
|
||||
}
|
||||
StatusCode::TOO_MANY_REQUESTS => {
|
||||
let retry_after = response
|
||||
.headers()
|
||||
.get("retry-after")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(60);
|
||||
Err(OpenAlexError::RateLimited(retry_after))
|
||||
}
|
||||
status => Err(OpenAlexError::Api(format!(
|
||||
"Unexpected status: {}",
|
||||
status
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch a single work by ID
|
||||
pub async fn get_work(&self, id: &str) -> Result<Work, OpenAlexError> {
|
||||
// Normalize ID format
|
||||
let normalized_id = if id.starts_with("https://") {
|
||||
id.to_string()
|
||||
} else if id.starts_with("W") {
|
||||
format!("https://openalex.org/{}", id)
|
||||
} else {
|
||||
return Err(OpenAlexError::InvalidId(id.to_string()));
|
||||
};
|
||||
|
||||
let url = self.build_url(&format!("works/{}", normalized_id), "");
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
match response.status() {
|
||||
StatusCode::OK => Ok(response.json().await?),
|
||||
StatusCode::NOT_FOUND => Err(OpenAlexError::InvalidId(id.to_string())),
|
||||
status => Err(OpenAlexError::Api(format!(
|
||||
"Unexpected status: {}",
|
||||
status
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Search works by query
|
||||
pub async fn search_works(
|
||||
&self,
|
||||
query: &str,
|
||||
per_page: usize,
|
||||
) -> Result<Vec<Work>, OpenAlexError> {
|
||||
let params = format!("search={}&per_page={}", urlencoding::encode(query), per_page);
|
||||
let url = self.build_url("works", ¶ms);
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
match response.status() {
|
||||
StatusCode::OK => {
|
||||
let api_response: ApiResponse<Work> = response.json().await?;
|
||||
Ok(api_response.results)
|
||||
}
|
||||
status => Err(OpenAlexError::Api(format!(
|
||||
"Unexpected status: {}",
|
||||
status
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch works by topic
|
||||
pub async fn works_by_topic(
|
||||
&self,
|
||||
topic_id: &str,
|
||||
per_page: usize,
|
||||
) -> Result<Vec<Work>, OpenAlexError> {
|
||||
let filter = format!("filter=primary_topic.id:{}", topic_id);
|
||||
let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
|
||||
Ok(works)
|
||||
}
|
||||
|
||||
/// Fetch works by author
|
||||
pub async fn works_by_author(
|
||||
&self,
|
||||
author_id: &str,
|
||||
per_page: usize,
|
||||
) -> Result<Vec<Work>, OpenAlexError> {
|
||||
let filter = format!("filter=authorships.author.id:{}", author_id);
|
||||
let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
|
||||
Ok(works)
|
||||
}
|
||||
|
||||
/// Fetch works by institution
|
||||
pub async fn works_by_institution(
|
||||
&self,
|
||||
institution_id: &str,
|
||||
per_page: usize,
|
||||
) -> Result<Vec<Work>, OpenAlexError> {
|
||||
let filter = format!(
|
||||
"filter=authorships.institutions.id:{}",
|
||||
institution_id
|
||||
);
|
||||
let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
|
||||
Ok(works)
|
||||
}
|
||||
|
||||
/// Fetch works citing a specific work
|
||||
pub async fn citing_works(
|
||||
&self,
|
||||
work_id: &str,
|
||||
per_page: usize,
|
||||
) -> Result<Vec<Work>, OpenAlexError> {
|
||||
let filter = format!("filter=cites:{}", work_id);
|
||||
let (works, _) = self.fetch_works_page(&filter, None, per_page).await?;
|
||||
Ok(works)
|
||||
}
|
||||
|
||||
/// Fetch works cited by a specific work
|
||||
pub async fn cited_by_work(&self, work_id: &str) -> Result<Vec<Work>, OpenAlexError> {
|
||||
let work = self.get_work(work_id).await?;
|
||||
|
||||
// Fetch referenced works
|
||||
let mut cited_works = Vec::new();
|
||||
for ref_id in work.referenced_works.iter().take(100) {
|
||||
// Limit to avoid too many requests
|
||||
if let Ok(cited) = self.get_work(ref_id).await {
|
||||
cited_works.push(cited);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(cited_works)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_client_creation() {
|
||||
let client = OpenAlexClient::new(None);
|
||||
assert_eq!(client.base_url, "https://api.openalex.org");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_client_with_email() {
|
||||
let client = OpenAlexClient::new(Some("test@example.com".to_string()));
|
||||
let url = client.build_url("works", "per_page=10");
|
||||
assert!(url.contains("mailto=test@example.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_building() {
|
||||
let client = OpenAlexClient::new(None);
|
||||
let url = client.build_url("works", "filter=publication_year:2023");
|
||||
assert!(url.starts_with("https://api.openalex.org/works"));
|
||||
assert!(url.contains("filter=publication_year:2023"));
|
||||
}
|
||||
}
|
||||
518
vendor/ruvector/examples/data/openalex/src/frontier.rs
vendored
Normal file
518
vendor/ruvector/examples/data/openalex/src/frontier.rs
vendored
Normal file
@@ -0,0 +1,518 @@
|
||||
//! Research frontier detection using coherence signals
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{TopicEdge, TopicGraph, TopicNode, Work};
|
||||
|
||||
/// An emerging research frontier
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EmergingFrontier {
|
||||
/// Frontier identifier
|
||||
pub id: String,
|
||||
|
||||
/// Primary topic name
|
||||
pub name: String,
|
||||
|
||||
/// Related topic names
|
||||
pub related_topics: Vec<String>,
|
||||
|
||||
/// Growth rate (works per year)
|
||||
pub growth_rate: f64,
|
||||
|
||||
/// Coherence delta (change in min-cut boundary)
|
||||
pub coherence_delta: f64,
|
||||
|
||||
/// Citation momentum (trend in citation rates)
|
||||
pub citation_momentum: f64,
|
||||
|
||||
/// Detected boundary nodes (topics at the frontier edge)
|
||||
pub boundary_topics: Vec<String>,
|
||||
|
||||
/// First detected
|
||||
pub detected_at: DateTime<Utc>,
|
||||
|
||||
/// Confidence score (0-1)
|
||||
pub confidence: f64,
|
||||
|
||||
/// Evidence supporting this frontier
|
||||
pub evidence: Vec<FrontierEvidence>,
|
||||
}
|
||||
|
||||
/// Evidence for a frontier detection
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FrontierEvidence {
|
||||
/// Evidence type
|
||||
pub evidence_type: String,
|
||||
|
||||
/// Value
|
||||
pub value: f64,
|
||||
|
||||
/// Explanation
|
||||
pub explanation: String,
|
||||
}
|
||||
|
||||
/// A cross-domain bridge connecting two research areas
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CrossDomainBridge {
|
||||
/// Bridge identifier
|
||||
pub id: String,
|
||||
|
||||
/// Source domain/topic
|
||||
pub source_domain: String,
|
||||
|
||||
/// Target domain/topic
|
||||
pub target_domain: String,
|
||||
|
||||
/// Bridge topics (connector nodes)
|
||||
pub bridge_topics: Vec<String>,
|
||||
|
||||
/// Citation flow (source → target)
|
||||
pub citation_flow: f64,
|
||||
|
||||
/// Reverse flow (target → source)
|
||||
pub reverse_flow: f64,
|
||||
|
||||
/// Bridge strength (combined normalized flow)
|
||||
pub strength: f64,
|
||||
|
||||
/// Is this a new connection?
|
||||
pub is_emerging: bool,
|
||||
|
||||
/// First observed
|
||||
pub first_observed: DateTime<Utc>,
|
||||
|
||||
/// Key papers establishing the bridge
|
||||
pub key_works: Vec<String>,
|
||||
}
|
||||
|
||||
/// Research frontier radar for detecting emerging fields
|
||||
pub struct FrontierRadar {
|
||||
/// Topic graph snapshots over time
|
||||
snapshots: Vec<(DateTime<Utc>, TopicGraph)>,
|
||||
|
||||
/// Minimum growth rate to consider
|
||||
min_growth_rate: f64,
|
||||
|
||||
/// Minimum coherence shift to detect
|
||||
min_coherence_shift: f64,
|
||||
|
||||
/// Detected frontiers
|
||||
frontiers: Vec<EmergingFrontier>,
|
||||
|
||||
/// Detected bridges
|
||||
bridges: Vec<CrossDomainBridge>,
|
||||
}
|
||||
|
||||
impl FrontierRadar {
|
||||
/// Create a new frontier radar
|
||||
pub fn new(min_growth_rate: f64, min_coherence_shift: f64) -> Self {
|
||||
Self {
|
||||
snapshots: Vec::new(),
|
||||
min_growth_rate,
|
||||
min_coherence_shift,
|
||||
frontiers: Vec::new(),
|
||||
bridges: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a topic graph snapshot
|
||||
pub fn add_snapshot(&mut self, timestamp: DateTime<Utc>, graph: TopicGraph) {
|
||||
self.snapshots.push((timestamp, graph));
|
||||
self.snapshots.sort_by_key(|(ts, _)| *ts);
|
||||
}
|
||||
|
||||
/// Build snapshots from works partitioned by time
|
||||
pub fn build_from_works(&mut self, works: &[Work], window_days: i64) {
|
||||
if works.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Find time range
|
||||
let mut min_date = Utc::now();
|
||||
let mut max_date = DateTime::<Utc>::MIN_UTC;
|
||||
|
||||
for work in works {
|
||||
if let Some(date) = work.publication_date {
|
||||
if date < min_date {
|
||||
min_date = date;
|
||||
}
|
||||
if date > max_date {
|
||||
max_date = date;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Partition works into time windows
|
||||
let window_duration = chrono::Duration::days(window_days);
|
||||
let mut current_start = min_date;
|
||||
|
||||
while current_start < max_date {
|
||||
let current_end = current_start + window_duration;
|
||||
|
||||
let window_works: Vec<_> = works
|
||||
.iter()
|
||||
.filter(|w| {
|
||||
w.publication_date
|
||||
.map(|d| d >= current_start && d < current_end)
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if !window_works.is_empty() {
|
||||
let graph = TopicGraph::from_works(&window_works);
|
||||
self.add_snapshot(current_start, graph);
|
||||
}
|
||||
|
||||
current_start = current_end;
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect emerging frontiers from snapshots
|
||||
pub fn detect_frontiers(&mut self) -> Vec<EmergingFrontier> {
|
||||
if self.snapshots.len() < 2 {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut frontiers = Vec::new();
|
||||
let mut frontier_counter = 0;
|
||||
|
||||
// Compare consecutive snapshots
|
||||
for i in 1..self.snapshots.len() {
|
||||
let (prev_ts, prev_graph) = &self.snapshots[i - 1];
|
||||
let (curr_ts, curr_graph) = &self.snapshots[i];
|
||||
|
||||
// Find topics with significant growth
|
||||
for (topic_id, curr_node) in &curr_graph.topics {
|
||||
let prev_node = prev_graph.topics.get(topic_id);
|
||||
|
||||
let growth = if let Some(prev) = prev_node {
|
||||
if prev.work_count > 0 {
|
||||
(curr_node.work_count as f64 - prev.work_count as f64)
|
||||
/ prev.work_count as f64
|
||||
} else {
|
||||
f64::INFINITY
|
||||
}
|
||||
} else {
|
||||
// New topic
|
||||
f64::INFINITY
|
||||
};
|
||||
|
||||
if growth > self.min_growth_rate {
|
||||
// Calculate coherence shift
|
||||
let coherence_delta = self.compute_topic_coherence_delta(
|
||||
topic_id,
|
||||
prev_graph,
|
||||
curr_graph,
|
||||
);
|
||||
|
||||
if coherence_delta.abs() > self.min_coherence_shift {
|
||||
// Calculate citation momentum
|
||||
let citation_momentum = curr_node.avg_citations
|
||||
- prev_node.map(|n| n.avg_citations).unwrap_or(0.0);
|
||||
|
||||
// Find boundary topics
|
||||
let boundary_topics = self.find_boundary_topics(topic_id, curr_graph);
|
||||
|
||||
// Build evidence
|
||||
let mut evidence = vec![
|
||||
FrontierEvidence {
|
||||
evidence_type: "growth_rate".to_string(),
|
||||
value: growth,
|
||||
explanation: format!(
|
||||
"{:.0}% increase in works",
|
||||
growth * 100.0
|
||||
),
|
||||
},
|
||||
FrontierEvidence {
|
||||
evidence_type: "coherence_delta".to_string(),
|
||||
value: coherence_delta,
|
||||
explanation: format!(
|
||||
"Coherence {} by {:.2}",
|
||||
if coherence_delta > 0.0 {
|
||||
"increased"
|
||||
} else {
|
||||
"decreased"
|
||||
},
|
||||
coherence_delta.abs()
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
if citation_momentum > 0.0 {
|
||||
evidence.push(FrontierEvidence {
|
||||
evidence_type: "citation_momentum".to_string(),
|
||||
value: citation_momentum,
|
||||
explanation: format!(
|
||||
"+{:.1} avg citations",
|
||||
citation_momentum
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate confidence based on evidence strength
|
||||
let confidence = self.calculate_confidence(growth, coherence_delta, citation_momentum);
|
||||
|
||||
if confidence >= 0.3 {
|
||||
frontiers.push(EmergingFrontier {
|
||||
id: format!("frontier_{}", frontier_counter),
|
||||
name: curr_node.name.clone(),
|
||||
related_topics: self.find_related_topics(topic_id, curr_graph),
|
||||
growth_rate: curr_node.growth_rate,
|
||||
coherence_delta,
|
||||
citation_momentum,
|
||||
boundary_topics,
|
||||
detected_at: *curr_ts,
|
||||
confidence,
|
||||
evidence,
|
||||
});
|
||||
frontier_counter += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by confidence
|
||||
frontiers.sort_by(|a, b| {
|
||||
b.confidence
|
||||
.partial_cmp(&a.confidence)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
self.frontiers = frontiers.clone();
|
||||
frontiers
|
||||
}
|
||||
|
||||
/// Detect cross-domain bridges
|
||||
pub fn detect_bridges(&mut self) -> Vec<CrossDomainBridge> {
|
||||
if self.snapshots.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut bridges = Vec::new();
|
||||
let mut bridge_counter = 0;
|
||||
|
||||
let (curr_ts, curr_graph) = self.snapshots.last().unwrap();
|
||||
|
||||
// Build domain → topics mapping (simplified: use top-level grouping)
|
||||
let mut domain_topics: HashMap<String, Vec<String>> = HashMap::new();
|
||||
for (topic_id, node) in &curr_graph.topics {
|
||||
// Use first word as domain (simplified)
|
||||
let domain = node
|
||||
.name
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.unwrap_or("Unknown")
|
||||
.to_string();
|
||||
domain_topics
|
||||
.entry(domain.clone())
|
||||
.or_default()
|
||||
.push(topic_id.clone());
|
||||
}
|
||||
|
||||
// Find cross-domain edges
|
||||
let mut domain_flows: HashMap<(String, String), Vec<&TopicEdge>> = HashMap::new();
|
||||
|
||||
for edge in &curr_graph.edges {
|
||||
let src_domain = self.get_domain(&edge.source, curr_graph);
|
||||
let tgt_domain = self.get_domain(&edge.target, curr_graph);
|
||||
|
||||
if src_domain != tgt_domain {
|
||||
domain_flows
|
||||
.entry((src_domain.clone(), tgt_domain.clone()))
|
||||
.or_default()
|
||||
.push(edge);
|
||||
}
|
||||
}
|
||||
|
||||
// Create bridge records
|
||||
for ((src_domain, tgt_domain), edges) in domain_flows {
|
||||
let total_flow: f64 = edges.iter().map(|e| e.weight).sum();
|
||||
let citation_count: usize = edges.iter().map(|e| e.citation_count).sum();
|
||||
|
||||
if citation_count >= 5 {
|
||||
// Minimum threshold
|
||||
let bridge_topics: Vec<String> = edges
|
||||
.iter()
|
||||
.flat_map(|e| vec![e.source.clone(), e.target.clone()])
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// Check if this is emerging (compare with previous snapshot)
|
||||
let is_emerging = if self.snapshots.len() >= 2 {
|
||||
let (_, prev_graph) = &self.snapshots[self.snapshots.len() - 2];
|
||||
let prev_flow: f64 = prev_graph
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| {
|
||||
self.get_domain(&e.source, prev_graph) == src_domain
|
||||
&& self.get_domain(&e.target, prev_graph) == tgt_domain
|
||||
})
|
||||
.map(|e| e.weight)
|
||||
.sum();
|
||||
total_flow > prev_flow * 1.5 // 50% growth
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
bridges.push(CrossDomainBridge {
|
||||
id: format!("bridge_{}", bridge_counter),
|
||||
source_domain: src_domain.clone(),
|
||||
target_domain: tgt_domain.clone(),
|
||||
bridge_topics,
|
||||
citation_flow: total_flow,
|
||||
reverse_flow: 0.0, // Would need to compute reverse direction
|
||||
strength: total_flow / citation_count as f64,
|
||||
is_emerging,
|
||||
first_observed: *curr_ts,
|
||||
key_works: vec![], // Would need work-level data
|
||||
});
|
||||
bridge_counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by strength
|
||||
bridges.sort_by(|a, b| {
|
||||
b.strength
|
||||
.partial_cmp(&a.strength)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
self.bridges = bridges.clone();
|
||||
bridges
|
||||
}
|
||||
|
||||
/// Compute coherence delta for a topic between snapshots
|
||||
fn compute_topic_coherence_delta(
|
||||
&self,
|
||||
topic_id: &str,
|
||||
prev_graph: &TopicGraph,
|
||||
curr_graph: &TopicGraph,
|
||||
) -> f64 {
|
||||
// Compute local coherence as ratio of intra-topic to inter-topic edges
|
||||
let prev_coherence = self.compute_local_coherence(topic_id, prev_graph);
|
||||
let curr_coherence = self.compute_local_coherence(topic_id, curr_graph);
|
||||
|
||||
curr_coherence - prev_coherence
|
||||
}
|
||||
|
||||
/// Compute local coherence for a topic
|
||||
fn compute_local_coherence(&self, topic_id: &str, graph: &TopicGraph) -> f64 {
|
||||
// Find edges involving this topic
|
||||
let edges: Vec<_> = graph
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| e.source == topic_id || e.target == topic_id)
|
||||
.collect();
|
||||
|
||||
if edges.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Coherence = sum of weights
|
||||
edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64
|
||||
}
|
||||
|
||||
/// Find topics at the boundary (connected to other clusters)
|
||||
fn find_boundary_topics(&self, topic_id: &str, graph: &TopicGraph) -> Vec<String> {
|
||||
// Find topics connected to this topic that have high connectivity elsewhere
|
||||
graph
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| e.source == topic_id)
|
||||
.map(|e| e.target.clone())
|
||||
.take(5)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Find related topics
|
||||
fn find_related_topics(&self, topic_id: &str, graph: &TopicGraph) -> Vec<String> {
|
||||
graph
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| e.source == topic_id || e.target == topic_id)
|
||||
.flat_map(|e| {
|
||||
if e.source == topic_id {
|
||||
vec![e.target.clone()]
|
||||
} else {
|
||||
vec![e.source.clone()]
|
||||
}
|
||||
})
|
||||
.take(10)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get domain for a topic (simplified)
|
||||
fn get_domain(&self, topic_id: &str, graph: &TopicGraph) -> String {
|
||||
graph
|
||||
.topics
|
||||
.get(topic_id)
|
||||
.map(|n| {
|
||||
n.name
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.unwrap_or("Unknown")
|
||||
.to_string()
|
||||
})
|
||||
.unwrap_or_else(|| "Unknown".to_string())
|
||||
}
|
||||
|
||||
/// Calculate confidence score
|
||||
fn calculate_confidence(
|
||||
&self,
|
||||
growth: f64,
|
||||
coherence_delta: f64,
|
||||
citation_momentum: f64,
|
||||
) -> f64 {
|
||||
let growth_score = (growth.min(5.0) / 5.0).max(0.0);
|
||||
let coherence_score = (coherence_delta.abs().min(1.0)).max(0.0);
|
||||
let citation_score = (citation_momentum / 10.0).min(1.0).max(0.0);
|
||||
|
||||
(growth_score * 0.4 + coherence_score * 0.4 + citation_score * 0.2).min(1.0)
|
||||
}
|
||||
|
||||
/// Get detected frontiers
|
||||
pub fn frontiers(&self) -> &[EmergingFrontier] {
|
||||
&self.frontiers
|
||||
}
|
||||
|
||||
/// Get detected bridges
|
||||
pub fn bridges(&self) -> &[CrossDomainBridge] {
|
||||
&self.bridges
|
||||
}
|
||||
|
||||
/// Get highest confidence frontiers
|
||||
pub fn top_frontiers(&self, n: usize) -> Vec<&EmergingFrontier> {
|
||||
self.frontiers.iter().take(n).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_frontier_radar_creation() {
|
||||
let radar = FrontierRadar::new(0.1, 0.2);
|
||||
assert!(radar.frontiers().is_empty());
|
||||
assert!(radar.bridges().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_confidence_calculation() {
|
||||
let radar = FrontierRadar::new(0.1, 0.2);
|
||||
|
||||
// High confidence
|
||||
let high = radar.calculate_confidence(2.0, 0.5, 5.0);
|
||||
assert!(high > 0.5);
|
||||
|
||||
// Low confidence
|
||||
let low = radar.calculate_confidence(0.05, 0.01, 0.1);
|
||||
assert!(low < 0.3);
|
||||
}
|
||||
}
|
||||
476
vendor/ruvector/examples/data/openalex/src/lib.rs
vendored
Normal file
476
vendor/ruvector/examples/data/openalex/src/lib.rs
vendored
Normal file
@@ -0,0 +1,476 @@
|
||||
//! # RuVector OpenAlex Integration
|
||||
//!
|
||||
//! Integration with OpenAlex, the open catalog of scholarly works, authors,
|
||||
//! institutions, and topics. Enables novel discovery through:
|
||||
//!
|
||||
//! - **Emerging Field Detection**: Find topic splits/merges as cut boundaries shift
|
||||
//! - **Cross-Domain Bridges**: Identify connector subgraphs between disciplines
|
||||
//! - **Funding-to-Output Causality**: Map funder → lab → venue → citation chains
|
||||
//!
|
||||
//! ## OpenAlex Data Model
|
||||
//!
|
||||
//! OpenAlex provides a rich graph structure:
|
||||
//! - **Works**: 250M+ scholarly publications
|
||||
//! - **Authors**: 90M+ researchers with affiliations
|
||||
//! - **Institutions**: 100K+ universities, labs, companies
|
||||
//! - **Topics**: Hierarchical concept taxonomy
|
||||
//! - **Funders**: Research funding organizations
|
||||
//! - **Sources**: Journals, conferences, repositories
|
||||
//!
|
||||
//! ## Quick Start
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! use ruvector_data_openalex::{OpenAlexClient, FrontierRadar, TopicGraph};
|
||||
//!
|
||||
//! // Initialize client
|
||||
//! let client = OpenAlexClient::new(Some("your-email@example.com"));
|
||||
//!
|
||||
//! // Build topic citation graph
|
||||
//! let graph = TopicGraph::build_from_works(
|
||||
//! client.works_by_topic("machine learning", 2020..2024).await?
|
||||
//! )?;
|
||||
//!
|
||||
//! // Detect emerging research frontiers
|
||||
//! let radar = FrontierRadar::new(graph);
|
||||
//! let frontiers = radar.detect_emerging_fields(0.3).await?;
|
||||
//!
|
||||
//! for frontier in frontiers {
|
||||
//! println!("Emerging: {} (coherence shift: {:.2})",
|
||||
//! frontier.name, frontier.coherence_delta);
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
#![warn(missing_docs)]
|
||||
#![warn(clippy::all)]
|
||||
|
||||
pub mod client;
|
||||
pub mod frontier;
|
||||
pub mod schema;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
pub use client::OpenAlexClient;
|
||||
pub use frontier::{CrossDomainBridge, EmergingFrontier, FrontierRadar};
|
||||
pub use schema::{
|
||||
Author, AuthorPosition, Authorship, Concept, Funder, Institution, Source, Topic, Work,
|
||||
};
|
||||
|
||||
use ruvector_data_framework::{DataRecord, DataSource, FrameworkError, Relationship, Result};
|
||||
|
||||
/// OpenAlex-specific error types
|
||||
#[derive(Error, Debug)]
|
||||
pub enum OpenAlexError {
|
||||
/// API request failed
|
||||
#[error("API error: {0}")]
|
||||
Api(String),
|
||||
|
||||
/// Rate limit exceeded
|
||||
#[error("Rate limit exceeded, retry after {0}s")]
|
||||
RateLimited(u64),
|
||||
|
||||
/// Invalid entity ID
|
||||
#[error("Invalid OpenAlex ID: {0}")]
|
||||
InvalidId(String),
|
||||
|
||||
/// Parsing failed
|
||||
#[error("Parse error: {0}")]
|
||||
Parse(String),
|
||||
|
||||
/// Network error
|
||||
#[error("Network error: {0}")]
|
||||
Network(#[from] reqwest::Error),
|
||||
}
|
||||
|
||||
impl From<OpenAlexError> for FrameworkError {
|
||||
fn from(e: OpenAlexError) -> Self {
|
||||
FrameworkError::Ingestion(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for OpenAlex data source
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OpenAlexConfig {
|
||||
/// API base URL
|
||||
pub base_url: String,
|
||||
|
||||
/// Email for polite pool (faster rate limits)
|
||||
pub email: Option<String>,
|
||||
|
||||
/// Maximum results per page
|
||||
pub per_page: usize,
|
||||
|
||||
/// Enable cursor-based pagination for bulk
|
||||
pub use_cursor: bool,
|
||||
|
||||
/// Filter to specific entity types
|
||||
pub entity_types: Vec<EntityType>,
|
||||
}
|
||||
|
||||
impl Default for OpenAlexConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
base_url: "https://api.openalex.org".to_string(),
|
||||
email: None,
|
||||
per_page: 200,
|
||||
use_cursor: true,
|
||||
entity_types: vec![EntityType::Work],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// OpenAlex entity types
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub enum EntityType {
|
||||
/// Scholarly works
|
||||
Work,
|
||||
/// Authors
|
||||
Author,
|
||||
/// Institutions
|
||||
Institution,
|
||||
/// Topics/concepts
|
||||
Topic,
|
||||
/// Funding sources
|
||||
Funder,
|
||||
/// Publication venues
|
||||
Source,
|
||||
}
|
||||
|
||||
impl EntityType {
|
||||
/// Get the API endpoint for this entity type
|
||||
pub fn endpoint(&self) -> &str {
|
||||
match self {
|
||||
EntityType::Work => "works",
|
||||
EntityType::Author => "authors",
|
||||
EntityType::Institution => "institutions",
|
||||
EntityType::Topic => "topics",
|
||||
EntityType::Funder => "funders",
|
||||
EntityType::Source => "sources",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// OpenAlex data source for the framework
|
||||
pub struct OpenAlexSource {
|
||||
client: OpenAlexClient,
|
||||
config: OpenAlexConfig,
|
||||
filters: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl OpenAlexSource {
|
||||
/// Create a new OpenAlex data source
|
||||
pub fn new(config: OpenAlexConfig) -> Self {
|
||||
let client = OpenAlexClient::new(config.email.clone());
|
||||
Self {
|
||||
client,
|
||||
config,
|
||||
filters: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a filter (e.g., "publication_year" => "2023")
|
||||
pub fn with_filter(mut self, key: &str, value: &str) -> Self {
|
||||
self.filters.insert(key.to_string(), value.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Filter to a specific year range
|
||||
pub fn with_year_range(self, start: i32, end: i32) -> Self {
|
||||
self.with_filter("publication_year", &format!("{}-{}", start, end))
|
||||
}
|
||||
|
||||
/// Filter to a specific topic
|
||||
pub fn with_topic(self, topic_id: &str) -> Self {
|
||||
self.with_filter("primary_topic.id", topic_id)
|
||||
}
|
||||
|
||||
/// Filter to open access works
|
||||
pub fn open_access_only(self) -> Self {
|
||||
self.with_filter("open_access.is_oa", "true")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl DataSource for OpenAlexSource {
|
||||
fn source_id(&self) -> &str {
|
||||
"openalex"
|
||||
}
|
||||
|
||||
async fn fetch_batch(
|
||||
&self,
|
||||
cursor: Option<String>,
|
||||
batch_size: usize,
|
||||
) -> Result<(Vec<DataRecord>, Option<String>)> {
|
||||
// Build query URL with filters
|
||||
let mut query_parts: Vec<String> = self
|
||||
.filters
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}:{}", k, v))
|
||||
.collect();
|
||||
|
||||
let filter_str = if query_parts.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("filter={}", query_parts.join(","))
|
||||
};
|
||||
|
||||
// Fetch works from API
|
||||
let (works, next_cursor) = self
|
||||
.client
|
||||
.fetch_works_page(&filter_str, cursor, batch_size.min(self.config.per_page))
|
||||
.await
|
||||
.map_err(|e| FrameworkError::Ingestion(e.to_string()))?;
|
||||
|
||||
// Convert to DataRecords
|
||||
let records: Vec<DataRecord> = works.into_iter().map(work_to_record).collect();
|
||||
|
||||
Ok((records, next_cursor))
|
||||
}
|
||||
|
||||
async fn total_count(&self) -> Result<Option<u64>> {
|
||||
// OpenAlex returns count in meta
|
||||
Ok(None) // Would require separate API call
|
||||
}
|
||||
|
||||
async fn health_check(&self) -> Result<bool> {
|
||||
self.client.health_check().await.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an OpenAlex Work to a DataRecord
|
||||
fn work_to_record(work: Work) -> DataRecord {
|
||||
let mut relationships = Vec::new();
|
||||
|
||||
// Citations as relationships
|
||||
for cited_id in &work.referenced_works {
|
||||
relationships.push(Relationship {
|
||||
target_id: cited_id.clone(),
|
||||
rel_type: "cites".to_string(),
|
||||
weight: 1.0,
|
||||
properties: HashMap::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// Author relationships
|
||||
for authorship in &work.authorships {
|
||||
relationships.push(Relationship {
|
||||
target_id: authorship.author.id.clone(),
|
||||
rel_type: "authored_by".to_string(),
|
||||
weight: 1.0 / work.authorships.len() as f64,
|
||||
properties: HashMap::new(),
|
||||
});
|
||||
|
||||
// Institution relationships
|
||||
for inst in &authorship.institutions {
|
||||
relationships.push(Relationship {
|
||||
target_id: inst.id.clone(),
|
||||
rel_type: "affiliated_with".to_string(),
|
||||
weight: 0.5,
|
||||
properties: HashMap::new(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Topic relationships
|
||||
if let Some(ref topic) = work.primary_topic {
|
||||
relationships.push(Relationship {
|
||||
target_id: topic.id.clone(),
|
||||
rel_type: "primary_topic".to_string(),
|
||||
weight: topic.score,
|
||||
properties: HashMap::new(),
|
||||
});
|
||||
}
|
||||
|
||||
DataRecord {
|
||||
id: work.id.clone(),
|
||||
source: "openalex".to_string(),
|
||||
record_type: "work".to_string(),
|
||||
timestamp: work.publication_date.unwrap_or_else(Utc::now),
|
||||
data: serde_json::to_value(&work).unwrap_or_default(),
|
||||
embedding: None, // Would compute from title/abstract
|
||||
relationships,
|
||||
}
|
||||
}
|
||||
|
||||
/// Topic-based citation graph for frontier detection
|
||||
pub struct TopicGraph {
|
||||
/// Topics as nodes
|
||||
pub topics: HashMap<String, TopicNode>,
|
||||
|
||||
/// Topic-to-topic edges (via citations)
|
||||
pub edges: Vec<TopicEdge>,
|
||||
|
||||
/// Time window
|
||||
pub time_window: (DateTime<Utc>, DateTime<Utc>),
|
||||
}
|
||||
|
||||
/// A topic node in the graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopicNode {
|
||||
/// OpenAlex topic ID
|
||||
pub id: String,
|
||||
|
||||
/// Topic display name
|
||||
pub name: String,
|
||||
|
||||
/// Number of works in this topic
|
||||
pub work_count: usize,
|
||||
|
||||
/// Average citation count
|
||||
pub avg_citations: f64,
|
||||
|
||||
/// Growth rate (works per year)
|
||||
pub growth_rate: f64,
|
||||
}
|
||||
|
||||
/// An edge between topics
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopicEdge {
|
||||
/// Source topic ID
|
||||
pub source: String,
|
||||
|
||||
/// Target topic ID
|
||||
pub target: String,
|
||||
|
||||
/// Number of citations across boundary
|
||||
pub citation_count: usize,
|
||||
|
||||
/// Normalized weight
|
||||
pub weight: f64,
|
||||
}
|
||||
|
||||
impl TopicGraph {
|
||||
/// Build topic graph from works
|
||||
pub fn from_works(works: &[Work]) -> Self {
|
||||
let mut topics: HashMap<String, TopicNode> = HashMap::new();
|
||||
let mut edge_counts: HashMap<(String, String), usize> = HashMap::new();
|
||||
|
||||
let mut min_date = Utc::now();
|
||||
let mut max_date = DateTime::<Utc>::MIN_UTC;
|
||||
|
||||
for work in works {
|
||||
if let Some(date) = work.publication_date {
|
||||
if date < min_date {
|
||||
min_date = date;
|
||||
}
|
||||
if date > max_date {
|
||||
max_date = date;
|
||||
}
|
||||
}
|
||||
|
||||
// Get work's primary topic
|
||||
let source_topic = match &work.primary_topic {
|
||||
Some(t) => t.id.clone(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Update or create topic node
|
||||
let node = topics.entry(source_topic.clone()).or_insert_with(|| TopicNode {
|
||||
id: source_topic.clone(),
|
||||
name: work
|
||||
.primary_topic
|
||||
.as_ref()
|
||||
.map(|t| t.display_name.clone())
|
||||
.unwrap_or_default(),
|
||||
work_count: 0,
|
||||
avg_citations: 0.0,
|
||||
growth_rate: 0.0,
|
||||
});
|
||||
node.work_count += 1;
|
||||
node.avg_citations = (node.avg_citations * (node.work_count - 1) as f64
|
||||
+ work.cited_by_count as f64)
|
||||
/ node.work_count as f64;
|
||||
|
||||
// For simplicity, we'd need referenced works' topics
|
||||
// This is a simplified model
|
||||
}
|
||||
|
||||
// Calculate growth rates
|
||||
let time_span_years = (max_date - min_date).num_days() as f64 / 365.0;
|
||||
for node in topics.values_mut() {
|
||||
node.growth_rate = if time_span_years > 0.0 {
|
||||
node.work_count as f64 / time_span_years
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
}
|
||||
|
||||
// Build edges
|
||||
let edges: Vec<TopicEdge> = edge_counts
|
||||
.into_iter()
|
||||
.map(|((src, tgt), count)| {
|
||||
let src_count = topics.get(&src).map(|n| n.work_count).unwrap_or(1);
|
||||
let tgt_count = topics.get(&tgt).map(|n| n.work_count).unwrap_or(1);
|
||||
let weight = count as f64 / (src_count * tgt_count) as f64;
|
||||
|
||||
TopicEdge {
|
||||
source: src,
|
||||
target: tgt,
|
||||
citation_count: count,
|
||||
weight,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
topics,
|
||||
edges,
|
||||
time_window: (min_date, max_date),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get number of topics
|
||||
pub fn topic_count(&self) -> usize {
|
||||
self.topics.len()
|
||||
}
|
||||
|
||||
/// Get number of edges
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
|
||||
/// Get topics by growth rate
|
||||
pub fn fastest_growing(&self, top_k: usize) -> Vec<&TopicNode> {
|
||||
let mut nodes: Vec<_> = self.topics.values().collect();
|
||||
nodes.sort_by(|a, b| {
|
||||
b.growth_rate
|
||||
.partial_cmp(&a.growth_rate)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
nodes.into_iter().take(top_k).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_entity_endpoints() {
|
||||
assert_eq!(EntityType::Work.endpoint(), "works");
|
||||
assert_eq!(EntityType::Author.endpoint(), "authors");
|
||||
assert_eq!(EntityType::Topic.endpoint(), "topics");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_config() {
|
||||
let config = OpenAlexConfig::default();
|
||||
assert_eq!(config.base_url, "https://api.openalex.org");
|
||||
assert!(config.use_cursor);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_with_filters() {
|
||||
let config = OpenAlexConfig::default();
|
||||
let source = OpenAlexSource::new(config)
|
||||
.with_year_range(2020, 2024)
|
||||
.open_access_only();
|
||||
|
||||
assert!(source.filters.contains_key("publication_year"));
|
||||
assert!(source.filters.contains_key("open_access.is_oa"));
|
||||
}
|
||||
}
|
||||
627
vendor/ruvector/examples/data/openalex/src/schema.rs
vendored
Normal file
627
vendor/ruvector/examples/data/openalex/src/schema.rs
vendored
Normal file
@@ -0,0 +1,627 @@
|
||||
//! OpenAlex entity schemas
|
||||
//!
|
||||
//! Represents the core entity types from OpenAlex:
|
||||
//! - Works (publications)
|
||||
//! - Authors
|
||||
//! - Institutions
|
||||
//! - Topics/Concepts
|
||||
//! - Funders
|
||||
//! - Sources (journals, conferences)
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A scholarly work (paper, book, dataset, etc.)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Work {
|
||||
/// OpenAlex ID (e.g., "W2741809807")
|
||||
pub id: String,
|
||||
|
||||
/// DOI (if available)
|
||||
pub doi: Option<String>,
|
||||
|
||||
/// Work title
|
||||
pub title: String,
|
||||
|
||||
/// Publication date
|
||||
pub publication_date: Option<DateTime<Utc>>,
|
||||
|
||||
/// Publication year
|
||||
pub publication_year: Option<i32>,
|
||||
|
||||
/// Work type (article, book, dataset, etc.)
|
||||
#[serde(rename = "type")]
|
||||
pub work_type: Option<String>,
|
||||
|
||||
/// Open access status
|
||||
pub open_access: Option<OpenAccessStatus>,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// Authors and their affiliations
|
||||
#[serde(default)]
|
||||
pub authorships: Vec<Authorship>,
|
||||
|
||||
/// Primary topic
|
||||
pub primary_topic: Option<TopicReference>,
|
||||
|
||||
/// All associated topics
|
||||
#[serde(default)]
|
||||
pub topics: Vec<TopicReference>,
|
||||
|
||||
/// Legacy concepts (deprecated but still in API)
|
||||
#[serde(default)]
|
||||
pub concepts: Vec<ConceptReference>,
|
||||
|
||||
/// Referenced works (citations)
|
||||
#[serde(default)]
|
||||
pub referenced_works: Vec<String>,
|
||||
|
||||
/// Related works
|
||||
#[serde(default)]
|
||||
pub related_works: Vec<String>,
|
||||
|
||||
/// Abstract (inverted index format in API)
|
||||
pub abstract_inverted_index: Option<serde_json::Value>,
|
||||
|
||||
/// Publication venue
|
||||
pub primary_location: Option<Location>,
|
||||
|
||||
/// Grants/funding
|
||||
#[serde(default)]
|
||||
pub grants: Vec<Grant>,
|
||||
|
||||
/// Bibliographic info
|
||||
pub biblio: Option<Biblio>,
|
||||
|
||||
/// Last update time
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Open access status
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OpenAccessStatus {
|
||||
/// Is this work open access?
|
||||
pub is_oa: bool,
|
||||
|
||||
/// OA status type (gold, green, hybrid, bronze)
|
||||
pub oa_status: Option<String>,
|
||||
|
||||
/// OA URL if available
|
||||
pub oa_url: Option<String>,
|
||||
}
|
||||
|
||||
/// Author and affiliation information for a work
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Authorship {
|
||||
/// Author position (first, middle, last)
|
||||
pub author_position: AuthorPosition,
|
||||
|
||||
/// Author details
|
||||
pub author: AuthorReference,
|
||||
|
||||
/// Institutions at time of publication
|
||||
#[serde(default)]
|
||||
pub institutions: Vec<InstitutionReference>,
|
||||
|
||||
/// Countries
|
||||
#[serde(default)]
|
||||
pub countries: Vec<String>,
|
||||
|
||||
/// Is corresponding author
|
||||
#[serde(default)]
|
||||
pub is_corresponding: bool,
|
||||
|
||||
/// Raw affiliation string
|
||||
pub raw_affiliation_string: Option<String>,
|
||||
}
|
||||
|
||||
/// Author position in author list
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum AuthorPosition {
|
||||
/// First author
|
||||
First,
|
||||
/// Middle author
|
||||
Middle,
|
||||
/// Last author
|
||||
Last,
|
||||
}
|
||||
|
||||
/// Reference to an author
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AuthorReference {
|
||||
/// OpenAlex author ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// ORCID (if available)
|
||||
pub orcid: Option<String>,
|
||||
}
|
||||
|
||||
/// Reference to an institution
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InstitutionReference {
|
||||
/// OpenAlex institution ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Institution type (education, company, etc.)
|
||||
#[serde(rename = "type")]
|
||||
pub institution_type: Option<String>,
|
||||
|
||||
/// Country code
|
||||
pub country_code: Option<String>,
|
||||
|
||||
/// ROR ID
|
||||
pub ror: Option<String>,
|
||||
}
|
||||
|
||||
/// Reference to a topic
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopicReference {
|
||||
/// OpenAlex topic ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Relevance score (0-1)
|
||||
#[serde(default)]
|
||||
pub score: f64,
|
||||
|
||||
/// Subfield
|
||||
pub subfield: Option<FieldReference>,
|
||||
|
||||
/// Field
|
||||
pub field: Option<FieldReference>,
|
||||
|
||||
/// Domain
|
||||
pub domain: Option<FieldReference>,
|
||||
}
|
||||
|
||||
/// Reference to a concept (legacy)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ConceptReference {
|
||||
/// OpenAlex concept ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Wikidata ID
|
||||
pub wikidata: Option<String>,
|
||||
|
||||
/// Relevance score
|
||||
#[serde(default)]
|
||||
pub score: f64,
|
||||
|
||||
/// Hierarchy level (0 = root)
|
||||
#[serde(default)]
|
||||
pub level: u32,
|
||||
}
|
||||
|
||||
/// Reference to a field/domain
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FieldReference {
|
||||
/// OpenAlex ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
}
|
||||
|
||||
/// Publication location
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Location {
|
||||
/// Is primary location
|
||||
#[serde(default)]
|
||||
pub is_primary: bool,
|
||||
|
||||
/// Landing page URL
|
||||
pub landing_page_url: Option<String>,
|
||||
|
||||
/// PDF URL
|
||||
pub pdf_url: Option<String>,
|
||||
|
||||
/// Source (journal/conference)
|
||||
pub source: Option<SourceReference>,
|
||||
|
||||
/// License
|
||||
pub license: Option<String>,
|
||||
|
||||
/// Version
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
/// Reference to a source (journal, conference, etc.)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SourceReference {
|
||||
/// OpenAlex source ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// ISSN
|
||||
pub issn_l: Option<String>,
|
||||
|
||||
/// Source type
|
||||
#[serde(rename = "type")]
|
||||
pub source_type: Option<String>,
|
||||
|
||||
/// Is Open Access journal
|
||||
#[serde(default)]
|
||||
pub is_oa: bool,
|
||||
|
||||
/// Host organization
|
||||
pub host_organization: Option<String>,
|
||||
}
|
||||
|
||||
/// Grant/funding information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Grant {
|
||||
/// Funder
|
||||
pub funder: Option<FunderReference>,
|
||||
|
||||
/// Funder display name
|
||||
pub funder_display_name: Option<String>,
|
||||
|
||||
/// Award ID
|
||||
pub award_id: Option<String>,
|
||||
}
|
||||
|
||||
/// Reference to a funder
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FunderReference {
|
||||
/// OpenAlex funder ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
}
|
||||
|
||||
/// Bibliographic details
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Biblio {
|
||||
/// Volume
|
||||
pub volume: Option<String>,
|
||||
|
||||
/// Issue
|
||||
pub issue: Option<String>,
|
||||
|
||||
/// First page
|
||||
pub first_page: Option<String>,
|
||||
|
||||
/// Last page
|
||||
pub last_page: Option<String>,
|
||||
}
|
||||
|
||||
/// Full author entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Author {
|
||||
/// OpenAlex author ID
|
||||
pub id: String,
|
||||
|
||||
/// ORCID
|
||||
pub orcid: Option<String>,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Alternative names
|
||||
#[serde(default)]
|
||||
pub display_name_alternatives: Vec<String>,
|
||||
|
||||
/// Works count
|
||||
pub works_count: u64,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// H-index
|
||||
pub summary_stats: Option<AuthorStats>,
|
||||
|
||||
/// Most recent institution
|
||||
pub last_known_institution: Option<InstitutionReference>,
|
||||
|
||||
/// All affiliations
|
||||
#[serde(default)]
|
||||
pub affiliations: Vec<Affiliation>,
|
||||
|
||||
/// Topic areas
|
||||
#[serde(default)]
|
||||
pub topics: Vec<TopicReference>,
|
||||
|
||||
/// Works API URL
|
||||
pub works_api_url: Option<String>,
|
||||
|
||||
/// Updated date
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Author summary statistics
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AuthorStats {
|
||||
/// H-index
|
||||
pub h_index: Option<u32>,
|
||||
|
||||
/// i10-index
|
||||
pub i10_index: Option<u32>,
|
||||
|
||||
/// Two-year mean citedness
|
||||
#[serde(rename = "2yr_mean_citedness")]
|
||||
pub two_year_mean_citedness: Option<f64>,
|
||||
}
|
||||
|
||||
/// Author affiliation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Affiliation {
|
||||
/// Institution
|
||||
pub institution: InstitutionReference,
|
||||
|
||||
/// Years affiliated
|
||||
#[serde(default)]
|
||||
pub years: Vec<i32>,
|
||||
}
|
||||
|
||||
/// Full institution entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Institution {
|
||||
/// OpenAlex institution ID
|
||||
pub id: String,
|
||||
|
||||
/// ROR ID
|
||||
pub ror: Option<String>,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Country code
|
||||
pub country_code: Option<String>,
|
||||
|
||||
/// Institution type
|
||||
#[serde(rename = "type")]
|
||||
pub institution_type: Option<String>,
|
||||
|
||||
/// Homepage URL
|
||||
pub homepage_url: Option<String>,
|
||||
|
||||
/// Works count
|
||||
pub works_count: u64,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// Geographic info
|
||||
pub geo: Option<GeoLocation>,
|
||||
|
||||
/// Parent institutions
|
||||
#[serde(default)]
|
||||
pub lineage: Vec<String>,
|
||||
|
||||
/// Associated institutions
|
||||
#[serde(default)]
|
||||
pub associated_institutions: Vec<InstitutionReference>,
|
||||
|
||||
/// Updated date
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Geographic location
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GeoLocation {
|
||||
/// City
|
||||
pub city: Option<String>,
|
||||
|
||||
/// Region/state
|
||||
pub region: Option<String>,
|
||||
|
||||
/// Country
|
||||
pub country: Option<String>,
|
||||
|
||||
/// Country code
|
||||
pub country_code: Option<String>,
|
||||
|
||||
/// Latitude
|
||||
pub latitude: Option<f64>,
|
||||
|
||||
/// Longitude
|
||||
pub longitude: Option<f64>,
|
||||
}
|
||||
|
||||
/// Full topic entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Topic {
|
||||
/// OpenAlex topic ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Description
|
||||
pub description: Option<String>,
|
||||
|
||||
/// Keywords
|
||||
#[serde(default)]
|
||||
pub keywords: Vec<String>,
|
||||
|
||||
/// Works count
|
||||
pub works_count: u64,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// Subfield
|
||||
pub subfield: Option<FieldReference>,
|
||||
|
||||
/// Field
|
||||
pub field: Option<FieldReference>,
|
||||
|
||||
/// Domain
|
||||
pub domain: Option<FieldReference>,
|
||||
|
||||
/// Sibling topics
|
||||
#[serde(default)]
|
||||
pub siblings: Vec<TopicReference>,
|
||||
|
||||
/// Updated date
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Legacy concept entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Concept {
|
||||
/// OpenAlex concept ID
|
||||
pub id: String,
|
||||
|
||||
/// Wikidata ID
|
||||
pub wikidata: Option<String>,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Description
|
||||
pub description: Option<String>,
|
||||
|
||||
/// Hierarchy level
|
||||
pub level: u32,
|
||||
|
||||
/// Works count
|
||||
pub works_count: u64,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// Parent concepts
|
||||
#[serde(default)]
|
||||
pub ancestors: Vec<ConceptReference>,
|
||||
|
||||
/// Child concepts
|
||||
#[serde(default)]
|
||||
pub related_concepts: Vec<ConceptReference>,
|
||||
|
||||
/// Updated date
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Full source entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Source {
|
||||
/// OpenAlex source ID
|
||||
pub id: String,
|
||||
|
||||
/// ISSN-L
|
||||
pub issn_l: Option<String>,
|
||||
|
||||
/// All ISSNs
|
||||
#[serde(default)]
|
||||
pub issn: Vec<String>,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Publisher
|
||||
pub host_organization: Option<String>,
|
||||
|
||||
/// Source type (journal, conference, etc.)
|
||||
#[serde(rename = "type")]
|
||||
pub source_type: Option<String>,
|
||||
|
||||
/// Is Open Access
|
||||
#[serde(default)]
|
||||
pub is_oa: bool,
|
||||
|
||||
/// Homepage URL
|
||||
pub homepage_url: Option<String>,
|
||||
|
||||
/// Works count
|
||||
pub works_count: u64,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// Topics
|
||||
#[serde(default)]
|
||||
pub topics: Vec<TopicReference>,
|
||||
|
||||
/// Updated date
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Full funder entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Funder {
|
||||
/// OpenAlex funder ID
|
||||
pub id: String,
|
||||
|
||||
/// Display name
|
||||
pub display_name: String,
|
||||
|
||||
/// Alternative names
|
||||
#[serde(default)]
|
||||
pub alternate_titles: Vec<String>,
|
||||
|
||||
/// Country code
|
||||
pub country_code: Option<String>,
|
||||
|
||||
/// Description
|
||||
pub description: Option<String>,
|
||||
|
||||
/// Homepage URL
|
||||
pub homepage_url: Option<String>,
|
||||
|
||||
/// Grants count
|
||||
pub grants_count: u64,
|
||||
|
||||
/// Works count
|
||||
pub works_count: u64,
|
||||
|
||||
/// Citation count
|
||||
pub cited_by_count: u64,
|
||||
|
||||
/// ROR ID
|
||||
pub ror: Option<String>,
|
||||
|
||||
/// Updated date
|
||||
pub updated_date: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_work_deserialization() {
|
||||
let json = r#"{
|
||||
"id": "W123",
|
||||
"title": "Test Paper",
|
||||
"cited_by_count": 10,
|
||||
"authorships": [],
|
||||
"topics": [],
|
||||
"concepts": [],
|
||||
"referenced_works": [],
|
||||
"related_works": [],
|
||||
"grants": []
|
||||
}"#;
|
||||
|
||||
let work: Work = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(work.id, "W123");
|
||||
assert_eq!(work.title, "Test Paper");
|
||||
assert_eq!(work.cited_by_count, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_author_position() {
|
||||
let first = serde_json::from_str::<AuthorPosition>(r#""first""#).unwrap();
|
||||
assert_eq!(first, AuthorPosition::First);
|
||||
|
||||
let last = serde_json::from_str::<AuthorPosition>(r#""last""#).unwrap();
|
||||
assert_eq!(last, AuthorPosition::Last);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user