Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,327 @@
//! SEC EDGAR API client
use std::time::Duration;
use chrono::NaiveDate;
use reqwest::{Client, StatusCode};
use serde::Deserialize;
use crate::{Company, EdgarError, Filing, FilingType, Sector};
/// SEC EDGAR API client
pub struct EdgarClient {
client: Client,
base_url: String,
bulk_url: String,
}
/// Company tickers response
#[derive(Debug, Deserialize)]
struct CompanyTickersResponse {
#[serde(flatten)]
companies: std::collections::HashMap<String, CompanyEntry>,
}
/// Company entry
#[derive(Debug, Deserialize)]
struct CompanyEntry {
cik_str: String,
ticker: String,
title: String,
}
/// Company facts response
#[derive(Debug, Deserialize)]
struct CompanyFactsResponse {
cik: u64,
#[serde(rename = "entityName")]
entity_name: String,
facts: Option<Facts>,
}
/// XBRL facts
#[derive(Debug, Deserialize)]
struct Facts {
#[serde(rename = "us-gaap")]
us_gaap: Option<std::collections::HashMap<String, Concept>>,
}
/// XBRL concept
#[derive(Debug, Deserialize)]
struct Concept {
label: String,
description: Option<String>,
units: std::collections::HashMap<String, Vec<UnitValue>>,
}
/// Unit value
#[derive(Debug, Deserialize)]
struct UnitValue {
#[serde(rename = "end")]
end_date: String,
val: f64,
accn: String,
fy: Option<i32>,
fp: Option<String>,
form: String,
filed: String,
}
/// Submissions response
#[derive(Debug, Deserialize)]
struct SubmissionsResponse {
cik: String,
name: String,
sic: Option<String>,
#[serde(rename = "sicDescription")]
sic_description: Option<String>,
#[serde(rename = "stateOfIncorporation")]
state: Option<String>,
#[serde(rename = "fiscalYearEnd")]
fiscal_year_end: Option<String>,
filings: FilingsData,
}
/// Filings data
#[derive(Debug, Deserialize)]
struct FilingsData {
recent: RecentFilings,
}
/// Recent filings
#[derive(Debug, Deserialize)]
struct RecentFilings {
#[serde(rename = "accessionNumber")]
accession_numbers: Vec<String>,
#[serde(rename = "filingDate")]
filing_dates: Vec<String>,
form: Vec<String>,
#[serde(rename = "primaryDocument")]
primary_documents: Vec<String>,
#[serde(rename = "primaryDocDescription")]
descriptions: Vec<String>,
}
impl EdgarClient {
/// Create a new EDGAR client
///
/// SEC requires user agent with company/contact info
pub fn new(user_agent: &str, company: &str, email: &str) -> Self {
let full_agent = format!("{} ({}, {})", user_agent, company, email);
let client = Client::builder()
.timeout(Duration::from_secs(30))
.user_agent(full_agent)
.build()
.expect("Failed to build HTTP client");
Self {
client,
base_url: "https://data.sec.gov".to_string(),
bulk_url: "https://www.sec.gov/cgi-bin/browse-edgar".to_string(),
}
}
/// Health check
pub async fn health_check(&self) -> Result<bool, EdgarError> {
let url = format!("{}/submissions/CIK0000320193.json", self.base_url);
let response = self.client.get(&url).send().await?;
Ok(response.status().is_success())
}
/// Convert ticker to CIK
pub async fn ticker_to_cik(&self, ticker: &str) -> Result<String, EdgarError> {
let url = format!("{}/files/company_tickers.json", self.base_url);
let response = self.client.get(&url).send().await?;
if !response.status().is_success() {
return Err(EdgarError::Api("Failed to fetch company tickers".to_string()));
}
let data: CompanyTickersResponse = response.json().await?;
for entry in data.companies.values() {
if entry.ticker.eq_ignore_ascii_case(ticker) {
return Ok(entry.cik_str.clone());
}
}
Err(EdgarError::InvalidCik(format!("Ticker not found: {}", ticker)))
}
/// Get company info
pub async fn get_company(&self, cik: &str) -> Result<Company, EdgarError> {
let padded_cik = format!("{:0>10}", cik.trim_start_matches('0'));
let url = format!("{}/submissions/CIK{}.json", self.base_url, padded_cik);
let response = self.client.get(&url).send().await?;
match response.status() {
StatusCode::OK => {
let data: SubmissionsResponse = response.json().await?;
Ok(Company {
cik: data.cik,
name: data.name,
ticker: None, // Would need to look up
sic_code: data.sic,
sic_description: data.sic_description,
state: data.state,
fiscal_year_end: data.fiscal_year_end,
latest_filing: data.filings.recent.filing_dates.first()
.and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok()),
})
}
StatusCode::NOT_FOUND => Err(EdgarError::InvalidCik(cik.to_string())),
status => Err(EdgarError::Api(format!("Unexpected status: {}", status))),
}
}
/// Get filings for a company
pub async fn get_filings(
&self,
cik: &str,
filing_types: &[FilingType],
) -> Result<Vec<Filing>, EdgarError> {
let padded_cik = format!("{:0>10}", cik.trim_start_matches('0'));
let url = format!("{}/submissions/CIK{}.json", self.base_url, padded_cik);
let response = self.client.get(&url).send().await?;
if !response.status().is_success() {
return Err(EdgarError::Api(format!(
"Failed to fetch submissions: {}",
response.status()
)));
}
let data: SubmissionsResponse = response.json().await?;
let mut filings = Vec::new();
for i in 0..data.filings.recent.accession_numbers.len() {
let form = &data.filings.recent.form[i];
let filing_type = FilingType::from_form(form);
if filing_types.contains(&filing_type) {
let filed_date = NaiveDate::parse_from_str(
&data.filings.recent.filing_dates[i],
"%Y-%m-%d",
)
.unwrap_or(NaiveDate::from_ymd_opt(2000, 1, 1).unwrap());
filings.push(Filing {
accession_number: data.filings.recent.accession_numbers[i].clone(),
cik: cik.to_string(),
filing_type,
filed_date,
document_url: format!(
"https://www.sec.gov/Archives/edgar/data/{}/{}/{}",
cik,
data.filings.recent.accession_numbers[i].replace("-", ""),
data.filings.recent.primary_documents[i]
),
description: data.filings.recent.descriptions.get(i).cloned(),
});
}
}
Ok(filings)
}
/// Get company facts (XBRL financial data)
pub async fn get_company_facts(&self, cik: &str) -> Result<CompanyFactsResponse, EdgarError> {
let padded_cik = format!("{:0>10}", cik.trim_start_matches('0'));
let url = format!(
"{}/api/xbrl/companyfacts/CIK{}.json",
self.base_url, padded_cik
);
let response = self.client.get(&url).send().await?;
match response.status() {
StatusCode::OK => Ok(response.json().await?),
StatusCode::NOT_FOUND => Err(EdgarError::InvalidCik(cik.to_string())),
status => Err(EdgarError::Api(format!("Unexpected status: {}", status))),
}
}
/// Get companies by sector
pub async fn get_companies_by_sector(&self, sector: &Sector) -> Result<Vec<Company>, EdgarError> {
// Note: This is a simplified implementation
// Real implementation would use bulk data or SIC code search
let sic_prefix = match sector {
Sector::Technology => "73",
Sector::Healthcare => "80",
Sector::Financials => "60",
Sector::ConsumerDiscretionary => "57",
Sector::ConsumerStaples => "20",
Sector::Energy => "13",
Sector::Materials => "28",
Sector::Industrials => "35",
Sector::Utilities => "49",
Sector::RealEstate => "65",
Sector::CommunicationServices => "48",
Sector::Other => "99",
};
// Return placeholder - would implement full sector search
Ok(vec![])
}
/// Get XBRL financial statement data
pub async fn get_financial_data(
&self,
cik: &str,
metrics: &[&str],
) -> Result<std::collections::HashMap<String, Vec<(NaiveDate, f64)>>, EdgarError> {
let facts = self.get_company_facts(cik).await?;
let mut result = std::collections::HashMap::new();
if let Some(facts) = facts.facts {
if let Some(us_gaap) = facts.us_gaap {
for metric in metrics {
if let Some(concept) = us_gaap.get(*metric) {
let mut values = Vec::new();
for (_, unit_values) in &concept.units {
for uv in unit_values {
if let Ok(date) = NaiveDate::parse_from_str(&uv.end_date, "%Y-%m-%d") {
values.push((date, uv.val));
}
}
}
values.sort_by_key(|(d, _)| *d);
result.insert(metric.to_string(), values);
}
}
}
}
Ok(result)
}
/// Download filing document
pub async fn download_filing(&self, url: &str) -> Result<String, EdgarError> {
let response = self.client.get(url).send().await?;
if !response.status().is_success() {
return Err(EdgarError::FilingNotFound(url.to_string()));
}
Ok(response.text().await?)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_creation() {
let client = EdgarClient::new("TestAgent/1.0", "Test Corp", "test@example.com");
assert!(client.base_url.contains("data.sec.gov"));
}
}

View File

@@ -0,0 +1,483 @@
//! Financial coherence analysis using RuVector's min-cut
use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::{Company, Filing, FilingAnalyzer, FinancialStatement, PeerNetwork, XbrlParser, xbrl::statement_to_embedding};
use crate::filings::{NarrativeExtractor, FilingAnalysis};
/// A coherence alert
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CoherenceAlert {
/// Alert identifier
pub id: String,
/// Company CIK
pub company_cik: String,
/// Company name
pub company_name: String,
/// Alert timestamp
pub timestamp: DateTime<Utc>,
/// Alert severity
pub severity: AlertSeverity,
/// Divergence type
pub divergence_type: DivergenceType,
/// Coherence score before (0-1)
pub coherence_before: f64,
/// Coherence score after (0-1)
pub coherence_after: f64,
/// Magnitude of change
pub magnitude: f64,
/// Fundamental vector component
pub fundamental_score: f64,
/// Narrative vector component
pub narrative_score: f64,
/// Peer comparison (z-score)
pub peer_z_score: f64,
/// Related companies
pub related_companies: Vec<String>,
/// Interpretation
pub interpretation: String,
/// Evidence
pub evidence: Vec<AlertEvidence>,
}
/// Alert severity levels
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd)]
pub enum AlertSeverity {
/// Informational
Info,
/// Low concern
Low,
/// Moderate concern
Medium,
/// High concern
High,
/// Critical concern
Critical,
}
impl AlertSeverity {
/// From magnitude
pub fn from_magnitude(magnitude: f64) -> Self {
if magnitude < 0.1 {
AlertSeverity::Info
} else if magnitude < 0.2 {
AlertSeverity::Low
} else if magnitude < 0.3 {
AlertSeverity::Medium
} else if magnitude < 0.5 {
AlertSeverity::High
} else {
AlertSeverity::Critical
}
}
}
/// Type of divergence detected
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum DivergenceType {
/// Fundamentals improving, narrative pessimistic
FundamentalOutpacing,
/// Narrative optimistic, fundamentals declining
NarrativeLeading,
/// Company diverging from peer group
PeerDivergence,
/// Sector-wide pattern change
SectorShift,
/// Unusual cross-metric divergence
MetricAnomaly,
/// Historical pattern break
PatternBreak,
}
/// Evidence for an alert
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlertEvidence {
/// Evidence type
pub evidence_type: String,
/// Numeric value
pub value: f64,
/// Explanation
pub explanation: String,
}
/// Coherence watch for financial monitoring
pub struct CoherenceWatch {
/// Configuration
config: WatchConfig,
/// Peer network
network: PeerNetwork,
/// Historical coherence by company
coherence_history: HashMap<String, Vec<(DateTime<Utc>, f64)>>,
/// Detected alerts
alerts: Vec<CoherenceAlert>,
/// Filing analyzer
filing_analyzer: FilingAnalyzer,
/// XBRL parser
xbrl_parser: XbrlParser,
/// Narrative extractor
narrative_extractor: NarrativeExtractor,
}
/// Watch configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WatchConfig {
/// Weight for fundamental metrics
pub fundamental_weight: f64,
/// Weight for narrative analysis
pub narrative_weight: f64,
/// Weight for peer comparison
pub peer_weight: f64,
/// Minimum divergence to alert
pub divergence_threshold: f64,
/// Lookback quarters for trend analysis
pub lookback_quarters: usize,
/// Enable peer comparison
pub compare_peers: bool,
/// Alert on sector-wide shifts
pub sector_alerts: bool,
}
impl Default for WatchConfig {
fn default() -> Self {
Self {
fundamental_weight: 0.4,
narrative_weight: 0.3,
peer_weight: 0.3,
divergence_threshold: 0.2,
lookback_quarters: 8,
compare_peers: true,
sector_alerts: true,
}
}
}
impl CoherenceWatch {
/// Create a new coherence watch
pub fn new(network: PeerNetwork, config: WatchConfig) -> Self {
Self {
config,
network,
coherence_history: HashMap::new(),
alerts: Vec::new(),
filing_analyzer: FilingAnalyzer::new(Default::default()),
xbrl_parser: XbrlParser::new(Default::default()),
narrative_extractor: NarrativeExtractor::new(Default::default()),
}
}
/// Analyze a company for coherence
pub fn analyze_company(
&mut self,
company: &Company,
filings: &[Filing],
statements: &[FinancialStatement],
filing_contents: &HashMap<String, String>,
) -> Option<CoherenceAlert> {
if filings.is_empty() || statements.is_empty() {
return None;
}
// Compute fundamental vector
let latest_statement = statements.last()?;
let fundamental_embedding = statement_to_embedding(latest_statement);
// Compute narrative vector
let latest_filing = filings.last()?;
let content = filing_contents.get(&latest_filing.accession_number)?;
let analysis = self.filing_analyzer.analyze(content, latest_filing);
let narrative_embedding = self.narrative_extractor.extract_embedding(&analysis);
// Compute coherence score
let coherence = self.compute_coherence(&fundamental_embedding, &narrative_embedding);
// Get historical coherence to check for significant change
let cik = &company.cik;
let should_alert = {
let history = self.coherence_history.entry(cik.clone()).or_default();
if !history.is_empty() {
let prev_coherence = history.last()?.1;
let delta = (coherence - prev_coherence).abs();
if delta > self.config.divergence_threshold {
Some(prev_coherence)
} else {
None
}
} else {
None
}
};
// Create alert if needed (outside the mutable borrow scope)
let alert = should_alert.map(|prev_coherence| {
self.create_alert(
company,
prev_coherence,
coherence,
&fundamental_embedding,
&narrative_embedding,
&analysis,
)
});
// Update history
self.coherence_history
.entry(cik.clone())
.or_default()
.push((Utc::now(), coherence));
alert
}
/// Compute coherence between fundamental and narrative vectors
fn compute_coherence(&self, fundamental: &[f32], narrative: &[f32]) -> f64 {
// Cosine similarity
let dot_product: f32 = fundamental.iter()
.zip(narrative.iter())
.map(|(a, b)| a * b)
.sum();
let norm_f: f32 = fundamental.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_n: f32 = narrative.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_f > 0.0 && norm_n > 0.0 {
((dot_product / (norm_f * norm_n) + 1.0) / 2.0) as f64 // Scale to 0-1
} else {
0.5
}
}
/// Create an alert from analysis
fn create_alert(
&self,
company: &Company,
prev_coherence: f64,
curr_coherence: f64,
fundamental: &[f32],
narrative: &[f32],
analysis: &FilingAnalysis,
) -> CoherenceAlert {
let magnitude = (curr_coherence - prev_coherence).abs();
let severity = AlertSeverity::from_magnitude(magnitude);
// Determine divergence type
let fundamental_score: f64 = fundamental.iter().map(|x| *x as f64).sum::<f64>() / fundamental.len() as f64;
let narrative_score = analysis.sentiment.unwrap_or(0.0);
let divergence_type = if fundamental_score > 0.0 && narrative_score < 0.0 {
DivergenceType::FundamentalOutpacing
} else if narrative_score > 0.0 && fundamental_score < 0.0 {
DivergenceType::NarrativeLeading
} else {
DivergenceType::PatternBreak
};
// Compute peer z-score (simplified)
let peer_z_score = self.compute_peer_z_score(&company.cik, curr_coherence);
// Build evidence
let evidence = vec![
AlertEvidence {
evidence_type: "coherence_change".to_string(),
value: magnitude,
explanation: format!(
"Coherence {} by {:.1}%",
if curr_coherence > prev_coherence { "increased" } else { "decreased" },
magnitude * 100.0
),
},
AlertEvidence {
evidence_type: "fundamental_score".to_string(),
value: fundamental_score,
explanation: format!("Fundamental metric score: {:.3}", fundamental_score),
},
AlertEvidence {
evidence_type: "narrative_sentiment".to_string(),
value: narrative_score,
explanation: format!("Narrative sentiment: {:.3}", narrative_score),
},
];
let interpretation = self.interpret_divergence(divergence_type, severity, peer_z_score);
CoherenceAlert {
id: format!("alert_{}_{}", company.cik, Utc::now().timestamp()),
company_cik: company.cik.clone(),
company_name: company.name.clone(),
timestamp: Utc::now(),
severity,
divergence_type,
coherence_before: prev_coherence,
coherence_after: curr_coherence,
magnitude,
fundamental_score,
narrative_score,
peer_z_score,
related_companies: self.find_related_companies(&company.cik),
interpretation,
evidence,
}
}
/// Compute peer group z-score
fn compute_peer_z_score(&self, cik: &str, coherence: f64) -> f64 {
let peer_coherences: Vec<f64> = self.coherence_history
.iter()
.filter(|(k, _)| *k != cik)
.filter_map(|(_, history)| history.last().map(|(_, c)| *c))
.collect();
if peer_coherences.len() < 2 {
return 0.0;
}
let mean: f64 = peer_coherences.iter().sum::<f64>() / peer_coherences.len() as f64;
let variance: f64 = peer_coherences.iter().map(|c| (c - mean).powi(2)).sum::<f64>()
/ peer_coherences.len() as f64;
let std_dev = variance.sqrt();
if std_dev > 0.0 {
(coherence - mean) / std_dev
} else {
0.0
}
}
/// Find related companies from network
fn find_related_companies(&self, cik: &str) -> Vec<String> {
self.network.get_peers(cik)
.iter()
.take(5)
.map(|p| p.to_string())
.collect()
}
/// Interpret divergence
fn interpret_divergence(
&self,
divergence_type: DivergenceType,
severity: AlertSeverity,
peer_z_score: f64,
) -> String {
let severity_str = match severity {
AlertSeverity::Info => "Minor",
AlertSeverity::Low => "Notable",
AlertSeverity::Medium => "Significant",
AlertSeverity::High => "Major",
AlertSeverity::Critical => "Critical",
};
let divergence_str = match divergence_type {
DivergenceType::FundamentalOutpacing =>
"Fundamentals improving faster than narrative suggests",
DivergenceType::NarrativeLeading =>
"Narrative more optimistic than fundamentals support",
DivergenceType::PeerDivergence =>
"Company diverging from peer group pattern",
DivergenceType::SectorShift =>
"Sector-wide coherence shift detected",
DivergenceType::MetricAnomaly =>
"Unusual cross-metric relationship detected",
DivergenceType::PatternBreak =>
"Historical coherence pattern broken",
};
let peer_context = if peer_z_score.abs() > 2.0 {
format!(". Company is {:.1} std devs from peer mean", peer_z_score)
} else {
String::new()
};
format!("{} divergence: {}{}", severity_str, divergence_str, peer_context)
}
/// Detect sector-wide coherence shifts
pub fn detect_sector_shifts(&self) -> Vec<CoherenceAlert> {
// Would analyze all companies in sector using min-cut on peer network
vec![]
}
/// Get all alerts
pub fn alerts(&self) -> &[CoherenceAlert] {
&self.alerts
}
/// Get alerts by severity
pub fn alerts_by_severity(&self, min_severity: AlertSeverity) -> Vec<&CoherenceAlert> {
self.alerts
.iter()
.filter(|a| a.severity >= min_severity)
.collect()
}
/// Get company coherence history
pub fn coherence_history(&self, cik: &str) -> Option<&Vec<(DateTime<Utc>, f64)>> {
self.coherence_history.get(cik)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::network::PeerNetworkBuilder;
#[test]
fn test_alert_severity() {
assert_eq!(AlertSeverity::from_magnitude(0.05), AlertSeverity::Info);
assert_eq!(AlertSeverity::from_magnitude(0.15), AlertSeverity::Low);
assert_eq!(AlertSeverity::from_magnitude(0.25), AlertSeverity::Medium);
assert_eq!(AlertSeverity::from_magnitude(0.4), AlertSeverity::High);
assert_eq!(AlertSeverity::from_magnitude(0.6), AlertSeverity::Critical);
}
#[test]
fn test_coherence_computation() {
let network = PeerNetworkBuilder::new().build();
let config = WatchConfig::default();
let watch = CoherenceWatch::new(network, config);
let vec_a = vec![1.0, 0.0, 0.0];
let vec_b = vec![1.0, 0.0, 0.0];
let coherence = watch.compute_coherence(&vec_a, &vec_b);
assert!((coherence - 1.0).abs() < 0.001);
let vec_c = vec![-1.0, 0.0, 0.0];
let coherence_neg = watch.compute_coherence(&vec_a, &vec_c);
assert!((coherence_neg - 0.0).abs() < 0.001);
}
}

View File

@@ -0,0 +1,508 @@
//! SEC filing types and analysis
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// SEC filing types
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum FilingType {
/// Annual report
TenK,
/// Quarterly report
TenQ,
/// Current report (material events)
EightK,
/// Proxy statement
DefFourteen,
/// Insider trading
FormFour,
/// Institutional holdings
ThirteenF,
/// Registration statement
S1,
/// Other filing type
Other,
}
impl FilingType {
/// Parse from SEC form name
pub fn from_form(form: &str) -> Self {
match form.to_uppercase().as_str() {
"10-K" | "10-K/A" => FilingType::TenK,
"10-Q" | "10-Q/A" => FilingType::TenQ,
"8-K" | "8-K/A" => FilingType::EightK,
"DEF 14A" | "DEFA14A" => FilingType::DefFourteen,
"4" | "4/A" => FilingType::FormFour,
"13F-HR" | "13F-HR/A" => FilingType::ThirteenF,
"S-1" | "S-1/A" => FilingType::S1,
_ => FilingType::Other,
}
}
/// Get SEC form name
pub fn form_name(&self) -> &str {
match self {
FilingType::TenK => "10-K",
FilingType::TenQ => "10-Q",
FilingType::EightK => "8-K",
FilingType::DefFourteen => "DEF 14A",
FilingType::FormFour => "4",
FilingType::ThirteenF => "13F-HR",
FilingType::S1 => "S-1",
FilingType::Other => "Other",
}
}
}
/// A SEC filing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Filing {
/// Accession number (unique identifier)
pub accession_number: String,
/// Company CIK
pub cik: String,
/// Filing type
pub filing_type: FilingType,
/// Date filed
pub filed_date: NaiveDate,
/// Primary document URL
pub document_url: String,
/// Description
pub description: Option<String>,
}
/// Filing analyzer for extracting insights
pub struct FilingAnalyzer {
/// Configuration
config: AnalyzerConfig,
}
/// Analyzer configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalyzerConfig {
/// Extract key phrases
pub extract_phrases: bool,
/// Sentiment analysis
pub analyze_sentiment: bool,
/// Risk factor extraction
pub extract_risks: bool,
/// Forward-looking statement extraction
pub extract_fls: bool,
}
impl Default for AnalyzerConfig {
fn default() -> Self {
Self {
extract_phrases: true,
analyze_sentiment: true,
extract_risks: true,
extract_fls: true,
}
}
}
impl FilingAnalyzer {
/// Create a new analyzer
pub fn new(config: AnalyzerConfig) -> Self {
Self { config }
}
/// Analyze a filing document
pub fn analyze(&self, content: &str, filing: &Filing) -> FilingAnalysis {
let sections = self.extract_sections(content, &filing.filing_type);
let sentiment = if self.config.analyze_sentiment {
Some(self.compute_sentiment(content))
} else {
None
};
let risk_factors = if self.config.extract_risks {
self.extract_risk_factors(content)
} else {
vec![]
};
let forward_looking = if self.config.extract_fls {
self.extract_forward_looking(content)
} else {
vec![]
};
let key_phrases = if self.config.extract_phrases {
self.extract_key_phrases(content)
} else {
vec![]
};
FilingAnalysis {
accession_number: filing.accession_number.clone(),
sections,
sentiment,
risk_factors,
forward_looking,
key_phrases,
word_count: content.split_whitespace().count(),
}
}
/// Extract standard sections from filing
fn extract_sections(&self, content: &str, filing_type: &FilingType) -> HashMap<String, String> {
let mut sections = HashMap::new();
// Section patterns vary by filing type
let section_patterns = match filing_type {
FilingType::TenK => vec![
("Business", "Item 1"),
("RiskFactors", "Item 1A"),
("Properties", "Item 2"),
("Legal", "Item 3"),
("MDA", "Item 7"),
("Financials", "Item 8"),
],
FilingType::TenQ => vec![
("Financials", "Part I"),
("MDA", "Item 2"),
("Controls", "Item 4"),
],
FilingType::EightK => vec![
("Item", "Item"),
],
_ => vec![],
};
// Simplified extraction - would use better text segmentation
for (name, marker) in section_patterns {
if let Some(idx) = content.find(marker) {
let section_text = &content[idx..];
let end_idx = section_text.len().min(5000);
sections.insert(name.to_string(), section_text[..end_idx].to_string());
}
}
sections
}
/// Compute sentiment score (-1 to 1)
fn compute_sentiment(&self, content: &str) -> f64 {
let positive_words = [
"growth", "profit", "increased", "strong", "improved", "successful",
"innovative", "opportunity", "favorable", "exceeded", "achieved",
];
let negative_words = [
"loss", "decline", "decreased", "weak", "challenging", "risk",
"uncertain", "adverse", "impairment", "litigation", "default",
];
let content_lower = content.to_lowercase();
let words: Vec<&str> = content_lower.split_whitespace().collect();
let total_words = words.len() as f64;
let positive_count = positive_words
.iter()
.map(|w| words.iter().filter(|word| word.contains(w)).count())
.sum::<usize>() as f64;
let negative_count = negative_words
.iter()
.map(|w| words.iter().filter(|word| word.contains(w)).count())
.sum::<usize>() as f64;
if total_words > 0.0 {
(positive_count - negative_count) / total_words.sqrt()
} else {
0.0
}
}
/// Extract risk factors
fn extract_risk_factors(&self, content: &str) -> Vec<RiskFactor> {
let mut risks = Vec::new();
let risk_patterns = [
("Regulatory", "regulatory", "regulation", "compliance"),
("Competition", "competitive", "competition", "competitors"),
("Cybersecurity", "cybersecurity", "data breach", "security"),
("Litigation", "litigation", "lawsuit", "legal proceedings"),
("Economic", "economic conditions", "recession", "downturn"),
("Supply Chain", "supply chain", "suppliers", "logistics"),
];
let content_lower = content.to_lowercase();
for (category, pattern1, pattern2, pattern3) in risk_patterns {
let count = [pattern1, pattern2, pattern3]
.iter()
.map(|p| content_lower.matches(p).count())
.sum::<usize>();
if count > 0 {
risks.push(RiskFactor {
category: category.to_string(),
severity: (count as f64 / 10.0).min(1.0),
mentions: count,
sample_text: None,
});
}
}
risks.sort_by(|a, b| b.severity.partial_cmp(&a.severity).unwrap_or(std::cmp::Ordering::Equal));
risks
}
/// Extract forward-looking statements
fn extract_forward_looking(&self, content: &str) -> Vec<ForwardLookingStatement> {
let mut statements = Vec::new();
let fls_patterns = [
"expect", "anticipate", "believe", "estimate", "project",
"forecast", "intend", "plan", "may", "will", "should",
];
let sentences: Vec<&str> = content.split(&['.', '!', '?'][..]).collect();
for sentence in sentences {
let sentence_lower = sentence.to_lowercase();
for pattern in fls_patterns {
if sentence_lower.contains(pattern) {
// Check if it's truly forward-looking
if sentence_lower.contains("future") ||
sentence_lower.contains("expect") ||
sentence_lower.contains("anticipate") {
statements.push(ForwardLookingStatement {
text: sentence.trim().to_string(),
sentiment: self.compute_sentiment(sentence),
confidence: 0.7,
});
break;
}
}
}
}
// Limit to most significant
statements.truncate(20);
statements
}
/// Extract key phrases
fn extract_key_phrases(&self, content: &str) -> Vec<KeyPhrase> {
let mut phrases = HashMap::new();
// Simple n-gram extraction
let words: Vec<&str> = content
.split_whitespace()
.filter(|w| w.len() > 3)
.collect();
// Bigrams
for window in words.windows(2) {
let phrase = format!("{} {}", window[0].to_lowercase(), window[1].to_lowercase());
if self.is_meaningful_phrase(&phrase) {
*phrases.entry(phrase).or_insert(0) += 1;
}
}
let mut result: Vec<KeyPhrase> = phrases
.into_iter()
.filter(|(_, count)| *count >= 3)
.map(|(phrase, count)| KeyPhrase {
phrase,
frequency: count,
importance: count as f64 / words.len() as f64,
})
.collect();
result.sort_by(|a, b| b.frequency.cmp(&a.frequency));
result.truncate(50);
result
}
/// Check if phrase is meaningful
fn is_meaningful_phrase(&self, phrase: &str) -> bool {
let stop_phrases = ["the", "and", "for", "this", "that", "with"];
!stop_phrases.iter().any(|s| phrase.starts_with(s))
}
}
/// Analysis result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilingAnalysis {
/// Filing accession number
pub accession_number: String,
/// Extracted sections
pub sections: HashMap<String, String>,
/// Overall sentiment score
pub sentiment: Option<f64>,
/// Risk factors
pub risk_factors: Vec<RiskFactor>,
/// Forward-looking statements
pub forward_looking: Vec<ForwardLookingStatement>,
/// Key phrases
pub key_phrases: Vec<KeyPhrase>,
/// Total word count
pub word_count: usize,
}
/// A risk factor
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RiskFactor {
/// Risk category
pub category: String,
/// Severity score (0-1)
pub severity: f64,
/// Number of mentions
pub mentions: usize,
/// Sample text
pub sample_text: Option<String>,
}
/// A forward-looking statement
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForwardLookingStatement {
/// Statement text
pub text: String,
/// Sentiment score
pub sentiment: f64,
/// Confidence that this is FLS
pub confidence: f64,
}
/// A key phrase
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KeyPhrase {
/// Phrase text
pub phrase: String,
/// Frequency count
pub frequency: usize,
/// Importance score
pub importance: f64,
}
/// Narrative extractor for text-to-vector
pub struct NarrativeExtractor {
/// Configuration
config: ExtractorConfig,
}
/// Extractor configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractorConfig {
/// Target embedding dimension
pub embedding_dim: usize,
/// Use TF-IDF weighting
pub use_tfidf: bool,
/// Normalize embeddings
pub normalize: bool,
}
impl Default for ExtractorConfig {
fn default() -> Self {
Self {
embedding_dim: 128,
use_tfidf: true,
normalize: true,
}
}
}
impl NarrativeExtractor {
/// Create a new extractor
pub fn new(config: ExtractorConfig) -> Self {
Self { config }
}
/// Extract embedding from filing analysis
pub fn extract_embedding(&self, analysis: &FilingAnalysis) -> Vec<f32> {
let mut embedding = Vec::with_capacity(self.config.embedding_dim);
// Sentiment feature
embedding.push(analysis.sentiment.unwrap_or(0.0) as f32);
// Word count (normalized)
embedding.push((analysis.word_count as f64 / 100000.0).min(1.0) as f32);
// Risk factor features
let total_risk_severity: f64 = analysis.risk_factors.iter().map(|r| r.severity).sum();
embedding.push((total_risk_severity / 5.0).min(1.0) as f32);
// FLS sentiment
let fls_sentiment: f64 = analysis.forward_looking
.iter()
.map(|f| f.sentiment)
.sum::<f64>() / analysis.forward_looking.len().max(1) as f64;
embedding.push(fls_sentiment as f32);
// Key phrase diversity
let phrase_diversity = analysis.key_phrases.len() as f64 / 100.0;
embedding.push(phrase_diversity.min(1.0) as f32);
// Pad to target dimension
while embedding.len() < self.config.embedding_dim {
embedding.push(0.0);
}
// Normalize
if self.config.normalize {
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in &mut embedding {
*x /= norm;
}
}
}
embedding
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_filing_type_from_form() {
assert_eq!(FilingType::from_form("10-K"), FilingType::TenK);
assert_eq!(FilingType::from_form("10-Q"), FilingType::TenQ);
assert_eq!(FilingType::from_form("8-K"), FilingType::EightK);
}
#[test]
fn test_sentiment_analysis() {
let config = AnalyzerConfig::default();
let analyzer = FilingAnalyzer::new(config);
let positive_text = "Growth and profit increased significantly. Strong performance exceeded expectations.";
let sentiment = analyzer.compute_sentiment(positive_text);
assert!(sentiment > 0.0);
let negative_text = "Loss and decline due to challenging conditions. Risk of default increased.";
let sentiment = analyzer.compute_sentiment(negative_text);
assert!(sentiment < 0.0);
}
}

View File

@@ -0,0 +1,601 @@
//! # RuVector SEC EDGAR Integration
//!
//! Integration with SEC EDGAR for financial intelligence, peer group coherence
//! analysis, and narrative drift detection.
//!
//! ## Core Capabilities
//!
//! - **Peer Network Graph**: Model company relationships via shared investors, sectors
//! - **Coherence Watch**: Detect when fundamentals diverge from narrative (10-K text)
//! - **Risk Signal Detection**: Use min-cut for structural discontinuities
//! - **Cross-Company Analysis**: Track contagion and sector-wide patterns
//!
//! ## Data Sources
//!
//! ### SEC EDGAR
//! - **XBRL Financial Statements**: Standardized accounting data (2009-present)
//! - **10-K/10-Q Filings**: Annual/quarterly reports with narrative
//! - **Form 4**: Insider trading disclosures
//! - **13F**: Institutional holdings
//! - **8-K**: Material events
//!
//! ## Quick Start
//!
//! ```rust,ignore
//! use ruvector_data_edgar::{
//! EdgarClient, PeerNetwork, CoherenceWatch, XbrlParser, FilingAnalyzer,
//! };
//!
//! // Build peer network from 13F holdings
//! let network = PeerNetwork::from_sector("technology")
//! .with_min_market_cap(1_000_000_000)
//! .build()
//! .await?;
//!
//! // Create coherence watch
//! let watch = CoherenceWatch::new(network);
//!
//! // Analyze for divergence
//! let alerts = watch.detect_divergence(
//! narrative_weight: 0.4,
//! lookback_quarters: 8,
//! ).await?;
//!
//! for alert in alerts {
//! println!("{}: {}", alert.company, alert.interpretation);
//! }
//! ```
#![warn(missing_docs)]
#![warn(clippy::all)]
pub mod client;
pub mod xbrl;
pub mod filings;
pub mod coherence;
pub mod network;
use std::collections::HashMap;
use async_trait::async_trait;
use chrono::{DateTime, NaiveDate, Utc};
use serde::{Deserialize, Serialize};
use thiserror::Error;
pub use client::EdgarClient;
pub use xbrl::{XbrlParser, FinancialStatement, XbrlFact, XbrlContext};
pub use filings::{Filing, FilingType, FilingAnalyzer, NarrativeExtractor};
pub use coherence::{CoherenceWatch, CoherenceAlert, AlertSeverity, DivergenceType};
pub use network::{PeerNetwork, PeerNetworkBuilder, CompanyNode, PeerEdge};
use ruvector_data_framework::{DataRecord, DataSource, FrameworkError, Relationship, Result};
/// EDGAR-specific error types
#[derive(Error, Debug)]
pub enum EdgarError {
/// API request failed
#[error("API error: {0}")]
Api(String),
/// Invalid CIK
#[error("Invalid CIK: {0}")]
InvalidCik(String),
/// XBRL parsing failed
#[error("XBRL parse error: {0}")]
XbrlParse(String),
/// Filing not found
#[error("Filing not found: {0}")]
FilingNotFound(String),
/// Network error
#[error("Network error: {0}")]
Network(#[from] reqwest::Error),
/// Data format error
#[error("Data format error: {0}")]
DataFormat(String),
}
impl From<EdgarError> for FrameworkError {
fn from(e: EdgarError) -> Self {
FrameworkError::Ingestion(e.to_string())
}
}
/// Configuration for EDGAR data source
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgarConfig {
/// User agent (required by SEC)
pub user_agent: String,
/// Company name for user agent
pub company_name: String,
/// Contact email (required by SEC)
pub contact_email: String,
/// Rate limit (requests per second)
pub rate_limit: u32,
/// Include historical data
pub include_historical: bool,
/// Filing types to fetch
pub filing_types: Vec<FilingType>,
}
impl Default for EdgarConfig {
fn default() -> Self {
Self {
user_agent: "RuVector/0.1.0".to_string(),
company_name: "Research Project".to_string(),
contact_email: "contact@example.com".to_string(),
rate_limit: 10, // SEC allows 10 requests/second
include_historical: true,
filing_types: vec![FilingType::TenK, FilingType::TenQ],
}
}
}
/// A company entity
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Company {
/// CIK (Central Index Key)
pub cik: String,
/// Company name
pub name: String,
/// Ticker symbol
pub ticker: Option<String>,
/// SIC code (industry)
pub sic_code: Option<String>,
/// SIC description
pub sic_description: Option<String>,
/// State of incorporation
pub state: Option<String>,
/// Fiscal year end
pub fiscal_year_end: Option<String>,
/// Latest filing date
pub latest_filing: Option<NaiveDate>,
}
/// A financial metric
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FinancialMetric {
/// Company CIK
pub cik: String,
/// Filing accession number
pub accession: String,
/// Report date
pub report_date: NaiveDate,
/// Metric name (XBRL tag)
pub metric_name: String,
/// Value
pub value: f64,
/// Unit
pub unit: String,
/// Is audited
pub audited: bool,
/// Context (annual, quarterly, etc.)
pub context: String,
}
/// Financial ratio
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum FinancialRatio {
/// Current ratio (current assets / current liabilities)
CurrentRatio,
/// Quick ratio ((current assets - inventory) / current liabilities)
QuickRatio,
/// Debt to equity
DebtToEquity,
/// Return on equity
ReturnOnEquity,
/// Return on assets
ReturnOnAssets,
/// Gross margin
GrossMargin,
/// Operating margin
OperatingMargin,
/// Net margin
NetMargin,
/// Asset turnover
AssetTurnover,
/// Inventory turnover
InventoryTurnover,
/// Price to earnings
PriceToEarnings,
/// Price to book
PriceToBook,
}
impl FinancialRatio {
/// Compute ratio from financial data
pub fn compute(&self, data: &HashMap<String, f64>) -> Option<f64> {
match self {
FinancialRatio::CurrentRatio => {
let current_assets = data.get("Assets Current")?;
let current_liabilities = data.get("Liabilities Current")?;
if *current_liabilities != 0.0 {
Some(current_assets / current_liabilities)
} else {
None
}
}
FinancialRatio::DebtToEquity => {
let total_debt = data.get("Debt")?;
let equity = data.get("Stockholders Equity")?;
if *equity != 0.0 {
Some(total_debt / equity)
} else {
None
}
}
FinancialRatio::NetMargin => {
let net_income = data.get("Net Income")?;
let revenue = data.get("Revenue")?;
if *revenue != 0.0 {
Some(net_income / revenue)
} else {
None
}
}
FinancialRatio::ReturnOnEquity => {
let net_income = data.get("Net Income")?;
let equity = data.get("Stockholders Equity")?;
if *equity != 0.0 {
Some(net_income / equity)
} else {
None
}
}
FinancialRatio::ReturnOnAssets => {
let net_income = data.get("Net Income")?;
let assets = data.get("Assets")?;
if *assets != 0.0 {
Some(net_income / assets)
} else {
None
}
}
_ => None, // Add more implementations as needed
}
}
}
/// Sector classification
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum Sector {
/// Technology
Technology,
/// Healthcare
Healthcare,
/// Financial services
Financials,
/// Consumer discretionary
ConsumerDiscretionary,
/// Consumer staples
ConsumerStaples,
/// Energy
Energy,
/// Materials
Materials,
/// Industrials
Industrials,
/// Utilities
Utilities,
/// Real estate
RealEstate,
/// Communication services
CommunicationServices,
/// Other/Unknown
Other,
}
impl Sector {
/// Get sector from SIC code
pub fn from_sic(sic: &str) -> Self {
match sic.chars().next() {
Some('7') => Sector::Technology,
Some('8') => Sector::Healthcare,
Some('6') => Sector::Financials,
Some('5') => Sector::ConsumerDiscretionary,
Some('2') => Sector::ConsumerStaples,
Some('1') => Sector::Energy,
Some('3') => Sector::Materials,
Some('4') => Sector::Industrials,
_ => Sector::Other,
}
}
}
/// EDGAR data source for the framework
pub struct EdgarSource {
client: EdgarClient,
config: EdgarConfig,
ciks: Vec<String>,
}
impl EdgarSource {
/// Create a new EDGAR data source
pub fn new(config: EdgarConfig) -> Self {
let client = EdgarClient::new(
&config.user_agent,
&config.company_name,
&config.contact_email,
);
Self {
client,
config,
ciks: Vec::new(),
}
}
/// Add CIKs to fetch
pub fn with_ciks(mut self, ciks: Vec<String>) -> Self {
self.ciks = ciks;
self
}
/// Add companies by ticker
pub async fn with_tickers(mut self, tickers: &[&str]) -> Result<Self> {
for ticker in tickers {
if let Ok(cik) = self.client.ticker_to_cik(ticker).await {
self.ciks.push(cik);
}
}
Ok(self)
}
/// Add all companies in a sector
pub async fn with_sector(mut self, sector: Sector) -> Result<Self> {
let companies = self.client.get_companies_by_sector(&sector).await?;
self.ciks.extend(companies.into_iter().map(|c| c.cik));
Ok(self)
}
}
#[async_trait]
impl DataSource for EdgarSource {
fn source_id(&self) -> &str {
"edgar"
}
async fn fetch_batch(
&self,
cursor: Option<String>,
batch_size: usize,
) -> Result<(Vec<DataRecord>, Option<String>)> {
let start_idx: usize = cursor.as_ref().and_then(|c| c.parse().ok()).unwrap_or(0);
let end_idx = (start_idx + batch_size).min(self.ciks.len());
let mut records = Vec::new();
for cik in &self.ciks[start_idx..end_idx] {
// Fetch filings for this CIK
match self.client.get_filings(cik, &self.config.filing_types).await {
Ok(filings) => {
for filing in filings {
records.push(filing_to_record(filing));
}
}
Err(e) => {
tracing::warn!("Failed to fetch filings for CIK {}: {}", cik, e);
}
}
// Rate limiting
if self.config.rate_limit > 0 {
let delay = 1000 / self.config.rate_limit as u64;
tokio::time::sleep(tokio::time::Duration::from_millis(delay)).await;
}
}
let next_cursor = if end_idx < self.ciks.len() {
Some(end_idx.to_string())
} else {
None
};
Ok((records, next_cursor))
}
async fn total_count(&self) -> Result<Option<u64>> {
Ok(Some(self.ciks.len() as u64))
}
async fn health_check(&self) -> Result<bool> {
self.client.health_check().await.map_err(|e| e.into())
}
}
/// Convert a filing to a data record
fn filing_to_record(filing: Filing) -> DataRecord {
let mut relationships = Vec::new();
// Company relationship
relationships.push(Relationship {
target_id: filing.cik.clone(),
rel_type: "filed_by".to_string(),
weight: 1.0,
properties: HashMap::new(),
});
DataRecord {
id: filing.accession_number.clone(),
source: "edgar".to_string(),
record_type: format!("{:?}", filing.filing_type).to_lowercase(),
timestamp: filing.filed_date.and_hms_opt(0, 0, 0)
.map(|dt| DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
.unwrap_or_else(Utc::now),
data: serde_json::to_value(&filing).unwrap_or_default(),
embedding: None,
relationships,
}
}
/// Fundamental vs Narrative analyzer
///
/// Detects divergence between quantitative financial data
/// and qualitative narrative in filings.
pub struct FundamentalNarrativeAnalyzer {
/// Configuration
config: AnalyzerConfig,
}
/// Analyzer configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalyzerConfig {
/// Weight for fundamental metrics
pub fundamental_weight: f64,
/// Weight for narrative sentiment
pub narrative_weight: f64,
/// Minimum divergence to flag
pub divergence_threshold: f64,
/// Lookback periods
pub lookback_periods: usize,
}
impl Default for AnalyzerConfig {
fn default() -> Self {
Self {
fundamental_weight: 0.6,
narrative_weight: 0.4,
divergence_threshold: 0.3,
lookback_periods: 4,
}
}
}
impl FundamentalNarrativeAnalyzer {
/// Create a new analyzer
pub fn new(config: AnalyzerConfig) -> Self {
Self { config }
}
/// Analyze a company for fundamental vs narrative divergence
pub fn analyze(&self, company: &Company, filings: &[Filing]) -> Option<DivergenceResult> {
if filings.len() < 2 {
return None;
}
// Extract fundamental changes
let fundamental_trend = self.compute_fundamental_trend(filings);
// Extract narrative sentiment changes
let narrative_trend = self.compute_narrative_trend(filings);
// Detect divergence
let divergence = (fundamental_trend - narrative_trend).abs();
if divergence > self.config.divergence_threshold {
Some(DivergenceResult {
company_cik: company.cik.clone(),
company_name: company.name.clone(),
fundamental_trend,
narrative_trend,
divergence_score: divergence,
interpretation: self.interpret_divergence(fundamental_trend, narrative_trend),
})
} else {
None
}
}
/// Compute fundamental trend
fn compute_fundamental_trend(&self, filings: &[Filing]) -> f64 {
// Simplified: would compute from actual XBRL data
// Positive = improving financials, negative = declining
0.0
}
/// Compute narrative sentiment trend
fn compute_narrative_trend(&self, filings: &[Filing]) -> f64 {
// Simplified: would analyze text sentiment
// Positive = optimistic narrative, negative = pessimistic
0.0
}
/// Interpret the divergence
fn interpret_divergence(&self, fundamental: f64, narrative: f64) -> String {
if fundamental > 0.0 && narrative < 0.0 {
"Fundamentals improving but narrative pessimistic - potential undervaluation".to_string()
} else if fundamental < 0.0 && narrative > 0.0 {
"Fundamentals declining but narrative optimistic - potential risk".to_string()
} else if fundamental > narrative {
"Narrative lagging behind fundamental improvement".to_string()
} else {
"Narrative ahead of fundamental reality".to_string()
}
}
}
/// Result of divergence analysis
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DivergenceResult {
/// Company CIK
pub company_cik: String,
/// Company name
pub company_name: String,
/// Fundamental trend (-1 to 1)
pub fundamental_trend: f64,
/// Narrative trend (-1 to 1)
pub narrative_trend: f64,
/// Divergence score (0 to 2)
pub divergence_score: f64,
/// Human-readable interpretation
pub interpretation: String,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sector_from_sic() {
assert_eq!(Sector::from_sic("7370"), Sector::Technology);
assert_eq!(Sector::from_sic("6000"), Sector::Financials);
}
#[test]
fn test_default_config() {
let config = EdgarConfig::default();
assert_eq!(config.rate_limit, 10);
}
#[test]
fn test_financial_ratio_compute() {
let mut data = HashMap::new();
data.insert("Assets Current".to_string(), 100.0);
data.insert("Liabilities Current".to_string(), 50.0);
let ratio = FinancialRatio::CurrentRatio.compute(&data);
assert!(ratio.is_some());
assert!((ratio.unwrap() - 2.0).abs() < 0.001);
}
}

View File

@@ -0,0 +1,469 @@
//! Peer network construction for financial coherence analysis
use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::{Company, Sector};
/// A company node in the peer network
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompanyNode {
/// Company CIK
pub cik: String,
/// Company name
pub name: String,
/// Ticker symbol
pub ticker: Option<String>,
/// Sector
pub sector: Sector,
/// Market cap (if known)
pub market_cap: Option<f64>,
/// Number of peer connections
pub peer_count: usize,
/// Average peer similarity
pub avg_peer_similarity: f64,
}
/// An edge between peer companies
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PeerEdge {
/// Source company CIK
pub source: String,
/// Target company CIK
pub target: String,
/// Similarity score (0-1)
pub similarity: f64,
/// Relationship type
pub relationship_type: PeerRelationType,
/// Edge weight for min-cut
pub weight: f64,
/// Evidence for relationship
pub evidence: Vec<String>,
}
/// Type of peer relationship
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum PeerRelationType {
/// Same sector/industry
SameSector,
/// Shared institutional investors
SharedInvestors,
/// Similar size (market cap)
SimilarSize,
/// Supply chain relationship
SupplyChain,
/// Competitor
Competitor,
/// Multiple relationship types
Multiple,
}
/// Peer network graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PeerNetwork {
/// Network identifier
pub id: String,
/// Nodes (companies)
pub nodes: HashMap<String, CompanyNode>,
/// Edges (peer relationships)
pub edges: Vec<PeerEdge>,
/// Creation time
pub created_at: DateTime<Utc>,
/// Network statistics
pub stats: NetworkStats,
}
/// Network statistics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct NetworkStats {
/// Number of nodes
pub node_count: usize,
/// Number of edges
pub edge_count: usize,
/// Average similarity
pub avg_similarity: f64,
/// Network density
pub density: f64,
/// Average degree
pub avg_degree: f64,
/// Number of connected components
pub num_components: usize,
/// Computed min-cut value
pub min_cut_value: Option<f64>,
}
impl PeerNetwork {
/// Create an empty network
pub fn new(id: &str) -> Self {
Self {
id: id.to_string(),
nodes: HashMap::new(),
edges: Vec::new(),
created_at: Utc::now(),
stats: NetworkStats::default(),
}
}
/// Add a company node
pub fn add_node(&mut self, node: CompanyNode) {
self.nodes.insert(node.cik.clone(), node);
self.update_stats();
}
/// Add a peer edge
pub fn add_edge(&mut self, edge: PeerEdge) {
self.edges.push(edge);
self.update_stats();
}
/// Get a node by CIK
pub fn get_node(&self, cik: &str) -> Option<&CompanyNode> {
self.nodes.get(cik)
}
/// Get peer CIKs for a company
pub fn get_peers(&self, cik: &str) -> Vec<&str> {
self.edges
.iter()
.filter_map(|e| {
if e.source == cik {
Some(e.target.as_str())
} else if e.target == cik {
Some(e.source.as_str())
} else {
None
}
})
.collect()
}
/// Get edges for a company
pub fn get_edges_for_company(&self, cik: &str) -> Vec<&PeerEdge> {
self.edges
.iter()
.filter(|e| e.source == cik || e.target == cik)
.collect()
}
/// Update statistics
fn update_stats(&mut self) {
self.stats.node_count = self.nodes.len();
self.stats.edge_count = self.edges.len();
if !self.edges.is_empty() {
self.stats.avg_similarity = self.edges.iter().map(|e| e.similarity).sum::<f64>()
/ self.edges.len() as f64;
}
let max_edges = if self.nodes.len() > 1 {
self.nodes.len() * (self.nodes.len() - 1) / 2
} else {
1
};
self.stats.density = self.edges.len() as f64 / max_edges as f64;
if !self.nodes.is_empty() {
self.stats.avg_degree = (2 * self.edges.len()) as f64 / self.nodes.len() as f64;
}
}
/// Convert to format for RuVector min-cut
pub fn to_mincut_edges(&self) -> Vec<(u64, u64, f64)> {
let mut node_ids: HashMap<&str, u64> = HashMap::new();
let mut next_id = 0u64;
for cik in self.nodes.keys() {
node_ids.insert(cik.as_str(), next_id);
next_id += 1;
}
self.edges
.iter()
.filter_map(|e| {
let src_id = node_ids.get(e.source.as_str())?;
let tgt_id = node_ids.get(e.target.as_str())?;
Some((*src_id, *tgt_id, e.weight))
})
.collect()
}
/// Get node ID mapping
pub fn node_id_mapping(&self) -> HashMap<u64, String> {
let mut mapping = HashMap::new();
for (i, cik) in self.nodes.keys().enumerate() {
mapping.insert(i as u64, cik.clone());
}
mapping
}
}
/// Builder for peer networks
pub struct PeerNetworkBuilder {
id: String,
companies: Vec<Company>,
min_similarity: f64,
max_peers: usize,
relationship_types: Vec<PeerRelationType>,
}
impl PeerNetworkBuilder {
/// Create a new builder
pub fn new() -> Self {
Self {
id: format!("network_{}", Utc::now().timestamp()),
companies: Vec::new(),
min_similarity: 0.3,
max_peers: 20,
relationship_types: vec![
PeerRelationType::SameSector,
PeerRelationType::SimilarSize,
],
}
}
/// Set network ID
pub fn with_id(mut self, id: &str) -> Self {
self.id = id.to_string();
self
}
/// Add companies
pub fn add_companies(mut self, companies: Vec<Company>) -> Self {
self.companies.extend(companies);
self
}
/// Set minimum similarity threshold
pub fn min_similarity(mut self, min: f64) -> Self {
self.min_similarity = min;
self
}
/// Set maximum peers per company
pub fn max_peers(mut self, max: usize) -> Self {
self.max_peers = max;
self
}
/// Set relationship types to consider
pub fn relationship_types(mut self, types: Vec<PeerRelationType>) -> Self {
self.relationship_types = types;
self
}
/// Build the network
pub fn build(self) -> PeerNetwork {
let mut network = PeerNetwork::new(&self.id);
// Add nodes
for company in &self.companies {
let sector = company.sic_code
.as_ref()
.map(|s| Sector::from_sic(s))
.unwrap_or(Sector::Other);
let node = CompanyNode {
cik: company.cik.clone(),
name: company.name.clone(),
ticker: company.ticker.clone(),
sector,
market_cap: None,
peer_count: 0,
avg_peer_similarity: 0.0,
};
network.add_node(node);
}
// Add edges based on relationships
for i in 0..self.companies.len() {
for j in (i + 1)..self.companies.len() {
let company_i = &self.companies[i];
let company_j = &self.companies[j];
let (similarity, rel_type) = self.compute_similarity(company_i, company_j);
if similarity >= self.min_similarity {
let edge = PeerEdge {
source: company_i.cik.clone(),
target: company_j.cik.clone(),
similarity,
relationship_type: rel_type,
weight: similarity,
evidence: self.collect_evidence(company_i, company_j),
};
network.add_edge(edge);
}
}
}
// Update node statistics
for (cik, node) in network.nodes.iter_mut() {
let edges = network.edges
.iter()
.filter(|e| e.source == *cik || e.target == *cik)
.collect::<Vec<_>>();
node.peer_count = edges.len();
if !edges.is_empty() {
node.avg_peer_similarity = edges.iter().map(|e| e.similarity).sum::<f64>()
/ edges.len() as f64;
}
}
network
}
/// Compute similarity between two companies
fn compute_similarity(&self, a: &Company, b: &Company) -> (f64, PeerRelationType) {
let mut total_similarity = 0.0;
let mut relationship_count = 0;
let mut rel_type = PeerRelationType::SameSector;
// Sector similarity
if self.relationship_types.contains(&PeerRelationType::SameSector) {
let sector_a = a.sic_code.as_ref().map(|s| Sector::from_sic(s));
let sector_b = b.sic_code.as_ref().map(|s| Sector::from_sic(s));
if sector_a.is_some() && sector_a == sector_b {
total_similarity += 0.5;
relationship_count += 1;
} else if a.sic_code.is_some() && b.sic_code.is_some() {
// Same SIC division (first digit)
let sic_a = a.sic_code.as_ref().unwrap();
let sic_b = b.sic_code.as_ref().unwrap();
if !sic_a.is_empty() && !sic_b.is_empty() &&
sic_a.chars().next() == sic_b.chars().next() {
total_similarity += 0.3;
relationship_count += 1;
}
}
}
// Same state
if a.state.is_some() && a.state == b.state {
total_similarity += 0.2;
relationship_count += 1;
}
let similarity = if relationship_count > 0 {
total_similarity / relationship_count as f64
} else {
0.0
};
if relationship_count > 1 {
rel_type = PeerRelationType::Multiple;
}
(similarity, rel_type)
}
/// Collect evidence for relationship
fn collect_evidence(&self, a: &Company, b: &Company) -> Vec<String> {
let mut evidence = Vec::new();
let sector_a = a.sic_code.as_ref().map(|s| Sector::from_sic(s));
let sector_b = b.sic_code.as_ref().map(|s| Sector::from_sic(s));
if sector_a.is_some() && sector_a == sector_b {
evidence.push(format!("Same sector: {:?}", sector_a.unwrap()));
}
if a.state.is_some() && a.state == b.state {
evidence.push(format!("Same state: {}", a.state.as_ref().unwrap()));
}
evidence
}
}
impl Default for PeerNetworkBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_network() {
let network = PeerNetwork::new("test");
assert_eq!(network.stats.node_count, 0);
assert_eq!(network.stats.edge_count, 0);
}
#[test]
fn test_builder() {
let builder = PeerNetworkBuilder::new()
.min_similarity(0.5)
.max_peers(10);
let network = builder.build();
assert!(network.nodes.is_empty());
}
#[test]
fn test_get_peers() {
let mut network = PeerNetwork::new("test");
network.add_node(CompanyNode {
cik: "A".to_string(),
name: "Company A".to_string(),
ticker: None,
sector: Sector::Technology,
market_cap: None,
peer_count: 0,
avg_peer_similarity: 0.0,
});
network.add_node(CompanyNode {
cik: "B".to_string(),
name: "Company B".to_string(),
ticker: None,
sector: Sector::Technology,
market_cap: None,
peer_count: 0,
avg_peer_similarity: 0.0,
});
network.add_edge(PeerEdge {
source: "A".to_string(),
target: "B".to_string(),
similarity: 0.8,
relationship_type: PeerRelationType::SameSector,
weight: 0.8,
evidence: vec![],
});
let peers = network.get_peers("A");
assert_eq!(peers.len(), 1);
assert_eq!(peers[0], "B");
}
}

View File

@@ -0,0 +1,338 @@
//! XBRL parsing for financial statement extraction
use std::collections::HashMap;
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use crate::EdgarError;
/// XBRL parser
pub struct XbrlParser {
/// Configuration
config: ParserConfig,
}
/// Parser configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParserConfig {
/// Include all numeric facts
pub include_all_facts: bool,
/// Fact name filters (regex patterns)
pub fact_filters: Vec<String>,
/// Merge duplicate contexts
pub merge_contexts: bool,
}
impl Default for ParserConfig {
fn default() -> Self {
Self {
include_all_facts: false,
fact_filters: vec![
"Revenue".to_string(),
"NetIncome".to_string(),
"Assets".to_string(),
"Liabilities".to_string(),
"StockholdersEquity".to_string(),
],
merge_contexts: true,
}
}
}
/// Parsed financial statement
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FinancialStatement {
/// Company CIK
pub cik: String,
/// Filing accession number
pub accession: String,
/// Report type (10-K, 10-Q)
pub report_type: String,
/// Period end date
pub period_end: NaiveDate,
/// Is annual (vs quarterly)
pub is_annual: bool,
/// Balance sheet items
pub balance_sheet: HashMap<String, f64>,
/// Income statement items
pub income_statement: HashMap<String, f64>,
/// Cash flow items
pub cash_flow: HashMap<String, f64>,
/// All facts
pub all_facts: Vec<XbrlFact>,
/// Contexts
pub contexts: Vec<XbrlContext>,
}
/// An XBRL fact
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct XbrlFact {
/// Concept name
pub name: String,
/// Value
pub value: f64,
/// Unit
pub unit: String,
/// Context reference
pub context_ref: String,
/// Decimals precision
pub decimals: Option<i32>,
/// Is negated
pub is_negated: bool,
}
/// An XBRL context
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct XbrlContext {
/// Context ID
pub id: String,
/// Start date
pub start_date: Option<NaiveDate>,
/// End date / instant
pub end_date: NaiveDate,
/// Is instant (vs duration)
pub is_instant: bool,
/// Segment/scenario dimensions
pub dimensions: HashMap<String, String>,
}
impl XbrlParser {
/// Create a new parser
pub fn new(config: ParserConfig) -> Self {
Self { config }
}
/// Parse XBRL document from string
pub fn parse(&self, content: &str, cik: &str, accession: &str) -> Result<FinancialStatement, EdgarError> {
// This is a simplified parser
// Real implementation would use quick-xml or similar
let contexts = self.parse_contexts(content)?;
let facts = self.parse_facts(content)?;
// Determine period end and type
let (period_end, is_annual) = self.determine_period(&contexts)?;
// Categorize facts
let mut balance_sheet = HashMap::new();
let mut income_statement = HashMap::new();
let mut cash_flow = HashMap::new();
for fact in &facts {
if self.is_balance_sheet_item(&fact.name) {
balance_sheet.insert(fact.name.clone(), fact.value);
} else if self.is_income_statement_item(&fact.name) {
income_statement.insert(fact.name.clone(), fact.value);
} else if self.is_cash_flow_item(&fact.name) {
cash_flow.insert(fact.name.clone(), fact.value);
}
}
Ok(FinancialStatement {
cik: cik.to_string(),
accession: accession.to_string(),
report_type: if is_annual { "10-K".to_string() } else { "10-Q".to_string() },
period_end,
is_annual,
balance_sheet,
income_statement,
cash_flow,
all_facts: facts,
contexts,
})
}
/// Parse contexts from XBRL
fn parse_contexts(&self, content: &str) -> Result<Vec<XbrlContext>, EdgarError> {
// Simplified - would use proper XML parsing
let mut contexts = Vec::new();
// Add placeholder context
contexts.push(XbrlContext {
id: "FY2023".to_string(),
start_date: Some(NaiveDate::from_ymd_opt(2023, 1, 1).unwrap()),
end_date: NaiveDate::from_ymd_opt(2023, 12, 31).unwrap(),
is_instant: false,
dimensions: HashMap::new(),
});
Ok(contexts)
}
/// Parse facts from XBRL
fn parse_facts(&self, content: &str) -> Result<Vec<XbrlFact>, EdgarError> {
// Simplified - would use proper XML parsing
let mut facts = Vec::new();
// Extract numeric values using simple pattern matching
// Real implementation would parse XML properly
Ok(facts)
}
/// Determine period end and whether annual
fn determine_period(&self, contexts: &[XbrlContext]) -> Result<(NaiveDate, bool), EdgarError> {
// Find the main reporting context
for ctx in contexts {
if !ctx.is_instant {
let duration_days = ctx.start_date
.map(|s| (ctx.end_date - s).num_days())
.unwrap_or(0);
let is_annual = duration_days > 300;
return Ok((ctx.end_date, is_annual));
}
}
// Default to latest instant context
if let Some(ctx) = contexts.last() {
return Ok((ctx.end_date, true));
}
Err(EdgarError::XbrlParse("No valid context found".to_string()))
}
/// Check if concept is balance sheet item
fn is_balance_sheet_item(&self, name: &str) -> bool {
let balance_sheet_patterns = [
"Assets",
"Liabilities",
"Equity",
"Cash",
"Inventory",
"Receivable",
"Payable",
"Debt",
"Property",
"Goodwill",
];
balance_sheet_patterns.iter().any(|p| name.contains(p))
}
/// Check if concept is income statement item
fn is_income_statement_item(&self, name: &str) -> bool {
let income_patterns = [
"Revenue",
"Sales",
"Cost",
"Expense",
"Income",
"Profit",
"Loss",
"Earnings",
"EBITDA",
"Margin",
];
income_patterns.iter().any(|p| name.contains(p))
}
/// Check if concept is cash flow item
fn is_cash_flow_item(&self, name: &str) -> bool {
let cash_flow_patterns = [
"CashFlow",
"Operating",
"Investing",
"Financing",
"Depreciation",
"Amortization",
"CapitalExpenditure",
];
cash_flow_patterns.iter().any(|p| name.contains(p))
}
}
/// Convert financial statement to vector embedding
pub fn statement_to_embedding(statement: &FinancialStatement) -> Vec<f32> {
let mut embedding = Vec::with_capacity(64);
// Balance sheet ratios
let total_assets = statement.balance_sheet.get("Assets").copied().unwrap_or(1.0);
let total_liabilities = statement.balance_sheet.get("Liabilities").copied().unwrap_or(0.0);
let equity = statement.balance_sheet.get("StockholdersEquity").copied().unwrap_or(1.0);
let cash = statement.balance_sheet.get("Cash").copied().unwrap_or(0.0);
embedding.push((total_liabilities / total_assets) as f32); // Debt ratio
embedding.push((cash / total_assets) as f32); // Cash ratio
embedding.push((equity / total_assets) as f32); // Equity ratio
// Income statement ratios
let revenue = statement.income_statement.get("Revenue").copied().unwrap_or(1.0);
let net_income = statement.income_statement.get("NetIncome").copied().unwrap_or(0.0);
let operating_income = statement.income_statement.get("OperatingIncome").copied().unwrap_or(0.0);
embedding.push((net_income / revenue) as f32); // Net margin
embedding.push((operating_income / revenue) as f32); // Operating margin
embedding.push((net_income / equity) as f32); // ROE
embedding.push((net_income / total_assets) as f32); // ROA
// Pad to fixed size
while embedding.len() < 64 {
embedding.push(0.0);
}
// Normalize
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in &mut embedding {
*x /= norm;
}
}
embedding
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_creation() {
let config = ParserConfig::default();
let parser = XbrlParser::new(config);
assert!(!parser.config.include_all_facts);
}
#[test]
fn test_balance_sheet_detection() {
let config = ParserConfig::default();
let parser = XbrlParser::new(config);
assert!(parser.is_balance_sheet_item("TotalAssets"));
assert!(parser.is_balance_sheet_item("CashAndCashEquivalents"));
assert!(!parser.is_balance_sheet_item("Revenue"));
}
#[test]
fn test_income_statement_detection() {
let config = ParserConfig::default();
let parser = XbrlParser::new(config);
assert!(parser.is_income_statement_item("Revenue"));
assert!(parser.is_income_statement_item("NetIncome"));
assert!(!parser.is_income_statement_item("TotalAssets"));
}
}