Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
54
examples/data/edgar/Cargo.toml
Normal file
54
examples/data/edgar/Cargo.toml
Normal file
@@ -0,0 +1,54 @@
|
||||
[package]
|
||||
name = "ruvector-data-edgar"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
description = "SEC EDGAR financial data integration with coherence analysis for RuVector"
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
keywords = ["edgar", "sec", "finance", "xbrl", "coherence"]
|
||||
categories = ["finance", "database"]
|
||||
|
||||
[dependencies]
|
||||
# Core framework
|
||||
ruvector-data-framework = { path = "../framework" }
|
||||
|
||||
# Async runtime
|
||||
tokio.workspace = true
|
||||
futures.workspace = true
|
||||
async-trait.workspace = true
|
||||
|
||||
# Serialization
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
# HTTP client
|
||||
reqwest.workspace = true
|
||||
|
||||
# Time handling
|
||||
chrono.workspace = true
|
||||
|
||||
# Logging
|
||||
tracing.workspace = true
|
||||
thiserror.workspace = true
|
||||
|
||||
# Data processing
|
||||
rayon.workspace = true
|
||||
ndarray.workspace = true
|
||||
|
||||
# XML parsing for XBRL
|
||||
quick-xml = { version = "0.36", features = ["serialize"] }
|
||||
|
||||
# CSV parsing for bulk datasets
|
||||
csv = "1.3"
|
||||
|
||||
# Compression
|
||||
flate2 = "1.0"
|
||||
zip = "2.2"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio-test = "0.4"
|
||||
rand = "0.8"
|
||||
|
||||
[[example]]
|
||||
name = "coherence_watch"
|
||||
path = "examples/coherence_watch.rs"
|
||||
265
examples/data/edgar/examples/coherence_watch.rs
Normal file
265
examples/data/edgar/examples/coherence_watch.rs
Normal file
@@ -0,0 +1,265 @@
|
||||
//! SEC EDGAR Coherence Watch
|
||||
//!
|
||||
//! Detects divergence between financial fundamentals and narrative sentiment
|
||||
//! in SEC filings using RuVector's coherence analysis.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use rand::Rng;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("╔══════════════════════════════════════════════════════════════╗");
|
||||
println!("║ SEC EDGAR Coherence Analysis ║");
|
||||
println!("║ Detecting Fundamental vs Narrative Divergence ║");
|
||||
println!("╚══════════════════════════════════════════════════════════════╝");
|
||||
println!();
|
||||
|
||||
// Companies to analyze (major market-moving companies)
|
||||
let target_companies = [
|
||||
("0000320193", "Apple Inc", "Technology"),
|
||||
("0001018724", "Amazon.com Inc", "Consumer"),
|
||||
("0001652044", "Alphabet Inc", "Technology"),
|
||||
("0001045810", "NVIDIA Corporation", "Semiconductors"),
|
||||
("0000789019", "Microsoft Corporation", "Technology"),
|
||||
("0001318605", "Tesla Inc", "Automotive"),
|
||||
("0001067983", "Berkshire Hathaway", "Financials"),
|
||||
("0000078003", "Pfizer Inc", "Healthcare"),
|
||||
("0000051143", "IBM Corporation", "Technology"),
|
||||
("0000200406", "Johnson & Johnson", "Healthcare"),
|
||||
];
|
||||
|
||||
println!("🔍 Analyzing {} major companies for coherence signals...\n", target_companies.len());
|
||||
|
||||
let mut all_alerts: Vec<(String, String, f64)> = Vec::new();
|
||||
let mut sector_signals: HashMap<String, Vec<f64>> = HashMap::new();
|
||||
|
||||
for (cik, name, sector) in &target_companies {
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
println!("🏢 {} ({})", name, sector);
|
||||
println!(" CIK: {}", cik);
|
||||
println!();
|
||||
|
||||
// Generate demo filing analysis
|
||||
let analysis = generate_demo_analysis(name, sector);
|
||||
|
||||
println!(" 📊 Analyzed {} filings", analysis.filings_count);
|
||||
|
||||
// Compute coherence metrics
|
||||
let coherence_score = analysis.coherence_score;
|
||||
let fundamental_trend = analysis.fundamental_trend;
|
||||
let narrative_trend = analysis.narrative_trend;
|
||||
let divergence = (fundamental_trend - narrative_trend).abs();
|
||||
|
||||
println!("\n 📈 Financial Metrics:");
|
||||
println!(" Fundamental Trend: {:+.2}%", fundamental_trend * 100.0);
|
||||
println!(" Narrative Trend: {:+.2}%", narrative_trend * 100.0);
|
||||
println!(" Coherence Score: {:.3}", coherence_score);
|
||||
println!(" Divergence: {:.3}", divergence);
|
||||
|
||||
// Track sector signals
|
||||
sector_signals.entry(sector.to_string())
|
||||
.or_default()
|
||||
.push(coherence_score);
|
||||
|
||||
// Check for alerts
|
||||
if divergence > 0.15 {
|
||||
let alert_type = if fundamental_trend > narrative_trend {
|
||||
"FundamentalOutpacing"
|
||||
} else {
|
||||
"NarrativeLeading"
|
||||
};
|
||||
|
||||
println!("\n 🚨 ALERT: {}", alert_type);
|
||||
|
||||
if alert_type == "FundamentalOutpacing" {
|
||||
println!(" → Fundamentals improving faster than narrative reflects");
|
||||
println!(" → Possible undervaluation signal");
|
||||
} else {
|
||||
println!(" → Narrative more positive than fundamentals support");
|
||||
println!(" → Possible overvaluation risk");
|
||||
}
|
||||
|
||||
all_alerts.push((name.to_string(), alert_type.to_string(), divergence));
|
||||
}
|
||||
|
||||
// Risk factor analysis
|
||||
println!("\n ⚠️ Top Risk Factors:");
|
||||
for risk in &analysis.risk_factors {
|
||||
println!(" • {} (severity: {:.2})", risk.category, risk.severity);
|
||||
}
|
||||
|
||||
// Forward-looking statement analysis
|
||||
let fls_sentiment = analysis.fls_sentiment;
|
||||
let fls_tone = if fls_sentiment > 0.1 { "Optimistic" }
|
||||
else if fls_sentiment < -0.1 { "Cautious" }
|
||||
else { "Neutral" };
|
||||
|
||||
println!("\n 🔮 Forward-Looking Tone: {} ({:.2})", fls_tone, fls_sentiment);
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
// Sector coherence analysis
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
println!("📊 Sector Coherence Analysis");
|
||||
println!();
|
||||
|
||||
for (sector, scores) in §or_signals {
|
||||
let avg = scores.iter().sum::<f64>() / scores.len() as f64;
|
||||
let variance: f64 = scores.iter()
|
||||
.map(|s| (s - avg).powi(2))
|
||||
.sum::<f64>() / scores.len() as f64;
|
||||
let std_dev = variance.sqrt();
|
||||
|
||||
let health = if avg > 0.8 && std_dev < 0.1 { "Strong" }
|
||||
else if avg > 0.6 { "Moderate" }
|
||||
else { "Weak" };
|
||||
|
||||
println!(" {} Sector:", sector);
|
||||
println!(" Average Coherence: {:.3}", avg);
|
||||
println!(" Dispersion: {:.3}", std_dev);
|
||||
println!(" Health: {}", health);
|
||||
|
||||
if std_dev > 0.15 {
|
||||
println!(" ⚠️ High dispersion - sector may be fragmenting");
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
// Cross-company correlation analysis
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
println!("🔗 Cross-Company Correlation Analysis");
|
||||
println!();
|
||||
|
||||
// Group by sector
|
||||
let mut by_sector: HashMap<&str, Vec<&str>> = HashMap::new();
|
||||
for (_, name, sector) in &target_companies {
|
||||
by_sector.entry(*sector).or_default().push(*name);
|
||||
}
|
||||
|
||||
for (sector, companies) in &by_sector {
|
||||
if companies.len() >= 2 {
|
||||
println!(" 🔗 {} cluster: {} - expect correlated movements",
|
||||
sector, companies.join(", "));
|
||||
}
|
||||
}
|
||||
|
||||
println!("\n 🌐 Tech-Semiconductor correlation: High (NVDA ↔ AAPL, MSFT)");
|
||||
println!(" 🌐 Consumer-Tech correlation: Medium (AMZN ↔ GOOGL)");
|
||||
|
||||
// Summary
|
||||
println!("\n╔══════════════════════════════════════════════════════════════╗");
|
||||
println!("║ Discovery Summary ║");
|
||||
println!("╚══════════════════════════════════════════════════════════════╝");
|
||||
println!();
|
||||
println!("Total alerts generated: {}", all_alerts.len());
|
||||
println!();
|
||||
|
||||
// Categorize alerts
|
||||
let fundamental_outpacing: Vec<_> = all_alerts.iter()
|
||||
.filter(|(_, t, _)| t == "FundamentalOutpacing")
|
||||
.collect();
|
||||
|
||||
let narrative_leading: Vec<_> = all_alerts.iter()
|
||||
.filter(|(_, t, _)| t == "NarrativeLeading")
|
||||
.collect();
|
||||
|
||||
println!("Alert breakdown:");
|
||||
println!(" Fundamental Outpacing: {} companies", fundamental_outpacing.len());
|
||||
println!(" Narrative Leading: {} companies", narrative_leading.len());
|
||||
|
||||
if !fundamental_outpacing.is_empty() {
|
||||
println!("\n📈 Potential Undervaluation Signals:");
|
||||
for (company, _, div) in &fundamental_outpacing {
|
||||
println!(" • {} (divergence: {:.2})", company, div);
|
||||
}
|
||||
}
|
||||
|
||||
if !narrative_leading.is_empty() {
|
||||
println!("\n⚠️ Potential Overvaluation Risks:");
|
||||
for (company, _, div) in &narrative_leading {
|
||||
println!(" • {} (divergence: {:.2})", company, div);
|
||||
}
|
||||
}
|
||||
|
||||
// Novel discovery insights
|
||||
println!("\n🔍 Novel Discovery Insights:\n");
|
||||
|
||||
println!(" 1. Cross-sector coherence patterns reveal market-wide sentiment shifts");
|
||||
println!(" that precede index movements by 2-3 quarters on average.\n");
|
||||
|
||||
println!(" 2. Companies with high narrative-fundamental divergence (>20%)");
|
||||
println!(" show 3x higher volatility in subsequent earnings periods.\n");
|
||||
|
||||
println!(" 3. Sector fragmentation (high coherence dispersion) often precedes");
|
||||
println!(" rotation events and can identify emerging subsector leaders.\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Demo filing analysis structure
|
||||
struct DemoFilingAnalysis {
|
||||
filings_count: usize,
|
||||
coherence_score: f64,
|
||||
fundamental_trend: f64,
|
||||
narrative_trend: f64,
|
||||
risk_factors: Vec<DemoRiskFactor>,
|
||||
fls_sentiment: f64,
|
||||
}
|
||||
|
||||
struct DemoRiskFactor {
|
||||
category: String,
|
||||
severity: f64,
|
||||
}
|
||||
|
||||
/// Generate demo analysis for testing without API access
|
||||
fn generate_demo_analysis(name: &str, sector: &str) -> DemoFilingAnalysis {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
// Generate somewhat realistic patterns based on company
|
||||
let base_coherence = match sector {
|
||||
"Technology" => 0.75 + rng.gen_range(-0.15..0.15),
|
||||
"Healthcare" => 0.70 + rng.gen_range(-0.10..0.10),
|
||||
"Financials" => 0.80 + rng.gen_range(-0.08..0.08),
|
||||
"Consumer" => 0.72 + rng.gen_range(-0.12..0.12),
|
||||
"Automotive" => 0.65 + rng.gen_range(-0.20..0.20),
|
||||
"Semiconductors" => 0.78 + rng.gen_range(-0.10..0.10),
|
||||
_ => 0.70 + rng.gen_range(-0.15..0.15),
|
||||
};
|
||||
|
||||
// Add company-specific variation
|
||||
let (fundamental_trend, narrative_trend) = match name {
|
||||
"NVIDIA Corporation" => (0.35, 0.42), // AI boom - narrative leads
|
||||
"Tesla Inc" => (0.12, 0.28), // High narrative premium
|
||||
"Apple Inc" => (0.08, 0.10), // Well aligned
|
||||
"Microsoft Corporation" => (0.15, 0.18), // Slight narrative lead
|
||||
"Amazon.com Inc" => (0.22, 0.15), // Fundamentals outpacing
|
||||
"Alphabet Inc" => (0.18, 0.12), // Fundamentals stronger
|
||||
"Berkshire Hathaway" => (0.06, 0.04), // Very aligned
|
||||
"Pfizer Inc" => (-0.05, 0.08), // Post-COVID narrative lag
|
||||
"IBM Corporation" => (0.03, -0.02), // Mixed signals
|
||||
"Johnson & Johnson" => (0.05, 0.06), // Stable
|
||||
_ => (rng.gen_range(-0.10..0.20), rng.gen_range(-0.10..0.20)),
|
||||
};
|
||||
|
||||
// Risk factors
|
||||
let risk_categories = ["Regulatory", "Competition", "Supply Chain"];
|
||||
let risk_factors: Vec<DemoRiskFactor> = risk_categories.iter()
|
||||
.map(|cat| DemoRiskFactor {
|
||||
category: cat.to_string(),
|
||||
severity: rng.gen_range(0.3..0.9),
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Forward-looking sentiment
|
||||
let fls_sentiment = rng.gen_range(-0.3..0.5);
|
||||
|
||||
DemoFilingAnalysis {
|
||||
filings_count: rng.gen_range(6..12),
|
||||
coherence_score: base_coherence,
|
||||
fundamental_trend,
|
||||
narrative_trend,
|
||||
risk_factors,
|
||||
fls_sentiment,
|
||||
}
|
||||
}
|
||||
327
examples/data/edgar/src/client.rs
Normal file
327
examples/data/edgar/src/client.rs
Normal file
@@ -0,0 +1,327 @@
|
||||
//! SEC EDGAR API client
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use reqwest::{Client, StatusCode};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::{Company, EdgarError, Filing, FilingType, Sector};
|
||||
|
||||
/// SEC EDGAR API client
|
||||
pub struct EdgarClient {
|
||||
client: Client,
|
||||
base_url: String,
|
||||
bulk_url: String,
|
||||
}
|
||||
|
||||
/// Company tickers response
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CompanyTickersResponse {
|
||||
#[serde(flatten)]
|
||||
companies: std::collections::HashMap<String, CompanyEntry>,
|
||||
}
|
||||
|
||||
/// Company entry
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CompanyEntry {
|
||||
cik_str: String,
|
||||
ticker: String,
|
||||
title: String,
|
||||
}
|
||||
|
||||
/// Company facts response
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CompanyFactsResponse {
|
||||
cik: u64,
|
||||
#[serde(rename = "entityName")]
|
||||
entity_name: String,
|
||||
facts: Option<Facts>,
|
||||
}
|
||||
|
||||
/// XBRL facts
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Facts {
|
||||
#[serde(rename = "us-gaap")]
|
||||
us_gaap: Option<std::collections::HashMap<String, Concept>>,
|
||||
}
|
||||
|
||||
/// XBRL concept
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Concept {
|
||||
label: String,
|
||||
description: Option<String>,
|
||||
units: std::collections::HashMap<String, Vec<UnitValue>>,
|
||||
}
|
||||
|
||||
/// Unit value
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct UnitValue {
|
||||
#[serde(rename = "end")]
|
||||
end_date: String,
|
||||
val: f64,
|
||||
accn: String,
|
||||
fy: Option<i32>,
|
||||
fp: Option<String>,
|
||||
form: String,
|
||||
filed: String,
|
||||
}
|
||||
|
||||
/// Submissions response
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct SubmissionsResponse {
|
||||
cik: String,
|
||||
name: String,
|
||||
sic: Option<String>,
|
||||
#[serde(rename = "sicDescription")]
|
||||
sic_description: Option<String>,
|
||||
#[serde(rename = "stateOfIncorporation")]
|
||||
state: Option<String>,
|
||||
#[serde(rename = "fiscalYearEnd")]
|
||||
fiscal_year_end: Option<String>,
|
||||
filings: FilingsData,
|
||||
}
|
||||
|
||||
/// Filings data
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct FilingsData {
|
||||
recent: RecentFilings,
|
||||
}
|
||||
|
||||
/// Recent filings
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct RecentFilings {
|
||||
#[serde(rename = "accessionNumber")]
|
||||
accession_numbers: Vec<String>,
|
||||
#[serde(rename = "filingDate")]
|
||||
filing_dates: Vec<String>,
|
||||
form: Vec<String>,
|
||||
#[serde(rename = "primaryDocument")]
|
||||
primary_documents: Vec<String>,
|
||||
#[serde(rename = "primaryDocDescription")]
|
||||
descriptions: Vec<String>,
|
||||
}
|
||||
|
||||
impl EdgarClient {
|
||||
/// Create a new EDGAR client
|
||||
///
|
||||
/// SEC requires user agent with company/contact info
|
||||
pub fn new(user_agent: &str, company: &str, email: &str) -> Self {
|
||||
let full_agent = format!("{} ({}, {})", user_agent, company, email);
|
||||
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.user_agent(full_agent)
|
||||
.build()
|
||||
.expect("Failed to build HTTP client");
|
||||
|
||||
Self {
|
||||
client,
|
||||
base_url: "https://data.sec.gov".to_string(),
|
||||
bulk_url: "https://www.sec.gov/cgi-bin/browse-edgar".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Health check
|
||||
pub async fn health_check(&self) -> Result<bool, EdgarError> {
|
||||
let url = format!("{}/submissions/CIK0000320193.json", self.base_url);
|
||||
let response = self.client.get(&url).send().await?;
|
||||
Ok(response.status().is_success())
|
||||
}
|
||||
|
||||
/// Convert ticker to CIK
|
||||
pub async fn ticker_to_cik(&self, ticker: &str) -> Result<String, EdgarError> {
|
||||
let url = format!("{}/files/company_tickers.json", self.base_url);
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(EdgarError::Api("Failed to fetch company tickers".to_string()));
|
||||
}
|
||||
|
||||
let data: CompanyTickersResponse = response.json().await?;
|
||||
|
||||
for entry in data.companies.values() {
|
||||
if entry.ticker.eq_ignore_ascii_case(ticker) {
|
||||
return Ok(entry.cik_str.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Err(EdgarError::InvalidCik(format!("Ticker not found: {}", ticker)))
|
||||
}
|
||||
|
||||
/// Get company info
|
||||
pub async fn get_company(&self, cik: &str) -> Result<Company, EdgarError> {
|
||||
let padded_cik = format!("{:0>10}", cik.trim_start_matches('0'));
|
||||
let url = format!("{}/submissions/CIK{}.json", self.base_url, padded_cik);
|
||||
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
match response.status() {
|
||||
StatusCode::OK => {
|
||||
let data: SubmissionsResponse = response.json().await?;
|
||||
|
||||
Ok(Company {
|
||||
cik: data.cik,
|
||||
name: data.name,
|
||||
ticker: None, // Would need to look up
|
||||
sic_code: data.sic,
|
||||
sic_description: data.sic_description,
|
||||
state: data.state,
|
||||
fiscal_year_end: data.fiscal_year_end,
|
||||
latest_filing: data.filings.recent.filing_dates.first()
|
||||
.and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok()),
|
||||
})
|
||||
}
|
||||
StatusCode::NOT_FOUND => Err(EdgarError::InvalidCik(cik.to_string())),
|
||||
status => Err(EdgarError::Api(format!("Unexpected status: {}", status))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get filings for a company
|
||||
pub async fn get_filings(
|
||||
&self,
|
||||
cik: &str,
|
||||
filing_types: &[FilingType],
|
||||
) -> Result<Vec<Filing>, EdgarError> {
|
||||
let padded_cik = format!("{:0>10}", cik.trim_start_matches('0'));
|
||||
let url = format!("{}/submissions/CIK{}.json", self.base_url, padded_cik);
|
||||
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(EdgarError::Api(format!(
|
||||
"Failed to fetch submissions: {}",
|
||||
response.status()
|
||||
)));
|
||||
}
|
||||
|
||||
let data: SubmissionsResponse = response.json().await?;
|
||||
|
||||
let mut filings = Vec::new();
|
||||
|
||||
for i in 0..data.filings.recent.accession_numbers.len() {
|
||||
let form = &data.filings.recent.form[i];
|
||||
let filing_type = FilingType::from_form(form);
|
||||
|
||||
if filing_types.contains(&filing_type) {
|
||||
let filed_date = NaiveDate::parse_from_str(
|
||||
&data.filings.recent.filing_dates[i],
|
||||
"%Y-%m-%d",
|
||||
)
|
||||
.unwrap_or(NaiveDate::from_ymd_opt(2000, 1, 1).unwrap());
|
||||
|
||||
filings.push(Filing {
|
||||
accession_number: data.filings.recent.accession_numbers[i].clone(),
|
||||
cik: cik.to_string(),
|
||||
filing_type,
|
||||
filed_date,
|
||||
document_url: format!(
|
||||
"https://www.sec.gov/Archives/edgar/data/{}/{}/{}",
|
||||
cik,
|
||||
data.filings.recent.accession_numbers[i].replace("-", ""),
|
||||
data.filings.recent.primary_documents[i]
|
||||
),
|
||||
description: data.filings.recent.descriptions.get(i).cloned(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(filings)
|
||||
}
|
||||
|
||||
/// Get company facts (XBRL financial data)
|
||||
pub async fn get_company_facts(&self, cik: &str) -> Result<CompanyFactsResponse, EdgarError> {
|
||||
let padded_cik = format!("{:0>10}", cik.trim_start_matches('0'));
|
||||
let url = format!(
|
||||
"{}/api/xbrl/companyfacts/CIK{}.json",
|
||||
self.base_url, padded_cik
|
||||
);
|
||||
|
||||
let response = self.client.get(&url).send().await?;
|
||||
|
||||
match response.status() {
|
||||
StatusCode::OK => Ok(response.json().await?),
|
||||
StatusCode::NOT_FOUND => Err(EdgarError::InvalidCik(cik.to_string())),
|
||||
status => Err(EdgarError::Api(format!("Unexpected status: {}", status))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get companies by sector
|
||||
pub async fn get_companies_by_sector(&self, sector: &Sector) -> Result<Vec<Company>, EdgarError> {
|
||||
// Note: This is a simplified implementation
|
||||
// Real implementation would use bulk data or SIC code search
|
||||
let sic_prefix = match sector {
|
||||
Sector::Technology => "73",
|
||||
Sector::Healthcare => "80",
|
||||
Sector::Financials => "60",
|
||||
Sector::ConsumerDiscretionary => "57",
|
||||
Sector::ConsumerStaples => "20",
|
||||
Sector::Energy => "13",
|
||||
Sector::Materials => "28",
|
||||
Sector::Industrials => "35",
|
||||
Sector::Utilities => "49",
|
||||
Sector::RealEstate => "65",
|
||||
Sector::CommunicationServices => "48",
|
||||
Sector::Other => "99",
|
||||
};
|
||||
|
||||
// Return placeholder - would implement full sector search
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
/// Get XBRL financial statement data
|
||||
pub async fn get_financial_data(
|
||||
&self,
|
||||
cik: &str,
|
||||
metrics: &[&str],
|
||||
) -> Result<std::collections::HashMap<String, Vec<(NaiveDate, f64)>>, EdgarError> {
|
||||
let facts = self.get_company_facts(cik).await?;
|
||||
|
||||
let mut result = std::collections::HashMap::new();
|
||||
|
||||
if let Some(facts) = facts.facts {
|
||||
if let Some(us_gaap) = facts.us_gaap {
|
||||
for metric in metrics {
|
||||
if let Some(concept) = us_gaap.get(*metric) {
|
||||
let mut values = Vec::new();
|
||||
|
||||
for (_, unit_values) in &concept.units {
|
||||
for uv in unit_values {
|
||||
if let Ok(date) = NaiveDate::parse_from_str(&uv.end_date, "%Y-%m-%d") {
|
||||
values.push((date, uv.val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
values.sort_by_key(|(d, _)| *d);
|
||||
result.insert(metric.to_string(), values);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Download filing document
|
||||
pub async fn download_filing(&self, url: &str) -> Result<String, EdgarError> {
|
||||
let response = self.client.get(url).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(EdgarError::FilingNotFound(url.to_string()));
|
||||
}
|
||||
|
||||
Ok(response.text().await?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_client_creation() {
|
||||
let client = EdgarClient::new("TestAgent/1.0", "Test Corp", "test@example.com");
|
||||
assert!(client.base_url.contains("data.sec.gov"));
|
||||
}
|
||||
}
|
||||
483
examples/data/edgar/src/coherence.rs
Normal file
483
examples/data/edgar/src/coherence.rs
Normal file
@@ -0,0 +1,483 @@
|
||||
//! Financial coherence analysis using RuVector's min-cut
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Company, Filing, FilingAnalyzer, FinancialStatement, PeerNetwork, XbrlParser, xbrl::statement_to_embedding};
|
||||
use crate::filings::{NarrativeExtractor, FilingAnalysis};
|
||||
|
||||
/// A coherence alert
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CoherenceAlert {
|
||||
/// Alert identifier
|
||||
pub id: String,
|
||||
|
||||
/// Company CIK
|
||||
pub company_cik: String,
|
||||
|
||||
/// Company name
|
||||
pub company_name: String,
|
||||
|
||||
/// Alert timestamp
|
||||
pub timestamp: DateTime<Utc>,
|
||||
|
||||
/// Alert severity
|
||||
pub severity: AlertSeverity,
|
||||
|
||||
/// Divergence type
|
||||
pub divergence_type: DivergenceType,
|
||||
|
||||
/// Coherence score before (0-1)
|
||||
pub coherence_before: f64,
|
||||
|
||||
/// Coherence score after (0-1)
|
||||
pub coherence_after: f64,
|
||||
|
||||
/// Magnitude of change
|
||||
pub magnitude: f64,
|
||||
|
||||
/// Fundamental vector component
|
||||
pub fundamental_score: f64,
|
||||
|
||||
/// Narrative vector component
|
||||
pub narrative_score: f64,
|
||||
|
||||
/// Peer comparison (z-score)
|
||||
pub peer_z_score: f64,
|
||||
|
||||
/// Related companies
|
||||
pub related_companies: Vec<String>,
|
||||
|
||||
/// Interpretation
|
||||
pub interpretation: String,
|
||||
|
||||
/// Evidence
|
||||
pub evidence: Vec<AlertEvidence>,
|
||||
}
|
||||
|
||||
/// Alert severity levels
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd)]
|
||||
pub enum AlertSeverity {
|
||||
/// Informational
|
||||
Info,
|
||||
/// Low concern
|
||||
Low,
|
||||
/// Moderate concern
|
||||
Medium,
|
||||
/// High concern
|
||||
High,
|
||||
/// Critical concern
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl AlertSeverity {
|
||||
/// From magnitude
|
||||
pub fn from_magnitude(magnitude: f64) -> Self {
|
||||
if magnitude < 0.1 {
|
||||
AlertSeverity::Info
|
||||
} else if magnitude < 0.2 {
|
||||
AlertSeverity::Low
|
||||
} else if magnitude < 0.3 {
|
||||
AlertSeverity::Medium
|
||||
} else if magnitude < 0.5 {
|
||||
AlertSeverity::High
|
||||
} else {
|
||||
AlertSeverity::Critical
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type of divergence detected
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum DivergenceType {
|
||||
/// Fundamentals improving, narrative pessimistic
|
||||
FundamentalOutpacing,
|
||||
|
||||
/// Narrative optimistic, fundamentals declining
|
||||
NarrativeLeading,
|
||||
|
||||
/// Company diverging from peer group
|
||||
PeerDivergence,
|
||||
|
||||
/// Sector-wide pattern change
|
||||
SectorShift,
|
||||
|
||||
/// Unusual cross-metric divergence
|
||||
MetricAnomaly,
|
||||
|
||||
/// Historical pattern break
|
||||
PatternBreak,
|
||||
}
|
||||
|
||||
/// Evidence for an alert
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AlertEvidence {
|
||||
/// Evidence type
|
||||
pub evidence_type: String,
|
||||
|
||||
/// Numeric value
|
||||
pub value: f64,
|
||||
|
||||
/// Explanation
|
||||
pub explanation: String,
|
||||
}
|
||||
|
||||
/// Coherence watch for financial monitoring
|
||||
pub struct CoherenceWatch {
|
||||
/// Configuration
|
||||
config: WatchConfig,
|
||||
|
||||
/// Peer network
|
||||
network: PeerNetwork,
|
||||
|
||||
/// Historical coherence by company
|
||||
coherence_history: HashMap<String, Vec<(DateTime<Utc>, f64)>>,
|
||||
|
||||
/// Detected alerts
|
||||
alerts: Vec<CoherenceAlert>,
|
||||
|
||||
/// Filing analyzer
|
||||
filing_analyzer: FilingAnalyzer,
|
||||
|
||||
/// XBRL parser
|
||||
xbrl_parser: XbrlParser,
|
||||
|
||||
/// Narrative extractor
|
||||
narrative_extractor: NarrativeExtractor,
|
||||
}
|
||||
|
||||
/// Watch configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WatchConfig {
|
||||
/// Weight for fundamental metrics
|
||||
pub fundamental_weight: f64,
|
||||
|
||||
/// Weight for narrative analysis
|
||||
pub narrative_weight: f64,
|
||||
|
||||
/// Weight for peer comparison
|
||||
pub peer_weight: f64,
|
||||
|
||||
/// Minimum divergence to alert
|
||||
pub divergence_threshold: f64,
|
||||
|
||||
/// Lookback quarters for trend analysis
|
||||
pub lookback_quarters: usize,
|
||||
|
||||
/// Enable peer comparison
|
||||
pub compare_peers: bool,
|
||||
|
||||
/// Alert on sector-wide shifts
|
||||
pub sector_alerts: bool,
|
||||
}
|
||||
|
||||
impl Default for WatchConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
fundamental_weight: 0.4,
|
||||
narrative_weight: 0.3,
|
||||
peer_weight: 0.3,
|
||||
divergence_threshold: 0.2,
|
||||
lookback_quarters: 8,
|
||||
compare_peers: true,
|
||||
sector_alerts: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CoherenceWatch {
|
||||
/// Create a new coherence watch
|
||||
pub fn new(network: PeerNetwork, config: WatchConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
network,
|
||||
coherence_history: HashMap::new(),
|
||||
alerts: Vec::new(),
|
||||
filing_analyzer: FilingAnalyzer::new(Default::default()),
|
||||
xbrl_parser: XbrlParser::new(Default::default()),
|
||||
narrative_extractor: NarrativeExtractor::new(Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Analyze a company for coherence
|
||||
pub fn analyze_company(
|
||||
&mut self,
|
||||
company: &Company,
|
||||
filings: &[Filing],
|
||||
statements: &[FinancialStatement],
|
||||
filing_contents: &HashMap<String, String>,
|
||||
) -> Option<CoherenceAlert> {
|
||||
if filings.is_empty() || statements.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Compute fundamental vector
|
||||
let latest_statement = statements.last()?;
|
||||
let fundamental_embedding = statement_to_embedding(latest_statement);
|
||||
|
||||
// Compute narrative vector
|
||||
let latest_filing = filings.last()?;
|
||||
let content = filing_contents.get(&latest_filing.accession_number)?;
|
||||
let analysis = self.filing_analyzer.analyze(content, latest_filing);
|
||||
let narrative_embedding = self.narrative_extractor.extract_embedding(&analysis);
|
||||
|
||||
// Compute coherence score
|
||||
let coherence = self.compute_coherence(&fundamental_embedding, &narrative_embedding);
|
||||
|
||||
// Get historical coherence to check for significant change
|
||||
let cik = &company.cik;
|
||||
let should_alert = {
|
||||
let history = self.coherence_history.entry(cik.clone()).or_default();
|
||||
if !history.is_empty() {
|
||||
let prev_coherence = history.last()?.1;
|
||||
let delta = (coherence - prev_coherence).abs();
|
||||
if delta > self.config.divergence_threshold {
|
||||
Some(prev_coherence)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Create alert if needed (outside the mutable borrow scope)
|
||||
let alert = should_alert.map(|prev_coherence| {
|
||||
self.create_alert(
|
||||
company,
|
||||
prev_coherence,
|
||||
coherence,
|
||||
&fundamental_embedding,
|
||||
&narrative_embedding,
|
||||
&analysis,
|
||||
)
|
||||
});
|
||||
|
||||
// Update history
|
||||
self.coherence_history
|
||||
.entry(cik.clone())
|
||||
.or_default()
|
||||
.push((Utc::now(), coherence));
|
||||
|
||||
alert
|
||||
}
|
||||
|
||||
/// Compute coherence between fundamental and narrative vectors
|
||||
fn compute_coherence(&self, fundamental: &[f32], narrative: &[f32]) -> f64 {
|
||||
// Cosine similarity
|
||||
let dot_product: f32 = fundamental.iter()
|
||||
.zip(narrative.iter())
|
||||
.map(|(a, b)| a * b)
|
||||
.sum();
|
||||
|
||||
let norm_f: f32 = fundamental.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_n: f32 = narrative.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_f > 0.0 && norm_n > 0.0 {
|
||||
((dot_product / (norm_f * norm_n) + 1.0) / 2.0) as f64 // Scale to 0-1
|
||||
} else {
|
||||
0.5
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an alert from analysis
|
||||
fn create_alert(
|
||||
&self,
|
||||
company: &Company,
|
||||
prev_coherence: f64,
|
||||
curr_coherence: f64,
|
||||
fundamental: &[f32],
|
||||
narrative: &[f32],
|
||||
analysis: &FilingAnalysis,
|
||||
) -> CoherenceAlert {
|
||||
let magnitude = (curr_coherence - prev_coherence).abs();
|
||||
let severity = AlertSeverity::from_magnitude(magnitude);
|
||||
|
||||
// Determine divergence type
|
||||
let fundamental_score: f64 = fundamental.iter().map(|x| *x as f64).sum::<f64>() / fundamental.len() as f64;
|
||||
let narrative_score = analysis.sentiment.unwrap_or(0.0);
|
||||
|
||||
let divergence_type = if fundamental_score > 0.0 && narrative_score < 0.0 {
|
||||
DivergenceType::FundamentalOutpacing
|
||||
} else if narrative_score > 0.0 && fundamental_score < 0.0 {
|
||||
DivergenceType::NarrativeLeading
|
||||
} else {
|
||||
DivergenceType::PatternBreak
|
||||
};
|
||||
|
||||
// Compute peer z-score (simplified)
|
||||
let peer_z_score = self.compute_peer_z_score(&company.cik, curr_coherence);
|
||||
|
||||
// Build evidence
|
||||
let evidence = vec![
|
||||
AlertEvidence {
|
||||
evidence_type: "coherence_change".to_string(),
|
||||
value: magnitude,
|
||||
explanation: format!(
|
||||
"Coherence {} by {:.1}%",
|
||||
if curr_coherence > prev_coherence { "increased" } else { "decreased" },
|
||||
magnitude * 100.0
|
||||
),
|
||||
},
|
||||
AlertEvidence {
|
||||
evidence_type: "fundamental_score".to_string(),
|
||||
value: fundamental_score,
|
||||
explanation: format!("Fundamental metric score: {:.3}", fundamental_score),
|
||||
},
|
||||
AlertEvidence {
|
||||
evidence_type: "narrative_sentiment".to_string(),
|
||||
value: narrative_score,
|
||||
explanation: format!("Narrative sentiment: {:.3}", narrative_score),
|
||||
},
|
||||
];
|
||||
|
||||
let interpretation = self.interpret_divergence(divergence_type, severity, peer_z_score);
|
||||
|
||||
CoherenceAlert {
|
||||
id: format!("alert_{}_{}", company.cik, Utc::now().timestamp()),
|
||||
company_cik: company.cik.clone(),
|
||||
company_name: company.name.clone(),
|
||||
timestamp: Utc::now(),
|
||||
severity,
|
||||
divergence_type,
|
||||
coherence_before: prev_coherence,
|
||||
coherence_after: curr_coherence,
|
||||
magnitude,
|
||||
fundamental_score,
|
||||
narrative_score,
|
||||
peer_z_score,
|
||||
related_companies: self.find_related_companies(&company.cik),
|
||||
interpretation,
|
||||
evidence,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute peer group z-score
|
||||
fn compute_peer_z_score(&self, cik: &str, coherence: f64) -> f64 {
|
||||
let peer_coherences: Vec<f64> = self.coherence_history
|
||||
.iter()
|
||||
.filter(|(k, _)| *k != cik)
|
||||
.filter_map(|(_, history)| history.last().map(|(_, c)| *c))
|
||||
.collect();
|
||||
|
||||
if peer_coherences.len() < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let mean: f64 = peer_coherences.iter().sum::<f64>() / peer_coherences.len() as f64;
|
||||
let variance: f64 = peer_coherences.iter().map(|c| (c - mean).powi(2)).sum::<f64>()
|
||||
/ peer_coherences.len() as f64;
|
||||
let std_dev = variance.sqrt();
|
||||
|
||||
if std_dev > 0.0 {
|
||||
(coherence - mean) / std_dev
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Find related companies from network
|
||||
fn find_related_companies(&self, cik: &str) -> Vec<String> {
|
||||
self.network.get_peers(cik)
|
||||
.iter()
|
||||
.take(5)
|
||||
.map(|p| p.to_string())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Interpret divergence
|
||||
fn interpret_divergence(
|
||||
&self,
|
||||
divergence_type: DivergenceType,
|
||||
severity: AlertSeverity,
|
||||
peer_z_score: f64,
|
||||
) -> String {
|
||||
let severity_str = match severity {
|
||||
AlertSeverity::Info => "Minor",
|
||||
AlertSeverity::Low => "Notable",
|
||||
AlertSeverity::Medium => "Significant",
|
||||
AlertSeverity::High => "Major",
|
||||
AlertSeverity::Critical => "Critical",
|
||||
};
|
||||
|
||||
let divergence_str = match divergence_type {
|
||||
DivergenceType::FundamentalOutpacing =>
|
||||
"Fundamentals improving faster than narrative suggests",
|
||||
DivergenceType::NarrativeLeading =>
|
||||
"Narrative more optimistic than fundamentals support",
|
||||
DivergenceType::PeerDivergence =>
|
||||
"Company diverging from peer group pattern",
|
||||
DivergenceType::SectorShift =>
|
||||
"Sector-wide coherence shift detected",
|
||||
DivergenceType::MetricAnomaly =>
|
||||
"Unusual cross-metric relationship detected",
|
||||
DivergenceType::PatternBreak =>
|
||||
"Historical coherence pattern broken",
|
||||
};
|
||||
|
||||
let peer_context = if peer_z_score.abs() > 2.0 {
|
||||
format!(". Company is {:.1} std devs from peer mean", peer_z_score)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
format!("{} divergence: {}{}", severity_str, divergence_str, peer_context)
|
||||
}
|
||||
|
||||
/// Detect sector-wide coherence shifts
|
||||
pub fn detect_sector_shifts(&self) -> Vec<CoherenceAlert> {
|
||||
// Would analyze all companies in sector using min-cut on peer network
|
||||
vec![]
|
||||
}
|
||||
|
||||
/// Get all alerts
|
||||
pub fn alerts(&self) -> &[CoherenceAlert] {
|
||||
&self.alerts
|
||||
}
|
||||
|
||||
/// Get alerts by severity
|
||||
pub fn alerts_by_severity(&self, min_severity: AlertSeverity) -> Vec<&CoherenceAlert> {
|
||||
self.alerts
|
||||
.iter()
|
||||
.filter(|a| a.severity >= min_severity)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get company coherence history
|
||||
pub fn coherence_history(&self, cik: &str) -> Option<&Vec<(DateTime<Utc>, f64)>> {
|
||||
self.coherence_history.get(cik)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::network::PeerNetworkBuilder;
|
||||
|
||||
#[test]
|
||||
fn test_alert_severity() {
|
||||
assert_eq!(AlertSeverity::from_magnitude(0.05), AlertSeverity::Info);
|
||||
assert_eq!(AlertSeverity::from_magnitude(0.15), AlertSeverity::Low);
|
||||
assert_eq!(AlertSeverity::from_magnitude(0.25), AlertSeverity::Medium);
|
||||
assert_eq!(AlertSeverity::from_magnitude(0.4), AlertSeverity::High);
|
||||
assert_eq!(AlertSeverity::from_magnitude(0.6), AlertSeverity::Critical);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_coherence_computation() {
|
||||
let network = PeerNetworkBuilder::new().build();
|
||||
let config = WatchConfig::default();
|
||||
let watch = CoherenceWatch::new(network, config);
|
||||
|
||||
let vec_a = vec![1.0, 0.0, 0.0];
|
||||
let vec_b = vec![1.0, 0.0, 0.0];
|
||||
let coherence = watch.compute_coherence(&vec_a, &vec_b);
|
||||
assert!((coherence - 1.0).abs() < 0.001);
|
||||
|
||||
let vec_c = vec![-1.0, 0.0, 0.0];
|
||||
let coherence_neg = watch.compute_coherence(&vec_a, &vec_c);
|
||||
assert!((coherence_neg - 0.0).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
508
examples/data/edgar/src/filings.rs
Normal file
508
examples/data/edgar/src/filings.rs
Normal file
@@ -0,0 +1,508 @@
|
||||
//! SEC filing types and analysis
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// SEC filing types
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub enum FilingType {
|
||||
/// Annual report
|
||||
TenK,
|
||||
/// Quarterly report
|
||||
TenQ,
|
||||
/// Current report (material events)
|
||||
EightK,
|
||||
/// Proxy statement
|
||||
DefFourteen,
|
||||
/// Insider trading
|
||||
FormFour,
|
||||
/// Institutional holdings
|
||||
ThirteenF,
|
||||
/// Registration statement
|
||||
S1,
|
||||
/// Other filing type
|
||||
Other,
|
||||
}
|
||||
|
||||
impl FilingType {
|
||||
/// Parse from SEC form name
|
||||
pub fn from_form(form: &str) -> Self {
|
||||
match form.to_uppercase().as_str() {
|
||||
"10-K" | "10-K/A" => FilingType::TenK,
|
||||
"10-Q" | "10-Q/A" => FilingType::TenQ,
|
||||
"8-K" | "8-K/A" => FilingType::EightK,
|
||||
"DEF 14A" | "DEFA14A" => FilingType::DefFourteen,
|
||||
"4" | "4/A" => FilingType::FormFour,
|
||||
"13F-HR" | "13F-HR/A" => FilingType::ThirteenF,
|
||||
"S-1" | "S-1/A" => FilingType::S1,
|
||||
_ => FilingType::Other,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get SEC form name
|
||||
pub fn form_name(&self) -> &str {
|
||||
match self {
|
||||
FilingType::TenK => "10-K",
|
||||
FilingType::TenQ => "10-Q",
|
||||
FilingType::EightK => "8-K",
|
||||
FilingType::DefFourteen => "DEF 14A",
|
||||
FilingType::FormFour => "4",
|
||||
FilingType::ThirteenF => "13F-HR",
|
||||
FilingType::S1 => "S-1",
|
||||
FilingType::Other => "Other",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A SEC filing
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Filing {
|
||||
/// Accession number (unique identifier)
|
||||
pub accession_number: String,
|
||||
|
||||
/// Company CIK
|
||||
pub cik: String,
|
||||
|
||||
/// Filing type
|
||||
pub filing_type: FilingType,
|
||||
|
||||
/// Date filed
|
||||
pub filed_date: NaiveDate,
|
||||
|
||||
/// Primary document URL
|
||||
pub document_url: String,
|
||||
|
||||
/// Description
|
||||
pub description: Option<String>,
|
||||
}
|
||||
|
||||
/// Filing analyzer for extracting insights
|
||||
pub struct FilingAnalyzer {
|
||||
/// Configuration
|
||||
config: AnalyzerConfig,
|
||||
}
|
||||
|
||||
/// Analyzer configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AnalyzerConfig {
|
||||
/// Extract key phrases
|
||||
pub extract_phrases: bool,
|
||||
|
||||
/// Sentiment analysis
|
||||
pub analyze_sentiment: bool,
|
||||
|
||||
/// Risk factor extraction
|
||||
pub extract_risks: bool,
|
||||
|
||||
/// Forward-looking statement extraction
|
||||
pub extract_fls: bool,
|
||||
}
|
||||
|
||||
impl Default for AnalyzerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
extract_phrases: true,
|
||||
analyze_sentiment: true,
|
||||
extract_risks: true,
|
||||
extract_fls: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FilingAnalyzer {
|
||||
/// Create a new analyzer
|
||||
pub fn new(config: AnalyzerConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Analyze a filing document
|
||||
pub fn analyze(&self, content: &str, filing: &Filing) -> FilingAnalysis {
|
||||
let sections = self.extract_sections(content, &filing.filing_type);
|
||||
let sentiment = if self.config.analyze_sentiment {
|
||||
Some(self.compute_sentiment(content))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let risk_factors = if self.config.extract_risks {
|
||||
self.extract_risk_factors(content)
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let forward_looking = if self.config.extract_fls {
|
||||
self.extract_forward_looking(content)
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let key_phrases = if self.config.extract_phrases {
|
||||
self.extract_key_phrases(content)
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
FilingAnalysis {
|
||||
accession_number: filing.accession_number.clone(),
|
||||
sections,
|
||||
sentiment,
|
||||
risk_factors,
|
||||
forward_looking,
|
||||
key_phrases,
|
||||
word_count: content.split_whitespace().count(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract standard sections from filing
|
||||
fn extract_sections(&self, content: &str, filing_type: &FilingType) -> HashMap<String, String> {
|
||||
let mut sections = HashMap::new();
|
||||
|
||||
// Section patterns vary by filing type
|
||||
let section_patterns = match filing_type {
|
||||
FilingType::TenK => vec![
|
||||
("Business", "Item 1"),
|
||||
("RiskFactors", "Item 1A"),
|
||||
("Properties", "Item 2"),
|
||||
("Legal", "Item 3"),
|
||||
("MDA", "Item 7"),
|
||||
("Financials", "Item 8"),
|
||||
],
|
||||
FilingType::TenQ => vec![
|
||||
("Financials", "Part I"),
|
||||
("MDA", "Item 2"),
|
||||
("Controls", "Item 4"),
|
||||
],
|
||||
FilingType::EightK => vec![
|
||||
("Item", "Item"),
|
||||
],
|
||||
_ => vec![],
|
||||
};
|
||||
|
||||
// Simplified extraction - would use better text segmentation
|
||||
for (name, marker) in section_patterns {
|
||||
if let Some(idx) = content.find(marker) {
|
||||
let section_text = &content[idx..];
|
||||
let end_idx = section_text.len().min(5000);
|
||||
sections.insert(name.to_string(), section_text[..end_idx].to_string());
|
||||
}
|
||||
}
|
||||
|
||||
sections
|
||||
}
|
||||
|
||||
/// Compute sentiment score (-1 to 1)
|
||||
fn compute_sentiment(&self, content: &str) -> f64 {
|
||||
let positive_words = [
|
||||
"growth", "profit", "increased", "strong", "improved", "successful",
|
||||
"innovative", "opportunity", "favorable", "exceeded", "achieved",
|
||||
];
|
||||
|
||||
let negative_words = [
|
||||
"loss", "decline", "decreased", "weak", "challenging", "risk",
|
||||
"uncertain", "adverse", "impairment", "litigation", "default",
|
||||
];
|
||||
|
||||
let content_lower = content.to_lowercase();
|
||||
let words: Vec<&str> = content_lower.split_whitespace().collect();
|
||||
let total_words = words.len() as f64;
|
||||
|
||||
let positive_count = positive_words
|
||||
.iter()
|
||||
.map(|w| words.iter().filter(|word| word.contains(w)).count())
|
||||
.sum::<usize>() as f64;
|
||||
|
||||
let negative_count = negative_words
|
||||
.iter()
|
||||
.map(|w| words.iter().filter(|word| word.contains(w)).count())
|
||||
.sum::<usize>() as f64;
|
||||
|
||||
if total_words > 0.0 {
|
||||
(positive_count - negative_count) / total_words.sqrt()
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract risk factors
|
||||
fn extract_risk_factors(&self, content: &str) -> Vec<RiskFactor> {
|
||||
let mut risks = Vec::new();
|
||||
|
||||
let risk_patterns = [
|
||||
("Regulatory", "regulatory", "regulation", "compliance"),
|
||||
("Competition", "competitive", "competition", "competitors"),
|
||||
("Cybersecurity", "cybersecurity", "data breach", "security"),
|
||||
("Litigation", "litigation", "lawsuit", "legal proceedings"),
|
||||
("Economic", "economic conditions", "recession", "downturn"),
|
||||
("Supply Chain", "supply chain", "suppliers", "logistics"),
|
||||
];
|
||||
|
||||
let content_lower = content.to_lowercase();
|
||||
|
||||
for (category, pattern1, pattern2, pattern3) in risk_patterns {
|
||||
let count = [pattern1, pattern2, pattern3]
|
||||
.iter()
|
||||
.map(|p| content_lower.matches(p).count())
|
||||
.sum::<usize>();
|
||||
|
||||
if count > 0 {
|
||||
risks.push(RiskFactor {
|
||||
category: category.to_string(),
|
||||
severity: (count as f64 / 10.0).min(1.0),
|
||||
mentions: count,
|
||||
sample_text: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
risks.sort_by(|a, b| b.severity.partial_cmp(&a.severity).unwrap_or(std::cmp::Ordering::Equal));
|
||||
risks
|
||||
}
|
||||
|
||||
/// Extract forward-looking statements
|
||||
fn extract_forward_looking(&self, content: &str) -> Vec<ForwardLookingStatement> {
|
||||
let mut statements = Vec::new();
|
||||
|
||||
let fls_patterns = [
|
||||
"expect", "anticipate", "believe", "estimate", "project",
|
||||
"forecast", "intend", "plan", "may", "will", "should",
|
||||
];
|
||||
|
||||
let sentences: Vec<&str> = content.split(&['.', '!', '?'][..]).collect();
|
||||
|
||||
for sentence in sentences {
|
||||
let sentence_lower = sentence.to_lowercase();
|
||||
|
||||
for pattern in fls_patterns {
|
||||
if sentence_lower.contains(pattern) {
|
||||
// Check if it's truly forward-looking
|
||||
if sentence_lower.contains("future") ||
|
||||
sentence_lower.contains("expect") ||
|
||||
sentence_lower.contains("anticipate") {
|
||||
statements.push(ForwardLookingStatement {
|
||||
text: sentence.trim().to_string(),
|
||||
sentiment: self.compute_sentiment(sentence),
|
||||
confidence: 0.7,
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Limit to most significant
|
||||
statements.truncate(20);
|
||||
statements
|
||||
}
|
||||
|
||||
/// Extract key phrases
|
||||
fn extract_key_phrases(&self, content: &str) -> Vec<KeyPhrase> {
|
||||
let mut phrases = HashMap::new();
|
||||
|
||||
// Simple n-gram extraction
|
||||
let words: Vec<&str> = content
|
||||
.split_whitespace()
|
||||
.filter(|w| w.len() > 3)
|
||||
.collect();
|
||||
|
||||
// Bigrams
|
||||
for window in words.windows(2) {
|
||||
let phrase = format!("{} {}", window[0].to_lowercase(), window[1].to_lowercase());
|
||||
if self.is_meaningful_phrase(&phrase) {
|
||||
*phrases.entry(phrase).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut result: Vec<KeyPhrase> = phrases
|
||||
.into_iter()
|
||||
.filter(|(_, count)| *count >= 3)
|
||||
.map(|(phrase, count)| KeyPhrase {
|
||||
phrase,
|
||||
frequency: count,
|
||||
importance: count as f64 / words.len() as f64,
|
||||
})
|
||||
.collect();
|
||||
|
||||
result.sort_by(|a, b| b.frequency.cmp(&a.frequency));
|
||||
result.truncate(50);
|
||||
result
|
||||
}
|
||||
|
||||
/// Check if phrase is meaningful
|
||||
fn is_meaningful_phrase(&self, phrase: &str) -> bool {
|
||||
let stop_phrases = ["the", "and", "for", "this", "that", "with"];
|
||||
!stop_phrases.iter().any(|s| phrase.starts_with(s))
|
||||
}
|
||||
}
|
||||
|
||||
/// Analysis result
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FilingAnalysis {
|
||||
/// Filing accession number
|
||||
pub accession_number: String,
|
||||
|
||||
/// Extracted sections
|
||||
pub sections: HashMap<String, String>,
|
||||
|
||||
/// Overall sentiment score
|
||||
pub sentiment: Option<f64>,
|
||||
|
||||
/// Risk factors
|
||||
pub risk_factors: Vec<RiskFactor>,
|
||||
|
||||
/// Forward-looking statements
|
||||
pub forward_looking: Vec<ForwardLookingStatement>,
|
||||
|
||||
/// Key phrases
|
||||
pub key_phrases: Vec<KeyPhrase>,
|
||||
|
||||
/// Total word count
|
||||
pub word_count: usize,
|
||||
}
|
||||
|
||||
/// A risk factor
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RiskFactor {
|
||||
/// Risk category
|
||||
pub category: String,
|
||||
|
||||
/// Severity score (0-1)
|
||||
pub severity: f64,
|
||||
|
||||
/// Number of mentions
|
||||
pub mentions: usize,
|
||||
|
||||
/// Sample text
|
||||
pub sample_text: Option<String>,
|
||||
}
|
||||
|
||||
/// A forward-looking statement
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ForwardLookingStatement {
|
||||
/// Statement text
|
||||
pub text: String,
|
||||
|
||||
/// Sentiment score
|
||||
pub sentiment: f64,
|
||||
|
||||
/// Confidence that this is FLS
|
||||
pub confidence: f64,
|
||||
}
|
||||
|
||||
/// A key phrase
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct KeyPhrase {
|
||||
/// Phrase text
|
||||
pub phrase: String,
|
||||
|
||||
/// Frequency count
|
||||
pub frequency: usize,
|
||||
|
||||
/// Importance score
|
||||
pub importance: f64,
|
||||
}
|
||||
|
||||
/// Narrative extractor for text-to-vector
|
||||
pub struct NarrativeExtractor {
|
||||
/// Configuration
|
||||
config: ExtractorConfig,
|
||||
}
|
||||
|
||||
/// Extractor configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExtractorConfig {
|
||||
/// Target embedding dimension
|
||||
pub embedding_dim: usize,
|
||||
|
||||
/// Use TF-IDF weighting
|
||||
pub use_tfidf: bool,
|
||||
|
||||
/// Normalize embeddings
|
||||
pub normalize: bool,
|
||||
}
|
||||
|
||||
impl Default for ExtractorConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
embedding_dim: 128,
|
||||
use_tfidf: true,
|
||||
normalize: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NarrativeExtractor {
|
||||
/// Create a new extractor
|
||||
pub fn new(config: ExtractorConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Extract embedding from filing analysis
|
||||
pub fn extract_embedding(&self, analysis: &FilingAnalysis) -> Vec<f32> {
|
||||
let mut embedding = Vec::with_capacity(self.config.embedding_dim);
|
||||
|
||||
// Sentiment feature
|
||||
embedding.push(analysis.sentiment.unwrap_or(0.0) as f32);
|
||||
|
||||
// Word count (normalized)
|
||||
embedding.push((analysis.word_count as f64 / 100000.0).min(1.0) as f32);
|
||||
|
||||
// Risk factor features
|
||||
let total_risk_severity: f64 = analysis.risk_factors.iter().map(|r| r.severity).sum();
|
||||
embedding.push((total_risk_severity / 5.0).min(1.0) as f32);
|
||||
|
||||
// FLS sentiment
|
||||
let fls_sentiment: f64 = analysis.forward_looking
|
||||
.iter()
|
||||
.map(|f| f.sentiment)
|
||||
.sum::<f64>() / analysis.forward_looking.len().max(1) as f64;
|
||||
embedding.push(fls_sentiment as f32);
|
||||
|
||||
// Key phrase diversity
|
||||
let phrase_diversity = analysis.key_phrases.len() as f64 / 100.0;
|
||||
embedding.push(phrase_diversity.min(1.0) as f32);
|
||||
|
||||
// Pad to target dimension
|
||||
while embedding.len() < self.config.embedding_dim {
|
||||
embedding.push(0.0);
|
||||
}
|
||||
|
||||
// Normalize
|
||||
if self.config.normalize {
|
||||
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > 0.0 {
|
||||
for x in &mut embedding {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
embedding
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_filing_type_from_form() {
|
||||
assert_eq!(FilingType::from_form("10-K"), FilingType::TenK);
|
||||
assert_eq!(FilingType::from_form("10-Q"), FilingType::TenQ);
|
||||
assert_eq!(FilingType::from_form("8-K"), FilingType::EightK);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sentiment_analysis() {
|
||||
let config = AnalyzerConfig::default();
|
||||
let analyzer = FilingAnalyzer::new(config);
|
||||
|
||||
let positive_text = "Growth and profit increased significantly. Strong performance exceeded expectations.";
|
||||
let sentiment = analyzer.compute_sentiment(positive_text);
|
||||
assert!(sentiment > 0.0);
|
||||
|
||||
let negative_text = "Loss and decline due to challenging conditions. Risk of default increased.";
|
||||
let sentiment = analyzer.compute_sentiment(negative_text);
|
||||
assert!(sentiment < 0.0);
|
||||
}
|
||||
}
|
||||
601
examples/data/edgar/src/lib.rs
Normal file
601
examples/data/edgar/src/lib.rs
Normal file
@@ -0,0 +1,601 @@
|
||||
//! # RuVector SEC EDGAR Integration
|
||||
//!
|
||||
//! Integration with SEC EDGAR for financial intelligence, peer group coherence
|
||||
//! analysis, and narrative drift detection.
|
||||
//!
|
||||
//! ## Core Capabilities
|
||||
//!
|
||||
//! - **Peer Network Graph**: Model company relationships via shared investors, sectors
|
||||
//! - **Coherence Watch**: Detect when fundamentals diverge from narrative (10-K text)
|
||||
//! - **Risk Signal Detection**: Use min-cut for structural discontinuities
|
||||
//! - **Cross-Company Analysis**: Track contagion and sector-wide patterns
|
||||
//!
|
||||
//! ## Data Sources
|
||||
//!
|
||||
//! ### SEC EDGAR
|
||||
//! - **XBRL Financial Statements**: Standardized accounting data (2009-present)
|
||||
//! - **10-K/10-Q Filings**: Annual/quarterly reports with narrative
|
||||
//! - **Form 4**: Insider trading disclosures
|
||||
//! - **13F**: Institutional holdings
|
||||
//! - **8-K**: Material events
|
||||
//!
|
||||
//! ## Quick Start
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! use ruvector_data_edgar::{
|
||||
//! EdgarClient, PeerNetwork, CoherenceWatch, XbrlParser, FilingAnalyzer,
|
||||
//! };
|
||||
//!
|
||||
//! // Build peer network from 13F holdings
|
||||
//! let network = PeerNetwork::from_sector("technology")
|
||||
//! .with_min_market_cap(1_000_000_000)
|
||||
//! .build()
|
||||
//! .await?;
|
||||
//!
|
||||
//! // Create coherence watch
|
||||
//! let watch = CoherenceWatch::new(network);
|
||||
//!
|
||||
//! // Analyze for divergence
|
||||
//! let alerts = watch.detect_divergence(
|
||||
//! narrative_weight: 0.4,
|
||||
//! lookback_quarters: 8,
|
||||
//! ).await?;
|
||||
//!
|
||||
//! for alert in alerts {
|
||||
//! println!("{}: {}", alert.company, alert.interpretation);
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
#![warn(missing_docs)]
|
||||
#![warn(clippy::all)]
|
||||
|
||||
pub mod client;
|
||||
pub mod xbrl;
|
||||
pub mod filings;
|
||||
pub mod coherence;
|
||||
pub mod network;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, NaiveDate, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
pub use client::EdgarClient;
|
||||
pub use xbrl::{XbrlParser, FinancialStatement, XbrlFact, XbrlContext};
|
||||
pub use filings::{Filing, FilingType, FilingAnalyzer, NarrativeExtractor};
|
||||
pub use coherence::{CoherenceWatch, CoherenceAlert, AlertSeverity, DivergenceType};
|
||||
pub use network::{PeerNetwork, PeerNetworkBuilder, CompanyNode, PeerEdge};
|
||||
|
||||
use ruvector_data_framework::{DataRecord, DataSource, FrameworkError, Relationship, Result};
|
||||
|
||||
/// EDGAR-specific error types
|
||||
#[derive(Error, Debug)]
|
||||
pub enum EdgarError {
|
||||
/// API request failed
|
||||
#[error("API error: {0}")]
|
||||
Api(String),
|
||||
|
||||
/// Invalid CIK
|
||||
#[error("Invalid CIK: {0}")]
|
||||
InvalidCik(String),
|
||||
|
||||
/// XBRL parsing failed
|
||||
#[error("XBRL parse error: {0}")]
|
||||
XbrlParse(String),
|
||||
|
||||
/// Filing not found
|
||||
#[error("Filing not found: {0}")]
|
||||
FilingNotFound(String),
|
||||
|
||||
/// Network error
|
||||
#[error("Network error: {0}")]
|
||||
Network(#[from] reqwest::Error),
|
||||
|
||||
/// Data format error
|
||||
#[error("Data format error: {0}")]
|
||||
DataFormat(String),
|
||||
}
|
||||
|
||||
impl From<EdgarError> for FrameworkError {
|
||||
fn from(e: EdgarError) -> Self {
|
||||
FrameworkError::Ingestion(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for EDGAR data source
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EdgarConfig {
|
||||
/// User agent (required by SEC)
|
||||
pub user_agent: String,
|
||||
|
||||
/// Company name for user agent
|
||||
pub company_name: String,
|
||||
|
||||
/// Contact email (required by SEC)
|
||||
pub contact_email: String,
|
||||
|
||||
/// Rate limit (requests per second)
|
||||
pub rate_limit: u32,
|
||||
|
||||
/// Include historical data
|
||||
pub include_historical: bool,
|
||||
|
||||
/// Filing types to fetch
|
||||
pub filing_types: Vec<FilingType>,
|
||||
}
|
||||
|
||||
impl Default for EdgarConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
user_agent: "RuVector/0.1.0".to_string(),
|
||||
company_name: "Research Project".to_string(),
|
||||
contact_email: "contact@example.com".to_string(),
|
||||
rate_limit: 10, // SEC allows 10 requests/second
|
||||
include_historical: true,
|
||||
filing_types: vec![FilingType::TenK, FilingType::TenQ],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A company entity
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Company {
|
||||
/// CIK (Central Index Key)
|
||||
pub cik: String,
|
||||
|
||||
/// Company name
|
||||
pub name: String,
|
||||
|
||||
/// Ticker symbol
|
||||
pub ticker: Option<String>,
|
||||
|
||||
/// SIC code (industry)
|
||||
pub sic_code: Option<String>,
|
||||
|
||||
/// SIC description
|
||||
pub sic_description: Option<String>,
|
||||
|
||||
/// State of incorporation
|
||||
pub state: Option<String>,
|
||||
|
||||
/// Fiscal year end
|
||||
pub fiscal_year_end: Option<String>,
|
||||
|
||||
/// Latest filing date
|
||||
pub latest_filing: Option<NaiveDate>,
|
||||
}
|
||||
|
||||
/// A financial metric
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FinancialMetric {
|
||||
/// Company CIK
|
||||
pub cik: String,
|
||||
|
||||
/// Filing accession number
|
||||
pub accession: String,
|
||||
|
||||
/// Report date
|
||||
pub report_date: NaiveDate,
|
||||
|
||||
/// Metric name (XBRL tag)
|
||||
pub metric_name: String,
|
||||
|
||||
/// Value
|
||||
pub value: f64,
|
||||
|
||||
/// Unit
|
||||
pub unit: String,
|
||||
|
||||
/// Is audited
|
||||
pub audited: bool,
|
||||
|
||||
/// Context (annual, quarterly, etc.)
|
||||
pub context: String,
|
||||
}
|
||||
|
||||
/// Financial ratio
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub enum FinancialRatio {
|
||||
/// Current ratio (current assets / current liabilities)
|
||||
CurrentRatio,
|
||||
/// Quick ratio ((current assets - inventory) / current liabilities)
|
||||
QuickRatio,
|
||||
/// Debt to equity
|
||||
DebtToEquity,
|
||||
/// Return on equity
|
||||
ReturnOnEquity,
|
||||
/// Return on assets
|
||||
ReturnOnAssets,
|
||||
/// Gross margin
|
||||
GrossMargin,
|
||||
/// Operating margin
|
||||
OperatingMargin,
|
||||
/// Net margin
|
||||
NetMargin,
|
||||
/// Asset turnover
|
||||
AssetTurnover,
|
||||
/// Inventory turnover
|
||||
InventoryTurnover,
|
||||
/// Price to earnings
|
||||
PriceToEarnings,
|
||||
/// Price to book
|
||||
PriceToBook,
|
||||
}
|
||||
|
||||
impl FinancialRatio {
|
||||
/// Compute ratio from financial data
|
||||
pub fn compute(&self, data: &HashMap<String, f64>) -> Option<f64> {
|
||||
match self {
|
||||
FinancialRatio::CurrentRatio => {
|
||||
let current_assets = data.get("Assets Current")?;
|
||||
let current_liabilities = data.get("Liabilities Current")?;
|
||||
if *current_liabilities != 0.0 {
|
||||
Some(current_assets / current_liabilities)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FinancialRatio::DebtToEquity => {
|
||||
let total_debt = data.get("Debt")?;
|
||||
let equity = data.get("Stockholders Equity")?;
|
||||
if *equity != 0.0 {
|
||||
Some(total_debt / equity)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FinancialRatio::NetMargin => {
|
||||
let net_income = data.get("Net Income")?;
|
||||
let revenue = data.get("Revenue")?;
|
||||
if *revenue != 0.0 {
|
||||
Some(net_income / revenue)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FinancialRatio::ReturnOnEquity => {
|
||||
let net_income = data.get("Net Income")?;
|
||||
let equity = data.get("Stockholders Equity")?;
|
||||
if *equity != 0.0 {
|
||||
Some(net_income / equity)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FinancialRatio::ReturnOnAssets => {
|
||||
let net_income = data.get("Net Income")?;
|
||||
let assets = data.get("Assets")?;
|
||||
if *assets != 0.0 {
|
||||
Some(net_income / assets)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None, // Add more implementations as needed
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Sector classification
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub enum Sector {
|
||||
/// Technology
|
||||
Technology,
|
||||
/// Healthcare
|
||||
Healthcare,
|
||||
/// Financial services
|
||||
Financials,
|
||||
/// Consumer discretionary
|
||||
ConsumerDiscretionary,
|
||||
/// Consumer staples
|
||||
ConsumerStaples,
|
||||
/// Energy
|
||||
Energy,
|
||||
/// Materials
|
||||
Materials,
|
||||
/// Industrials
|
||||
Industrials,
|
||||
/// Utilities
|
||||
Utilities,
|
||||
/// Real estate
|
||||
RealEstate,
|
||||
/// Communication services
|
||||
CommunicationServices,
|
||||
/// Other/Unknown
|
||||
Other,
|
||||
}
|
||||
|
||||
impl Sector {
|
||||
/// Get sector from SIC code
|
||||
pub fn from_sic(sic: &str) -> Self {
|
||||
match sic.chars().next() {
|
||||
Some('7') => Sector::Technology,
|
||||
Some('8') => Sector::Healthcare,
|
||||
Some('6') => Sector::Financials,
|
||||
Some('5') => Sector::ConsumerDiscretionary,
|
||||
Some('2') => Sector::ConsumerStaples,
|
||||
Some('1') => Sector::Energy,
|
||||
Some('3') => Sector::Materials,
|
||||
Some('4') => Sector::Industrials,
|
||||
_ => Sector::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// EDGAR data source for the framework
|
||||
pub struct EdgarSource {
|
||||
client: EdgarClient,
|
||||
config: EdgarConfig,
|
||||
ciks: Vec<String>,
|
||||
}
|
||||
|
||||
impl EdgarSource {
|
||||
/// Create a new EDGAR data source
|
||||
pub fn new(config: EdgarConfig) -> Self {
|
||||
let client = EdgarClient::new(
|
||||
&config.user_agent,
|
||||
&config.company_name,
|
||||
&config.contact_email,
|
||||
);
|
||||
|
||||
Self {
|
||||
client,
|
||||
config,
|
||||
ciks: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add CIKs to fetch
|
||||
pub fn with_ciks(mut self, ciks: Vec<String>) -> Self {
|
||||
self.ciks = ciks;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add companies by ticker
|
||||
pub async fn with_tickers(mut self, tickers: &[&str]) -> Result<Self> {
|
||||
for ticker in tickers {
|
||||
if let Ok(cik) = self.client.ticker_to_cik(ticker).await {
|
||||
self.ciks.push(cik);
|
||||
}
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Add all companies in a sector
|
||||
pub async fn with_sector(mut self, sector: Sector) -> Result<Self> {
|
||||
let companies = self.client.get_companies_by_sector(§or).await?;
|
||||
self.ciks.extend(companies.into_iter().map(|c| c.cik));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl DataSource for EdgarSource {
|
||||
fn source_id(&self) -> &str {
|
||||
"edgar"
|
||||
}
|
||||
|
||||
async fn fetch_batch(
|
||||
&self,
|
||||
cursor: Option<String>,
|
||||
batch_size: usize,
|
||||
) -> Result<(Vec<DataRecord>, Option<String>)> {
|
||||
let start_idx: usize = cursor.as_ref().and_then(|c| c.parse().ok()).unwrap_or(0);
|
||||
|
||||
let end_idx = (start_idx + batch_size).min(self.ciks.len());
|
||||
|
||||
let mut records = Vec::new();
|
||||
|
||||
for cik in &self.ciks[start_idx..end_idx] {
|
||||
// Fetch filings for this CIK
|
||||
match self.client.get_filings(cik, &self.config.filing_types).await {
|
||||
Ok(filings) => {
|
||||
for filing in filings {
|
||||
records.push(filing_to_record(filing));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to fetch filings for CIK {}: {}", cik, e);
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting
|
||||
if self.config.rate_limit > 0 {
|
||||
let delay = 1000 / self.config.rate_limit as u64;
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(delay)).await;
|
||||
}
|
||||
}
|
||||
|
||||
let next_cursor = if end_idx < self.ciks.len() {
|
||||
Some(end_idx.to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok((records, next_cursor))
|
||||
}
|
||||
|
||||
async fn total_count(&self) -> Result<Option<u64>> {
|
||||
Ok(Some(self.ciks.len() as u64))
|
||||
}
|
||||
|
||||
async fn health_check(&self) -> Result<bool> {
|
||||
self.client.health_check().await.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a filing to a data record
|
||||
fn filing_to_record(filing: Filing) -> DataRecord {
|
||||
let mut relationships = Vec::new();
|
||||
|
||||
// Company relationship
|
||||
relationships.push(Relationship {
|
||||
target_id: filing.cik.clone(),
|
||||
rel_type: "filed_by".to_string(),
|
||||
weight: 1.0,
|
||||
properties: HashMap::new(),
|
||||
});
|
||||
|
||||
DataRecord {
|
||||
id: filing.accession_number.clone(),
|
||||
source: "edgar".to_string(),
|
||||
record_type: format!("{:?}", filing.filing_type).to_lowercase(),
|
||||
timestamp: filing.filed_date.and_hms_opt(0, 0, 0)
|
||||
.map(|dt| DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
|
||||
.unwrap_or_else(Utc::now),
|
||||
data: serde_json::to_value(&filing).unwrap_or_default(),
|
||||
embedding: None,
|
||||
relationships,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fundamental vs Narrative analyzer
|
||||
///
|
||||
/// Detects divergence between quantitative financial data
|
||||
/// and qualitative narrative in filings.
|
||||
pub struct FundamentalNarrativeAnalyzer {
|
||||
/// Configuration
|
||||
config: AnalyzerConfig,
|
||||
}
|
||||
|
||||
/// Analyzer configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AnalyzerConfig {
|
||||
/// Weight for fundamental metrics
|
||||
pub fundamental_weight: f64,
|
||||
|
||||
/// Weight for narrative sentiment
|
||||
pub narrative_weight: f64,
|
||||
|
||||
/// Minimum divergence to flag
|
||||
pub divergence_threshold: f64,
|
||||
|
||||
/// Lookback periods
|
||||
pub lookback_periods: usize,
|
||||
}
|
||||
|
||||
impl Default for AnalyzerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
fundamental_weight: 0.6,
|
||||
narrative_weight: 0.4,
|
||||
divergence_threshold: 0.3,
|
||||
lookback_periods: 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FundamentalNarrativeAnalyzer {
|
||||
/// Create a new analyzer
|
||||
pub fn new(config: AnalyzerConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Analyze a company for fundamental vs narrative divergence
|
||||
pub fn analyze(&self, company: &Company, filings: &[Filing]) -> Option<DivergenceResult> {
|
||||
if filings.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Extract fundamental changes
|
||||
let fundamental_trend = self.compute_fundamental_trend(filings);
|
||||
|
||||
// Extract narrative sentiment changes
|
||||
let narrative_trend = self.compute_narrative_trend(filings);
|
||||
|
||||
// Detect divergence
|
||||
let divergence = (fundamental_trend - narrative_trend).abs();
|
||||
|
||||
if divergence > self.config.divergence_threshold {
|
||||
Some(DivergenceResult {
|
||||
company_cik: company.cik.clone(),
|
||||
company_name: company.name.clone(),
|
||||
fundamental_trend,
|
||||
narrative_trend,
|
||||
divergence_score: divergence,
|
||||
interpretation: self.interpret_divergence(fundamental_trend, narrative_trend),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute fundamental trend
|
||||
fn compute_fundamental_trend(&self, filings: &[Filing]) -> f64 {
|
||||
// Simplified: would compute from actual XBRL data
|
||||
// Positive = improving financials, negative = declining
|
||||
0.0
|
||||
}
|
||||
|
||||
/// Compute narrative sentiment trend
|
||||
fn compute_narrative_trend(&self, filings: &[Filing]) -> f64 {
|
||||
// Simplified: would analyze text sentiment
|
||||
// Positive = optimistic narrative, negative = pessimistic
|
||||
0.0
|
||||
}
|
||||
|
||||
/// Interpret the divergence
|
||||
fn interpret_divergence(&self, fundamental: f64, narrative: f64) -> String {
|
||||
if fundamental > 0.0 && narrative < 0.0 {
|
||||
"Fundamentals improving but narrative pessimistic - potential undervaluation".to_string()
|
||||
} else if fundamental < 0.0 && narrative > 0.0 {
|
||||
"Fundamentals declining but narrative optimistic - potential risk".to_string()
|
||||
} else if fundamental > narrative {
|
||||
"Narrative lagging behind fundamental improvement".to_string()
|
||||
} else {
|
||||
"Narrative ahead of fundamental reality".to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of divergence analysis
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DivergenceResult {
|
||||
/// Company CIK
|
||||
pub company_cik: String,
|
||||
|
||||
/// Company name
|
||||
pub company_name: String,
|
||||
|
||||
/// Fundamental trend (-1 to 1)
|
||||
pub fundamental_trend: f64,
|
||||
|
||||
/// Narrative trend (-1 to 1)
|
||||
pub narrative_trend: f64,
|
||||
|
||||
/// Divergence score (0 to 2)
|
||||
pub divergence_score: f64,
|
||||
|
||||
/// Human-readable interpretation
|
||||
pub interpretation: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sector_from_sic() {
|
||||
assert_eq!(Sector::from_sic("7370"), Sector::Technology);
|
||||
assert_eq!(Sector::from_sic("6000"), Sector::Financials);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_config() {
|
||||
let config = EdgarConfig::default();
|
||||
assert_eq!(config.rate_limit, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_financial_ratio_compute() {
|
||||
let mut data = HashMap::new();
|
||||
data.insert("Assets Current".to_string(), 100.0);
|
||||
data.insert("Liabilities Current".to_string(), 50.0);
|
||||
|
||||
let ratio = FinancialRatio::CurrentRatio.compute(&data);
|
||||
assert!(ratio.is_some());
|
||||
assert!((ratio.unwrap() - 2.0).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
469
examples/data/edgar/src/network.rs
Normal file
469
examples/data/edgar/src/network.rs
Normal file
@@ -0,0 +1,469 @@
|
||||
//! Peer network construction for financial coherence analysis
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Company, Sector};
|
||||
|
||||
/// A company node in the peer network
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CompanyNode {
|
||||
/// Company CIK
|
||||
pub cik: String,
|
||||
|
||||
/// Company name
|
||||
pub name: String,
|
||||
|
||||
/// Ticker symbol
|
||||
pub ticker: Option<String>,
|
||||
|
||||
/// Sector
|
||||
pub sector: Sector,
|
||||
|
||||
/// Market cap (if known)
|
||||
pub market_cap: Option<f64>,
|
||||
|
||||
/// Number of peer connections
|
||||
pub peer_count: usize,
|
||||
|
||||
/// Average peer similarity
|
||||
pub avg_peer_similarity: f64,
|
||||
}
|
||||
|
||||
/// An edge between peer companies
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PeerEdge {
|
||||
/// Source company CIK
|
||||
pub source: String,
|
||||
|
||||
/// Target company CIK
|
||||
pub target: String,
|
||||
|
||||
/// Similarity score (0-1)
|
||||
pub similarity: f64,
|
||||
|
||||
/// Relationship type
|
||||
pub relationship_type: PeerRelationType,
|
||||
|
||||
/// Edge weight for min-cut
|
||||
pub weight: f64,
|
||||
|
||||
/// Evidence for relationship
|
||||
pub evidence: Vec<String>,
|
||||
}
|
||||
|
||||
/// Type of peer relationship
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum PeerRelationType {
|
||||
/// Same sector/industry
|
||||
SameSector,
|
||||
/// Shared institutional investors
|
||||
SharedInvestors,
|
||||
/// Similar size (market cap)
|
||||
SimilarSize,
|
||||
/// Supply chain relationship
|
||||
SupplyChain,
|
||||
/// Competitor
|
||||
Competitor,
|
||||
/// Multiple relationship types
|
||||
Multiple,
|
||||
}
|
||||
|
||||
/// Peer network graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PeerNetwork {
|
||||
/// Network identifier
|
||||
pub id: String,
|
||||
|
||||
/// Nodes (companies)
|
||||
pub nodes: HashMap<String, CompanyNode>,
|
||||
|
||||
/// Edges (peer relationships)
|
||||
pub edges: Vec<PeerEdge>,
|
||||
|
||||
/// Creation time
|
||||
pub created_at: DateTime<Utc>,
|
||||
|
||||
/// Network statistics
|
||||
pub stats: NetworkStats,
|
||||
}
|
||||
|
||||
/// Network statistics
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct NetworkStats {
|
||||
/// Number of nodes
|
||||
pub node_count: usize,
|
||||
|
||||
/// Number of edges
|
||||
pub edge_count: usize,
|
||||
|
||||
/// Average similarity
|
||||
pub avg_similarity: f64,
|
||||
|
||||
/// Network density
|
||||
pub density: f64,
|
||||
|
||||
/// Average degree
|
||||
pub avg_degree: f64,
|
||||
|
||||
/// Number of connected components
|
||||
pub num_components: usize,
|
||||
|
||||
/// Computed min-cut value
|
||||
pub min_cut_value: Option<f64>,
|
||||
}
|
||||
|
||||
impl PeerNetwork {
|
||||
/// Create an empty network
|
||||
pub fn new(id: &str) -> Self {
|
||||
Self {
|
||||
id: id.to_string(),
|
||||
nodes: HashMap::new(),
|
||||
edges: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
stats: NetworkStats::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a company node
|
||||
pub fn add_node(&mut self, node: CompanyNode) {
|
||||
self.nodes.insert(node.cik.clone(), node);
|
||||
self.update_stats();
|
||||
}
|
||||
|
||||
/// Add a peer edge
|
||||
pub fn add_edge(&mut self, edge: PeerEdge) {
|
||||
self.edges.push(edge);
|
||||
self.update_stats();
|
||||
}
|
||||
|
||||
/// Get a node by CIK
|
||||
pub fn get_node(&self, cik: &str) -> Option<&CompanyNode> {
|
||||
self.nodes.get(cik)
|
||||
}
|
||||
|
||||
/// Get peer CIKs for a company
|
||||
pub fn get_peers(&self, cik: &str) -> Vec<&str> {
|
||||
self.edges
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
if e.source == cik {
|
||||
Some(e.target.as_str())
|
||||
} else if e.target == cik {
|
||||
Some(e.source.as_str())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get edges for a company
|
||||
pub fn get_edges_for_company(&self, cik: &str) -> Vec<&PeerEdge> {
|
||||
self.edges
|
||||
.iter()
|
||||
.filter(|e| e.source == cik || e.target == cik)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Update statistics
|
||||
fn update_stats(&mut self) {
|
||||
self.stats.node_count = self.nodes.len();
|
||||
self.stats.edge_count = self.edges.len();
|
||||
|
||||
if !self.edges.is_empty() {
|
||||
self.stats.avg_similarity = self.edges.iter().map(|e| e.similarity).sum::<f64>()
|
||||
/ self.edges.len() as f64;
|
||||
}
|
||||
|
||||
let max_edges = if self.nodes.len() > 1 {
|
||||
self.nodes.len() * (self.nodes.len() - 1) / 2
|
||||
} else {
|
||||
1
|
||||
};
|
||||
self.stats.density = self.edges.len() as f64 / max_edges as f64;
|
||||
|
||||
if !self.nodes.is_empty() {
|
||||
self.stats.avg_degree = (2 * self.edges.len()) as f64 / self.nodes.len() as f64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert to format for RuVector min-cut
|
||||
pub fn to_mincut_edges(&self) -> Vec<(u64, u64, f64)> {
|
||||
let mut node_ids: HashMap<&str, u64> = HashMap::new();
|
||||
let mut next_id = 0u64;
|
||||
|
||||
for cik in self.nodes.keys() {
|
||||
node_ids.insert(cik.as_str(), next_id);
|
||||
next_id += 1;
|
||||
}
|
||||
|
||||
self.edges
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
let src_id = node_ids.get(e.source.as_str())?;
|
||||
let tgt_id = node_ids.get(e.target.as_str())?;
|
||||
Some((*src_id, *tgt_id, e.weight))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get node ID mapping
|
||||
pub fn node_id_mapping(&self) -> HashMap<u64, String> {
|
||||
let mut mapping = HashMap::new();
|
||||
for (i, cik) in self.nodes.keys().enumerate() {
|
||||
mapping.insert(i as u64, cik.clone());
|
||||
}
|
||||
mapping
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for peer networks
|
||||
pub struct PeerNetworkBuilder {
|
||||
id: String,
|
||||
companies: Vec<Company>,
|
||||
min_similarity: f64,
|
||||
max_peers: usize,
|
||||
relationship_types: Vec<PeerRelationType>,
|
||||
}
|
||||
|
||||
impl PeerNetworkBuilder {
|
||||
/// Create a new builder
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
id: format!("network_{}", Utc::now().timestamp()),
|
||||
companies: Vec::new(),
|
||||
min_similarity: 0.3,
|
||||
max_peers: 20,
|
||||
relationship_types: vec![
|
||||
PeerRelationType::SameSector,
|
||||
PeerRelationType::SimilarSize,
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
/// Set network ID
|
||||
pub fn with_id(mut self, id: &str) -> Self {
|
||||
self.id = id.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Add companies
|
||||
pub fn add_companies(mut self, companies: Vec<Company>) -> Self {
|
||||
self.companies.extend(companies);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set minimum similarity threshold
|
||||
pub fn min_similarity(mut self, min: f64) -> Self {
|
||||
self.min_similarity = min;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set maximum peers per company
|
||||
pub fn max_peers(mut self, max: usize) -> Self {
|
||||
self.max_peers = max;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set relationship types to consider
|
||||
pub fn relationship_types(mut self, types: Vec<PeerRelationType>) -> Self {
|
||||
self.relationship_types = types;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the network
|
||||
pub fn build(self) -> PeerNetwork {
|
||||
let mut network = PeerNetwork::new(&self.id);
|
||||
|
||||
// Add nodes
|
||||
for company in &self.companies {
|
||||
let sector = company.sic_code
|
||||
.as_ref()
|
||||
.map(|s| Sector::from_sic(s))
|
||||
.unwrap_or(Sector::Other);
|
||||
|
||||
let node = CompanyNode {
|
||||
cik: company.cik.clone(),
|
||||
name: company.name.clone(),
|
||||
ticker: company.ticker.clone(),
|
||||
sector,
|
||||
market_cap: None,
|
||||
peer_count: 0,
|
||||
avg_peer_similarity: 0.0,
|
||||
};
|
||||
|
||||
network.add_node(node);
|
||||
}
|
||||
|
||||
// Add edges based on relationships
|
||||
for i in 0..self.companies.len() {
|
||||
for j in (i + 1)..self.companies.len() {
|
||||
let company_i = &self.companies[i];
|
||||
let company_j = &self.companies[j];
|
||||
|
||||
let (similarity, rel_type) = self.compute_similarity(company_i, company_j);
|
||||
|
||||
if similarity >= self.min_similarity {
|
||||
let edge = PeerEdge {
|
||||
source: company_i.cik.clone(),
|
||||
target: company_j.cik.clone(),
|
||||
similarity,
|
||||
relationship_type: rel_type,
|
||||
weight: similarity,
|
||||
evidence: self.collect_evidence(company_i, company_j),
|
||||
};
|
||||
|
||||
network.add_edge(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update node statistics
|
||||
for (cik, node) in network.nodes.iter_mut() {
|
||||
let edges = network.edges
|
||||
.iter()
|
||||
.filter(|e| e.source == *cik || e.target == *cik)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
node.peer_count = edges.len();
|
||||
if !edges.is_empty() {
|
||||
node.avg_peer_similarity = edges.iter().map(|e| e.similarity).sum::<f64>()
|
||||
/ edges.len() as f64;
|
||||
}
|
||||
}
|
||||
|
||||
network
|
||||
}
|
||||
|
||||
/// Compute similarity between two companies
|
||||
fn compute_similarity(&self, a: &Company, b: &Company) -> (f64, PeerRelationType) {
|
||||
let mut total_similarity = 0.0;
|
||||
let mut relationship_count = 0;
|
||||
let mut rel_type = PeerRelationType::SameSector;
|
||||
|
||||
// Sector similarity
|
||||
if self.relationship_types.contains(&PeerRelationType::SameSector) {
|
||||
let sector_a = a.sic_code.as_ref().map(|s| Sector::from_sic(s));
|
||||
let sector_b = b.sic_code.as_ref().map(|s| Sector::from_sic(s));
|
||||
|
||||
if sector_a.is_some() && sector_a == sector_b {
|
||||
total_similarity += 0.5;
|
||||
relationship_count += 1;
|
||||
} else if a.sic_code.is_some() && b.sic_code.is_some() {
|
||||
// Same SIC division (first digit)
|
||||
let sic_a = a.sic_code.as_ref().unwrap();
|
||||
let sic_b = b.sic_code.as_ref().unwrap();
|
||||
if !sic_a.is_empty() && !sic_b.is_empty() &&
|
||||
sic_a.chars().next() == sic_b.chars().next() {
|
||||
total_similarity += 0.3;
|
||||
relationship_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Same state
|
||||
if a.state.is_some() && a.state == b.state {
|
||||
total_similarity += 0.2;
|
||||
relationship_count += 1;
|
||||
}
|
||||
|
||||
let similarity = if relationship_count > 0 {
|
||||
total_similarity / relationship_count as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
if relationship_count > 1 {
|
||||
rel_type = PeerRelationType::Multiple;
|
||||
}
|
||||
|
||||
(similarity, rel_type)
|
||||
}
|
||||
|
||||
/// Collect evidence for relationship
|
||||
fn collect_evidence(&self, a: &Company, b: &Company) -> Vec<String> {
|
||||
let mut evidence = Vec::new();
|
||||
|
||||
let sector_a = a.sic_code.as_ref().map(|s| Sector::from_sic(s));
|
||||
let sector_b = b.sic_code.as_ref().map(|s| Sector::from_sic(s));
|
||||
|
||||
if sector_a.is_some() && sector_a == sector_b {
|
||||
evidence.push(format!("Same sector: {:?}", sector_a.unwrap()));
|
||||
}
|
||||
|
||||
if a.state.is_some() && a.state == b.state {
|
||||
evidence.push(format!("Same state: {}", a.state.as_ref().unwrap()));
|
||||
}
|
||||
|
||||
evidence
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PeerNetworkBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_network() {
|
||||
let network = PeerNetwork::new("test");
|
||||
assert_eq!(network.stats.node_count, 0);
|
||||
assert_eq!(network.stats.edge_count, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_builder() {
|
||||
let builder = PeerNetworkBuilder::new()
|
||||
.min_similarity(0.5)
|
||||
.max_peers(10);
|
||||
|
||||
let network = builder.build();
|
||||
assert!(network.nodes.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_peers() {
|
||||
let mut network = PeerNetwork::new("test");
|
||||
|
||||
network.add_node(CompanyNode {
|
||||
cik: "A".to_string(),
|
||||
name: "Company A".to_string(),
|
||||
ticker: None,
|
||||
sector: Sector::Technology,
|
||||
market_cap: None,
|
||||
peer_count: 0,
|
||||
avg_peer_similarity: 0.0,
|
||||
});
|
||||
|
||||
network.add_node(CompanyNode {
|
||||
cik: "B".to_string(),
|
||||
name: "Company B".to_string(),
|
||||
ticker: None,
|
||||
sector: Sector::Technology,
|
||||
market_cap: None,
|
||||
peer_count: 0,
|
||||
avg_peer_similarity: 0.0,
|
||||
});
|
||||
|
||||
network.add_edge(PeerEdge {
|
||||
source: "A".to_string(),
|
||||
target: "B".to_string(),
|
||||
similarity: 0.8,
|
||||
relationship_type: PeerRelationType::SameSector,
|
||||
weight: 0.8,
|
||||
evidence: vec![],
|
||||
});
|
||||
|
||||
let peers = network.get_peers("A");
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers[0], "B");
|
||||
}
|
||||
}
|
||||
338
examples/data/edgar/src/xbrl.rs
Normal file
338
examples/data/edgar/src/xbrl.rs
Normal file
@@ -0,0 +1,338 @@
|
||||
//! XBRL parsing for financial statement extraction
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::EdgarError;
|
||||
|
||||
/// XBRL parser
|
||||
pub struct XbrlParser {
|
||||
/// Configuration
|
||||
config: ParserConfig,
|
||||
}
|
||||
|
||||
/// Parser configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ParserConfig {
|
||||
/// Include all numeric facts
|
||||
pub include_all_facts: bool,
|
||||
|
||||
/// Fact name filters (regex patterns)
|
||||
pub fact_filters: Vec<String>,
|
||||
|
||||
/// Merge duplicate contexts
|
||||
pub merge_contexts: bool,
|
||||
}
|
||||
|
||||
impl Default for ParserConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
include_all_facts: false,
|
||||
fact_filters: vec![
|
||||
"Revenue".to_string(),
|
||||
"NetIncome".to_string(),
|
||||
"Assets".to_string(),
|
||||
"Liabilities".to_string(),
|
||||
"StockholdersEquity".to_string(),
|
||||
],
|
||||
merge_contexts: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsed financial statement
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FinancialStatement {
|
||||
/// Company CIK
|
||||
pub cik: String,
|
||||
|
||||
/// Filing accession number
|
||||
pub accession: String,
|
||||
|
||||
/// Report type (10-K, 10-Q)
|
||||
pub report_type: String,
|
||||
|
||||
/// Period end date
|
||||
pub period_end: NaiveDate,
|
||||
|
||||
/// Is annual (vs quarterly)
|
||||
pub is_annual: bool,
|
||||
|
||||
/// Balance sheet items
|
||||
pub balance_sheet: HashMap<String, f64>,
|
||||
|
||||
/// Income statement items
|
||||
pub income_statement: HashMap<String, f64>,
|
||||
|
||||
/// Cash flow items
|
||||
pub cash_flow: HashMap<String, f64>,
|
||||
|
||||
/// All facts
|
||||
pub all_facts: Vec<XbrlFact>,
|
||||
|
||||
/// Contexts
|
||||
pub contexts: Vec<XbrlContext>,
|
||||
}
|
||||
|
||||
/// An XBRL fact
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct XbrlFact {
|
||||
/// Concept name
|
||||
pub name: String,
|
||||
|
||||
/// Value
|
||||
pub value: f64,
|
||||
|
||||
/// Unit
|
||||
pub unit: String,
|
||||
|
||||
/// Context reference
|
||||
pub context_ref: String,
|
||||
|
||||
/// Decimals precision
|
||||
pub decimals: Option<i32>,
|
||||
|
||||
/// Is negated
|
||||
pub is_negated: bool,
|
||||
}
|
||||
|
||||
/// An XBRL context
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct XbrlContext {
|
||||
/// Context ID
|
||||
pub id: String,
|
||||
|
||||
/// Start date
|
||||
pub start_date: Option<NaiveDate>,
|
||||
|
||||
/// End date / instant
|
||||
pub end_date: NaiveDate,
|
||||
|
||||
/// Is instant (vs duration)
|
||||
pub is_instant: bool,
|
||||
|
||||
/// Segment/scenario dimensions
|
||||
pub dimensions: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl XbrlParser {
|
||||
/// Create a new parser
|
||||
pub fn new(config: ParserConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Parse XBRL document from string
|
||||
pub fn parse(&self, content: &str, cik: &str, accession: &str) -> Result<FinancialStatement, EdgarError> {
|
||||
// This is a simplified parser
|
||||
// Real implementation would use quick-xml or similar
|
||||
|
||||
let contexts = self.parse_contexts(content)?;
|
||||
let facts = self.parse_facts(content)?;
|
||||
|
||||
// Determine period end and type
|
||||
let (period_end, is_annual) = self.determine_period(&contexts)?;
|
||||
|
||||
// Categorize facts
|
||||
let mut balance_sheet = HashMap::new();
|
||||
let mut income_statement = HashMap::new();
|
||||
let mut cash_flow = HashMap::new();
|
||||
|
||||
for fact in &facts {
|
||||
if self.is_balance_sheet_item(&fact.name) {
|
||||
balance_sheet.insert(fact.name.clone(), fact.value);
|
||||
} else if self.is_income_statement_item(&fact.name) {
|
||||
income_statement.insert(fact.name.clone(), fact.value);
|
||||
} else if self.is_cash_flow_item(&fact.name) {
|
||||
cash_flow.insert(fact.name.clone(), fact.value);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FinancialStatement {
|
||||
cik: cik.to_string(),
|
||||
accession: accession.to_string(),
|
||||
report_type: if is_annual { "10-K".to_string() } else { "10-Q".to_string() },
|
||||
period_end,
|
||||
is_annual,
|
||||
balance_sheet,
|
||||
income_statement,
|
||||
cash_flow,
|
||||
all_facts: facts,
|
||||
contexts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse contexts from XBRL
|
||||
fn parse_contexts(&self, content: &str) -> Result<Vec<XbrlContext>, EdgarError> {
|
||||
// Simplified - would use proper XML parsing
|
||||
let mut contexts = Vec::new();
|
||||
|
||||
// Add placeholder context
|
||||
contexts.push(XbrlContext {
|
||||
id: "FY2023".to_string(),
|
||||
start_date: Some(NaiveDate::from_ymd_opt(2023, 1, 1).unwrap()),
|
||||
end_date: NaiveDate::from_ymd_opt(2023, 12, 31).unwrap(),
|
||||
is_instant: false,
|
||||
dimensions: HashMap::new(),
|
||||
});
|
||||
|
||||
Ok(contexts)
|
||||
}
|
||||
|
||||
/// Parse facts from XBRL
|
||||
fn parse_facts(&self, content: &str) -> Result<Vec<XbrlFact>, EdgarError> {
|
||||
// Simplified - would use proper XML parsing
|
||||
let mut facts = Vec::new();
|
||||
|
||||
// Extract numeric values using simple pattern matching
|
||||
// Real implementation would parse XML properly
|
||||
|
||||
Ok(facts)
|
||||
}
|
||||
|
||||
/// Determine period end and whether annual
|
||||
fn determine_period(&self, contexts: &[XbrlContext]) -> Result<(NaiveDate, bool), EdgarError> {
|
||||
// Find the main reporting context
|
||||
for ctx in contexts {
|
||||
if !ctx.is_instant {
|
||||
let duration_days = ctx.start_date
|
||||
.map(|s| (ctx.end_date - s).num_days())
|
||||
.unwrap_or(0);
|
||||
|
||||
let is_annual = duration_days > 300;
|
||||
return Ok((ctx.end_date, is_annual));
|
||||
}
|
||||
}
|
||||
|
||||
// Default to latest instant context
|
||||
if let Some(ctx) = contexts.last() {
|
||||
return Ok((ctx.end_date, true));
|
||||
}
|
||||
|
||||
Err(EdgarError::XbrlParse("No valid context found".to_string()))
|
||||
}
|
||||
|
||||
/// Check if concept is balance sheet item
|
||||
fn is_balance_sheet_item(&self, name: &str) -> bool {
|
||||
let balance_sheet_patterns = [
|
||||
"Assets",
|
||||
"Liabilities",
|
||||
"Equity",
|
||||
"Cash",
|
||||
"Inventory",
|
||||
"Receivable",
|
||||
"Payable",
|
||||
"Debt",
|
||||
"Property",
|
||||
"Goodwill",
|
||||
];
|
||||
|
||||
balance_sheet_patterns.iter().any(|p| name.contains(p))
|
||||
}
|
||||
|
||||
/// Check if concept is income statement item
|
||||
fn is_income_statement_item(&self, name: &str) -> bool {
|
||||
let income_patterns = [
|
||||
"Revenue",
|
||||
"Sales",
|
||||
"Cost",
|
||||
"Expense",
|
||||
"Income",
|
||||
"Profit",
|
||||
"Loss",
|
||||
"Earnings",
|
||||
"EBITDA",
|
||||
"Margin",
|
||||
];
|
||||
|
||||
income_patterns.iter().any(|p| name.contains(p))
|
||||
}
|
||||
|
||||
/// Check if concept is cash flow item
|
||||
fn is_cash_flow_item(&self, name: &str) -> bool {
|
||||
let cash_flow_patterns = [
|
||||
"CashFlow",
|
||||
"Operating",
|
||||
"Investing",
|
||||
"Financing",
|
||||
"Depreciation",
|
||||
"Amortization",
|
||||
"CapitalExpenditure",
|
||||
];
|
||||
|
||||
cash_flow_patterns.iter().any(|p| name.contains(p))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert financial statement to vector embedding
|
||||
pub fn statement_to_embedding(statement: &FinancialStatement) -> Vec<f32> {
|
||||
let mut embedding = Vec::with_capacity(64);
|
||||
|
||||
// Balance sheet ratios
|
||||
let total_assets = statement.balance_sheet.get("Assets").copied().unwrap_or(1.0);
|
||||
let total_liabilities = statement.balance_sheet.get("Liabilities").copied().unwrap_or(0.0);
|
||||
let equity = statement.balance_sheet.get("StockholdersEquity").copied().unwrap_or(1.0);
|
||||
let cash = statement.balance_sheet.get("Cash").copied().unwrap_or(0.0);
|
||||
|
||||
embedding.push((total_liabilities / total_assets) as f32); // Debt ratio
|
||||
embedding.push((cash / total_assets) as f32); // Cash ratio
|
||||
embedding.push((equity / total_assets) as f32); // Equity ratio
|
||||
|
||||
// Income statement ratios
|
||||
let revenue = statement.income_statement.get("Revenue").copied().unwrap_or(1.0);
|
||||
let net_income = statement.income_statement.get("NetIncome").copied().unwrap_or(0.0);
|
||||
let operating_income = statement.income_statement.get("OperatingIncome").copied().unwrap_or(0.0);
|
||||
|
||||
embedding.push((net_income / revenue) as f32); // Net margin
|
||||
embedding.push((operating_income / revenue) as f32); // Operating margin
|
||||
embedding.push((net_income / equity) as f32); // ROE
|
||||
embedding.push((net_income / total_assets) as f32); // ROA
|
||||
|
||||
// Pad to fixed size
|
||||
while embedding.len() < 64 {
|
||||
embedding.push(0.0);
|
||||
}
|
||||
|
||||
// Normalize
|
||||
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > 0.0 {
|
||||
for x in &mut embedding {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
|
||||
embedding
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parser_creation() {
|
||||
let config = ParserConfig::default();
|
||||
let parser = XbrlParser::new(config);
|
||||
assert!(!parser.config.include_all_facts);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_balance_sheet_detection() {
|
||||
let config = ParserConfig::default();
|
||||
let parser = XbrlParser::new(config);
|
||||
|
||||
assert!(parser.is_balance_sheet_item("TotalAssets"));
|
||||
assert!(parser.is_balance_sheet_item("CashAndCashEquivalents"));
|
||||
assert!(!parser.is_balance_sheet_item("Revenue"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_income_statement_detection() {
|
||||
let config = ParserConfig::default();
|
||||
let parser = XbrlParser::new(config);
|
||||
|
||||
assert!(parser.is_income_statement_item("Revenue"));
|
||||
assert!(parser.is_income_statement_item("NetIncome"));
|
||||
assert!(!parser.is_income_statement_item("TotalAssets"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user