Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
//! Application layer for the Interpretation bounded context.
|
||||
//!
|
||||
//! Contains services for orchestrating interpretation workflows.
|
||||
|
||||
pub mod services;
|
||||
@@ -0,0 +1,769 @@
|
||||
//! Application services for RAB interpretation.
|
||||
//!
|
||||
//! The `InterpretationService` orchestrates the building of evidence packs
|
||||
//! and generation of interpretations with cited claims.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use tracing::{debug, info, instrument, warn};
|
||||
|
||||
use crate::domain::entities::{
|
||||
Claim, ClusterContext, EmbeddingId, EvidencePack, EvidenceRef, EvidenceRefType,
|
||||
Interpretation, NeighborEvidence, RecordingMetadata, SequenceContext, SegmentId,
|
||||
};
|
||||
use crate::domain::repository::{ClusterRepository, EvidencePackRepository};
|
||||
use crate::infrastructure::claim_generator::ClaimGenerator;
|
||||
use crate::infrastructure::evidence_builder::EvidenceBuilder;
|
||||
use crate::templates::InterpretationTemplates;
|
||||
use crate::{Error, Result};
|
||||
|
||||
/// Configuration for the interpretation service.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct InterpretationConfig {
|
||||
/// Maximum number of neighbors to include in evidence packs
|
||||
pub max_neighbors: usize,
|
||||
|
||||
/// Whether to include spectrogram URLs in evidence
|
||||
pub include_spectrograms: bool,
|
||||
|
||||
/// Minimum confidence threshold for claims
|
||||
pub min_claim_confidence: f32,
|
||||
|
||||
/// Maximum number of claims per interpretation
|
||||
pub max_claims: usize,
|
||||
|
||||
/// Whether to include sequence context
|
||||
pub include_sequence_context: bool,
|
||||
|
||||
/// Number of preceding/following segments to include
|
||||
pub sequence_context_window: usize,
|
||||
|
||||
/// Minimum overall confidence to accept an interpretation
|
||||
pub min_interpretation_confidence: f32,
|
||||
}
|
||||
|
||||
impl Default for InterpretationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_neighbors: 10,
|
||||
include_spectrograms: true,
|
||||
min_claim_confidence: 0.5,
|
||||
max_claims: 10,
|
||||
include_sequence_context: true,
|
||||
sequence_context_window: 3,
|
||||
min_interpretation_confidence: 0.3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InterpretationConfig {
|
||||
/// Create a new configuration builder
|
||||
pub fn builder() -> InterpretationConfigBuilder {
|
||||
InterpretationConfigBuilder::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for InterpretationConfig
|
||||
#[derive(Debug, Default)]
|
||||
pub struct InterpretationConfigBuilder {
|
||||
config: InterpretationConfig,
|
||||
}
|
||||
|
||||
impl InterpretationConfigBuilder {
|
||||
pub fn max_neighbors(mut self, n: usize) -> Self {
|
||||
self.config.max_neighbors = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn include_spectrograms(mut self, include: bool) -> Self {
|
||||
self.config.include_spectrograms = include;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn min_claim_confidence(mut self, confidence: f32) -> Self {
|
||||
self.config.min_claim_confidence = confidence;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn max_claims(mut self, n: usize) -> Self {
|
||||
self.config.max_claims = n;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn include_sequence_context(mut self, include: bool) -> Self {
|
||||
self.config.include_sequence_context = include;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn sequence_context_window(mut self, window: usize) -> Self {
|
||||
self.config.sequence_context_window = window;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn min_interpretation_confidence(mut self, confidence: f32) -> Self {
|
||||
self.config.min_interpretation_confidence = confidence;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> InterpretationConfig {
|
||||
self.config
|
||||
}
|
||||
}
|
||||
|
||||
/// Neighbor data from vector search (simplified interface).
|
||||
///
|
||||
/// This represents the data returned from the vector space service.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Neighbor {
|
||||
/// The embedding ID of the neighbor
|
||||
pub embedding_id: EmbeddingId,
|
||||
/// Distance from the query (lower = more similar)
|
||||
pub distance: f32,
|
||||
/// Optional metadata about the neighbor
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl Neighbor {
|
||||
/// Create a new neighbor
|
||||
pub fn new(embedding_id: EmbeddingId, distance: f32) -> Self {
|
||||
Self {
|
||||
embedding_id,
|
||||
distance,
|
||||
metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add metadata
|
||||
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
|
||||
self.metadata = Some(metadata);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for vector space operations needed by the interpretation service.
|
||||
///
|
||||
/// This abstracts the vector search operations from sevensense-vector.
|
||||
#[async_trait::async_trait]
|
||||
pub trait VectorSpaceService: Send + Sync {
|
||||
/// Find k nearest neighbors for an embedding
|
||||
async fn find_neighbors(&self, embedding_id: &EmbeddingId, k: usize) -> Result<Vec<Neighbor>>;
|
||||
|
||||
/// Get the embedding vector for an ID
|
||||
async fn get_embedding(&self, embedding_id: &EmbeddingId) -> Result<Option<Vec<f32>>>;
|
||||
|
||||
/// Calculate similarity between two embeddings
|
||||
async fn calculate_similarity(
|
||||
&self,
|
||||
embedding_id_a: &EmbeddingId,
|
||||
embedding_id_b: &EmbeddingId,
|
||||
) -> Result<f32>;
|
||||
}
|
||||
|
||||
/// Trait for sequence operations needed by the interpretation service.
|
||||
///
|
||||
/// This abstracts sequence analysis operations from sevensense-analysis.
|
||||
#[async_trait::async_trait]
|
||||
pub trait SequenceService: Send + Sync {
|
||||
/// Get segments preceding the given segment in time
|
||||
async fn get_preceding_segments(
|
||||
&self,
|
||||
segment_id: &SegmentId,
|
||||
count: usize,
|
||||
) -> Result<Vec<SegmentId>>;
|
||||
|
||||
/// Get segments following the given segment in time
|
||||
async fn get_following_segments(
|
||||
&self,
|
||||
segment_id: &SegmentId,
|
||||
count: usize,
|
||||
) -> Result<Vec<SegmentId>>;
|
||||
|
||||
/// Detect motif patterns in a sequence
|
||||
async fn detect_motif(&self, segment_ids: &[SegmentId]) -> Result<Option<String>>;
|
||||
}
|
||||
|
||||
/// Trait for metadata lookup operations.
|
||||
#[async_trait::async_trait]
|
||||
pub trait MetadataService: Send + Sync {
|
||||
/// Get recording metadata for an embedding
|
||||
async fn get_recording_metadata(
|
||||
&self,
|
||||
embedding_id: &EmbeddingId,
|
||||
) -> Result<RecordingMetadata>;
|
||||
|
||||
/// Get spectrogram URL for an embedding
|
||||
async fn get_spectrogram_url(&self, embedding_id: &EmbeddingId) -> Result<Option<String>>;
|
||||
|
||||
/// Get segment ID for an embedding (if it represents a segment)
|
||||
async fn get_segment_id(&self, embedding_id: &EmbeddingId) -> Result<Option<SegmentId>>;
|
||||
}
|
||||
|
||||
/// Service for building evidence packs and generating interpretations.
|
||||
///
|
||||
/// This is the main entry point for the interpretation bounded context.
|
||||
pub struct InterpretationService {
|
||||
vector_service: Arc<dyn VectorSpaceService>,
|
||||
cluster_repo: Arc<dyn ClusterRepository>,
|
||||
metadata_service: Arc<dyn MetadataService>,
|
||||
sequence_service: Option<Arc<dyn SequenceService>>,
|
||||
evidence_pack_repo: Option<Arc<dyn EvidencePackRepository>>,
|
||||
evidence_builder: EvidenceBuilder,
|
||||
claim_generator: ClaimGenerator,
|
||||
config: InterpretationConfig,
|
||||
}
|
||||
|
||||
impl InterpretationService {
|
||||
/// Create a new interpretation service.
|
||||
pub fn new(
|
||||
vector_service: Arc<dyn VectorSpaceService>,
|
||||
cluster_repo: Arc<dyn ClusterRepository>,
|
||||
metadata_service: Arc<dyn MetadataService>,
|
||||
config: InterpretationConfig,
|
||||
) -> Self {
|
||||
let evidence_builder = EvidenceBuilder::new(&config);
|
||||
let claim_generator = ClaimGenerator::new(&config);
|
||||
|
||||
Self {
|
||||
vector_service,
|
||||
cluster_repo,
|
||||
metadata_service,
|
||||
sequence_service: None,
|
||||
evidence_pack_repo: None,
|
||||
evidence_builder,
|
||||
claim_generator,
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add sequence service for temporal context
|
||||
pub fn with_sequence_service(mut self, service: Arc<dyn SequenceService>) -> Self {
|
||||
self.sequence_service = Some(service);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add evidence pack repository for persistence
|
||||
pub fn with_repository(mut self, repo: Arc<dyn EvidencePackRepository>) -> Self {
|
||||
self.evidence_pack_repo = Some(repo);
|
||||
self
|
||||
}
|
||||
|
||||
/// Build an evidence pack for a query embedding.
|
||||
///
|
||||
/// This gathers all relevant evidence (neighbors, cluster context, sequence context)
|
||||
/// and generates an interpretation with cited claims.
|
||||
#[instrument(skip(self), fields(query_id = %query_id))]
|
||||
pub async fn build_evidence_pack(&self, query_id: &EmbeddingId) -> Result<EvidencePack> {
|
||||
info!("Building evidence pack for query: {}", query_id);
|
||||
|
||||
// Step 1: Find neighbors
|
||||
let neighbors = self.vector_service
|
||||
.find_neighbors(query_id, self.config.max_neighbors)
|
||||
.await
|
||||
.map_err(|e| Error::VectorServiceError(e.to_string()))?;
|
||||
|
||||
debug!("Found {} neighbors", neighbors.len());
|
||||
|
||||
// Step 2: Collect neighbor evidence
|
||||
let neighbor_evidence = self
|
||||
.collect_neighbor_evidence(&neighbors)
|
||||
.await?;
|
||||
|
||||
// Step 3: Build cluster context
|
||||
let cluster_context = self.build_cluster_context(query_id).await?;
|
||||
|
||||
debug!(
|
||||
"Cluster context: assigned={}, confidence={}",
|
||||
cluster_context.has_cluster(),
|
||||
cluster_context.confidence
|
||||
);
|
||||
|
||||
// Step 4: Build sequence context (if enabled and available)
|
||||
let sequence_context = if self.config.include_sequence_context {
|
||||
self.build_sequence_context(query_id).await?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Step 5: Generate interpretation
|
||||
let interpretation = self
|
||||
.generate_interpretation_internal(
|
||||
query_id,
|
||||
&neighbor_evidence,
|
||||
&cluster_context,
|
||||
&sequence_context,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Step 6: Create evidence pack
|
||||
let evidence_pack = EvidencePack::new(
|
||||
query_id.clone(),
|
||||
neighbor_evidence,
|
||||
cluster_context,
|
||||
sequence_context,
|
||||
interpretation,
|
||||
);
|
||||
|
||||
info!(
|
||||
"Built evidence pack {} with {} neighbors, confidence={}",
|
||||
evidence_pack.id,
|
||||
evidence_pack.neighbors.len(),
|
||||
evidence_pack.overall_confidence()
|
||||
);
|
||||
|
||||
// Step 7: Persist if repository is available
|
||||
if let Some(repo) = &self.evidence_pack_repo {
|
||||
repo.save(&evidence_pack).await?;
|
||||
debug!("Persisted evidence pack {}", evidence_pack.id);
|
||||
}
|
||||
|
||||
Ok(evidence_pack)
|
||||
}
|
||||
|
||||
/// Generate an interpretation for an existing evidence pack.
|
||||
///
|
||||
/// Useful for regenerating interpretations with different parameters.
|
||||
#[instrument(skip(self, evidence))]
|
||||
pub async fn generate_interpretation(
|
||||
&self,
|
||||
evidence: &EvidencePack,
|
||||
) -> Result<Interpretation> {
|
||||
self.generate_interpretation_internal(
|
||||
&evidence.query_embedding_id,
|
||||
&evidence.neighbors,
|
||||
&evidence.cluster_context,
|
||||
&evidence.sequence_context,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Validate claims against evidence.
|
||||
///
|
||||
/// Returns each claim paired with a boolean indicating if it's well-supported.
|
||||
#[instrument(skip(self, claims))]
|
||||
pub async fn validate_claims(&self, claims: &[Claim]) -> Result<Vec<(Claim, bool)>> {
|
||||
let mut results = Vec::with_capacity(claims.len());
|
||||
|
||||
for claim in claims {
|
||||
let is_valid = self.validate_single_claim(claim).await?;
|
||||
results.push((claim.clone(), is_valid));
|
||||
}
|
||||
|
||||
let valid_count = results.iter().filter(|(_, v)| *v).count();
|
||||
info!(
|
||||
"Validated {} claims: {} valid, {} invalid",
|
||||
claims.len(),
|
||||
valid_count,
|
||||
claims.len() - valid_count
|
||||
);
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Collect neighbor evidence with metadata.
|
||||
async fn collect_neighbor_evidence(
|
||||
&self,
|
||||
neighbors: &[Neighbor],
|
||||
) -> Result<Vec<NeighborEvidence>> {
|
||||
let mut evidence = Vec::with_capacity(neighbors.len());
|
||||
|
||||
for neighbor in neighbors {
|
||||
let metadata = self
|
||||
.metadata_service
|
||||
.get_recording_metadata(&neighbor.embedding_id)
|
||||
.await
|
||||
.unwrap_or_else(|_| RecordingMetadata::new("unknown"));
|
||||
|
||||
let mut neighbor_ev = NeighborEvidence::new(
|
||||
neighbor.embedding_id.clone(),
|
||||
neighbor.distance,
|
||||
metadata,
|
||||
);
|
||||
|
||||
// Add cluster info
|
||||
let cluster_ctx = self
|
||||
.cluster_repo
|
||||
.get_cluster_context(&neighbor.embedding_id)
|
||||
.await
|
||||
.unwrap_or_else(|_| ClusterContext::empty());
|
||||
|
||||
if let Some(cluster_id) = cluster_ctx.assigned_cluster {
|
||||
neighbor_ev = neighbor_ev.with_cluster(cluster_id);
|
||||
}
|
||||
|
||||
// Add spectrogram URL if enabled
|
||||
if self.config.include_spectrograms {
|
||||
if let Ok(Some(url)) = self
|
||||
.metadata_service
|
||||
.get_spectrogram_url(&neighbor.embedding_id)
|
||||
.await
|
||||
{
|
||||
neighbor_ev = neighbor_ev.with_spectrogram(url);
|
||||
}
|
||||
}
|
||||
|
||||
evidence.push(neighbor_ev);
|
||||
}
|
||||
|
||||
Ok(evidence)
|
||||
}
|
||||
|
||||
/// Build cluster context for an embedding.
|
||||
async fn build_cluster_context(&self, embedding_id: &EmbeddingId) -> Result<ClusterContext> {
|
||||
self.cluster_repo
|
||||
.get_cluster_context(embedding_id)
|
||||
.await
|
||||
.map_err(|e| Error::ClusterServiceError(e.to_string()))
|
||||
}
|
||||
|
||||
/// Build sequence context if sequence service is available.
|
||||
async fn build_sequence_context(
|
||||
&self,
|
||||
embedding_id: &EmbeddingId,
|
||||
) -> Result<Option<SequenceContext>> {
|
||||
let sequence_service = match &self.sequence_service {
|
||||
Some(s) => s,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
// Get segment ID for this embedding
|
||||
let segment_id = match self.metadata_service.get_segment_id(embedding_id).await? {
|
||||
Some(id) => id,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let window = self.config.sequence_context_window;
|
||||
|
||||
// Get preceding segments
|
||||
let preceding = sequence_service
|
||||
.get_preceding_segments(&segment_id, window)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
// Get following segments
|
||||
let following = sequence_service
|
||||
.get_following_segments(&segment_id, window)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
if preceding.is_empty() && following.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Try to detect motif
|
||||
let mut all_segments = preceding.clone();
|
||||
all_segments.push(segment_id);
|
||||
all_segments.extend(following.clone());
|
||||
|
||||
let motif = sequence_service.detect_motif(&all_segments).await.ok().flatten();
|
||||
|
||||
let context = SequenceContext::new(preceding, following);
|
||||
let context = if let Some(m) = motif {
|
||||
context.with_motif(m)
|
||||
} else {
|
||||
context
|
||||
};
|
||||
|
||||
Ok(Some(context))
|
||||
}
|
||||
|
||||
/// Internal implementation of interpretation generation.
|
||||
async fn generate_interpretation_internal(
|
||||
&self,
|
||||
query_id: &EmbeddingId,
|
||||
neighbors: &[NeighborEvidence],
|
||||
cluster_context: &ClusterContext,
|
||||
sequence_context: &Option<SequenceContext>,
|
||||
) -> Result<Interpretation> {
|
||||
// Generate structural description
|
||||
let structural_description = self
|
||||
.generate_structural_description(neighbors, cluster_context, sequence_context);
|
||||
|
||||
// Generate claims with evidence citations
|
||||
let claims = self
|
||||
.claim_generator
|
||||
.generate_claims(query_id, neighbors, cluster_context, sequence_context)
|
||||
.await?;
|
||||
|
||||
// Filter claims by confidence threshold
|
||||
let claims: Vec<Claim> = claims
|
||||
.into_iter()
|
||||
.filter(|c| c.confidence >= self.config.min_claim_confidence)
|
||||
.take(self.config.max_claims)
|
||||
.collect();
|
||||
|
||||
// Calculate overall confidence
|
||||
let confidence = if claims.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
let sum: f32 = claims.iter().map(|c| c.confidence).sum();
|
||||
sum / claims.len() as f32
|
||||
};
|
||||
|
||||
Ok(Interpretation::new(structural_description, claims, confidence))
|
||||
}
|
||||
|
||||
/// Generate a structural description of the acoustic signal.
|
||||
fn generate_structural_description(
|
||||
&self,
|
||||
neighbors: &[NeighborEvidence],
|
||||
cluster_context: &ClusterContext,
|
||||
sequence_context: &Option<SequenceContext>,
|
||||
) -> String {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let mut parts = Vec::new();
|
||||
|
||||
// Describe based on neighbors
|
||||
if !neighbors.is_empty() {
|
||||
let avg_distance: f32 = neighbors.iter().map(|n| n.distance).sum::<f32>()
|
||||
/ neighbors.len() as f32;
|
||||
let similarity = 1.0 - avg_distance.min(1.0);
|
||||
|
||||
parts.push(templates.neighbor_description(neighbors.len(), similarity));
|
||||
|
||||
// Add taxon info if available
|
||||
let taxa: Vec<&str> = neighbors
|
||||
.iter()
|
||||
.filter_map(|n| n.recording_metadata.taxon.as_deref())
|
||||
.collect();
|
||||
if !taxa.is_empty() {
|
||||
parts.push(templates.taxon_description(&taxa));
|
||||
}
|
||||
}
|
||||
|
||||
// Describe cluster context
|
||||
if cluster_context.has_cluster() {
|
||||
let label = cluster_context
|
||||
.cluster_label
|
||||
.as_deref()
|
||||
.unwrap_or("unlabeled");
|
||||
parts.push(templates.cluster_description(
|
||||
label,
|
||||
cluster_context.confidence,
|
||||
cluster_context.exemplar_similarity,
|
||||
));
|
||||
}
|
||||
|
||||
// Describe sequence context
|
||||
if let Some(seq) = sequence_context {
|
||||
if seq.has_temporal_context() {
|
||||
parts.push(templates.sequence_description(
|
||||
seq.sequence_length(),
|
||||
seq.detected_motif.as_deref(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if parts.is_empty() {
|
||||
"Insufficient evidence for structural description.".to_string()
|
||||
} else {
|
||||
parts.join(" ")
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate a single claim against its evidence.
|
||||
async fn validate_single_claim(&self, claim: &Claim) -> Result<bool> {
|
||||
// A claim is valid if it has evidence AND confidence is above threshold
|
||||
if claim.evidence_refs.is_empty() {
|
||||
warn!("Claim has no evidence references: {}", claim.statement);
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if claim.confidence < self.config.min_claim_confidence {
|
||||
debug!(
|
||||
"Claim confidence {} below threshold {}: {}",
|
||||
claim.confidence, self.config.min_claim_confidence, claim.statement
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Verify each evidence reference exists
|
||||
for evidence_ref in &claim.evidence_refs {
|
||||
let exists = match evidence_ref.ref_type {
|
||||
EvidenceRefType::Neighbor => {
|
||||
let emb_id = EmbeddingId::new(&evidence_ref.ref_id);
|
||||
self.vector_service
|
||||
.get_embedding(&emb_id)
|
||||
.await
|
||||
.map(|e| e.is_some())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
EvidenceRefType::Cluster => {
|
||||
let cluster_id = crate::domain::entities::ClusterId::new(&evidence_ref.ref_id);
|
||||
self.cluster_repo
|
||||
.get_cluster_label(&cluster_id)
|
||||
.await
|
||||
.is_ok()
|
||||
}
|
||||
EvidenceRefType::Sequence | EvidenceRefType::Taxon => {
|
||||
// These are derived evidence, assume valid if present
|
||||
true
|
||||
}
|
||||
};
|
||||
|
||||
if !exists {
|
||||
warn!(
|
||||
"Evidence reference not found: {} ({})",
|
||||
evidence_ref.ref_id, evidence_ref.ref_type
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::domain::repository::InMemoryClusterRepository;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::RwLock;
|
||||
|
||||
// Mock implementations for testing
|
||||
|
||||
struct MockVectorService {
|
||||
neighbors: RwLock<HashMap<String, Vec<Neighbor>>>,
|
||||
embeddings: RwLock<HashMap<String, Vec<f32>>>,
|
||||
}
|
||||
|
||||
impl MockVectorService {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
neighbors: RwLock::new(HashMap::new()),
|
||||
embeddings: RwLock::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_neighbor(&self, query_id: &str, neighbor: Neighbor) {
|
||||
let mut neighbors = self.neighbors.write().unwrap();
|
||||
neighbors
|
||||
.entry(query_id.to_string())
|
||||
.or_default()
|
||||
.push(neighbor);
|
||||
}
|
||||
|
||||
fn add_embedding(&self, id: &str, embedding: Vec<f32>) {
|
||||
let mut embeddings = self.embeddings.write().unwrap();
|
||||
embeddings.insert(id.to_string(), embedding);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl VectorSpaceService for MockVectorService {
|
||||
async fn find_neighbors(&self, embedding_id: &EmbeddingId, k: usize) -> Result<Vec<Neighbor>> {
|
||||
let neighbors = self.neighbors.read().unwrap();
|
||||
let result = neighbors
|
||||
.get(embedding_id.as_str())
|
||||
.map(|n| n.iter().take(k).cloned().collect())
|
||||
.unwrap_or_default();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn get_embedding(&self, embedding_id: &EmbeddingId) -> Result<Option<Vec<f32>>> {
|
||||
let embeddings = self.embeddings.read().unwrap();
|
||||
Ok(embeddings.get(embedding_id.as_str()).cloned())
|
||||
}
|
||||
|
||||
async fn calculate_similarity(
|
||||
&self,
|
||||
_embedding_id_a: &EmbeddingId,
|
||||
_embedding_id_b: &EmbeddingId,
|
||||
) -> Result<f32> {
|
||||
Ok(0.85)
|
||||
}
|
||||
}
|
||||
|
||||
struct MockMetadataService;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MetadataService for MockMetadataService {
|
||||
async fn get_recording_metadata(
|
||||
&self,
|
||||
embedding_id: &EmbeddingId,
|
||||
) -> Result<RecordingMetadata> {
|
||||
Ok(RecordingMetadata::new(format!("recording-{}", embedding_id)))
|
||||
}
|
||||
|
||||
async fn get_spectrogram_url(&self, embedding_id: &EmbeddingId) -> Result<Option<String>> {
|
||||
Ok(Some(format!(
|
||||
"https://spectrograms.example.com/{}",
|
||||
embedding_id
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_segment_id(&self, _embedding_id: &EmbeddingId) -> Result<Option<SegmentId>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_interpretation_service_build_evidence_pack() {
|
||||
let vector_service = Arc::new(MockVectorService::new());
|
||||
let cluster_repo = Arc::new(InMemoryClusterRepository::new());
|
||||
let metadata_service = Arc::new(MockMetadataService);
|
||||
|
||||
// Add some test data
|
||||
vector_service.add_neighbor(
|
||||
"query-1",
|
||||
Neighbor::new(EmbeddingId::new("neighbor-1"), 0.1),
|
||||
);
|
||||
vector_service.add_neighbor(
|
||||
"query-1",
|
||||
Neighbor::new(EmbeddingId::new("neighbor-2"), 0.2),
|
||||
);
|
||||
vector_service.add_embedding("neighbor-1", vec![0.1, 0.2, 0.3]);
|
||||
vector_service.add_embedding("neighbor-2", vec![0.2, 0.3, 0.4]);
|
||||
|
||||
let config = InterpretationConfig::default();
|
||||
let service = InterpretationService::new(
|
||||
vector_service,
|
||||
cluster_repo,
|
||||
metadata_service,
|
||||
config,
|
||||
);
|
||||
|
||||
let query_id = EmbeddingId::new("query-1");
|
||||
let result = service.build_evidence_pack(&query_id).await;
|
||||
|
||||
assert!(result.is_ok());
|
||||
let pack = result.unwrap();
|
||||
assert_eq!(pack.query_embedding_id, query_id);
|
||||
assert_eq!(pack.neighbors.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_validate_claims() {
|
||||
let vector_service = Arc::new(MockVectorService::new());
|
||||
let cluster_repo = Arc::new(InMemoryClusterRepository::new());
|
||||
let metadata_service = Arc::new(MockMetadataService);
|
||||
|
||||
vector_service.add_embedding("evidence-1", vec![0.1, 0.2, 0.3]);
|
||||
|
||||
let config = InterpretationConfig::default();
|
||||
let service = InterpretationService::new(
|
||||
vector_service,
|
||||
cluster_repo,
|
||||
metadata_service,
|
||||
config,
|
||||
);
|
||||
|
||||
let valid_claim = Claim::new("Valid claim with evidence", 0.9)
|
||||
.with_evidence(vec![EvidenceRef::neighbor(
|
||||
&EmbeddingId::new("evidence-1"),
|
||||
"Supporting evidence",
|
||||
)]);
|
||||
|
||||
let invalid_claim = Claim::new("Invalid claim without evidence", 0.9);
|
||||
|
||||
let results = service
|
||||
.validate_claims(&[valid_claim, invalid_claim])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
assert!(results[0].1); // Valid claim
|
||||
assert!(!results[1].1); // Invalid claim (no evidence)
|
||||
}
|
||||
}
|
||||
691
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/domain/entities.rs
vendored
Normal file
691
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/domain/entities.rs
vendored
Normal file
@@ -0,0 +1,691 @@
|
||||
//! Core domain entities for the Interpretation bounded context.
|
||||
//!
|
||||
//! These entities represent RAB (Retrieval-Augmented Bioacoustics) evidence packs
|
||||
//! and their associated interpretations with cited claims.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Type alias for timestamps
|
||||
pub type Timestamp = DateTime<Utc>;
|
||||
|
||||
/// Unique identifier for embeddings
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct EmbeddingId(pub String);
|
||||
|
||||
impl EmbeddingId {
|
||||
/// Create a new embedding ID
|
||||
pub fn new(id: impl Into<String>) -> Self {
|
||||
Self(id.into())
|
||||
}
|
||||
|
||||
/// Generate a new random embedding ID
|
||||
pub fn generate() -> Self {
|
||||
Self(Uuid::new_v4().to_string())
|
||||
}
|
||||
|
||||
/// Get the inner string value
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for EmbeddingId {
|
||||
fn from(s: String) -> Self {
|
||||
Self(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for EmbeddingId {
|
||||
fn from(s: &str) -> Self {
|
||||
Self(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EmbeddingId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unique identifier for clusters
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct ClusterId(pub String);
|
||||
|
||||
impl ClusterId {
|
||||
pub fn new(id: impl Into<String>) -> Self {
|
||||
Self(id.into())
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ClusterId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unique identifier for audio segments
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct SegmentId(pub String);
|
||||
|
||||
impl SegmentId {
|
||||
pub fn new(id: impl Into<String>) -> Self {
|
||||
Self(id.into())
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Evidence pack containing all evidence for a bioacoustic query.
|
||||
///
|
||||
/// An evidence pack is the core artifact of RAB interpretation, bundling
|
||||
/// together neighbor evidence, cluster context, sequence context, and
|
||||
/// the generated interpretation with cited claims.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EvidencePack {
|
||||
/// Unique identifier for this evidence pack
|
||||
pub id: String,
|
||||
|
||||
/// The query embedding that initiated this evidence pack
|
||||
pub query_embedding_id: EmbeddingId,
|
||||
|
||||
/// Evidence from nearest neighbor search
|
||||
pub neighbors: Vec<NeighborEvidence>,
|
||||
|
||||
/// Context from cluster analysis
|
||||
pub cluster_context: ClusterContext,
|
||||
|
||||
/// Optional temporal sequence context
|
||||
pub sequence_context: Option<SequenceContext>,
|
||||
|
||||
/// Generated interpretation with claims
|
||||
pub interpretation: Interpretation,
|
||||
|
||||
/// When this evidence pack was created
|
||||
pub created_at: Timestamp,
|
||||
}
|
||||
|
||||
impl EvidencePack {
|
||||
/// Create a new evidence pack with a generated ID
|
||||
pub fn new(
|
||||
query_embedding_id: EmbeddingId,
|
||||
neighbors: Vec<NeighborEvidence>,
|
||||
cluster_context: ClusterContext,
|
||||
sequence_context: Option<SequenceContext>,
|
||||
interpretation: Interpretation,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
query_embedding_id,
|
||||
neighbors,
|
||||
cluster_context,
|
||||
sequence_context,
|
||||
interpretation,
|
||||
created_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the total confidence score for this evidence pack
|
||||
pub fn overall_confidence(&self) -> f32 {
|
||||
let neighbor_confidence = if self.neighbors.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
// Higher confidence if neighbors are close (low distance)
|
||||
let avg_distance: f32 = self.neighbors.iter().map(|n| n.distance).sum::<f32>()
|
||||
/ self.neighbors.len() as f32;
|
||||
(1.0 - avg_distance.min(1.0)).max(0.0)
|
||||
};
|
||||
|
||||
let cluster_confidence = self.cluster_context.confidence;
|
||||
let interpretation_confidence = self.interpretation.confidence;
|
||||
|
||||
// Weighted average
|
||||
(neighbor_confidence * 0.3 + cluster_confidence * 0.3 + interpretation_confidence * 0.4)
|
||||
}
|
||||
|
||||
/// Get the number of distinct evidence sources
|
||||
pub fn evidence_source_count(&self) -> usize {
|
||||
let mut count = 0;
|
||||
if !self.neighbors.is_empty() {
|
||||
count += 1;
|
||||
}
|
||||
if self.cluster_context.assigned_cluster.is_some() {
|
||||
count += 1;
|
||||
}
|
||||
if self.sequence_context.is_some() {
|
||||
count += 1;
|
||||
}
|
||||
count
|
||||
}
|
||||
}
|
||||
|
||||
/// Evidence from a single neighbor in vector space.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NeighborEvidence {
|
||||
/// The embedding ID of this neighbor
|
||||
pub embedding_id: EmbeddingId,
|
||||
|
||||
/// Distance from the query embedding (lower = more similar)
|
||||
pub distance: f32,
|
||||
|
||||
/// Cluster assignment if available
|
||||
pub cluster_id: Option<ClusterId>,
|
||||
|
||||
/// Metadata about the source recording
|
||||
pub recording_metadata: RecordingMetadata,
|
||||
|
||||
/// Optional URL to a spectrogram visualization
|
||||
pub spectrogram_url: Option<String>,
|
||||
}
|
||||
|
||||
impl NeighborEvidence {
|
||||
/// Create new neighbor evidence
|
||||
pub fn new(
|
||||
embedding_id: EmbeddingId,
|
||||
distance: f32,
|
||||
recording_metadata: RecordingMetadata,
|
||||
) -> Self {
|
||||
Self {
|
||||
embedding_id,
|
||||
distance,
|
||||
cluster_id: None,
|
||||
recording_metadata,
|
||||
spectrogram_url: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add cluster information
|
||||
pub fn with_cluster(mut self, cluster_id: ClusterId) -> Self {
|
||||
self.cluster_id = Some(cluster_id);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add spectrogram URL
|
||||
pub fn with_spectrogram(mut self, url: String) -> Self {
|
||||
self.spectrogram_url = Some(url);
|
||||
self
|
||||
}
|
||||
|
||||
/// Convert distance to similarity score (0.0 to 1.0)
|
||||
pub fn similarity(&self) -> f32 {
|
||||
(1.0 - self.distance).max(0.0).min(1.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata about a source recording
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RecordingMetadata {
|
||||
/// Recording identifier
|
||||
pub recording_id: String,
|
||||
|
||||
/// Species or taxon if known
|
||||
pub taxon: Option<String>,
|
||||
|
||||
/// Geographic location
|
||||
pub location: Option<GeoLocation>,
|
||||
|
||||
/// Recording timestamp
|
||||
pub recorded_at: Option<Timestamp>,
|
||||
|
||||
/// Duration in seconds
|
||||
pub duration_seconds: Option<f32>,
|
||||
|
||||
/// Sample rate in Hz
|
||||
pub sample_rate: Option<u32>,
|
||||
|
||||
/// Additional tags or labels
|
||||
pub tags: Vec<String>,
|
||||
}
|
||||
|
||||
impl RecordingMetadata {
|
||||
/// Create minimal recording metadata
|
||||
pub fn new(recording_id: impl Into<String>) -> Self {
|
||||
Self {
|
||||
recording_id: recording_id.into(),
|
||||
taxon: None,
|
||||
location: None,
|
||||
recorded_at: None,
|
||||
duration_seconds: None,
|
||||
sample_rate: None,
|
||||
tags: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add taxon information
|
||||
pub fn with_taxon(mut self, taxon: impl Into<String>) -> Self {
|
||||
self.taxon = Some(taxon.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add location information
|
||||
pub fn with_location(mut self, location: GeoLocation) -> Self {
|
||||
self.location = Some(location);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Geographic location
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GeoLocation {
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub elevation_meters: Option<f32>,
|
||||
pub locality: Option<String>,
|
||||
}
|
||||
|
||||
impl GeoLocation {
|
||||
pub fn new(latitude: f64, longitude: f64) -> Self {
|
||||
Self {
|
||||
latitude,
|
||||
longitude,
|
||||
elevation_meters: None,
|
||||
locality: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Context from cluster analysis.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ClusterContext {
|
||||
/// The cluster this embedding was assigned to
|
||||
pub assigned_cluster: Option<ClusterId>,
|
||||
|
||||
/// Human-readable label for the cluster
|
||||
pub cluster_label: Option<String>,
|
||||
|
||||
/// Confidence in the cluster assignment (0.0 to 1.0)
|
||||
pub confidence: f32,
|
||||
|
||||
/// Similarity to the cluster exemplar (0.0 to 1.0)
|
||||
pub exemplar_similarity: f32,
|
||||
}
|
||||
|
||||
impl ClusterContext {
|
||||
/// Create a new cluster context
|
||||
pub fn new(
|
||||
assigned_cluster: Option<ClusterId>,
|
||||
confidence: f32,
|
||||
exemplar_similarity: f32,
|
||||
) -> Self {
|
||||
Self {
|
||||
assigned_cluster,
|
||||
cluster_label: None,
|
||||
confidence,
|
||||
exemplar_similarity,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an empty cluster context (no cluster assigned)
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
assigned_cluster: None,
|
||||
cluster_label: None,
|
||||
confidence: 0.0,
|
||||
exemplar_similarity: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a cluster label
|
||||
pub fn with_label(mut self, label: impl Into<String>) -> Self {
|
||||
self.cluster_label = Some(label.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Check if a cluster was assigned
|
||||
pub fn has_cluster(&self) -> bool {
|
||||
self.assigned_cluster.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// Temporal sequence context for understanding vocalization patterns.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SequenceContext {
|
||||
/// Segments that precede the query in time
|
||||
pub preceding_segments: Vec<SegmentId>,
|
||||
|
||||
/// Segments that follow the query in time
|
||||
pub following_segments: Vec<SegmentId>,
|
||||
|
||||
/// Detected acoustic motif pattern
|
||||
pub detected_motif: Option<String>,
|
||||
}
|
||||
|
||||
impl SequenceContext {
|
||||
/// Create a new sequence context
|
||||
pub fn new(
|
||||
preceding_segments: Vec<SegmentId>,
|
||||
following_segments: Vec<SegmentId>,
|
||||
) -> Self {
|
||||
Self {
|
||||
preceding_segments,
|
||||
following_segments,
|
||||
detected_motif: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an empty sequence context
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
preceding_segments: Vec::new(),
|
||||
following_segments: Vec::new(),
|
||||
detected_motif: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a detected motif
|
||||
pub fn with_motif(mut self, motif: impl Into<String>) -> Self {
|
||||
self.detected_motif = Some(motif.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Check if sequence context has any temporal information
|
||||
pub fn has_temporal_context(&self) -> bool {
|
||||
!self.preceding_segments.is_empty() || !self.following_segments.is_empty()
|
||||
}
|
||||
|
||||
/// Get total sequence length
|
||||
pub fn sequence_length(&self) -> usize {
|
||||
self.preceding_segments.len() + 1 + self.following_segments.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Generated interpretation of the evidence.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Interpretation {
|
||||
/// Structural description of the acoustic signal
|
||||
pub structural_description: String,
|
||||
|
||||
/// Claims made about the signal with evidence citations
|
||||
pub claims: Vec<Claim>,
|
||||
|
||||
/// Overall confidence in the interpretation (0.0 to 1.0)
|
||||
pub confidence: f32,
|
||||
}
|
||||
|
||||
impl Interpretation {
|
||||
/// Create a new interpretation
|
||||
pub fn new(structural_description: String, claims: Vec<Claim>, confidence: f32) -> Self {
|
||||
Self {
|
||||
structural_description,
|
||||
claims,
|
||||
confidence: confidence.clamp(0.0, 1.0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an empty interpretation with no claims
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
structural_description: String::new(),
|
||||
claims: Vec::new(),
|
||||
confidence: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a claim to the interpretation
|
||||
pub fn add_claim(&mut self, claim: Claim) {
|
||||
self.claims.push(claim);
|
||||
self.recalculate_confidence();
|
||||
}
|
||||
|
||||
/// Recalculate overall confidence based on claims
|
||||
fn recalculate_confidence(&mut self) {
|
||||
if self.claims.is_empty() {
|
||||
return;
|
||||
}
|
||||
let total_confidence: f32 = self.claims.iter().map(|c| c.confidence).sum();
|
||||
self.confidence = total_confidence / self.claims.len() as f32;
|
||||
}
|
||||
|
||||
/// Get claims above a confidence threshold
|
||||
pub fn high_confidence_claims(&self, threshold: f32) -> Vec<&Claim> {
|
||||
self.claims
|
||||
.iter()
|
||||
.filter(|c| c.confidence >= threshold)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get the number of evidence-backed claims
|
||||
pub fn evidenced_claim_count(&self) -> usize {
|
||||
self.claims
|
||||
.iter()
|
||||
.filter(|c| !c.evidence_refs.is_empty())
|
||||
.count()
|
||||
}
|
||||
}
|
||||
|
||||
/// A claim made about the acoustic signal with evidence citations.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Claim {
|
||||
/// The statement being made
|
||||
pub statement: String,
|
||||
|
||||
/// References to evidence supporting this claim
|
||||
pub evidence_refs: Vec<EvidenceRef>,
|
||||
|
||||
/// Confidence in this claim (0.0 to 1.0)
|
||||
pub confidence: f32,
|
||||
}
|
||||
|
||||
impl Claim {
|
||||
/// Create a new claim
|
||||
pub fn new(statement: impl Into<String>, confidence: f32) -> Self {
|
||||
Self {
|
||||
statement: statement.into(),
|
||||
evidence_refs: Vec::new(),
|
||||
confidence: confidence.clamp(0.0, 1.0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an evidence reference
|
||||
pub fn add_evidence(&mut self, evidence_ref: EvidenceRef) {
|
||||
self.evidence_refs.push(evidence_ref);
|
||||
}
|
||||
|
||||
/// Create a claim with evidence references
|
||||
pub fn with_evidence(mut self, evidence_refs: Vec<EvidenceRef>) -> Self {
|
||||
self.evidence_refs = evidence_refs;
|
||||
self
|
||||
}
|
||||
|
||||
/// Check if this claim has supporting evidence
|
||||
pub fn has_evidence(&self) -> bool {
|
||||
!self.evidence_refs.is_empty()
|
||||
}
|
||||
|
||||
/// Get evidence references of a specific type
|
||||
pub fn evidence_of_type(&self, ref_type: EvidenceRefType) -> Vec<&EvidenceRef> {
|
||||
self.evidence_refs
|
||||
.iter()
|
||||
.filter(|e| e.ref_type == ref_type)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Reference to a piece of evidence supporting a claim.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EvidenceRef {
|
||||
/// Type of evidence being referenced
|
||||
pub ref_type: EvidenceRefType,
|
||||
|
||||
/// Identifier for the evidence (embedding ID, cluster ID, etc.)
|
||||
pub ref_id: String,
|
||||
|
||||
/// Human-readable description of the evidence
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
impl EvidenceRef {
|
||||
/// Create a new evidence reference
|
||||
pub fn new(ref_type: EvidenceRefType, ref_id: impl Into<String>, description: impl Into<String>) -> Self {
|
||||
Self {
|
||||
ref_type,
|
||||
ref_id: ref_id.into(),
|
||||
description: description.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a neighbor evidence reference
|
||||
pub fn neighbor(embedding_id: &EmbeddingId, description: impl Into<String>) -> Self {
|
||||
Self::new(EvidenceRefType::Neighbor, embedding_id.as_str(), description)
|
||||
}
|
||||
|
||||
/// Create a cluster evidence reference
|
||||
pub fn cluster(cluster_id: &ClusterId, description: impl Into<String>) -> Self {
|
||||
Self::new(EvidenceRefType::Cluster, cluster_id.as_str(), description)
|
||||
}
|
||||
|
||||
/// Create a sequence evidence reference
|
||||
pub fn sequence(segment_id: &SegmentId, description: impl Into<String>) -> Self {
|
||||
Self::new(EvidenceRefType::Sequence, segment_id.as_str(), description)
|
||||
}
|
||||
|
||||
/// Create a taxon evidence reference
|
||||
pub fn taxon(taxon_name: impl Into<String>, description: impl Into<String>) -> Self {
|
||||
Self::new(EvidenceRefType::Taxon, taxon_name, description)
|
||||
}
|
||||
}
|
||||
|
||||
/// Types of evidence that can be referenced in claims.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum EvidenceRefType {
|
||||
/// Evidence from nearest neighbor search
|
||||
Neighbor,
|
||||
/// Evidence from cluster assignment
|
||||
Cluster,
|
||||
/// Evidence from temporal sequence analysis
|
||||
Sequence,
|
||||
/// Evidence from taxonomic classification
|
||||
Taxon,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EvidenceRefType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
EvidenceRefType::Neighbor => write!(f, "neighbor"),
|
||||
EvidenceRefType::Cluster => write!(f, "cluster"),
|
||||
EvidenceRefType::Sequence => write!(f, "sequence"),
|
||||
EvidenceRefType::Taxon => write!(f, "taxon"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_embedding_id() {
|
||||
let id = EmbeddingId::new("test-123");
|
||||
assert_eq!(id.as_str(), "test-123");
|
||||
assert_eq!(id.to_string(), "test-123");
|
||||
|
||||
let generated = EmbeddingId::generate();
|
||||
assert!(!generated.as_str().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_neighbor_evidence_similarity() {
|
||||
let metadata = RecordingMetadata::new("rec-1");
|
||||
let evidence = NeighborEvidence::new(
|
||||
EmbeddingId::new("emb-1"),
|
||||
0.2,
|
||||
metadata,
|
||||
);
|
||||
assert_eq!(evidence.similarity(), 0.8);
|
||||
|
||||
let far_evidence = NeighborEvidence::new(
|
||||
EmbeddingId::new("emb-2"),
|
||||
1.5,
|
||||
RecordingMetadata::new("rec-2"),
|
||||
);
|
||||
assert_eq!(far_evidence.similarity(), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_context() {
|
||||
let context = ClusterContext::new(
|
||||
Some(ClusterId::new("cluster-1")),
|
||||
0.85,
|
||||
0.92,
|
||||
).with_label("Song Type A");
|
||||
|
||||
assert!(context.has_cluster());
|
||||
assert_eq!(context.cluster_label, Some("Song Type A".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sequence_context() {
|
||||
let context = SequenceContext::new(
|
||||
vec![SegmentId::new("seg-1"), SegmentId::new("seg-2")],
|
||||
vec![SegmentId::new("seg-4")],
|
||||
).with_motif("ABAB");
|
||||
|
||||
assert!(context.has_temporal_context());
|
||||
assert_eq!(context.sequence_length(), 4);
|
||||
assert_eq!(context.detected_motif, Some("ABAB".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_claim_with_evidence() {
|
||||
let mut claim = Claim::new("This is a dawn chorus vocalization", 0.9);
|
||||
claim.add_evidence(EvidenceRef::neighbor(
|
||||
&EmbeddingId::new("emb-1"),
|
||||
"Similar to known dawn chorus recording",
|
||||
));
|
||||
claim.add_evidence(EvidenceRef::cluster(
|
||||
&ClusterId::new("cluster-5"),
|
||||
"Assigned to dawn chorus cluster",
|
||||
));
|
||||
|
||||
assert!(claim.has_evidence());
|
||||
assert_eq!(claim.evidence_refs.len(), 2);
|
||||
assert_eq!(claim.evidence_of_type(EvidenceRefType::Neighbor).len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpretation_confidence() {
|
||||
let mut interp = Interpretation::new(
|
||||
"Complex harmonic structure with frequency modulation".to_string(),
|
||||
Vec::new(),
|
||||
0.0,
|
||||
);
|
||||
|
||||
interp.add_claim(Claim::new("Claim 1", 0.8));
|
||||
interp.add_claim(Claim::new("Claim 2", 0.6));
|
||||
|
||||
assert_eq!(interp.confidence, 0.7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_overall_confidence() {
|
||||
let pack = EvidencePack::new(
|
||||
EmbeddingId::new("query-1"),
|
||||
vec![
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n-1"),
|
||||
0.1,
|
||||
RecordingMetadata::new("r-1"),
|
||||
),
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n-2"),
|
||||
0.2,
|
||||
RecordingMetadata::new("r-2"),
|
||||
),
|
||||
],
|
||||
ClusterContext::new(Some(ClusterId::new("c-1")), 0.9, 0.85),
|
||||
None,
|
||||
Interpretation::new("Test".to_string(), vec![Claim::new("Test", 0.8)], 0.8),
|
||||
);
|
||||
|
||||
let confidence = pack.overall_confidence();
|
||||
assert!(confidence > 0.0 && confidence <= 1.0);
|
||||
assert_eq!(pack.evidence_source_count(), 2);
|
||||
}
|
||||
}
|
||||
6
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/domain/mod.rs
vendored
Normal file
6
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/domain/mod.rs
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
//! Domain layer for the Interpretation bounded context.
|
||||
//!
|
||||
//! Contains core entities and repository traits.
|
||||
|
||||
pub mod entities;
|
||||
pub mod repository;
|
||||
305
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/domain/repository.rs
vendored
Normal file
305
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/domain/repository.rs
vendored
Normal file
@@ -0,0 +1,305 @@
|
||||
//! Repository traits for the Interpretation bounded context.
|
||||
//!
|
||||
//! These traits define the persistence interfaces for evidence packs
|
||||
//! and related entities.
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{Error, Result};
|
||||
use super::entities::{EvidencePack, EmbeddingId, ClusterId, ClusterContext};
|
||||
|
||||
/// Repository for persisting and retrieving evidence packs.
|
||||
///
|
||||
/// Implementations of this trait handle the storage and retrieval
|
||||
/// of evidence packs, which are the primary artifacts of RAB interpretation.
|
||||
#[async_trait]
|
||||
pub trait EvidencePackRepository: Send + Sync {
|
||||
/// Save an evidence pack to the repository.
|
||||
///
|
||||
/// If an evidence pack with the same ID already exists, it will be updated.
|
||||
async fn save(&self, pack: &EvidencePack) -> Result<()>;
|
||||
|
||||
/// Find an evidence pack by its unique identifier.
|
||||
async fn find_by_id(&self, id: &str) -> Result<Option<EvidencePack>>;
|
||||
|
||||
/// Find all evidence packs for a given query embedding.
|
||||
///
|
||||
/// Returns evidence packs in reverse chronological order (newest first).
|
||||
async fn find_by_query(&self, embedding_id: &EmbeddingId) -> Result<Vec<EvidencePack>>;
|
||||
|
||||
/// Delete an evidence pack by ID.
|
||||
async fn delete(&self, id: &str) -> Result<bool>;
|
||||
|
||||
/// Find evidence packs created within a time range.
|
||||
async fn find_by_time_range(
|
||||
&self,
|
||||
start: chrono::DateTime<chrono::Utc>,
|
||||
end: chrono::DateTime<chrono::Utc>,
|
||||
) -> Result<Vec<EvidencePack>>;
|
||||
|
||||
/// Count total evidence packs in the repository.
|
||||
async fn count(&self) -> Result<usize>;
|
||||
}
|
||||
|
||||
/// Repository for cluster information used in interpretation.
|
||||
///
|
||||
/// This trait provides read access to cluster data needed for
|
||||
/// building evidence packs and generating interpretations.
|
||||
#[async_trait]
|
||||
pub trait ClusterRepository: Send + Sync {
|
||||
/// Get cluster context for an embedding.
|
||||
async fn get_cluster_context(&self, embedding_id: &EmbeddingId) -> Result<ClusterContext>;
|
||||
|
||||
/// Get the label for a cluster.
|
||||
async fn get_cluster_label(&self, cluster_id: &ClusterId) -> Result<Option<String>>;
|
||||
|
||||
/// Get the exemplar embedding for a cluster.
|
||||
async fn get_cluster_exemplar(&self, cluster_id: &ClusterId) -> Result<Option<EmbeddingId>>;
|
||||
|
||||
/// Get all embeddings in a cluster.
|
||||
async fn get_cluster_members(&self, cluster_id: &ClusterId) -> Result<Vec<EmbeddingId>>;
|
||||
|
||||
/// Get statistics about a cluster.
|
||||
async fn get_cluster_stats(&self, cluster_id: &ClusterId) -> Result<ClusterStats>;
|
||||
}
|
||||
|
||||
/// Statistics about a cluster.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ClusterStats {
|
||||
/// Number of embeddings in the cluster
|
||||
pub member_count: usize,
|
||||
/// Average distance from cluster center
|
||||
pub avg_distance: f32,
|
||||
/// Maximum distance from cluster center
|
||||
pub max_distance: f32,
|
||||
/// Cluster coherence score (0.0 to 1.0)
|
||||
pub coherence: f32,
|
||||
}
|
||||
|
||||
impl Default for ClusterStats {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
member_count: 0,
|
||||
avg_distance: 0.0,
|
||||
max_distance: 0.0,
|
||||
coherence: 0.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// In-memory implementation of EvidencePackRepository for testing.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct InMemoryEvidencePackRepository {
|
||||
packs: std::sync::RwLock<std::collections::HashMap<String, EvidencePack>>,
|
||||
}
|
||||
|
||||
impl InMemoryEvidencePackRepository {
|
||||
/// Create a new in-memory repository.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EvidencePackRepository for InMemoryEvidencePackRepository {
|
||||
async fn save(&self, pack: &EvidencePack) -> Result<()> {
|
||||
let mut packs = self.packs.write().map_err(|e| Error::internal(e.to_string()))?;
|
||||
packs.insert(pack.id.clone(), pack.clone());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_by_id(&self, id: &str) -> Result<Option<EvidencePack>> {
|
||||
let packs = self.packs.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(packs.get(id).cloned())
|
||||
}
|
||||
|
||||
async fn find_by_query(&self, embedding_id: &EmbeddingId) -> Result<Vec<EvidencePack>> {
|
||||
let packs = self.packs.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
let mut results: Vec<EvidencePack> = packs
|
||||
.values()
|
||||
.filter(|p| p.query_embedding_id == *embedding_id)
|
||||
.cloned()
|
||||
.collect();
|
||||
results.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
async fn delete(&self, id: &str) -> Result<bool> {
|
||||
let mut packs = self.packs.write().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(packs.remove(id).is_some())
|
||||
}
|
||||
|
||||
async fn find_by_time_range(
|
||||
&self,
|
||||
start: chrono::DateTime<chrono::Utc>,
|
||||
end: chrono::DateTime<chrono::Utc>,
|
||||
) -> Result<Vec<EvidencePack>> {
|
||||
let packs = self.packs.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
let mut results: Vec<EvidencePack> = packs
|
||||
.values()
|
||||
.filter(|p| p.created_at >= start && p.created_at <= end)
|
||||
.cloned()
|
||||
.collect();
|
||||
results.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
async fn count(&self) -> Result<usize> {
|
||||
let packs = self.packs.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(packs.len())
|
||||
}
|
||||
}
|
||||
|
||||
/// In-memory implementation of ClusterRepository for testing.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct InMemoryClusterRepository {
|
||||
clusters: std::sync::RwLock<std::collections::HashMap<ClusterId, ClusterData>>,
|
||||
assignments: std::sync::RwLock<std::collections::HashMap<EmbeddingId, ClusterId>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ClusterData {
|
||||
label: Option<String>,
|
||||
exemplar: Option<EmbeddingId>,
|
||||
members: Vec<EmbeddingId>,
|
||||
stats: ClusterStats,
|
||||
}
|
||||
|
||||
impl InMemoryClusterRepository {
|
||||
/// Create a new in-memory cluster repository.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Add a cluster to the repository.
|
||||
pub fn add_cluster(
|
||||
&self,
|
||||
cluster_id: ClusterId,
|
||||
label: Option<String>,
|
||||
exemplar: Option<EmbeddingId>,
|
||||
) -> Result<()> {
|
||||
let mut clusters = self.clusters.write().map_err(|e| Error::internal(e.to_string()))?;
|
||||
clusters.insert(cluster_id, ClusterData {
|
||||
label,
|
||||
exemplar,
|
||||
members: Vec::new(),
|
||||
stats: ClusterStats::default(),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Assign an embedding to a cluster.
|
||||
pub fn assign_to_cluster(
|
||||
&self,
|
||||
embedding_id: EmbeddingId,
|
||||
cluster_id: ClusterId,
|
||||
) -> Result<()> {
|
||||
let mut assignments = self.assignments.write().map_err(|e| Error::internal(e.to_string()))?;
|
||||
assignments.insert(embedding_id.clone(), cluster_id.clone());
|
||||
|
||||
let mut clusters = self.clusters.write().map_err(|e| Error::internal(e.to_string()))?;
|
||||
if let Some(cluster) = clusters.get_mut(&cluster_id) {
|
||||
cluster.members.push(embedding_id);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ClusterRepository for InMemoryClusterRepository {
|
||||
async fn get_cluster_context(&self, embedding_id: &EmbeddingId) -> Result<ClusterContext> {
|
||||
let assignments = self.assignments.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
let cluster_id = assignments.get(embedding_id).cloned();
|
||||
|
||||
if let Some(cid) = &cluster_id {
|
||||
let clusters = self.clusters.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
if let Some(cluster) = clusters.get(cid) {
|
||||
return Ok(ClusterContext {
|
||||
assigned_cluster: Some(cid.clone()),
|
||||
cluster_label: cluster.label.clone(),
|
||||
confidence: 0.85,
|
||||
exemplar_similarity: 0.90,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ClusterContext::empty())
|
||||
}
|
||||
|
||||
async fn get_cluster_label(&self, cluster_id: &ClusterId) -> Result<Option<String>> {
|
||||
let clusters = self.clusters.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(clusters.get(cluster_id).and_then(|c| c.label.clone()))
|
||||
}
|
||||
|
||||
async fn get_cluster_exemplar(&self, cluster_id: &ClusterId) -> Result<Option<EmbeddingId>> {
|
||||
let clusters = self.clusters.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(clusters.get(cluster_id).and_then(|c| c.exemplar.clone()))
|
||||
}
|
||||
|
||||
async fn get_cluster_members(&self, cluster_id: &ClusterId) -> Result<Vec<EmbeddingId>> {
|
||||
let clusters = self.clusters.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(clusters.get(cluster_id).map(|c| c.members.clone()).unwrap_or_default())
|
||||
}
|
||||
|
||||
async fn get_cluster_stats(&self, cluster_id: &ClusterId) -> Result<ClusterStats> {
|
||||
let clusters = self.clusters.read().map_err(|e| Error::internal(e.to_string()))?;
|
||||
Ok(clusters.get(cluster_id).map(|c| c.stats.clone()).unwrap_or_default())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_in_memory_evidence_pack_repo() {
|
||||
use crate::domain::entities::*;
|
||||
|
||||
let repo = InMemoryEvidencePackRepository::new();
|
||||
|
||||
let pack = EvidencePack::new(
|
||||
EmbeddingId::new("query-1"),
|
||||
Vec::new(),
|
||||
ClusterContext::empty(),
|
||||
None,
|
||||
Interpretation::empty(),
|
||||
);
|
||||
|
||||
repo.save(&pack).await.unwrap();
|
||||
assert_eq!(repo.count().await.unwrap(), 1);
|
||||
|
||||
let found = repo.find_by_id(&pack.id).await.unwrap();
|
||||
assert!(found.is_some());
|
||||
|
||||
let by_query = repo.find_by_query(&EmbeddingId::new("query-1")).await.unwrap();
|
||||
assert_eq!(by_query.len(), 1);
|
||||
|
||||
repo.delete(&pack.id).await.unwrap();
|
||||
assert_eq!(repo.count().await.unwrap(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_in_memory_cluster_repo() {
|
||||
let repo = InMemoryClusterRepository::new();
|
||||
|
||||
let cluster_id = ClusterId::new("cluster-1");
|
||||
repo.add_cluster(
|
||||
cluster_id.clone(),
|
||||
Some("Song Type A".to_string()),
|
||||
Some(EmbeddingId::new("exemplar-1")),
|
||||
).unwrap();
|
||||
|
||||
let embedding_id = EmbeddingId::new("emb-1");
|
||||
repo.assign_to_cluster(embedding_id.clone(), cluster_id.clone()).unwrap();
|
||||
|
||||
let context = repo.get_cluster_context(&embedding_id).await.unwrap();
|
||||
assert!(context.has_cluster());
|
||||
assert_eq!(context.cluster_label, Some("Song Type A".to_string()));
|
||||
|
||||
let label = repo.get_cluster_label(&cluster_id).await.unwrap();
|
||||
assert_eq!(label, Some("Song Type A".to_string()));
|
||||
|
||||
let members = repo.get_cluster_members(&cluster_id).await.unwrap();
|
||||
assert_eq!(members.len(), 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,754 @@
|
||||
//! Claim generator for RAB interpretations.
|
||||
//!
|
||||
//! This module generates claims with evidence citations based on
|
||||
//! neighbor evidence, cluster context, and sequence context.
|
||||
|
||||
use tracing::{debug, instrument};
|
||||
|
||||
use crate::application::services::InterpretationConfig;
|
||||
use crate::domain::entities::{
|
||||
Claim, ClusterContext, ClusterId, EmbeddingId, EvidenceRef, EvidenceRefType,
|
||||
NeighborEvidence, SequenceContext,
|
||||
};
|
||||
use crate::infrastructure::evidence_builder::EvidenceContext;
|
||||
use crate::templates::InterpretationTemplates;
|
||||
use crate::Result;
|
||||
|
||||
/// Generator for evidence-backed claims.
|
||||
///
|
||||
/// The `ClaimGenerator` creates claims based on available evidence
|
||||
/// and ensures each claim has proper citations.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ClaimGenerator {
|
||||
/// Minimum confidence threshold for claims
|
||||
min_confidence: f32,
|
||||
/// Maximum claims to generate
|
||||
max_claims: usize,
|
||||
/// Templates for claim text
|
||||
templates: InterpretationTemplates,
|
||||
}
|
||||
|
||||
impl ClaimGenerator {
|
||||
/// Create a new claim generator from configuration.
|
||||
pub fn new(config: &InterpretationConfig) -> Self {
|
||||
Self {
|
||||
min_confidence: config.min_claim_confidence,
|
||||
max_claims: config.max_claims,
|
||||
templates: InterpretationTemplates::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a claim generator with custom parameters.
|
||||
pub fn with_params(min_confidence: f32, max_claims: usize) -> Self {
|
||||
Self {
|
||||
min_confidence,
|
||||
max_claims,
|
||||
templates: InterpretationTemplates::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate claims from collected evidence.
|
||||
///
|
||||
/// Claims are generated based on:
|
||||
/// - Neighbor similarity patterns
|
||||
/// - Cluster assignments
|
||||
/// - Taxonomic information
|
||||
/// - Temporal sequence patterns
|
||||
#[instrument(skip(self, neighbors, cluster_context, sequence_context))]
|
||||
pub async fn generate_claims(
|
||||
&self,
|
||||
query_id: &EmbeddingId,
|
||||
neighbors: &[NeighborEvidence],
|
||||
cluster_context: &ClusterContext,
|
||||
sequence_context: &Option<SequenceContext>,
|
||||
) -> Result<Vec<Claim>> {
|
||||
let context = EvidenceContext::from_evidence(neighbors, cluster_context, sequence_context);
|
||||
let mut claims = Vec::new();
|
||||
|
||||
// Generate similarity-based claims
|
||||
let similarity_claims = self.generate_similarity_claims(neighbors, &context);
|
||||
claims.extend(similarity_claims);
|
||||
|
||||
// Generate cluster-based claims
|
||||
if cluster_context.has_cluster() {
|
||||
let cluster_claims = self.generate_cluster_claims(cluster_context, &context);
|
||||
claims.extend(cluster_claims);
|
||||
}
|
||||
|
||||
// Generate taxonomy-based claims
|
||||
if !context.unique_taxa.is_empty() {
|
||||
let taxon_claims = self.generate_taxon_claims(neighbors, &context);
|
||||
claims.extend(taxon_claims);
|
||||
}
|
||||
|
||||
// Generate sequence-based claims
|
||||
if let Some(seq) = sequence_context {
|
||||
if seq.has_temporal_context() {
|
||||
let sequence_claims = self.generate_sequence_claims(seq, &context);
|
||||
claims.extend(sequence_claims);
|
||||
}
|
||||
}
|
||||
|
||||
// Filter by confidence and limit
|
||||
let mut claims: Vec<Claim> = claims
|
||||
.into_iter()
|
||||
.filter(|c| c.confidence >= self.min_confidence)
|
||||
.collect();
|
||||
|
||||
// Sort by confidence (highest first)
|
||||
claims.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
|
||||
|
||||
// Limit number of claims
|
||||
claims.truncate(self.max_claims);
|
||||
|
||||
debug!(
|
||||
"Generated {} claims for query {}",
|
||||
claims.len(),
|
||||
query_id
|
||||
);
|
||||
|
||||
Ok(claims)
|
||||
}
|
||||
|
||||
/// Generate claims based on neighbor similarity.
|
||||
fn generate_similarity_claims(
|
||||
&self,
|
||||
neighbors: &[NeighborEvidence],
|
||||
context: &EvidenceContext,
|
||||
) -> Vec<Claim> {
|
||||
let mut claims = Vec::new();
|
||||
|
||||
if neighbors.is_empty() {
|
||||
return claims;
|
||||
}
|
||||
|
||||
// Claim about overall similarity
|
||||
let similarity = context.scores.avg_similarity;
|
||||
if similarity >= 0.7 {
|
||||
let statement = self.templates.high_similarity_claim(
|
||||
neighbors.len(),
|
||||
similarity,
|
||||
);
|
||||
let confidence = similarity * 0.9;
|
||||
|
||||
let evidence: Vec<EvidenceRef> = neighbors
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|n| {
|
||||
EvidenceRef::neighbor(
|
||||
&n.embedding_id,
|
||||
format!(
|
||||
"Similarity: {:.1}% (distance: {:.3})",
|
||||
n.similarity() * 100.0,
|
||||
n.distance
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
} else if similarity >= 0.5 {
|
||||
let statement = self.templates.moderate_similarity_claim(neighbors.len());
|
||||
let confidence = similarity * 0.8;
|
||||
|
||||
let evidence: Vec<EvidenceRef> = neighbors
|
||||
.iter()
|
||||
.take(2)
|
||||
.map(|n| {
|
||||
EvidenceRef::neighbor(
|
||||
&n.embedding_id,
|
||||
format!("Distance: {:.3}", n.distance),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
} else if neighbors.len() >= 3 {
|
||||
let statement = self.templates.low_similarity_claim();
|
||||
let confidence = 0.4;
|
||||
|
||||
let evidence = vec![EvidenceRef::new(
|
||||
EvidenceRefType::Neighbor,
|
||||
"aggregate",
|
||||
format!(
|
||||
"Average distance: {:.3} across {} neighbors",
|
||||
context.scores.avg_distance,
|
||||
neighbors.len()
|
||||
),
|
||||
)];
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
}
|
||||
|
||||
// Claim about closest match
|
||||
if let Some(closest) = neighbors.first() {
|
||||
if closest.distance < 0.2 {
|
||||
let taxon_info = closest
|
||||
.recording_metadata
|
||||
.taxon
|
||||
.as_deref()
|
||||
.map(|t| format!(" ({})", t))
|
||||
.unwrap_or_default();
|
||||
|
||||
let statement = format!(
|
||||
"Strong acoustic match found with recording {}{}",
|
||||
closest.recording_metadata.recording_id,
|
||||
taxon_info
|
||||
);
|
||||
let confidence = (1.0 - closest.distance) * 0.95;
|
||||
|
||||
let evidence = vec![
|
||||
EvidenceRef::neighbor(
|
||||
&closest.embedding_id,
|
||||
format!(
|
||||
"Closest neighbor with distance {:.3} ({:.1}% similarity)",
|
||||
closest.distance,
|
||||
closest.similarity() * 100.0
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
}
|
||||
}
|
||||
|
||||
claims
|
||||
}
|
||||
|
||||
/// Generate claims based on cluster assignment.
|
||||
fn generate_cluster_claims(
|
||||
&self,
|
||||
cluster_context: &ClusterContext,
|
||||
context: &EvidenceContext,
|
||||
) -> Vec<Claim> {
|
||||
let mut claims = Vec::new();
|
||||
|
||||
let cluster_id = match &cluster_context.assigned_cluster {
|
||||
Some(id) => id,
|
||||
None => return claims,
|
||||
};
|
||||
|
||||
let label = cluster_context
|
||||
.cluster_label
|
||||
.as_deref()
|
||||
.unwrap_or("unlabeled cluster");
|
||||
|
||||
// Main cluster assignment claim
|
||||
let statement = self.templates.cluster_assignment_claim(
|
||||
label,
|
||||
cluster_context.confidence,
|
||||
cluster_context.exemplar_similarity,
|
||||
);
|
||||
let confidence = cluster_context.confidence * cluster_context.exemplar_similarity;
|
||||
|
||||
let evidence = vec![
|
||||
EvidenceRef::cluster(
|
||||
cluster_id,
|
||||
format!(
|
||||
"Assigned to cluster '{}' with {:.1}% confidence, {:.1}% exemplar similarity",
|
||||
label,
|
||||
cluster_context.confidence * 100.0,
|
||||
cluster_context.exemplar_similarity * 100.0
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
|
||||
// Claim about cluster coherence with neighbors
|
||||
if context.scores.cluster_coherence > 0.5 {
|
||||
let statement = format!(
|
||||
"Acoustic features are consistent with {} - {:.0}% of similar recordings belong to the same cluster",
|
||||
label,
|
||||
context.scores.cluster_coherence * 100.0
|
||||
);
|
||||
let confidence = context.scores.cluster_coherence * 0.85;
|
||||
|
||||
let evidence = vec![
|
||||
EvidenceRef::cluster(
|
||||
cluster_id,
|
||||
format!(
|
||||
"{:.0}% cluster coherence among neighbors",
|
||||
context.scores.cluster_coherence * 100.0
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
}
|
||||
|
||||
claims
|
||||
}
|
||||
|
||||
/// Generate claims based on taxonomic information.
|
||||
fn generate_taxon_claims(
|
||||
&self,
|
||||
neighbors: &[NeighborEvidence],
|
||||
context: &EvidenceContext,
|
||||
) -> Vec<Claim> {
|
||||
let mut claims = Vec::new();
|
||||
|
||||
if context.unique_taxa.is_empty() {
|
||||
return claims;
|
||||
}
|
||||
|
||||
// Count taxa occurrences
|
||||
let mut taxon_counts: std::collections::HashMap<&str, (usize, Vec<&NeighborEvidence>)> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for neighbor in neighbors {
|
||||
if let Some(taxon) = &neighbor.recording_metadata.taxon {
|
||||
let entry = taxon_counts.entry(taxon.as_str()).or_insert((0, Vec::new()));
|
||||
entry.0 += 1;
|
||||
entry.1.push(neighbor);
|
||||
}
|
||||
}
|
||||
|
||||
// Find dominant taxon
|
||||
if let Some((taxon, (count, examples))) = taxon_counts
|
||||
.iter()
|
||||
.max_by_key(|(_, (count, _))| count)
|
||||
{
|
||||
let proportion = *count as f32 / neighbors.len() as f32;
|
||||
|
||||
if proportion >= 0.6 {
|
||||
let statement = self.templates.dominant_taxon_claim(taxon, proportion);
|
||||
let confidence = proportion * context.scores.avg_similarity;
|
||||
|
||||
let evidence: Vec<EvidenceRef> = examples
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|n| {
|
||||
EvidenceRef::taxon(
|
||||
*taxon,
|
||||
format!(
|
||||
"Recording {} identified as {} (similarity: {:.1}%)",
|
||||
n.recording_metadata.recording_id,
|
||||
taxon,
|
||||
n.similarity() * 100.0
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
} else if context.unique_taxa.len() > 1 {
|
||||
// Multiple taxa present
|
||||
let taxa_list = context.unique_taxa.join(", ");
|
||||
let statement = format!(
|
||||
"Acoustic features show similarity to multiple taxa: {}. Further analysis recommended.",
|
||||
taxa_list
|
||||
);
|
||||
let confidence = 0.5;
|
||||
|
||||
let evidence: Vec<EvidenceRef> = context
|
||||
.unique_taxa
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|t| {
|
||||
let count = taxon_counts.get(t.as_str()).map(|(c, _)| *c).unwrap_or(0);
|
||||
EvidenceRef::taxon(
|
||||
t,
|
||||
format!("{} neighbors identified as {}", count, t),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
}
|
||||
}
|
||||
|
||||
claims
|
||||
}
|
||||
|
||||
/// Generate claims based on sequence context.
|
||||
fn generate_sequence_claims(
|
||||
&self,
|
||||
sequence_context: &SequenceContext,
|
||||
context: &EvidenceContext,
|
||||
) -> Vec<Claim> {
|
||||
let mut claims = Vec::new();
|
||||
|
||||
// Claim about temporal context
|
||||
let preceding = sequence_context.preceding_segments.len();
|
||||
let following = sequence_context.following_segments.len();
|
||||
|
||||
if preceding > 0 || following > 0 {
|
||||
let statement = self.templates.sequence_context_claim(preceding, following);
|
||||
let confidence = 0.7;
|
||||
|
||||
let mut evidence = Vec::new();
|
||||
|
||||
for (i, seg) in sequence_context.preceding_segments.iter().enumerate() {
|
||||
evidence.push(EvidenceRef::sequence(
|
||||
seg,
|
||||
format!("Preceding segment {} at position -{}", seg.0, preceding - i),
|
||||
));
|
||||
}
|
||||
|
||||
for (i, seg) in sequence_context.following_segments.iter().enumerate() {
|
||||
evidence.push(EvidenceRef::sequence(
|
||||
seg,
|
||||
format!("Following segment {} at position +{}", seg.0, i + 1),
|
||||
));
|
||||
}
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
}
|
||||
|
||||
// Claim about detected motif
|
||||
if let Some(motif) = &sequence_context.detected_motif {
|
||||
let statement = self.templates.motif_claim(motif);
|
||||
let confidence = 0.75;
|
||||
|
||||
let evidence = vec![EvidenceRef::new(
|
||||
EvidenceRefType::Sequence,
|
||||
"motif-detection",
|
||||
format!(
|
||||
"Motif pattern '{}' detected across {} segments",
|
||||
motif,
|
||||
sequence_context.sequence_length()
|
||||
),
|
||||
)];
|
||||
|
||||
claims.push(Claim::new(statement, confidence).with_evidence(evidence));
|
||||
}
|
||||
|
||||
claims
|
||||
}
|
||||
|
||||
/// Generate a claim from manual input with evidence validation.
|
||||
pub fn create_manual_claim(
|
||||
&self,
|
||||
statement: &str,
|
||||
confidence: f32,
|
||||
evidence_refs: Vec<EvidenceRef>,
|
||||
) -> Result<Claim> {
|
||||
if evidence_refs.is_empty() {
|
||||
return Err(crate::Error::ClaimValidationFailed(
|
||||
"Claims must have at least one evidence reference".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let confidence = confidence.clamp(0.0, 1.0);
|
||||
|
||||
if confidence < self.min_confidence {
|
||||
debug!(
|
||||
"Claim confidence {} below threshold {}: {}",
|
||||
confidence, self.min_confidence, statement
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Claim::new(statement, confidence).with_evidence(evidence_refs))
|
||||
}
|
||||
|
||||
/// Merge multiple claims about the same topic.
|
||||
pub fn merge_claims(&self, claims: &[Claim]) -> Option<Claim> {
|
||||
if claims.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if claims.len() == 1 {
|
||||
return Some(claims[0].clone());
|
||||
}
|
||||
|
||||
// Combine evidence from all claims
|
||||
let mut all_evidence: Vec<EvidenceRef> = Vec::new();
|
||||
let mut total_confidence = 0.0;
|
||||
|
||||
for claim in claims {
|
||||
all_evidence.extend(claim.evidence_refs.clone());
|
||||
total_confidence += claim.confidence;
|
||||
}
|
||||
|
||||
// Deduplicate evidence by ref_id
|
||||
let mut seen_ids = std::collections::HashSet::new();
|
||||
all_evidence.retain(|e| seen_ids.insert(e.ref_id.clone()));
|
||||
|
||||
let avg_confidence = total_confidence / claims.len() as f32;
|
||||
|
||||
// Use the statement from the highest-confidence claim
|
||||
let best_claim = claims
|
||||
.iter()
|
||||
.max_by(|a, b| a.confidence.partial_cmp(&b.confidence).unwrap())
|
||||
.unwrap();
|
||||
|
||||
Some(
|
||||
Claim::new(&best_claim.statement, avg_confidence)
|
||||
.with_evidence(all_evidence),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for creating claims with proper evidence citations.
|
||||
#[derive(Debug)]
|
||||
pub struct ClaimBuilder {
|
||||
statement: String,
|
||||
confidence: f32,
|
||||
evidence: Vec<EvidenceRef>,
|
||||
}
|
||||
|
||||
impl ClaimBuilder {
|
||||
/// Start building a new claim.
|
||||
pub fn new(statement: impl Into<String>) -> Self {
|
||||
Self {
|
||||
statement: statement.into(),
|
||||
confidence: 0.5,
|
||||
evidence: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the confidence level.
|
||||
pub fn confidence(mut self, confidence: f32) -> Self {
|
||||
self.confidence = confidence.clamp(0.0, 1.0);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a neighbor evidence reference.
|
||||
pub fn cite_neighbor(
|
||||
mut self,
|
||||
embedding_id: &EmbeddingId,
|
||||
description: impl Into<String>,
|
||||
) -> Self {
|
||||
self.evidence.push(EvidenceRef::neighbor(embedding_id, description));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a cluster evidence reference.
|
||||
pub fn cite_cluster(
|
||||
mut self,
|
||||
cluster_id: &ClusterId,
|
||||
description: impl Into<String>,
|
||||
) -> Self {
|
||||
self.evidence.push(EvidenceRef::cluster(cluster_id, description));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a taxon evidence reference.
|
||||
pub fn cite_taxon(
|
||||
mut self,
|
||||
taxon: impl Into<String>,
|
||||
description: impl Into<String>,
|
||||
) -> Self {
|
||||
self.evidence.push(EvidenceRef::taxon(taxon, description));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a sequence evidence reference.
|
||||
pub fn cite_sequence(
|
||||
mut self,
|
||||
segment_id: &crate::domain::entities::SegmentId,
|
||||
description: impl Into<String>,
|
||||
) -> Self {
|
||||
self.evidence.push(EvidenceRef::sequence(segment_id, description));
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the claim.
|
||||
pub fn build(self) -> Claim {
|
||||
Claim::new(self.statement, self.confidence).with_evidence(self.evidence)
|
||||
}
|
||||
|
||||
/// Build the claim, requiring at least one evidence reference.
|
||||
pub fn build_validated(self) -> Result<Claim> {
|
||||
if self.evidence.is_empty() {
|
||||
return Err(crate::Error::ClaimValidationFailed(
|
||||
"Claims must have at least one evidence reference".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(self.build())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::domain::entities::RecordingMetadata;
|
||||
|
||||
fn create_test_neighbors() -> Vec<NeighborEvidence> {
|
||||
vec![
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n1"),
|
||||
0.1,
|
||||
RecordingMetadata::new("r1").with_taxon("Species A"),
|
||||
).with_cluster(ClusterId::new("c1")),
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n2"),
|
||||
0.15,
|
||||
RecordingMetadata::new("r2").with_taxon("Species A"),
|
||||
).with_cluster(ClusterId::new("c1")),
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n3"),
|
||||
0.2,
|
||||
RecordingMetadata::new("r3").with_taxon("Species B"),
|
||||
).with_cluster(ClusterId::new("c2")),
|
||||
]
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_claims_with_neighbors() {
|
||||
let generator = ClaimGenerator::with_params(0.3, 10);
|
||||
let neighbors = create_test_neighbors();
|
||||
let cluster_context = ClusterContext::empty();
|
||||
let query_id = EmbeddingId::new("query-1");
|
||||
|
||||
let claims = generator
|
||||
.generate_claims(&query_id, &neighbors, &cluster_context, &None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(!claims.is_empty());
|
||||
|
||||
// All claims should have evidence
|
||||
for claim in &claims {
|
||||
assert!(claim.has_evidence(), "Claim should have evidence: {}", claim.statement);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_claims_with_cluster() {
|
||||
let generator = ClaimGenerator::with_params(0.3, 10);
|
||||
let neighbors = create_test_neighbors();
|
||||
let cluster_context = ClusterContext::new(
|
||||
Some(ClusterId::new("c1")),
|
||||
0.9,
|
||||
0.85,
|
||||
).with_label("Song Type A");
|
||||
let query_id = EmbeddingId::new("query-1");
|
||||
|
||||
let claims = generator
|
||||
.generate_claims(&query_id, &neighbors, &cluster_context, &None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should have cluster-related claims
|
||||
let cluster_claims: Vec<_> = claims
|
||||
.iter()
|
||||
.filter(|c| c.statement.contains("cluster") || c.statement.contains("Song Type A"))
|
||||
.collect();
|
||||
|
||||
assert!(!cluster_claims.is_empty());
|
||||
|
||||
// Cluster claims should cite the cluster
|
||||
for claim in cluster_claims {
|
||||
let cluster_refs = claim.evidence_of_type(EvidenceRefType::Cluster);
|
||||
assert!(!cluster_refs.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_claims_with_sequence() {
|
||||
let generator = ClaimGenerator::with_params(0.3, 10);
|
||||
let neighbors = create_test_neighbors();
|
||||
let cluster_context = ClusterContext::empty();
|
||||
let sequence_context = Some(SequenceContext::new(
|
||||
vec![
|
||||
crate::domain::entities::SegmentId::new("seg-1"),
|
||||
crate::domain::entities::SegmentId::new("seg-2"),
|
||||
],
|
||||
vec![
|
||||
crate::domain::entities::SegmentId::new("seg-4"),
|
||||
],
|
||||
).with_motif("ABAB"));
|
||||
|
||||
let query_id = EmbeddingId::new("query-1");
|
||||
|
||||
let claims = generator
|
||||
.generate_claims(&query_id, &neighbors, &cluster_context, &sequence_context)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should have sequence-related claims
|
||||
let sequence_claims: Vec<_> = claims
|
||||
.iter()
|
||||
.filter(|c| {
|
||||
c.statement.contains("sequence")
|
||||
|| c.statement.contains("temporal")
|
||||
|| c.statement.contains("motif")
|
||||
|| c.statement.contains("ABAB")
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert!(!sequence_claims.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_claim_builder() {
|
||||
let claim = ClaimBuilder::new("Test claim statement")
|
||||
.confidence(0.85)
|
||||
.cite_neighbor(
|
||||
&EmbeddingId::new("n1"),
|
||||
"Supporting neighbor evidence",
|
||||
)
|
||||
.cite_cluster(
|
||||
&ClusterId::new("c1"),
|
||||
"Cluster assignment evidence",
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq!(claim.statement, "Test claim statement");
|
||||
assert_eq!(claim.confidence, 0.85);
|
||||
assert_eq!(claim.evidence_refs.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_claim_builder_validated() {
|
||||
// Should fail without evidence
|
||||
let result = ClaimBuilder::new("Unsupported claim")
|
||||
.confidence(0.9)
|
||||
.build_validated();
|
||||
|
||||
assert!(result.is_err());
|
||||
|
||||
// Should succeed with evidence
|
||||
let result = ClaimBuilder::new("Supported claim")
|
||||
.confidence(0.9)
|
||||
.cite_taxon("Species A", "Taxon evidence")
|
||||
.build_validated();
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_claims() {
|
||||
let generator = ClaimGenerator::with_params(0.3, 10);
|
||||
|
||||
let claim1 = ClaimBuilder::new("Similar acoustic features observed")
|
||||
.confidence(0.8)
|
||||
.cite_neighbor(&EmbeddingId::new("n1"), "Evidence 1")
|
||||
.build();
|
||||
|
||||
let claim2 = ClaimBuilder::new("Similar acoustic features observed")
|
||||
.confidence(0.7)
|
||||
.cite_neighbor(&EmbeddingId::new("n2"), "Evidence 2")
|
||||
.build();
|
||||
|
||||
let merged = generator.merge_claims(&[claim1, claim2]);
|
||||
|
||||
assert!(merged.is_some());
|
||||
let merged = merged.unwrap();
|
||||
assert_eq!(merged.evidence_refs.len(), 2);
|
||||
assert_eq!(merged.confidence, 0.75);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_manual_claim() {
|
||||
let generator = ClaimGenerator::with_params(0.5, 10);
|
||||
|
||||
// Should fail without evidence
|
||||
let result = generator.create_manual_claim(
|
||||
"Test claim",
|
||||
0.8,
|
||||
Vec::new(),
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Should succeed with evidence
|
||||
let result = generator.create_manual_claim(
|
||||
"Test claim",
|
||||
0.8,
|
||||
vec![EvidenceRef::taxon("Species A", "Manual evidence")],
|
||||
);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,565 @@
|
||||
//! Evidence builder for constructing RAB evidence packs.
|
||||
//!
|
||||
//! This module provides utilities for collecting and organizing evidence
|
||||
//! from various sources (neighbors, clusters, sequences) into structured
|
||||
//! evidence packs.
|
||||
|
||||
use tracing::{debug, instrument};
|
||||
|
||||
use crate::application::services::InterpretationConfig;
|
||||
use crate::domain::entities::{
|
||||
ClusterContext, ClusterId, EmbeddingId, NeighborEvidence, RecordingMetadata,
|
||||
SegmentId, SequenceContext,
|
||||
};
|
||||
use crate::Result;
|
||||
|
||||
/// Builder for constructing evidence from various sources.
|
||||
///
|
||||
/// The `EvidenceBuilder` provides a structured way to collect and organize
|
||||
/// evidence for RAB interpretations.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EvidenceBuilder {
|
||||
/// Maximum number of neighbors to include
|
||||
max_neighbors: usize,
|
||||
|
||||
/// Whether to include spectrogram URLs
|
||||
include_spectrograms: bool,
|
||||
|
||||
/// Whether to include sequence context
|
||||
include_sequences: bool,
|
||||
|
||||
/// Sequence context window size
|
||||
sequence_window: usize,
|
||||
|
||||
/// Minimum distance threshold for neighbor inclusion
|
||||
min_distance_threshold: f32,
|
||||
|
||||
/// Maximum distance threshold for neighbor inclusion
|
||||
max_distance_threshold: f32,
|
||||
}
|
||||
|
||||
impl EvidenceBuilder {
|
||||
/// Create a new evidence builder from configuration.
|
||||
pub fn new(config: &InterpretationConfig) -> Self {
|
||||
Self {
|
||||
max_neighbors: config.max_neighbors,
|
||||
include_spectrograms: config.include_spectrograms,
|
||||
include_sequences: config.include_sequence_context,
|
||||
sequence_window: config.sequence_context_window,
|
||||
min_distance_threshold: 0.0,
|
||||
max_distance_threshold: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a builder with default settings.
|
||||
pub fn default_builder() -> Self {
|
||||
Self {
|
||||
max_neighbors: 10,
|
||||
include_spectrograms: true,
|
||||
include_sequences: true,
|
||||
sequence_window: 3,
|
||||
min_distance_threshold: 0.0,
|
||||
max_distance_threshold: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the maximum number of neighbors.
|
||||
pub fn with_max_neighbors(mut self, n: usize) -> Self {
|
||||
self.max_neighbors = n;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set whether to include spectrogram URLs.
|
||||
pub fn with_spectrograms(mut self, include: bool) -> Self {
|
||||
self.include_spectrograms = include;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the distance threshold range.
|
||||
pub fn with_distance_threshold(mut self, min: f32, max: f32) -> Self {
|
||||
self.min_distance_threshold = min;
|
||||
self.max_distance_threshold = max;
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the maximum neighbors setting.
|
||||
pub fn max_neighbors(&self) -> usize {
|
||||
self.max_neighbors
|
||||
}
|
||||
|
||||
/// Check if spectrograms are enabled.
|
||||
pub fn spectrograms_enabled(&self) -> bool {
|
||||
self.include_spectrograms
|
||||
}
|
||||
|
||||
/// Collect neighbor evidence from raw neighbor data.
|
||||
///
|
||||
/// This method processes raw neighbor data and builds structured
|
||||
/// `NeighborEvidence` objects with metadata.
|
||||
#[instrument(skip(self, neighbors))]
|
||||
pub async fn collect_neighbor_evidence(
|
||||
&self,
|
||||
neighbors: &[RawNeighbor],
|
||||
) -> Result<Vec<NeighborEvidence>> {
|
||||
let filtered: Vec<&RawNeighbor> = neighbors
|
||||
.iter()
|
||||
.filter(|n| {
|
||||
n.distance >= self.min_distance_threshold
|
||||
&& n.distance <= self.max_distance_threshold
|
||||
})
|
||||
.take(self.max_neighbors)
|
||||
.collect();
|
||||
|
||||
debug!(
|
||||
"Collecting evidence from {} neighbors (filtered from {})",
|
||||
filtered.len(),
|
||||
neighbors.len()
|
||||
);
|
||||
|
||||
let evidence: Vec<NeighborEvidence> = filtered
|
||||
.into_iter()
|
||||
.map(|n| self.build_neighbor_evidence(n))
|
||||
.collect();
|
||||
|
||||
Ok(evidence)
|
||||
}
|
||||
|
||||
/// Build neighbor evidence from raw neighbor data.
|
||||
fn build_neighbor_evidence(&self, raw: &RawNeighbor) -> NeighborEvidence {
|
||||
let metadata = raw
|
||||
.metadata
|
||||
.clone()
|
||||
.unwrap_or_else(|| RecordingMetadata::new(&raw.embedding_id.0));
|
||||
|
||||
let mut evidence = NeighborEvidence::new(
|
||||
raw.embedding_id.clone(),
|
||||
raw.distance,
|
||||
metadata,
|
||||
);
|
||||
|
||||
if let Some(cluster_id) = &raw.cluster_id {
|
||||
evidence = evidence.with_cluster(cluster_id.clone());
|
||||
}
|
||||
|
||||
if self.include_spectrograms {
|
||||
if let Some(url) = &raw.spectrogram_url {
|
||||
evidence = evidence.with_spectrogram(url.clone());
|
||||
}
|
||||
}
|
||||
|
||||
evidence
|
||||
}
|
||||
|
||||
/// Build cluster context from cluster assignment data.
|
||||
#[instrument(skip(self))]
|
||||
pub async fn build_cluster_context(
|
||||
&self,
|
||||
cluster_id: Option<ClusterId>,
|
||||
label: Option<String>,
|
||||
confidence: f32,
|
||||
exemplar_similarity: f32,
|
||||
) -> Result<ClusterContext> {
|
||||
let context = ClusterContext {
|
||||
assigned_cluster: cluster_id,
|
||||
cluster_label: label,
|
||||
confidence,
|
||||
exemplar_similarity,
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Built cluster context: assigned={}, confidence={}",
|
||||
context.has_cluster(),
|
||||
context.confidence
|
||||
);
|
||||
|
||||
Ok(context)
|
||||
}
|
||||
|
||||
/// Build sequence context from temporal data.
|
||||
#[instrument(skip(self))]
|
||||
pub async fn build_sequence_context(
|
||||
&self,
|
||||
preceding: Vec<SegmentId>,
|
||||
following: Vec<SegmentId>,
|
||||
motif: Option<String>,
|
||||
) -> Result<Option<SequenceContext>> {
|
||||
if !self.include_sequences {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if preceding.is_empty() && following.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let preceding = preceding
|
||||
.into_iter()
|
||||
.take(self.sequence_window)
|
||||
.collect();
|
||||
|
||||
let following = following
|
||||
.into_iter()
|
||||
.take(self.sequence_window)
|
||||
.collect();
|
||||
|
||||
let context = SequenceContext {
|
||||
preceding_segments: preceding,
|
||||
following_segments: following,
|
||||
detected_motif: motif,
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Built sequence context: {} preceding, {} following, motif={}",
|
||||
context.preceding_segments.len(),
|
||||
context.following_segments.len(),
|
||||
context.detected_motif.as_deref().unwrap_or("none")
|
||||
);
|
||||
|
||||
Ok(Some(context))
|
||||
}
|
||||
|
||||
/// Aggregate evidence from multiple sources.
|
||||
pub fn aggregate_evidence_scores(&self, neighbors: &[NeighborEvidence]) -> EvidenceScores {
|
||||
if neighbors.is_empty() {
|
||||
return EvidenceScores::default();
|
||||
}
|
||||
|
||||
let distances: Vec<f32> = neighbors.iter().map(|n| n.distance).collect();
|
||||
let avg_distance = distances.iter().sum::<f32>() / distances.len() as f32;
|
||||
let min_distance = distances.iter().cloned().fold(f32::INFINITY, f32::min);
|
||||
let max_distance = distances.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
|
||||
|
||||
let similarity = (1.0 - avg_distance).max(0.0).min(1.0);
|
||||
|
||||
// Calculate cluster coherence (how many neighbors share the same cluster)
|
||||
let clustered_count = neighbors
|
||||
.iter()
|
||||
.filter(|n| n.cluster_id.is_some())
|
||||
.count();
|
||||
|
||||
let cluster_coherence = if clustered_count > 0 {
|
||||
// Check if neighbors share clusters
|
||||
let mut cluster_counts = std::collections::HashMap::new();
|
||||
for neighbor in neighbors {
|
||||
if let Some(cid) = &neighbor.cluster_id {
|
||||
*cluster_counts.entry(cid.0.clone()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
let max_cluster_count = cluster_counts.values().cloned().max().unwrap_or(0);
|
||||
max_cluster_count as f32 / neighbors.len() as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
// Calculate taxon coherence
|
||||
let taxa: Vec<&str> = neighbors
|
||||
.iter()
|
||||
.filter_map(|n| n.recording_metadata.taxon.as_deref())
|
||||
.collect();
|
||||
|
||||
let taxon_coherence = if !taxa.is_empty() {
|
||||
let mut taxon_counts = std::collections::HashMap::new();
|
||||
for taxon in &taxa {
|
||||
*taxon_counts.entry(*taxon).or_insert(0) += 1;
|
||||
}
|
||||
let max_taxon_count = taxon_counts.values().cloned().max().unwrap_or(0);
|
||||
max_taxon_count as f32 / taxa.len() as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
EvidenceScores {
|
||||
neighbor_count: neighbors.len(),
|
||||
avg_distance,
|
||||
min_distance,
|
||||
max_distance,
|
||||
avg_similarity: similarity,
|
||||
cluster_coherence,
|
||||
taxon_coherence,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw neighbor data before processing.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RawNeighbor {
|
||||
/// Embedding ID of the neighbor
|
||||
pub embedding_id: EmbeddingId,
|
||||
/// Distance from query
|
||||
pub distance: f32,
|
||||
/// Optional cluster assignment
|
||||
pub cluster_id: Option<ClusterId>,
|
||||
/// Optional recording metadata
|
||||
pub metadata: Option<RecordingMetadata>,
|
||||
/// Optional spectrogram URL
|
||||
pub spectrogram_url: Option<String>,
|
||||
}
|
||||
|
||||
impl RawNeighbor {
|
||||
/// Create a new raw neighbor.
|
||||
pub fn new(embedding_id: EmbeddingId, distance: f32) -> Self {
|
||||
Self {
|
||||
embedding_id,
|
||||
distance,
|
||||
cluster_id: None,
|
||||
metadata: None,
|
||||
spectrogram_url: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add cluster ID.
|
||||
pub fn with_cluster(mut self, cluster_id: ClusterId) -> Self {
|
||||
self.cluster_id = Some(cluster_id);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add metadata.
|
||||
pub fn with_metadata(mut self, metadata: RecordingMetadata) -> Self {
|
||||
self.metadata = Some(metadata);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add spectrogram URL.
|
||||
pub fn with_spectrogram(mut self, url: String) -> Self {
|
||||
self.spectrogram_url = Some(url);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregated scores from evidence analysis.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct EvidenceScores {
|
||||
/// Number of neighbors
|
||||
pub neighbor_count: usize,
|
||||
/// Average distance to neighbors
|
||||
pub avg_distance: f32,
|
||||
/// Minimum distance (closest neighbor)
|
||||
pub min_distance: f32,
|
||||
/// Maximum distance (farthest neighbor)
|
||||
pub max_distance: f32,
|
||||
/// Average similarity (1 - avg_distance)
|
||||
pub avg_similarity: f32,
|
||||
/// Cluster coherence (0-1, how many neighbors share clusters)
|
||||
pub cluster_coherence: f32,
|
||||
/// Taxon coherence (0-1, how many neighbors share taxa)
|
||||
pub taxon_coherence: f32,
|
||||
}
|
||||
|
||||
impl EvidenceScores {
|
||||
/// Calculate overall evidence strength.
|
||||
pub fn overall_strength(&self) -> f32 {
|
||||
if self.neighbor_count == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Weighted combination of scores
|
||||
let similarity_weight = 0.4;
|
||||
let cluster_weight = 0.3;
|
||||
let taxon_weight = 0.3;
|
||||
|
||||
self.avg_similarity * similarity_weight
|
||||
+ self.cluster_coherence * cluster_weight
|
||||
+ self.taxon_coherence * taxon_weight
|
||||
}
|
||||
|
||||
/// Determine if evidence is strong enough for high-confidence claims.
|
||||
pub fn is_strong(&self) -> bool {
|
||||
self.neighbor_count >= 3 && self.overall_strength() >= 0.6
|
||||
}
|
||||
|
||||
/// Determine if evidence is weak (should generate cautious claims).
|
||||
pub fn is_weak(&self) -> bool {
|
||||
self.neighbor_count < 2 || self.overall_strength() < 0.3
|
||||
}
|
||||
}
|
||||
|
||||
/// Evidence aggregation context for building interpretations.
|
||||
#[derive(Debug)]
|
||||
pub struct EvidenceContext {
|
||||
/// Aggregated scores
|
||||
pub scores: EvidenceScores,
|
||||
/// Unique taxa found in neighbors
|
||||
pub unique_taxa: Vec<String>,
|
||||
/// Unique cluster labels found
|
||||
pub unique_clusters: Vec<String>,
|
||||
/// Whether temporal sequence is available
|
||||
pub has_sequence: bool,
|
||||
/// Detected motif if any
|
||||
pub motif: Option<String>,
|
||||
}
|
||||
|
||||
impl EvidenceContext {
|
||||
/// Build evidence context from collected evidence.
|
||||
pub fn from_evidence(
|
||||
neighbors: &[NeighborEvidence],
|
||||
cluster_context: &ClusterContext,
|
||||
sequence_context: &Option<SequenceContext>,
|
||||
) -> Self {
|
||||
let builder = EvidenceBuilder::default_builder();
|
||||
let scores = builder.aggregate_evidence_scores(neighbors);
|
||||
|
||||
// Collect unique taxa
|
||||
let unique_taxa: Vec<String> = neighbors
|
||||
.iter()
|
||||
.filter_map(|n| n.recording_metadata.taxon.clone())
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// Collect unique cluster labels
|
||||
let mut unique_clusters = Vec::new();
|
||||
if let Some(label) = &cluster_context.cluster_label {
|
||||
unique_clusters.push(label.clone());
|
||||
}
|
||||
|
||||
let has_sequence = sequence_context
|
||||
.as_ref()
|
||||
.map(|s| s.has_temporal_context())
|
||||
.unwrap_or(false);
|
||||
|
||||
let motif = sequence_context
|
||||
.as_ref()
|
||||
.and_then(|s| s.detected_motif.clone());
|
||||
|
||||
Self {
|
||||
scores,
|
||||
unique_taxa,
|
||||
unique_clusters,
|
||||
has_sequence,
|
||||
motif,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_evidence_builder_collect_neighbors() {
|
||||
let builder = EvidenceBuilder::default_builder()
|
||||
.with_max_neighbors(5)
|
||||
.with_spectrograms(true);
|
||||
|
||||
let raw_neighbors = vec![
|
||||
RawNeighbor::new(EmbeddingId::new("n1"), 0.1)
|
||||
.with_metadata(RecordingMetadata::new("r1").with_taxon("Species A")),
|
||||
RawNeighbor::new(EmbeddingId::new("n2"), 0.2)
|
||||
.with_metadata(RecordingMetadata::new("r2").with_taxon("Species A")),
|
||||
RawNeighbor::new(EmbeddingId::new("n3"), 0.3)
|
||||
.with_cluster(ClusterId::new("c1")),
|
||||
];
|
||||
|
||||
let evidence = builder.collect_neighbor_evidence(&raw_neighbors).await.unwrap();
|
||||
|
||||
assert_eq!(evidence.len(), 3);
|
||||
assert_eq!(evidence[0].embedding_id.as_str(), "n1");
|
||||
assert_eq!(evidence[0].recording_metadata.taxon, Some("Species A".to_string()));
|
||||
assert!(evidence[2].cluster_id.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_evidence_builder_distance_filtering() {
|
||||
let builder = EvidenceBuilder::default_builder()
|
||||
.with_distance_threshold(0.0, 0.5);
|
||||
|
||||
let raw_neighbors = vec![
|
||||
RawNeighbor::new(EmbeddingId::new("close"), 0.2),
|
||||
RawNeighbor::new(EmbeddingId::new("far"), 0.8),
|
||||
];
|
||||
|
||||
let evidence = builder.collect_neighbor_evidence(&raw_neighbors).await.unwrap();
|
||||
|
||||
assert_eq!(evidence.len(), 1);
|
||||
assert_eq!(evidence[0].embedding_id.as_str(), "close");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_scores_calculation() {
|
||||
let builder = EvidenceBuilder::default_builder();
|
||||
|
||||
let neighbors = vec![
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n1"),
|
||||
0.1,
|
||||
RecordingMetadata::new("r1").with_taxon("Species A"),
|
||||
).with_cluster(ClusterId::new("c1")),
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n2"),
|
||||
0.2,
|
||||
RecordingMetadata::new("r2").with_taxon("Species A"),
|
||||
).with_cluster(ClusterId::new("c1")),
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n3"),
|
||||
0.3,
|
||||
RecordingMetadata::new("r3").with_taxon("Species B"),
|
||||
).with_cluster(ClusterId::new("c2")),
|
||||
];
|
||||
|
||||
let scores = builder.aggregate_evidence_scores(&neighbors);
|
||||
|
||||
assert_eq!(scores.neighbor_count, 3);
|
||||
assert!((scores.avg_distance - 0.2).abs() < 0.001);
|
||||
assert!((scores.min_distance - 0.1).abs() < 0.001);
|
||||
assert!((scores.max_distance - 0.3).abs() < 0.001);
|
||||
assert!(scores.cluster_coherence > 0.0);
|
||||
assert!(scores.taxon_coherence > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_context_from_evidence() {
|
||||
let neighbors = vec![
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n1"),
|
||||
0.1,
|
||||
RecordingMetadata::new("r1").with_taxon("Species A"),
|
||||
),
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n2"),
|
||||
0.2,
|
||||
RecordingMetadata::new("r2").with_taxon("Species B"),
|
||||
),
|
||||
];
|
||||
|
||||
let cluster_context = ClusterContext::new(
|
||||
Some(ClusterId::new("c1")),
|
||||
0.9,
|
||||
0.85,
|
||||
).with_label("Song Type A");
|
||||
|
||||
let sequence_context = Some(SequenceContext::new(
|
||||
vec![SegmentId::new("s1")],
|
||||
vec![SegmentId::new("s3")],
|
||||
).with_motif("ABAB"));
|
||||
|
||||
let context = EvidenceContext::from_evidence(
|
||||
&neighbors,
|
||||
&cluster_context,
|
||||
&sequence_context,
|
||||
);
|
||||
|
||||
assert_eq!(context.unique_taxa.len(), 2);
|
||||
assert_eq!(context.unique_clusters.len(), 1);
|
||||
assert!(context.has_sequence);
|
||||
assert_eq!(context.motif, Some("ABAB".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_build_sequence_context() {
|
||||
let builder = EvidenceBuilder::default_builder();
|
||||
|
||||
let context = builder
|
||||
.build_sequence_context(
|
||||
vec![SegmentId::new("s1"), SegmentId::new("s2")],
|
||||
vec![SegmentId::new("s4")],
|
||||
Some("AABB".to_string()),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(context.is_some());
|
||||
let ctx = context.unwrap();
|
||||
assert_eq!(ctx.preceding_segments.len(), 2);
|
||||
assert_eq!(ctx.following_segments.len(), 1);
|
||||
assert_eq!(ctx.detected_motif, Some("AABB".to_string()));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
//! Infrastructure layer for the Interpretation bounded context.
|
||||
//!
|
||||
//! Contains implementations for evidence building and claim generation.
|
||||
|
||||
pub mod evidence_builder;
|
||||
pub mod claim_generator;
|
||||
33
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/lib.rs
vendored
Normal file
33
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/lib.rs
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
//! # sevensense-interpretation
|
||||
//!
|
||||
//! LLM-powered interpretation for the 7sense bioacoustics platform.
|
||||
//!
|
||||
//! This crate provides:
|
||||
//! - Natural language report generation
|
||||
//! - Conservation insights
|
||||
//! - Anomaly explanation
|
||||
//! - Multi-language support
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! ```text
|
||||
//! sevensense-interpretation
|
||||
//! ├── reports/ # Report generation
|
||||
//! ├── insights/ # Conservation insights
|
||||
//! ├── prompts/ # Prompt templates
|
||||
//! └── providers/ # LLM provider integrations
|
||||
//! ```
|
||||
|
||||
#![warn(missing_docs)]
|
||||
#![warn(clippy::all)]
|
||||
#![warn(clippy::pedantic)]
|
||||
#![allow(clippy::module_name_repetitions)]
|
||||
|
||||
// TODO: Implement interpretation modules
|
||||
// - reports: Structured report generation
|
||||
// - insights: Ecological pattern detection
|
||||
// - prompts: Template management
|
||||
// - providers: Claude, GPT-4, local models
|
||||
|
||||
/// Crate version information
|
||||
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
654
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/templates.rs
vendored
Normal file
654
vendor/ruvector/examples/vibecast-7sense/crates/sevensense-interpretation/src/templates.rs
vendored
Normal file
@@ -0,0 +1,654 @@
|
||||
//! Interpretation text templates.
|
||||
//!
|
||||
//! This module provides templates for generating human-readable
|
||||
//! interpretations and claims.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Templates for generating interpretation text.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct InterpretationTemplates {
|
||||
/// Custom template overrides
|
||||
custom_templates: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl Default for InterpretationTemplates {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl InterpretationTemplates {
|
||||
/// Create a new templates instance with default templates.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
custom_templates: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a custom template override.
|
||||
pub fn with_template(mut self, key: &str, template: &str) -> Self {
|
||||
self.custom_templates.insert(key.to_string(), template.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Get a template by key, falling back to default.
|
||||
fn get_template(&self, key: &str, default: &str) -> String {
|
||||
self.custom_templates
|
||||
.get(key)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| default.to_string())
|
||||
}
|
||||
|
||||
// === Structural Description Templates ===
|
||||
|
||||
/// Generate neighbor-based description.
|
||||
pub fn neighbor_description(&self, count: usize, similarity: f32) -> String {
|
||||
let similarity_level = if similarity >= 0.8 {
|
||||
"high"
|
||||
} else if similarity >= 0.6 {
|
||||
"moderate"
|
||||
} else if similarity >= 0.4 {
|
||||
"low"
|
||||
} else {
|
||||
"minimal"
|
||||
};
|
||||
|
||||
format!(
|
||||
"Acoustic signal shows {} similarity ({:.1}%) to {} reference recordings in the database.",
|
||||
similarity_level,
|
||||
similarity * 100.0,
|
||||
count
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate taxon-based description.
|
||||
pub fn taxon_description(&self, taxa: &[&str]) -> String {
|
||||
if taxa.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let unique_taxa: Vec<&str> = {
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
taxa.iter().filter(|t| seen.insert(**t)).copied().collect()
|
||||
};
|
||||
|
||||
if unique_taxa.len() == 1 {
|
||||
format!(
|
||||
"Reference recordings are primarily associated with {}.",
|
||||
unique_taxa[0]
|
||||
)
|
||||
} else {
|
||||
let taxa_list = unique_taxa.join(", ");
|
||||
format!(
|
||||
"Reference recordings span multiple taxa: {}.",
|
||||
taxa_list
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate cluster-based description.
|
||||
pub fn cluster_description(
|
||||
&self,
|
||||
label: &str,
|
||||
confidence: f32,
|
||||
exemplar_similarity: f32,
|
||||
) -> String {
|
||||
let confidence_level = if confidence >= 0.9 {
|
||||
"very high"
|
||||
} else if confidence >= 0.7 {
|
||||
"high"
|
||||
} else if confidence >= 0.5 {
|
||||
"moderate"
|
||||
} else {
|
||||
"low"
|
||||
};
|
||||
|
||||
format!(
|
||||
"Cluster analysis places this vocalization in '{}' with {} confidence ({:.1}%) and {:.1}% similarity to the cluster exemplar.",
|
||||
label,
|
||||
confidence_level,
|
||||
confidence * 100.0,
|
||||
exemplar_similarity * 100.0
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate sequence-based description.
|
||||
pub fn sequence_description(&self, sequence_length: usize, motif: Option<&str>) -> String {
|
||||
let base = format!(
|
||||
"Temporal analysis reveals this segment is part of a {} vocalization sequence.",
|
||||
sequence_length
|
||||
);
|
||||
|
||||
if let Some(m) = motif {
|
||||
format!("{} A recurring motif pattern '{}' has been detected.", base, m)
|
||||
} else {
|
||||
base
|
||||
}
|
||||
}
|
||||
|
||||
// === Claim Templates ===
|
||||
|
||||
/// High similarity claim.
|
||||
pub fn high_similarity_claim(&self, count: usize, similarity: f32) -> String {
|
||||
format!(
|
||||
"Strong acoustic similarity ({:.1}%) to {} database recordings suggests a reliable identification.",
|
||||
similarity * 100.0,
|
||||
count
|
||||
)
|
||||
}
|
||||
|
||||
/// Moderate similarity claim.
|
||||
pub fn moderate_similarity_claim(&self, count: usize) -> String {
|
||||
format!(
|
||||
"Moderate acoustic similarity to {} reference recordings found. Additional context recommended for confident identification.",
|
||||
count
|
||||
)
|
||||
}
|
||||
|
||||
/// Low similarity claim.
|
||||
pub fn low_similarity_claim(&self) -> String {
|
||||
self.get_template(
|
||||
"low_similarity",
|
||||
"Limited similarity to existing reference recordings. This may represent an unusual vocalization variant or a novel recording."
|
||||
)
|
||||
}
|
||||
|
||||
/// Cluster assignment claim.
|
||||
pub fn cluster_assignment_claim(
|
||||
&self,
|
||||
label: &str,
|
||||
confidence: f32,
|
||||
exemplar_similarity: f32,
|
||||
) -> String {
|
||||
format!(
|
||||
"This vocalization is classified as '{}' based on acoustic clustering (confidence: {:.1}%, exemplar similarity: {:.1}%).",
|
||||
label,
|
||||
confidence * 100.0,
|
||||
exemplar_similarity * 100.0
|
||||
)
|
||||
}
|
||||
|
||||
/// Dominant taxon claim.
|
||||
pub fn dominant_taxon_claim(&self, taxon: &str, proportion: f32) -> String {
|
||||
format!(
|
||||
"Acoustic features strongly suggest {} ({:.0}% of similar recordings in the database belong to this taxon).",
|
||||
taxon,
|
||||
proportion * 100.0
|
||||
)
|
||||
}
|
||||
|
||||
/// Sequence context claim.
|
||||
pub fn sequence_context_claim(&self, preceding: usize, following: usize) -> String {
|
||||
format!(
|
||||
"This vocalization appears within a temporal sequence with {} preceding and {} following segments, providing additional context for interpretation.",
|
||||
preceding,
|
||||
following
|
||||
)
|
||||
}
|
||||
|
||||
/// Motif claim.
|
||||
pub fn motif_claim(&self, motif: &str) -> String {
|
||||
format!(
|
||||
"A repeating acoustic motif '{}' has been detected in the vocalization sequence, suggesting a structured call pattern.",
|
||||
motif
|
||||
)
|
||||
}
|
||||
|
||||
// === Evidence Description Templates ===
|
||||
|
||||
/// Format neighbor evidence description.
|
||||
pub fn neighbor_evidence_description(
|
||||
&self,
|
||||
recording_id: &str,
|
||||
distance: f32,
|
||||
taxon: Option<&str>,
|
||||
) -> String {
|
||||
let similarity = ((1.0 - distance) * 100.0).max(0.0);
|
||||
|
||||
if let Some(t) = taxon {
|
||||
format!(
|
||||
"Recording {} ({}) with {:.1}% acoustic similarity",
|
||||
recording_id, t, similarity
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Recording {} with {:.1}% acoustic similarity",
|
||||
recording_id, similarity
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Format cluster evidence description.
|
||||
pub fn cluster_evidence_description(
|
||||
&self,
|
||||
label: &str,
|
||||
confidence: f32,
|
||||
) -> String {
|
||||
format!(
|
||||
"Assigned to cluster '{}' with {:.1}% confidence",
|
||||
label,
|
||||
confidence * 100.0
|
||||
)
|
||||
}
|
||||
|
||||
/// Format sequence evidence description.
|
||||
pub fn sequence_evidence_description(
|
||||
&self,
|
||||
segment_id: &str,
|
||||
position: i32,
|
||||
) -> String {
|
||||
let position_desc = if position < 0 {
|
||||
format!("position {} before target", -position)
|
||||
} else if position > 0 {
|
||||
format!("position {} after target", position)
|
||||
} else {
|
||||
"target position".to_string()
|
||||
};
|
||||
|
||||
format!("Segment {} at {}", segment_id, position_desc)
|
||||
}
|
||||
|
||||
// === Summary Templates ===
|
||||
|
||||
/// Generate an overall summary.
|
||||
pub fn generate_summary(
|
||||
&self,
|
||||
neighbor_count: usize,
|
||||
avg_similarity: f32,
|
||||
cluster_label: Option<&str>,
|
||||
dominant_taxon: Option<&str>,
|
||||
confidence: f32,
|
||||
) -> String {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
// Similarity summary
|
||||
let similarity_desc = if avg_similarity >= 0.8 {
|
||||
format!(
|
||||
"highly similar ({:.1}%) to {} reference recordings",
|
||||
avg_similarity * 100.0,
|
||||
neighbor_count
|
||||
)
|
||||
} else if avg_similarity >= 0.5 {
|
||||
format!(
|
||||
"moderately similar ({:.1}%) to {} reference recordings",
|
||||
avg_similarity * 100.0,
|
||||
neighbor_count
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"shows limited similarity ({:.1}%) to {} reference recordings",
|
||||
avg_similarity * 100.0,
|
||||
neighbor_count
|
||||
)
|
||||
};
|
||||
parts.push(similarity_desc);
|
||||
|
||||
// Cluster summary
|
||||
if let Some(label) = cluster_label {
|
||||
parts.push(format!("classified in cluster '{}'", label));
|
||||
}
|
||||
|
||||
// Taxon summary
|
||||
if let Some(taxon) = dominant_taxon {
|
||||
parts.push(format!("likely associated with {}", taxon));
|
||||
}
|
||||
|
||||
let main_summary = parts.join(", ");
|
||||
|
||||
// Confidence qualifier
|
||||
let confidence_qualifier = if confidence >= 0.8 {
|
||||
"High confidence interpretation"
|
||||
} else if confidence >= 0.5 {
|
||||
"Moderate confidence interpretation"
|
||||
} else {
|
||||
"Low confidence interpretation"
|
||||
};
|
||||
|
||||
format!(
|
||||
"{}. This vocalization is {}. Overall confidence: {:.1}%.",
|
||||
confidence_qualifier,
|
||||
main_summary,
|
||||
confidence * 100.0
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate a confidence explanation.
|
||||
pub fn confidence_explanation(&self, confidence: f32) -> String {
|
||||
if confidence >= 0.9 {
|
||||
"Very high confidence: Strong evidence from multiple sources supports this interpretation.".to_string()
|
||||
} else if confidence >= 0.7 {
|
||||
"High confidence: Good evidence supports this interpretation with minor uncertainty.".to_string()
|
||||
} else if confidence >= 0.5 {
|
||||
"Moderate confidence: Evidence partially supports this interpretation. Additional verification recommended.".to_string()
|
||||
} else if confidence >= 0.3 {
|
||||
"Low confidence: Limited evidence available. Interpretation should be considered tentative.".to_string()
|
||||
} else {
|
||||
"Very low confidence: Insufficient evidence for reliable interpretation. Expert review recommended.".to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Formatter for evidence pack output.
|
||||
#[derive(Debug)]
|
||||
pub struct EvidencePackFormatter {
|
||||
templates: InterpretationTemplates,
|
||||
include_details: bool,
|
||||
max_evidence_items: usize,
|
||||
}
|
||||
|
||||
impl Default for EvidencePackFormatter {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl EvidencePackFormatter {
|
||||
/// Create a new formatter.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
templates: InterpretationTemplates::new(),
|
||||
include_details: true,
|
||||
max_evidence_items: 5,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set whether to include detailed evidence.
|
||||
pub fn with_details(mut self, include: bool) -> Self {
|
||||
self.include_details = include;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set maximum evidence items to show.
|
||||
pub fn with_max_evidence(mut self, max: usize) -> Self {
|
||||
self.max_evidence_items = max;
|
||||
self
|
||||
}
|
||||
|
||||
/// Format an evidence pack as a structured report.
|
||||
pub fn format_report(&self, pack: &crate::domain::entities::EvidencePack) -> String {
|
||||
let mut sections = Vec::new();
|
||||
|
||||
// Header
|
||||
sections.push(format!(
|
||||
"# Evidence Pack Report\n\nID: {}\nQuery: {}\nCreated: {}",
|
||||
pack.id,
|
||||
pack.query_embedding_id,
|
||||
pack.created_at.format("%Y-%m-%d %H:%M:%S UTC")
|
||||
));
|
||||
|
||||
// Summary
|
||||
sections.push(format!(
|
||||
"\n## Summary\n\n{}",
|
||||
self.templates.generate_summary(
|
||||
pack.neighbors.len(),
|
||||
pack.overall_confidence(),
|
||||
pack.cluster_context.cluster_label.as_deref(),
|
||||
pack.neighbors.first().and_then(|n| n.recording_metadata.taxon.as_deref()),
|
||||
pack.interpretation.confidence,
|
||||
)
|
||||
));
|
||||
|
||||
// Structural description
|
||||
sections.push(format!(
|
||||
"\n## Structural Analysis\n\n{}",
|
||||
pack.interpretation.structural_description
|
||||
));
|
||||
|
||||
// Claims
|
||||
if !pack.interpretation.claims.is_empty() {
|
||||
let claims_text: Vec<String> = pack
|
||||
.interpretation
|
||||
.claims
|
||||
.iter()
|
||||
.map(|c| {
|
||||
let evidence_count = c.evidence_refs.len();
|
||||
format!(
|
||||
"- {} (confidence: {:.1}%, {} evidence reference{})",
|
||||
c.statement,
|
||||
c.confidence * 100.0,
|
||||
evidence_count,
|
||||
if evidence_count == 1 { "" } else { "s" }
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
sections.push(format!(
|
||||
"\n## Claims\n\n{}",
|
||||
claims_text.join("\n")
|
||||
));
|
||||
}
|
||||
|
||||
// Detailed evidence (if enabled)
|
||||
if self.include_details && !pack.neighbors.is_empty() {
|
||||
let evidence_text: Vec<String> = pack
|
||||
.neighbors
|
||||
.iter()
|
||||
.take(self.max_evidence_items)
|
||||
.map(|n| {
|
||||
self.templates.neighbor_evidence_description(
|
||||
&n.recording_metadata.recording_id,
|
||||
n.distance,
|
||||
n.recording_metadata.taxon.as_deref(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let more_text = if pack.neighbors.len() > self.max_evidence_items {
|
||||
format!(
|
||||
"\n... and {} more neighbors",
|
||||
pack.neighbors.len() - self.max_evidence_items
|
||||
)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
sections.push(format!(
|
||||
"\n## Evidence Details\n\n### Neighbors\n{}\n{}",
|
||||
evidence_text.join("\n"),
|
||||
more_text
|
||||
));
|
||||
}
|
||||
|
||||
// Confidence explanation
|
||||
sections.push(format!(
|
||||
"\n## Confidence Assessment\n\n{}",
|
||||
self.templates.confidence_explanation(pack.interpretation.confidence)
|
||||
));
|
||||
|
||||
sections.join("\n")
|
||||
}
|
||||
|
||||
/// Format a compact single-line summary.
|
||||
pub fn format_compact(&self, pack: &crate::domain::entities::EvidencePack) -> String {
|
||||
let taxon = pack
|
||||
.neighbors
|
||||
.first()
|
||||
.and_then(|n| n.recording_metadata.taxon.as_deref())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
let cluster = pack
|
||||
.cluster_context
|
||||
.cluster_label
|
||||
.as_deref()
|
||||
.unwrap_or("unassigned");
|
||||
|
||||
format!(
|
||||
"[{}] {} neighbors, cluster='{}', taxon='{}', confidence={:.1}%",
|
||||
pack.id,
|
||||
pack.neighbors.len(),
|
||||
cluster,
|
||||
taxon,
|
||||
pack.overall_confidence() * 100.0
|
||||
)
|
||||
}
|
||||
|
||||
/// Format as JSON-compatible structure.
|
||||
pub fn format_json(&self, pack: &crate::domain::entities::EvidencePack) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"id": pack.id,
|
||||
"query_embedding_id": pack.query_embedding_id.0,
|
||||
"created_at": pack.created_at.to_rfc3339(),
|
||||
"summary": {
|
||||
"neighbor_count": pack.neighbors.len(),
|
||||
"overall_confidence": pack.overall_confidence(),
|
||||
"cluster_assigned": pack.cluster_context.has_cluster(),
|
||||
"has_sequence_context": pack.sequence_context.is_some(),
|
||||
},
|
||||
"interpretation": {
|
||||
"structural_description": pack.interpretation.structural_description,
|
||||
"claim_count": pack.interpretation.claims.len(),
|
||||
"confidence": pack.interpretation.confidence,
|
||||
},
|
||||
"claims": pack.interpretation.claims.iter().map(|c| {
|
||||
serde_json::json!({
|
||||
"statement": c.statement,
|
||||
"confidence": c.confidence,
|
||||
"evidence_count": c.evidence_refs.len(),
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_neighbor_description() {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let desc = templates.neighbor_description(5, 0.85);
|
||||
assert!(desc.contains("high similarity"));
|
||||
assert!(desc.contains("85.0%"));
|
||||
assert!(desc.contains("5 reference"));
|
||||
|
||||
let desc = templates.neighbor_description(3, 0.45);
|
||||
assert!(desc.contains("low similarity"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_taxon_description() {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let desc = templates.taxon_description(&["Species A", "Species A", "Species A"]);
|
||||
assert!(desc.contains("Species A"));
|
||||
assert!(!desc.contains("multiple taxa"));
|
||||
|
||||
let desc = templates.taxon_description(&["Species A", "Species B"]);
|
||||
assert!(desc.contains("multiple taxa"));
|
||||
assert!(desc.contains("Species A"));
|
||||
assert!(desc.contains("Species B"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cluster_description() {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let desc = templates.cluster_description("Song Type A", 0.9, 0.85);
|
||||
assert!(desc.contains("Song Type A"));
|
||||
assert!(desc.contains("very high confidence"));
|
||||
assert!(desc.contains("90.0%"));
|
||||
assert!(desc.contains("85.0%"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sequence_description() {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let desc = templates.sequence_description(5, None);
|
||||
assert!(desc.contains("5 vocalization sequence"));
|
||||
assert!(!desc.contains("motif"));
|
||||
|
||||
let desc = templates.sequence_description(5, Some("ABAB"));
|
||||
assert!(desc.contains("motif pattern 'ABAB'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_summary() {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let summary = templates.generate_summary(
|
||||
10,
|
||||
0.85,
|
||||
Some("Dawn Chorus"),
|
||||
Some("Turdus merula"),
|
||||
0.9,
|
||||
);
|
||||
|
||||
assert!(summary.contains("High confidence"));
|
||||
assert!(summary.contains("highly similar"));
|
||||
assert!(summary.contains("Dawn Chorus"));
|
||||
assert!(summary.contains("Turdus merula"));
|
||||
assert!(summary.contains("90.0%"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_confidence_explanation() {
|
||||
let templates = InterpretationTemplates::new();
|
||||
|
||||
let high = templates.confidence_explanation(0.95);
|
||||
assert!(high.contains("Very high confidence"));
|
||||
|
||||
let moderate = templates.confidence_explanation(0.55);
|
||||
assert!(moderate.contains("Moderate confidence"));
|
||||
|
||||
let low = templates.confidence_explanation(0.2);
|
||||
assert!(low.contains("Very low confidence"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_custom_template_override() {
|
||||
let templates = InterpretationTemplates::new()
|
||||
.with_template("low_similarity", "Custom low similarity message");
|
||||
|
||||
let desc = templates.low_similarity_claim();
|
||||
assert_eq!(desc, "Custom low similarity message");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evidence_pack_formatter() {
|
||||
use crate::domain::entities::*;
|
||||
|
||||
let pack = EvidencePack::new(
|
||||
EmbeddingId::new("query-1"),
|
||||
vec![
|
||||
NeighborEvidence::new(
|
||||
EmbeddingId::new("n1"),
|
||||
0.1,
|
||||
RecordingMetadata::new("r1").with_taxon("Species A"),
|
||||
),
|
||||
],
|
||||
ClusterContext::new(
|
||||
Some(ClusterId::new("c1")),
|
||||
0.9,
|
||||
0.85,
|
||||
).with_label("Song Type A"),
|
||||
None,
|
||||
Interpretation::new(
|
||||
"Test structural description".to_string(),
|
||||
vec![Claim::new("Test claim", 0.9)],
|
||||
0.85,
|
||||
),
|
||||
);
|
||||
|
||||
let formatter = EvidencePackFormatter::new();
|
||||
|
||||
// Test full report
|
||||
let report = formatter.format_report(&pack);
|
||||
assert!(report.contains("Evidence Pack Report"));
|
||||
assert!(report.contains("query-1"));
|
||||
assert!(report.contains("Test structural description"));
|
||||
|
||||
// Test compact format
|
||||
let compact = formatter.format_compact(&pack);
|
||||
assert!(compact.contains("1 neighbors"));
|
||||
assert!(compact.contains("Song Type A"));
|
||||
|
||||
// Test JSON format
|
||||
let json = formatter.format_json(&pack);
|
||||
assert_eq!(json["query_embedding_id"], "query-1");
|
||||
assert_eq!(json["summary"]["neighbor_count"], 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user