Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,219 @@
//! Signal ingestion service.
use crate::types::{Hash, NodeId, Timestamp};
use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
/// A signal representing an incoming event.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Signal {
/// Unique signal ID for idempotency
pub id: Hash,
/// Type of signal (e.g., "observation", "update", "correction")
pub signal_type: String,
/// Target node (if applicable)
pub target_node: Option<NodeId>,
/// Signal payload as JSON
pub payload: serde_json::Value,
/// Source of the signal
pub source: String,
/// Timestamp of signal generation
pub timestamp: Timestamp,
}
impl Signal {
/// Create a new signal.
pub fn new(
signal_type: impl Into<String>,
payload: serde_json::Value,
source: impl Into<String>,
) -> Self {
let signal_type = signal_type.into();
let source = source.into();
// Generate ID from content
let content = serde_json::json!({
"type": signal_type,
"payload": payload,
"source": source,
});
let id = Hash::digest(content.to_string().as_bytes());
Self {
id,
signal_type,
target_node: None,
payload,
source,
timestamp: Timestamp::now(),
}
}
/// Set the target node.
pub fn with_target(mut self, node_id: NodeId) -> Self {
self.target_node = Some(node_id);
self
}
}
/// A batch of signals to be processed together.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SignalBatch {
/// Signals in the batch
pub signals: Vec<Signal>,
/// Batch creation timestamp
pub created_at: Timestamp,
}
impl SignalBatch {
/// Create a new empty batch.
pub fn new() -> Self {
Self {
signals: Vec::new(),
created_at: Timestamp::now(),
}
}
/// Add a signal to the batch.
pub fn add(&mut self, signal: Signal) {
self.signals.push(signal);
}
/// Get the number of signals.
pub fn len(&self) -> usize {
self.signals.len()
}
/// Check if batch is empty.
pub fn is_empty(&self) -> bool {
self.signals.is_empty()
}
}
impl Default for SignalBatch {
fn default() -> Self {
Self::new()
}
}
/// Service for ingesting signals.
pub struct SignalIngestion {
/// Buffer for batching signals
buffer: VecDeque<Signal>,
/// Maximum batch size
max_batch_size: usize,
/// Set of processed signal IDs (for deduplication)
processed_ids: std::collections::HashSet<Hash>,
}
impl SignalIngestion {
/// Create a new ingestion service.
pub fn new(max_batch_size: usize) -> Self {
Self {
buffer: VecDeque::new(),
max_batch_size,
processed_ids: std::collections::HashSet::new(),
}
}
/// Ingest a signal.
///
/// Returns true if the signal was accepted, false if it was a duplicate.
pub fn ingest(&mut self, signal: Signal) -> bool {
// Check for duplicates
if self.processed_ids.contains(&signal.id) {
return false;
}
self.processed_ids.insert(signal.id);
self.buffer.push_back(signal);
true
}
/// Get the next batch of signals if available.
pub fn next_batch(&mut self) -> Option<SignalBatch> {
if self.buffer.is_empty() {
return None;
}
let mut batch = SignalBatch::new();
while batch.len() < self.max_batch_size {
if let Some(signal) = self.buffer.pop_front() {
batch.add(signal);
} else {
break;
}
}
if batch.is_empty() {
None
} else {
Some(batch)
}
}
/// Get the number of buffered signals.
pub fn buffer_size(&self) -> usize {
self.buffer.len()
}
/// Clear the processed IDs set (for memory management).
pub fn clear_processed_ids(&mut self) {
self.processed_ids.clear();
}
}
impl Default for SignalIngestion {
fn default() -> Self {
Self::new(100)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_signal_creation() {
let signal = Signal::new(
"observation",
serde_json::json!({"value": 42}),
"test-source",
);
assert_eq!(signal.signal_type, "observation");
assert_eq!(signal.source, "test-source");
}
#[test]
fn test_duplicate_rejection() {
let mut ingestion = SignalIngestion::new(10);
let signal = Signal::new("test", serde_json::json!({}), "source");
let signal_clone = signal.clone();
assert!(ingestion.ingest(signal));
assert!(!ingestion.ingest(signal_clone)); // Duplicate
}
#[test]
fn test_batching() {
let mut ingestion = SignalIngestion::new(2);
for i in 0..5 {
let signal = Signal::new("test", serde_json::json!({"i": i}), "source");
ingestion.ingest(signal);
}
let batch1 = ingestion.next_batch().unwrap();
assert_eq!(batch1.len(), 2);
let batch2 = ingestion.next_batch().unwrap();
assert_eq!(batch2.len(), 2);
let batch3 = ingestion.next_batch().unwrap();
assert_eq!(batch3.len(), 1);
assert!(ingestion.next_batch().is_none());
}
}

View File

@@ -0,0 +1,108 @@
//! # Signal Ingestion Module
//!
//! Validates and normalizes incoming events before they enter the coherence engine.
//!
//! ## Responsibilities
//!
//! - Validate incoming signals against schema
//! - Normalize to canonical form
//! - Route to appropriate processing pipeline
//! - Emit domain events for ingested signals
// TODO: Implement signal validation and normalization
// This is a placeholder for the signal ingestion bounded context
use serde::{Deserialize, Serialize};
/// A raw signal before validation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RawSignal {
/// Signal identifier.
pub id: String,
/// Signal type.
pub signal_type: String,
/// Raw payload.
pub payload: serde_json::Value,
/// Timestamp (Unix millis).
pub timestamp_ms: u64,
/// Source identifier.
pub source: String,
}
/// A validated and normalized signal.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidatedSignal {
/// Signal identifier.
pub id: String,
/// Signal type.
pub signal_type: SignalType,
/// Normalized payload.
pub payload: NormalizedPayload,
/// Timestamp (Unix millis).
pub timestamp_ms: u64,
/// Source identifier.
pub source: String,
/// Validation metadata.
pub validation: ValidationMetadata,
}
/// Signal type enumeration.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum SignalType {
/// State update for a node.
StateUpdate,
/// Edge addition.
EdgeAdd,
/// Edge removal.
EdgeRemove,
/// Observation for evidence accumulation.
Observation,
/// Policy update.
PolicyUpdate,
/// Query request.
Query,
}
/// Normalized payload for processing.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NormalizedPayload {
/// State update payload.
StateUpdate { node_id: String, state: Vec<f32> },
/// Edge modification payload.
EdgeMod {
source: String,
target: String,
weight: Option<f32>,
},
/// Observation payload.
Observation {
hypothesis_id: String,
observed: bool,
},
/// Generic JSON payload.
Json(serde_json::Value),
}
/// Metadata from signal validation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationMetadata {
/// Whether the signal passed validation.
pub valid: bool,
/// Validation warnings.
pub warnings: Vec<String>,
/// Schema version used.
pub schema_version: String,
/// Normalization applied.
pub normalizations: Vec<String>,
}
impl Default for ValidationMetadata {
fn default() -> Self {
Self {
valid: true,
warnings: Vec::new(),
schema_version: "1.0.0".to_string(),
normalizations: Vec::new(),
}
}
}

View File

@@ -0,0 +1,131 @@
//! Signal normalization.
use super::Signal;
/// Configuration for normalization.
#[derive(Debug, Clone)]
pub struct NormalizationConfig {
/// Lowercase all string values
pub lowercase_strings: bool,
/// Trim whitespace from strings
pub trim_whitespace: bool,
/// Replace null values with defaults
pub replace_nulls: bool,
}
impl Default for NormalizationConfig {
fn default() -> Self {
Self {
lowercase_strings: false,
trim_whitespace: true,
replace_nulls: false,
}
}
}
/// Normalizer for signals.
pub struct Normalizer {
config: NormalizationConfig,
}
impl Normalizer {
/// Create a new normalizer.
pub fn new(config: NormalizationConfig) -> Self {
Self { config }
}
/// Normalize a signal in place.
pub fn normalize(&self, signal: &mut Signal) {
if self.config.trim_whitespace {
signal.signal_type = signal.signal_type.trim().to_string();
signal.source = signal.source.trim().to_string();
}
if self.config.lowercase_strings {
signal.signal_type = signal.signal_type.to_lowercase();
signal.source = signal.source.to_lowercase();
}
// Normalize payload recursively
signal.payload = self.normalize_value(signal.payload.clone());
}
fn normalize_value(&self, value: serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::String(s) => {
let mut s = s;
if self.config.trim_whitespace {
s = s.trim().to_string();
}
if self.config.lowercase_strings {
s = s.to_lowercase();
}
serde_json::Value::String(s)
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(|v| self.normalize_value(v)).collect())
}
serde_json::Value::Object(obj) => {
let normalized: serde_json::Map<String, serde_json::Value> = obj
.into_iter()
.map(|(k, v)| {
let key = if self.config.lowercase_strings {
k.to_lowercase()
} else {
k
};
(key, self.normalize_value(v))
})
.collect();
serde_json::Value::Object(normalized)
}
serde_json::Value::Null if self.config.replace_nulls => {
serde_json::Value::String(String::new())
}
other => other,
}
}
}
impl Default for Normalizer {
fn default() -> Self {
Self::new(NormalizationConfig::default())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_trim_whitespace() {
let normalizer = Normalizer::default();
let mut signal = Signal::new(
" test ",
serde_json::json!({"key": " value "}),
" source ",
);
normalizer.normalize(&mut signal);
assert_eq!(signal.signal_type, "test");
assert_eq!(signal.source, "source");
assert_eq!(signal.payload["key"], "value");
}
#[test]
fn test_lowercase() {
let config = NormalizationConfig {
lowercase_strings: true,
..Default::default()
};
let normalizer = Normalizer::new(config);
let mut signal = Signal::new("TEST", serde_json::json!({"KEY": "VALUE"}), "SOURCE");
normalizer.normalize(&mut signal);
assert_eq!(signal.signal_type, "test");
assert_eq!(signal.source, "source");
assert_eq!(signal.payload["key"], "value");
}
}

View File

@@ -0,0 +1,131 @@
//! Signal validation.
use super::Signal;
/// Result of signal validation.
#[derive(Debug, Clone)]
pub enum ValidationResult {
/// Signal is valid
Valid,
/// Signal is invalid with reasons
Invalid(Vec<String>),
}
impl ValidationResult {
/// Check if valid.
pub fn is_valid(&self) -> bool {
matches!(self, Self::Valid)
}
/// Get validation errors (if any).
pub fn errors(&self) -> &[String] {
match self {
Self::Invalid(errors) => errors,
Self::Valid => &[],
}
}
}
/// Validator for incoming signals.
pub struct SignalValidator {
/// Maximum payload size in bytes
max_payload_size: usize,
/// Allowed signal types
allowed_types: Option<Vec<String>>,
}
impl SignalValidator {
/// Create a new validator.
pub fn new() -> Self {
Self {
max_payload_size: 1024 * 1024, // 1MB default
allowed_types: None,
}
}
/// Set maximum payload size.
pub fn with_max_payload_size(mut self, size: usize) -> Self {
self.max_payload_size = size;
self
}
/// Set allowed signal types.
pub fn with_allowed_types(mut self, types: Vec<String>) -> Self {
self.allowed_types = Some(types);
self
}
/// Validate a signal.
pub fn validate(&self, signal: &Signal) -> ValidationResult {
let mut errors = Vec::new();
// Check payload size
let payload_str = signal.payload.to_string();
if payload_str.len() > self.max_payload_size {
errors.push(format!(
"Payload exceeds maximum size of {} bytes",
self.max_payload_size
));
}
// Check signal type if restricted
if let Some(ref allowed) = self.allowed_types {
if !allowed.contains(&signal.signal_type) {
errors.push(format!(
"Signal type '{}' not in allowed types",
signal.signal_type
));
}
}
// Check source is not empty
if signal.source.is_empty() {
errors.push("Signal source cannot be empty".to_string());
}
if errors.is_empty() {
ValidationResult::Valid
} else {
ValidationResult::Invalid(errors)
}
}
}
impl Default for SignalValidator {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_signal() {
let validator = SignalValidator::new();
let signal = Signal::new("test", serde_json::json!({"key": "value"}), "source");
assert!(validator.validate(&signal).is_valid());
}
#[test]
fn test_empty_source() {
let validator = SignalValidator::new();
let mut signal = Signal::new("test", serde_json::json!({}), "source");
signal.source = String::new();
let result = validator.validate(&signal);
assert!(!result.is_valid());
assert!(result.errors()[0].contains("source"));
}
#[test]
fn test_disallowed_type() {
let validator = SignalValidator::new().with_allowed_types(vec!["allowed".to_string()]);
let signal = Signal::new("disallowed", serde_json::json!({}), "source");
let result = validator.validate(&signal);
assert!(!result.is_valid());
}
}