feat: ADR-024 Contrastive CSI Embedding Model — all 7 phases (#52)

Full implementation of Project AETHER — Contrastive CSI Embedding Model.

## Phases Delivered
1. ProjectionHead (64→128→128) + L2 normalization
2. CsiAugmenter (5 physically-motivated augmentations)
3. InfoNCE contrastive loss + SimCLR pretraining
4. FingerprintIndex (4 index types: env, activity, temporal, person)
5. RVF SEG_EMBED (0x0C) + CLI integration
6. Cross-modal alignment (PoseEncoder + InfoNCE)
7. Deep RuVector: MicroLoRA, EWC++, drift detection, hard-negative mining, SEG_LORA

## Stats
- 276 tests passing (191 lib + 51 bin + 16 rvf + 18 vitals)
- 3,342 additions across 8 files
- Zero unsafe/unwrap/panic/todo stubs
- ~55KB INT8 model for ESP32 edge deployment

Also fixes deprecated GitHub Actions (v3→v4) and adds feat/* branch CI triggers.

Closes #50
This commit was merged in pull request #52.
This commit is contained in:
rUv
2026-03-01 01:44:38 -05:00
committed by GitHub
parent 44b9c30dbc
commit 9bbe95648c
39 changed files with 5136 additions and 68 deletions

View File

@@ -37,6 +37,10 @@ const SEG_META: u8 = 0x07;
const SEG_WITNESS: u8 = 0x0A;
/// Domain profile declarations.
const SEG_PROFILE: u8 = 0x0B;
/// Contrastive embedding model weights and configuration (ADR-024).
pub const SEG_EMBED: u8 = 0x0C;
/// LoRA adaptation profile (named LoRA weight sets for environment-specific fine-tuning).
pub const SEG_LORA: u8 = 0x0D;
// ── Pure-Rust CRC32 (IEEE 802.3 polynomial) ────────────────────────────────
@@ -304,6 +308,35 @@ impl RvfBuilder {
self.push_segment(seg_type, payload);
}
/// Add a named LoRA adaptation profile (ADR-024 Phase 7).
///
/// Segment format: `[name_len: u16 LE][name_bytes: UTF-8][weights: f32 LE...]`
pub fn add_lora_profile(&mut self, name: &str, lora_weights: &[f32]) {
let name_bytes = name.as_bytes();
let name_len = name_bytes.len() as u16;
let mut payload = Vec::with_capacity(2 + name_bytes.len() + lora_weights.len() * 4);
payload.extend_from_slice(&name_len.to_le_bytes());
payload.extend_from_slice(name_bytes);
for &w in lora_weights {
payload.extend_from_slice(&w.to_le_bytes());
}
self.push_segment(SEG_LORA, &payload);
}
/// Add contrastive embedding config and projection head weights (ADR-024).
/// Serializes embedding config as JSON followed by projection weights as f32 LE.
pub fn add_embedding(&mut self, config_json: &serde_json::Value, proj_weights: &[f32]) {
let config_bytes = serde_json::to_vec(config_json).unwrap_or_default();
let config_len = config_bytes.len() as u32;
let mut payload = Vec::with_capacity(4 + config_bytes.len() + proj_weights.len() * 4);
payload.extend_from_slice(&config_len.to_le_bytes());
payload.extend_from_slice(&config_bytes);
for &w in proj_weights {
payload.extend_from_slice(&w.to_le_bytes());
}
self.push_segment(SEG_EMBED, &payload);
}
/// Add witness/proof data as a Witness segment.
pub fn add_witness(&mut self, training_hash: &str, metrics: &serde_json::Value) {
let witness = serde_json::json!({
@@ -528,6 +561,73 @@ impl RvfReader {
.and_then(|data| serde_json::from_slice(data).ok())
}
/// Parse and return the embedding config JSON and projection weights, if present.
pub fn embedding(&self) -> Option<(serde_json::Value, Vec<f32>)> {
let data = self.find_segment(SEG_EMBED)?;
if data.len() < 4 {
return None;
}
let config_len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if 4 + config_len > data.len() {
return None;
}
let config: serde_json::Value = serde_json::from_slice(&data[4..4 + config_len]).ok()?;
let weight_data = &data[4 + config_len..];
if weight_data.len() % 4 != 0 {
return None;
}
let weights: Vec<f32> = weight_data
.chunks_exact(4)
.map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
.collect();
Some((config, weights))
}
/// Retrieve a named LoRA profile's weights, if present.
/// Returns None if no profile with the given name exists.
pub fn lora_profile(&self, name: &str) -> Option<Vec<f32>> {
for (h, payload) in &self.segments {
if h.seg_type != SEG_LORA || payload.len() < 2 {
continue;
}
let name_len = u16::from_le_bytes([payload[0], payload[1]]) as usize;
if 2 + name_len > payload.len() {
continue;
}
let seg_name = std::str::from_utf8(&payload[2..2 + name_len]).ok()?;
if seg_name == name {
let weight_data = &payload[2 + name_len..];
if weight_data.len() % 4 != 0 {
return None;
}
let weights: Vec<f32> = weight_data
.chunks_exact(4)
.map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
.collect();
return Some(weights);
}
}
None
}
/// List all stored LoRA profile names.
pub fn lora_profiles(&self) -> Vec<String> {
let mut names = Vec::new();
for (h, payload) in &self.segments {
if h.seg_type != SEG_LORA || payload.len() < 2 {
continue;
}
let name_len = u16::from_le_bytes([payload[0], payload[1]]) as usize;
if 2 + name_len > payload.len() {
continue;
}
if let Ok(name) = std::str::from_utf8(&payload[2..2 + name_len]) {
names.push(name.to_string());
}
}
names
}
/// Number of segments in the container.
pub fn segment_count(&self) -> usize {
self.segments.len()
@@ -911,4 +1011,91 @@ mod tests {
assert!(!info.has_quant_info);
assert!(!info.has_witness);
}
#[test]
fn test_rvf_embedding_segment_roundtrip() {
let config = serde_json::json!({
"d_model": 64,
"d_proj": 128,
"temperature": 0.07,
"normalize": true,
});
let weights: Vec<f32> = (0..256).map(|i| (i as f32 * 0.13).sin()).collect();
let mut builder = RvfBuilder::new();
builder.add_manifest("embed-test", "1.0", "embedding test");
builder.add_embedding(&config, &weights);
let data = builder.build();
let reader = RvfReader::from_bytes(&data).unwrap();
assert_eq!(reader.segment_count(), 2);
let (decoded_config, decoded_weights) = reader.embedding()
.expect("embedding segment should be present");
assert_eq!(decoded_config["d_model"], 64);
assert_eq!(decoded_config["d_proj"], 128);
assert!((decoded_config["temperature"].as_f64().unwrap() - 0.07).abs() < 1e-4);
assert_eq!(decoded_weights.len(), weights.len());
for (a, b) in decoded_weights.iter().zip(weights.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "weight mismatch");
}
}
// ── Phase 7: RVF LoRA profile tests ───────────────────────────────
#[test]
fn test_rvf_lora_profile_roundtrip() {
let weights: Vec<f32> = (0..100).map(|i| (i as f32 * 0.37).sin()).collect();
let mut builder = RvfBuilder::new();
builder.add_manifest("lora-test", "1.0", "LoRA profile test");
builder.add_lora_profile("office-env", &weights);
let data = builder.build();
let reader = RvfReader::from_bytes(&data).unwrap();
assert_eq!(reader.segment_count(), 2);
let profiles = reader.lora_profiles();
assert_eq!(profiles, vec!["office-env"]);
let decoded = reader.lora_profile("office-env")
.expect("LoRA profile should be present");
assert_eq!(decoded.len(), weights.len());
for (a, b) in decoded.iter().zip(weights.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "LoRA weight mismatch");
}
// Non-existent profile returns None
assert!(reader.lora_profile("nonexistent").is_none());
}
#[test]
fn test_rvf_multiple_lora_profiles() {
let w1: Vec<f32> = vec![1.0, 2.0, 3.0];
let w2: Vec<f32> = vec![4.0, 5.0, 6.0, 7.0];
let w3: Vec<f32> = vec![-1.0, -2.0];
let mut builder = RvfBuilder::new();
builder.add_lora_profile("office", &w1);
builder.add_lora_profile("home", &w2);
builder.add_lora_profile("outdoor", &w3);
let data = builder.build();
let reader = RvfReader::from_bytes(&data).unwrap();
assert_eq!(reader.segment_count(), 3);
let profiles = reader.lora_profiles();
assert_eq!(profiles.len(), 3);
assert!(profiles.contains(&"office".to_string()));
assert!(profiles.contains(&"home".to_string()));
assert!(profiles.contains(&"outdoor".to_string()));
// Verify each profile's weights
let d1 = reader.lora_profile("office").unwrap();
assert_eq!(d1, w1);
let d2 = reader.lora_profile("home").unwrap();
assert_eq!(d2, w2);
let d3 = reader.lora_profile("outdoor").unwrap();
assert_eq!(d3, w3);
}
}