Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,159 @@
//! DitheredQuantizer: deterministic low-bit quantization for exo activations.
//!
//! Wraps `ruvector-dither` to provide drop-in dithered quantization for
//! exo-backend-classical activation and weight tensors.
//!
//! Dithering breaks power-of-two resonances that cause idle tones / sticky
//! activations in 3/5/7-bit inference — without any RNG overhead.
//!
//! # Quick start
//!
//! ```
//! use exo_backend_classical::dither_quantizer::{DitheredQuantizer, DitherKind};
//!
//! // 8-bit, golden-ratio dither, layer 0, 16 channels, ε = 0.5 LSB
//! let mut q = DitheredQuantizer::new(DitherKind::GoldenRatio, 0, 16, 8, 0.5);
//!
//! let mut activations = vec![0.3_f32, -0.7, 0.5, 0.1];
//! q.quantize(&mut activations);
//! assert!(activations.iter().all(|&v| v >= -1.0 && v <= 1.0));
//! ```
use ruvector_dither::{channel::ChannelDither, quantize_slice_dithered, PiDither};
/// Which deterministic dither sequence to use.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DitherKind {
/// Golden-ratio quasi-random sequence (best equidistribution, no period).
GoldenRatio,
/// π-digit cyclic sequence (period = 256; ideal for weight pack-time use).
Pi,
}
enum Source {
Golden(ChannelDither),
Pi(PiDither),
}
/// Dithered quantizer for exo activation / weight tensors.
pub struct DitheredQuantizer {
source: Source,
bits: u32,
eps: f32,
}
impl DitheredQuantizer {
/// Create a new quantizer.
///
/// - `kind` dither sequence type
/// - `layer_id` identifies this layer (seeds per-channel states)
/// - `n_channels` number of independent channels (ignored for Pi)
/// - `bits` quantizer bit-width (38)
/// - `eps` dither amplitude in LSB units (0.5 recommended)
pub fn new(kind: DitherKind, layer_id: u32, n_channels: usize, bits: u32, eps: f32) -> Self {
let source = match kind {
DitherKind::GoldenRatio => {
Source::Golden(ChannelDither::new(layer_id, n_channels, bits, eps))
}
DitherKind::Pi => Source::Pi(PiDither::from_tensor_id(layer_id)),
};
Self { source, bits, eps }
}
/// Quantize `activations` in-place.
///
/// Each element is rounded to the nearest representable value in
/// `[-1.0, 1.0]` at `bits`-bit precision with dither applied.
pub fn quantize(&mut self, activations: &mut [f32]) {
match &mut self.source {
Source::Golden(cd) => cd.quantize_batch(activations),
Source::Pi(pd) => quantize_slice_dithered(activations, self.bits, self.eps, pd),
}
}
/// Reset the dither state to the initial seed (useful for reproducible tests).
pub fn reset(&mut self, layer_id: u32, n_channels: usize) {
match &mut self.source {
Source::Golden(cd) => {
*cd = ChannelDither::new(layer_id, n_channels, self.bits, self.eps);
}
Source::Pi(pd) => {
*pd = PiDither::from_tensor_id(layer_id);
}
}
}
/// Bit-width used by this quantizer.
pub fn bits(&self) -> u32 {
self.bits
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn golden_quantizer_in_bounds() {
let mut q = DitheredQuantizer::new(DitherKind::GoldenRatio, 0, 8, 8, 0.5);
let mut acts: Vec<f32> = (0..64).map(|i| (i as f32 / 63.0) * 2.0 - 1.0).collect();
q.quantize(&mut acts);
for v in &acts {
assert!(*v >= -1.0 && *v <= 1.0, "out of bounds: {v}");
}
}
#[test]
fn pi_quantizer_in_bounds() {
let mut q = DitheredQuantizer::new(DitherKind::Pi, 42, 1, 5, 0.5);
let mut acts = vec![0.3_f32, -0.7, 0.5, 0.1, -1.0, 1.0];
q.quantize(&mut acts);
for v in &acts {
assert!(*v >= -1.0 && *v <= 1.0, "out of bounds: {v}");
}
}
#[test]
fn different_layers_different_output() {
let input: Vec<f32> = vec![0.5; 16];
let quantize = |layer: u32| {
let mut buf = input.clone();
let mut q = DitheredQuantizer::new(DitherKind::GoldenRatio, layer, 8, 8, 0.5);
q.quantize(&mut buf);
buf
};
assert_ne!(quantize(0), quantize(1));
}
#[test]
fn deterministic_after_reset() {
let input: Vec<f32> = vec![0.3, -0.4, 0.7, -0.1, 0.9];
let mut q = DitheredQuantizer::new(DitherKind::GoldenRatio, 7, 4, 8, 0.5);
let mut buf1 = input.clone();
q.quantize(&mut buf1);
q.reset(7, 4);
let mut buf2 = input.clone();
q.quantize(&mut buf2);
assert_eq!(buf1, buf2, "reset must restore deterministic output");
}
#[test]
fn three_bit_quantization() {
let mut q = DitheredQuantizer::new(DitherKind::Pi, 0, 1, 3, 0.5);
let mut acts = vec![-0.9_f32, -0.5, 0.0, 0.5, 0.9];
q.quantize(&mut acts);
for v in &acts {
assert!(*v >= -1.0 && *v <= 1.0);
}
// 3-bit: qmax = 3, only multiples of 1/3 are valid
let step = 1.0 / 3.0;
for v in &acts {
let rem = (v / step).round() * step - v;
assert!(rem.abs() < 1e-5, "3-bit output should be on grid: {v}");
}
}
}

View File

@@ -0,0 +1,840 @@
//! Domain bridge: wraps EXO-AI classical operations as learnable domains
//! for ruvector-domain-expansion's transfer-learning engine.
//!
//! ## Why
//!
//! EXO-AI performs vector similarity search and graph traversal constantly
//! but never *learns* which strategies work best for which problem types.
//! This bridge turns those operations into `Domain` implementations so
//! Thompson Sampling can discover optimal policies and transfer insights
//! across categories (e.g. "approximate HNSW wins on high-dim sparse queries"
//! transfers to graph traversal: "approximate BFS beats exact DFS").
//!
//! ## Two Domains
//!
//! - **ExoRetrievalDomain**: Vector similarity search as a bandit problem.
//! Arms: `exact`, `approximate`, `beam_rerank`.
//!
//! - **ExoGraphDomain**: Hypergraph traversal as a bandit problem.
//! Arms: `bfs`, `approx`, `hierarchical`.
//!
//! Embeddings align structurally (same 64-dim layout, same dimension semantics)
//! so cross-domain transfer priors carry meaningful signal.
use ruvector_domain_expansion::{
ArmId, ContextBucket, Domain, DomainEmbedding, DomainId, Evaluation, Solution, Task,
};
use serde_json::json;
use std::f32::consts::PI;
// ─── Utilities ────────────────────────────────────────────────────────────────
/// Build a ContextBucket from task difficulty.
fn bucket_for(difficulty: f32, category: &str) -> ContextBucket {
let tier = if difficulty < 0.33 {
"easy"
} else if difficulty < 0.67 {
"medium"
} else {
"hard"
};
ContextBucket {
difficulty_tier: tier.to_string(),
category: category.to_string(),
}
}
/// Spread a scalar value into a sinusoidal pattern over `n` dimensions.
/// Used to make scalar metrics distinguishable in the 64-dim embedding.
#[inline]
fn spread(val: f32, out: &mut [f32], offset: usize, n: usize) {
for i in 0..n.min(out.len().saturating_sub(offset)) {
out[offset + i] = val * ((i as f32 / n as f32) * PI).sin().abs();
}
}
// ─── Retrieval Domain ─────────────────────────────────────────────────────────
/// Retrieval strategies available to the Thompson Sampling engine.
pub const RETRIEVAL_ARMS: &[&str] = &["exact", "approximate", "beam_rerank"];
/// EXO vector similarity retrieval as a `Domain`.
///
/// **Task spec** (JSON):
/// ```json
/// { "dim": 512, "k": 10, "noise": 0.2, "n_candidates": 100, "arm": "approximate" }
/// ```
///
/// **Reference solution** (optimal): recall = 1.0, latency = low.
///
/// **Transfer signal**: high-dimensional + noisy tasks → prefer `approximate`.
/// This prior transfers to ExoGraphDomain: large + sparse graphs → prefer `approx`.
pub struct ExoRetrievalDomain {
id: DomainId,
}
impl ExoRetrievalDomain {
pub fn new() -> Self {
Self {
id: DomainId("exo-retrieval".to_string()),
}
}
fn task_id(index: usize) -> String {
format!("exo-ret-{:05}", index)
}
fn category(k: usize) -> String {
if k <= 5 {
"top-k-small".to_string()
} else if k <= 20 {
"top-k-medium".to_string()
} else {
"top-k-large".to_string()
}
}
/// Simulate scoring a retrieval strategy on a task.
/// In production this would run against the actual VectorIndexWrapper.
fn simulate_score(arm: &str, dim: usize, noise: f32, k: usize) -> (f32, f32, f32) {
let complexity = (dim as f32 / 1024.0) * (1.0 + noise);
let (recall, efficiency) = match arm {
"exact" => {
// High accuracy but O(n) latency — slow for high-dim
let recall = 1.0 - noise * 0.1;
let efficiency = 1.0 - complexity * 0.6;
(recall, efficiency)
}
"approximate" => {
// Good trade-off — recall drops with noise but stays efficient
let recall = 1.0 - noise * 0.25;
let efficiency = 0.85 - complexity * 0.2;
(recall, efficiency)
}
"beam_rerank" => {
// Best recall on large k, moderate cost
let recall = 1.0 - noise * 0.15;
let efficiency = 0.7 - complexity * 0.3;
let k_bonus = (k as f32 / 50.0).min(0.15);
(recall + k_bonus * 0.1, efficiency)
}
_ => (0.5, 0.5),
};
let elegance = if k <= 10 { 0.9 } else { 0.6 };
(recall.clamp(0.0, 1.0), efficiency.clamp(0.0, 1.0), elegance)
}
}
impl Default for ExoRetrievalDomain {
fn default() -> Self {
Self::new()
}
}
impl Domain for ExoRetrievalDomain {
fn id(&self) -> &DomainId {
&self.id
}
fn name(&self) -> &str {
"EXO Vector Retrieval"
}
fn embedding_dim(&self) -> usize {
64
}
fn generate_tasks(&self, count: usize, difficulty: f32) -> Vec<Task> {
let dim = (64.0 + difficulty * 960.0) as usize;
let k = (3.0 + difficulty * 47.0) as usize;
let noise = difficulty * 0.5;
let n_candidates = (k * 10).max(50);
let cat = Self::category(k);
RETRIEVAL_ARMS
.iter()
.cycle()
.take(count)
.enumerate()
.map(|(i, arm)| Task {
id: Self::task_id(i),
domain_id: self.id.clone(),
difficulty,
spec: json!({
"dim": dim,
"k": k,
"noise": noise,
"n_candidates": n_candidates,
"arm": arm,
"category": cat,
}),
constraints: vec![
format!("recall >= {:.2}", (1.0 - difficulty * 0.4).max(0.5)),
"latency_us < 10000".to_string(),
],
})
.collect()
}
fn evaluate(&self, task: &Task, solution: &Solution) -> Evaluation {
let sol = &solution.data;
let recall = sol.get("recall").and_then(|x| x.as_f64()).unwrap_or(0.0) as f32;
let latency_us = sol
.get("latency_us")
.and_then(|x| x.as_u64())
.unwrap_or(9999);
let retrieved_k = sol.get("retrieved_k").and_then(|x| x.as_u64()).unwrap_or(0);
let target_k = task.spec.get("k").and_then(|x| x.as_u64()).unwrap_or(5);
let efficiency = (1000.0 / (latency_us as f32 + 1.0)).min(1.0);
let elegance = if retrieved_k == target_k { 1.0 } else { 0.5 };
let min_recall: f32 = (1.0 - task.difficulty * 0.4).max(0.5);
let mut eval = Evaluation::composite(recall, efficiency, elegance);
eval.constraint_results = vec![recall >= min_recall, latency_us < 10_000];
eval
}
fn embed(&self, solution: &Solution) -> DomainEmbedding {
let sol = &solution.data;
let mut v = vec![0.0f32; 64];
let recall = sol.get("recall").and_then(|x| x.as_f64()).unwrap_or(0.0) as f32;
let latency = sol
.get("latency_us")
.and_then(|x| x.as_u64())
.unwrap_or(1000) as f32;
let k = sol.get("retrieved_k").and_then(|x| x.as_u64()).unwrap_or(5) as f32;
let arm = sol.get("arm").and_then(|x| x.as_str()).unwrap_or("exact");
v[0] = recall;
v[1] = (1000.0 / (latency + 1.0)).min(1.0); // efficiency
v[2] = (k / 50.0).min(1.0);
// Strategy one-hot — aligned with ExoGraphDomain positions [5,6,7]
match arm {
"exact" => {
v[5] = 1.0;
}
"approximate" => {
v[6] = 1.0;
}
"beam_rerank" => {
v[7] = 1.0;
}
_ => {}
}
spread(recall, &mut v, 8, 24); // dims 8..31
DomainEmbedding::new(v, self.id.clone())
}
fn reference_solution(&self, task: &Task) -> Option<Solution> {
let dim = task.spec.get("dim").and_then(|x| x.as_u64()).unwrap_or(128) as usize;
let k = task.spec.get("k").and_then(|x| x.as_u64()).unwrap_or(5) as usize;
let noise = task
.spec
.get("noise")
.and_then(|x| x.as_f64())
.unwrap_or(0.0) as f32;
// Optimal arm: beam_rerank for large k, approximate for high-dim noisy
let arm = if k > 20 {
"beam_rerank"
} else if dim > 512 || noise > 0.3 {
"approximate"
} else {
"exact"
};
let (recall, _, _) = Self::simulate_score(arm, dim, noise, k);
// Reference latency: approximate is ~100µs, exact ~500µs at 512-dim
let latency_us = match arm {
"exact" => 500u64,
"approximate" => 100,
_ => 200,
};
Some(Solution {
task_id: task.id.clone(),
content: format!("optimal-{}", arm),
data: json!({
"recall": recall,
"latency_us": latency_us,
"retrieved_k": k,
"arm": arm,
}),
})
}
}
// ─── Graph Domain ─────────────────────────────────────────────────────────────
/// Traversal strategies for the graph domain.
pub const GRAPH_ARMS: &[&str] = &["bfs", "approx", "hierarchical"];
/// EXO hypergraph traversal as a `Domain`.
///
/// Structural alignment with ExoRetrievalDomain (same embedding layout)
/// enables cross-domain transfer: retrieval priors seed graph policies.
///
/// **Task spec** (JSON):
/// ```json
/// { "n_entities": 500, "max_hops": 3, "min_coverage": 20,
/// "relation": "causal", "arm": "approx" }
/// ```
pub struct ExoGraphDomain {
id: DomainId,
}
impl ExoGraphDomain {
pub fn new() -> Self {
Self {
id: DomainId("exo-graph".to_string()),
}
}
fn task_id(index: usize) -> String {
format!("exo-graph-{:05}", index)
}
/// Simulate graph traversal score for an arm + problem parameters.
fn simulate_score(
arm: &str,
n_entities: usize,
max_hops: usize,
min_coverage: usize,
) -> (f32, f32, f32, u64) {
let density = (n_entities as f32 / 1000.0).min(1.0);
let depth_ratio = max_hops as f32 / 6.0;
let (coverage_ratio, hops_used, latency_us) = match arm {
"bfs" => {
// Complete but expensive for large graphs
let cov = 1.3 - density * 0.4;
let hops = max_hops.saturating_sub(1);
let lat = (n_entities as u64) * 10;
(cov, hops, lat)
}
"approx" => {
// Approximate neighborhood expansion — efficient, slight coverage loss
let cov = 1.1 - density * 0.2;
let hops = (max_hops * 2 / 3).max(1);
let lat = (n_entities as u64) * 3;
(cov, hops, lat)
}
"hierarchical" => {
// Coarse→fine decomposition — best for large graphs with structure
let cov = 1.2 - depth_ratio * 0.3;
let hops = (max_hops * 3 / 4).max(1);
let lat = (n_entities as u64) * 5;
(cov, hops, lat)
}
_ => (0.5, max_hops, 10_000),
};
let entities_found = (min_coverage as f32 * coverage_ratio) as u64;
let correctness = (entities_found as f32 / min_coverage as f32).min(1.0);
let efficiency = if max_hops > 0 {
(1.0 - hops_used as f32 / max_hops as f32).max(0.0)
} else {
0.0
};
let elegance = if coverage_ratio >= 1.0 && coverage_ratio <= 1.5 {
1.0
} else if coverage_ratio > 0.8 {
0.7
} else {
0.3
};
(correctness, efficiency, elegance, latency_us)
}
}
impl Default for ExoGraphDomain {
fn default() -> Self {
Self::new()
}
}
impl Domain for ExoGraphDomain {
fn id(&self) -> &DomainId {
&self.id
}
fn name(&self) -> &str {
"EXO Hypergraph Traversal"
}
fn embedding_dim(&self) -> usize {
64
}
fn generate_tasks(&self, count: usize, difficulty: f32) -> Vec<Task> {
let n_entities = (50.0 + difficulty * 950.0) as usize;
let max_hops = (2.0 + difficulty * 4.0) as usize;
let min_coverage = (5.0 + difficulty * 95.0) as usize;
let relations = ["causal", "temporal", "semantic", "structural"];
GRAPH_ARMS
.iter()
.cycle()
.take(count)
.enumerate()
.map(|(i, arm)| Task {
id: Self::task_id(i),
domain_id: self.id.clone(),
difficulty,
spec: json!({
"n_entities": n_entities,
"max_hops": max_hops,
"min_coverage": min_coverage,
"relation": relations[i % 4],
"arm": arm,
}),
constraints: vec![
format!("entities_found >= {}", min_coverage),
format!("hops_used <= {}", max_hops),
],
})
.collect()
}
fn evaluate(&self, task: &Task, solution: &Solution) -> Evaluation {
let sol = &solution.data;
let entities_found = sol
.get("entities_found")
.and_then(|x| x.as_u64())
.unwrap_or(0);
let hops_used = sol.get("hops_used").and_then(|x| x.as_u64()).unwrap_or(0);
let coverage_ratio = sol
.get("coverage_ratio")
.and_then(|x| x.as_f64())
.unwrap_or(0.0) as f32;
let min_coverage = task
.spec
.get("min_coverage")
.and_then(|x| x.as_u64())
.unwrap_or(5);
let max_hops = task
.spec
.get("max_hops")
.and_then(|x| x.as_u64())
.unwrap_or(3);
let correctness = (entities_found as f32 / min_coverage as f32).min(1.0);
let efficiency = if max_hops > 0 {
(1.0 - hops_used as f32 / max_hops as f32).max(0.0)
} else {
0.0
};
let elegance = if coverage_ratio >= 1.0 && coverage_ratio <= 1.5 {
1.0
} else if coverage_ratio > 0.8 {
0.7
} else {
0.3
};
let mut eval = Evaluation::composite(correctness, efficiency, elegance);
eval.constraint_results = vec![entities_found >= min_coverage, hops_used <= max_hops];
eval
}
fn embed(&self, solution: &Solution) -> DomainEmbedding {
let sol = &solution.data;
let mut v = vec![0.0f32; 64];
let coverage = sol
.get("coverage_ratio")
.and_then(|x| x.as_f64())
.unwrap_or(0.0) as f32;
let hops = sol.get("hops_used").and_then(|x| x.as_u64()).unwrap_or(0) as f32;
let entities = sol
.get("entities_found")
.and_then(|x| x.as_u64())
.unwrap_or(0) as f32;
let arm = sol.get("arm").and_then(|x| x.as_str()).unwrap_or("bfs");
v[0] = coverage.min(1.0);
v[1] = (1.0 / (hops + 1.0)).min(1.0); // efficiency proxy
v[2] = (entities / 100.0).min(1.0);
// Strategy one-hot — aligned with ExoRetrievalDomain at [5,6,7]
match arm {
"bfs" => {
v[5] = 1.0;
} // aligns with "exact"
"approx" => {
v[6] = 1.0;
} // aligns with "approximate"
"hierarchical" => {
v[7] = 1.0;
} // aligns with "beam_rerank"
_ => {}
}
spread(coverage.min(1.0), &mut v, 8, 24); // dims 8..31
DomainEmbedding::new(v, self.id.clone())
}
fn reference_solution(&self, task: &Task) -> Option<Solution> {
let n = task
.spec
.get("n_entities")
.and_then(|x| x.as_u64())
.unwrap_or(100) as usize;
let max_hops = task
.spec
.get("max_hops")
.and_then(|x| x.as_u64())
.unwrap_or(3) as usize;
let min_cov = task
.spec
.get("min_coverage")
.and_then(|x| x.as_u64())
.unwrap_or(5) as usize;
// Optimal arm: hierarchical for large sparse graphs, approx for medium
let arm = if n > 500 { "hierarchical" } else { "approx" };
let (correctness, _, _, lat) = Self::simulate_score(arm, n, max_hops, min_cov);
let entities = (min_cov as f32 * 1.2 * correctness) as u64;
let hops = (max_hops as u64).saturating_sub(1).max(1);
Some(Solution {
task_id: task.id.clone(),
content: format!("optimal-{}", arm),
data: json!({
"entities_found": entities,
"hops_used": hops,
"coverage_ratio": 1.2 * correctness,
"arm": arm,
"latency_us": lat,
}),
})
}
}
// ─── Transfer Adapter ─────────────────────────────────────────────────────────
/// Unified adapter that registers both EXO domains into a `DomainExpansionEngine`
/// and exposes a simple training + transfer lifecycle API.
///
/// # Example
/// ```no_run
/// use exo_backend_classical::domain_bridge::ExoTransferAdapter;
///
/// let mut adapter = ExoTransferAdapter::new();
/// adapter.warmup(30); // train retrieval + graph
/// let accel = adapter.transfer_ret_to_graph(10); // measure acceleration
/// println!("Transfer acceleration: {:.2}x", accel);
/// ```
pub struct ExoTransferAdapter {
/// The underlying domain-expansion engine (also contains built-in domains).
pub engine: ruvector_domain_expansion::DomainExpansionEngine,
}
impl ExoTransferAdapter {
/// Create adapter and register both EXO domains alongside the built-in ones.
pub fn new() -> Self {
let mut engine = ruvector_domain_expansion::DomainExpansionEngine::new();
engine.register_domain(Box::new(ExoRetrievalDomain::new()));
engine.register_domain(Box::new(ExoGraphDomain::new()));
Self { engine }
}
/// Run one training cycle on the given domain:
/// generate a task, pick a strategy arm, record outcome.
fn train_one(&mut self, domain_id: &DomainId, difficulty: f32) -> f32 {
let tasks = self.engine.generate_tasks(domain_id, 1, difficulty);
let task = match tasks.into_iter().next() {
Some(t) => t,
None => return 0.0,
};
// Select arm via Thompson Sampling
let arm_str = task
.spec
.get("arm")
.and_then(|x| x.as_str())
.unwrap_or("exact");
let arm = ArmId(arm_str.to_string());
let bucket = bucket_for(difficulty, arm_str);
// Synthesize a plausible solution for the chosen arm
let solution = self.make_solution(&task, arm_str);
let eval = self
.engine
.evaluate_and_record(domain_id, &task, &solution, bucket, arm);
eval.score
}
/// Build a synthetic solution for the given arm choice.
fn make_solution(&self, task: &Task, arm: &str) -> Solution {
let spec = &task.spec;
let data = if task.domain_id.0 == "exo-retrieval" {
let dim = spec.get("dim").and_then(|x| x.as_u64()).unwrap_or(128) as usize;
let k = spec.get("k").and_then(|x| x.as_u64()).unwrap_or(5) as usize;
let noise = spec.get("noise").and_then(|x| x.as_f64()).unwrap_or(0.0) as f32;
let (recall, _, _) = ExoRetrievalDomain::simulate_score(arm, dim, noise, k);
let latency_us = match arm {
"exact" => 500u64,
"approximate" => 80,
_ => 150,
};
json!({ "recall": recall, "latency_us": latency_us, "retrieved_k": k, "arm": arm })
} else {
let n = spec
.get("n_entities")
.and_then(|x| x.as_u64())
.unwrap_or(100) as usize;
let max_hops = spec.get("max_hops").and_then(|x| x.as_u64()).unwrap_or(3) as usize;
let min_cov = spec
.get("min_coverage")
.and_then(|x| x.as_u64())
.unwrap_or(5) as usize;
let (corr, _, _, lat) = ExoGraphDomain::simulate_score(arm, n, max_hops, min_cov);
let found = (min_cov as f32 * 1.1 * corr) as u64;
let hops = (max_hops as u64).saturating_sub(1).max(1);
json!({ "entities_found": found, "hops_used": hops,
"coverage_ratio": 1.1 * corr, "arm": arm, "latency_us": lat })
};
Solution {
task_id: task.id.clone(),
content: arm.to_string(),
data,
}
}
/// Train both EXO domains for `cycles` iterations each.
/// Returns (retrieval_mean, graph_mean) scores.
pub fn warmup(&mut self, cycles: usize) -> (f32, f32) {
let ret_id = DomainId("exo-retrieval".to_string());
let gph_id = DomainId("exo-graph".to_string());
let difficulties = [0.2, 0.5, 0.8];
let ret_score: f32 = (0..cycles)
.map(|i| self.train_one(&ret_id, difficulties[i % 3]))
.sum::<f32>()
/ cycles.max(1) as f32;
let gph_score: f32 = (0..cycles)
.map(|i| self.train_one(&gph_id, difficulties[i % 3]))
.sum::<f32>()
/ cycles.max(1) as f32;
(ret_score, gph_score)
}
/// Transfer priors from retrieval domain → graph domain.
/// Returns the acceleration factor (>1.0 means transfer helped).
pub fn transfer_ret_to_graph(&mut self, measure_cycles: usize) -> f32 {
let src = DomainId("exo-retrieval".to_string());
let dst = DomainId("exo-graph".to_string());
// Measure baseline graph performance BEFORE transfer
let gph_id = DomainId("exo-graph".to_string());
let difficulties = [0.3, 0.6, 0.9];
let baseline: f32 = (0..measure_cycles)
.map(|i| self.train_one(&gph_id, difficulties[i % 3]))
.sum::<f32>()
/ measure_cycles.max(1) as f32;
// Initiate transfer: inject retrieval priors into graph bandit
self.engine.initiate_transfer(&src, &dst);
// Measure graph performance AFTER transfer
let transfer: f32 = (0..measure_cycles)
.map(|i| self.train_one(&gph_id, difficulties[i % 3]))
.sum::<f32>()
/ measure_cycles.max(1) as f32;
// Acceleration = ratio of improvement
if baseline > 0.0 {
transfer / baseline
} else {
1.0
}
}
/// Summary from the scoreboard.
pub fn summary(&self) -> ruvector_domain_expansion::ScoreboardSummary {
self.engine.scoreboard_summary()
}
}
impl Default for ExoTransferAdapter {
fn default() -> Self {
Self::new()
}
}
// ─── Tests ────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_retrieval_task_generation() {
let d = ExoRetrievalDomain::new();
let tasks = d.generate_tasks(6, 0.5);
assert_eq!(tasks.len(), 6);
for t in &tasks {
assert_eq!(t.domain_id, DomainId("exo-retrieval".to_string()));
assert!(t.spec.get("k").and_then(|x| x.as_u64()).unwrap_or(0) > 0);
}
}
#[test]
fn test_retrieval_perfect_solution() {
let d = ExoRetrievalDomain::new();
let tasks = d.generate_tasks(1, 0.2);
let task = &tasks[0];
let k = task.spec.get("k").and_then(|x| x.as_u64()).unwrap_or(5);
let sol = Solution {
task_id: task.id.clone(),
content: "exact".to_string(),
data: serde_json::json!({
"recall": 1.0f32,
"latency_us": 80u64,
"retrieved_k": k,
"arm": "exact",
}),
};
let eval = d.evaluate(task, &sol);
assert!(
eval.correctness > 0.9,
"recall=1.0 → correctness > 0.9, got {}",
eval.correctness
);
assert!(
eval.score > 0.7,
"perfect retrieval score > 0.7, got {}",
eval.score
);
}
#[test]
fn test_retrieval_reference_solution() {
let d = ExoRetrievalDomain::new();
let tasks = d.generate_tasks(1, 0.4);
let ref_sol = d.reference_solution(&tasks[0]);
assert!(ref_sol.is_some());
let sol = ref_sol.unwrap();
let eval = d.evaluate(&tasks[0], &sol);
assert!(
eval.score > 0.5,
"reference solution should be good: {}",
eval.score
);
}
#[test]
fn test_graph_task_generation() {
let d = ExoGraphDomain::new();
let tasks = d.generate_tasks(6, 0.6);
assert_eq!(tasks.len(), 6);
for t in &tasks {
assert_eq!(t.domain_id, DomainId("exo-graph".to_string()));
assert!(t.spec.get("max_hops").and_then(|x| x.as_u64()).unwrap_or(0) >= 2);
}
}
#[test]
fn test_graph_reference_solution() {
let d = ExoGraphDomain::new();
let tasks = d.generate_tasks(1, 0.3);
let ref_sol = d.reference_solution(&tasks[0]);
assert!(ref_sol.is_some());
let sol = ref_sol.unwrap();
let eval = d.evaluate(&tasks[0], &sol);
assert!(
eval.correctness > 0.5,
"reference solution correctness: {}",
eval.correctness
);
}
#[test]
fn test_embeddings_64_dim_and_aligned() {
let rd = ExoRetrievalDomain::new();
let gd = ExoGraphDomain::new();
let sol_r = Solution {
task_id: "t0".to_string(),
content: "approximate".to_string(),
data: serde_json::json!({
"recall": 0.85f32, "latency_us": 120u64,
"retrieved_k": 10u64, "arm": "approximate"
}),
};
let sol_g = Solution {
task_id: "t0".to_string(),
content: "approx".to_string(),
data: serde_json::json!({
"entities_found": 15u64, "hops_used": 2u64,
"coverage_ratio": 1.1f32, "arm": "approx"
}),
};
let emb_r = rd.embed(&sol_r);
let emb_g = gd.embed(&sol_g);
assert_eq!(emb_r.vector.len(), 64, "retrieval embedding must be 64-dim");
assert_eq!(emb_g.vector.len(), 64, "graph embedding must be 64-dim");
// Both use "approximate"/"approx" → v[6] should be 1.0 in both
assert!(
(emb_r.vector[6] - 1.0).abs() < 1e-6,
"retrieval approx arm at v[6]"
);
assert!(
(emb_g.vector[6] - 1.0).abs() < 1e-6,
"graph approx arm at v[6]"
);
// Cosine similarity should be meaningful (both represent "approximate" strategy)
let sim = emb_r.cosine_similarity(&emb_g);
assert!(
sim > 0.3,
"aligned embeddings should have decent similarity: {}",
sim
);
}
#[test]
fn test_adapter_warmup_and_transfer() {
let mut adapter = ExoTransferAdapter::new();
// Train for a few cycles
let (ret_score, gph_score) = adapter.warmup(10);
assert!(
ret_score >= 0.0 && ret_score <= 1.0,
"retrieval score in [0,1]: {}",
ret_score
);
assert!(
gph_score >= 0.0 && gph_score <= 1.0,
"graph score in [0,1]: {}",
gph_score
);
// Transfer — acceleration >= 0
let accel = adapter.transfer_ret_to_graph(5);
assert!(accel >= 0.0, "acceleration must be non-negative: {}", accel);
}
#[test]
fn test_bucket_tier_assignment() {
let easy = bucket_for(0.1, "top-k-small");
let med = bucket_for(0.5, "top-k-medium");
let hard = bucket_for(0.9, "top-k-large");
assert_eq!(easy.difficulty_tier, "easy");
assert_eq!(med.difficulty_tier, "medium");
assert_eq!(hard.difficulty_tier, "hard");
}
}

View File

@@ -0,0 +1,181 @@
//! Graph database wrapper for ruvector-graph
use exo_core::{
EntityId, HyperedgeId, HyperedgeResult, Relation, SheafConsistencyResult, TopologicalQuery,
};
use ruvector_graph::{GraphDB, Hyperedge, Node};
use std::str::FromStr;
use exo_core::{Error as ExoError, Result as ExoResult};
#[cfg(test)]
use exo_core::RelationType;
/// Wrapper around ruvector GraphDB
pub struct GraphWrapper {
/// Underlying graph database
db: GraphDB,
}
impl GraphWrapper {
/// Create a new graph wrapper
pub fn new() -> Self {
Self { db: GraphDB::new() }
}
/// Create a hyperedge spanning multiple entities
pub fn create_hyperedge(
&mut self,
entities: &[EntityId],
relation: &Relation,
) -> ExoResult<HyperedgeId> {
// Ensure all entities exist as nodes (create if they don't)
for entity_id in entities {
let entity_id_str = entity_id.0.to_string();
if self.db.get_node(&entity_id_str).is_none() {
// Create node if it doesn't exist
use ruvector_graph::types::{Label, Properties};
let node = Node::new(entity_id_str, vec![Label::new("Entity")], Properties::new());
self.db
.create_node(node)
.map_err(|e| ExoError::Backend(format!("Failed to create node: {}", e)))?;
}
}
// Create hyperedge using ruvector-graph
let entity_strs: Vec<String> = entities.iter().map(|e| e.0.to_string()).collect();
let mut hyperedge = Hyperedge::new(entity_strs, relation.relation_type.0.clone());
// Add properties if they're an object
if let Some(obj) = relation.properties.as_object() {
for (key, value) in obj {
if let Ok(prop_val) = serde_json::from_value(value.clone()) {
hyperedge.properties.insert(key.clone(), prop_val);
}
}
}
let hyperedge_id_str = hyperedge.id.clone();
self.db
.create_hyperedge(hyperedge)
.map_err(|e| ExoError::Backend(format!("Failed to create hyperedge: {}", e)))?;
// Convert string ID to HyperedgeId
let uuid = uuid::Uuid::from_str(&hyperedge_id_str).unwrap_or_else(|_| uuid::Uuid::new_v4());
Ok(HyperedgeId(uuid))
}
/// Get a node by ID
pub fn get_node(&self, id: &EntityId) -> Option<Node> {
self.db.get_node(&id.0.to_string())
}
/// Get a hyperedge by ID
pub fn get_hyperedge(&self, id: &HyperedgeId) -> Option<Hyperedge> {
self.db.get_hyperedge(&id.0.to_string())
}
/// Query the graph with topological queries
pub fn query(&self, query: &TopologicalQuery) -> ExoResult<HyperedgeResult> {
match query {
TopologicalQuery::PersistentHomology {
dimension: _,
epsilon_range: _,
} => {
// Persistent homology is not directly supported on classical backend
// This would require building a filtration and computing persistence
// For now, return not supported
Ok(HyperedgeResult::NotSupported)
}
TopologicalQuery::BettiNumbers { max_dimension } => {
// Betti numbers computation
// For classical backend, we can approximate:
// - Betti_0 = number of connected components
// - Higher Betti numbers require simplicial complex construction
// Simple approximation: count connected components for Betti_0
let betti_0 = self.approximate_connected_components();
// For higher dimensions, we'd need proper TDA implementation
// Return placeholder values for now
let mut betti = vec![betti_0];
for _ in 1..=*max_dimension {
betti.push(0); // Placeholder
}
Ok(HyperedgeResult::BettiNumbers(betti))
}
TopologicalQuery::SheafConsistency { local_sections: _ } => {
// Sheaf consistency is an advanced topological concept
// Not supported on classical discrete backend
Ok(HyperedgeResult::SheafConsistency(
SheafConsistencyResult::Inconsistent(vec![
"Sheaf consistency not supported on classical backend".to_string(),
]),
))
}
}
}
/// Approximate the number of connected components
fn approximate_connected_components(&self) -> usize {
// This is a simple approximation
// In a full implementation, we'd use proper graph traversal
// For now, return 1 as a placeholder
1
}
/// Get hyperedges containing a specific node
pub fn hyperedges_containing(&self, node_id: &EntityId) -> Vec<Hyperedge> {
// Use the hyperedge index from GraphDB
self.db.get_hyperedges_by_node(&node_id.0.to_string())
}
}
impl Default for GraphWrapper {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn test_graph_creation() {
let graph = GraphWrapper::new();
// Basic test
assert!(graph.db.get_node("nonexistent").is_none());
}
#[test]
fn test_create_hyperedge() {
let mut graph = GraphWrapper::new();
let entities = vec![EntityId::new(), EntityId::new(), EntityId::new()];
let relation = Relation {
relation_type: RelationType::new("related_to"),
properties: serde_json::json!({}),
};
let result = graph.create_hyperedge(&entities, &relation);
assert!(result.is_ok());
}
#[test]
fn test_topological_query() {
let graph = GraphWrapper::new();
let query = TopologicalQuery::BettiNumbers { max_dimension: 2 };
let result = graph.query(&query);
assert!(result.is_ok());
if let Ok(HyperedgeResult::BettiNumbers(betti)) = result {
assert_eq!(betti.len(), 3); // Dimensions 0, 1, 2
}
}
}

View File

@@ -0,0 +1,163 @@
//! # EXO Backend Classical
//!
//! Classical substrate backend consuming ruvector crates.
//! This provides a bridge between the EXO substrate abstractions and the
//! high-performance ruvector vector database and graph database.
#![warn(missing_docs)]
pub mod dither_quantizer;
pub mod domain_bridge;
pub mod graph;
pub mod thermo_layer;
pub mod transfer_orchestrator;
pub mod vector;
use exo_core::{
Error as ExoError, Filter, ManifoldDelta, Pattern, Result as ExoResult, SearchResult,
SubstrateBackend,
};
use parking_lot::RwLock;
use std::sync::Arc;
use vector::VectorIndexWrapper;
pub use graph::GraphWrapper;
/// Configuration for the classical backend
#[derive(Debug, Clone)]
pub struct ClassicalConfig {
/// Vector dimensions
pub dimensions: usize,
/// Distance metric
pub distance_metric: ruvector_core::DistanceMetric,
}
impl Default for ClassicalConfig {
fn default() -> Self {
Self {
dimensions: 768,
distance_metric: ruvector_core::DistanceMetric::Cosine,
}
}
}
/// Classical substrate backend using ruvector
///
/// This backend wraps ruvector-core for vector operations and ruvector-graph
/// for hypergraph operations, providing a classical (discrete) implementation
/// of the substrate backend trait.
pub struct ClassicalBackend {
/// Vector index wrapper
vector_index: Arc<RwLock<VectorIndexWrapper>>,
/// Graph database wrapper
graph_db: Arc<RwLock<GraphWrapper>>,
/// Configuration
config: ClassicalConfig,
}
impl ClassicalBackend {
/// Create a new classical backend with the given configuration
pub fn new(config: ClassicalConfig) -> ExoResult<Self> {
let vector_index = VectorIndexWrapper::new(config.dimensions, config.distance_metric)
.map_err(|e| ExoError::Backend(format!("Failed to create vector index: {}", e)))?;
let graph_db = GraphWrapper::new();
Ok(Self {
vector_index: Arc::new(RwLock::new(vector_index)),
graph_db: Arc::new(RwLock::new(graph_db)),
config,
})
}
/// Create with default configuration
pub fn with_dimensions(dimensions: usize) -> ExoResult<Self> {
let mut config = ClassicalConfig::default();
config.dimensions = dimensions;
Self::new(config)
}
/// Get access to the underlying graph database (for hyperedge operations)
pub fn graph_db(&self) -> Arc<RwLock<GraphWrapper>> {
Arc::clone(&self.graph_db)
}
}
impl SubstrateBackend for ClassicalBackend {
fn similarity_search(
&self,
query: &[f32],
k: usize,
filter: Option<&Filter>,
) -> ExoResult<Vec<SearchResult>> {
// Validate dimensions
if query.len() != self.config.dimensions {
return Err(ExoError::InvalidDimension {
expected: self.config.dimensions,
got: query.len(),
});
}
// Delegate to vector index wrapper
let index = self.vector_index.read();
index.search(query, k, filter)
}
fn manifold_deform(&self, pattern: &Pattern, _learning_rate: f32) -> ExoResult<ManifoldDelta> {
// Validate dimensions
if pattern.embedding.len() != self.config.dimensions {
return Err(ExoError::InvalidDimension {
expected: self.config.dimensions,
got: pattern.embedding.len(),
});
}
// Classical backend: discrete insert (no continuous deformation)
let mut index = self.vector_index.write();
let id = index.insert(pattern)?;
Ok(ManifoldDelta::DiscreteInsert { id })
}
fn dimension(&self) -> usize {
self.config.dimensions
}
}
#[cfg(test)]
mod tests {
use super::*;
use exo_core::{Metadata, PatternId, SubstrateTime};
#[test]
fn test_classical_backend_creation() {
let backend = ClassicalBackend::with_dimensions(128).unwrap();
assert_eq!(backend.dimension(), 128);
}
#[test]
fn test_insert_and_search() {
let backend = ClassicalBackend::with_dimensions(3).unwrap();
// Create a pattern
let pattern = Pattern {
id: PatternId::new(),
embedding: vec![1.0, 2.0, 3.0],
metadata: Metadata::default(),
timestamp: SubstrateTime::now(),
antecedents: vec![],
salience: 1.0,
};
// Insert pattern
let result = backend.manifold_deform(&pattern, 0.0);
assert!(result.is_ok());
// Search
let query = vec![1.1, 2.1, 3.1];
let results = backend.similarity_search(&query, 1, None);
assert!(results.is_ok());
let results = results.unwrap();
assert_eq!(results.len(), 1);
}
}

View File

@@ -0,0 +1,205 @@
//! ThermoLayer: thermodynamic coherence gate for exo-backend-classical.
//!
//! Wraps a `thermorust` Ising motif and treats the energy drop ΔE as a
//! **coherence λ-signal**: a large negative ΔE means the activation pattern
//! is "settling" (becoming coherent); a near-zero ΔE means it is already
//! at a local minimum or chaotically fluctuating at high temperature.
//!
//! The λ-signal can be used to gate min-cut operations or to weight
//! confidence scores in the ruvector-attn-mincut pipeline.
//!
//! # Integration sketch
//! ```no_run
//! use exo_backend_classical::thermo_layer::{ThermoLayer, ThermoConfig};
//!
//! let cfg = ThermoConfig { n: 16, beta: 3.0, steps_per_call: 20, ..Default::default() };
//! let mut layer = ThermoLayer::new(cfg);
//!
//! // Activations from an attention layer (length must equal `n`).
//! let mut acts = vec![0.5_f32; 16];
//! let signal = layer.run(&mut acts, 20);
//! println!("λ = {:.4}, dissipation = {:.3e} J", signal.lambda, signal.dissipation_j);
//! ```
use rand::SeedableRng;
use thermorust::{
dynamics::{step_discrete, Params},
energy::{Couplings, EnergyModel, Ising},
metrics::magnetisation,
State,
};
/// Configuration for a `ThermoLayer`.
#[derive(Clone, Debug)]
pub struct ThermoConfig {
/// Number of units in the Ising motif (must match activation vector length).
pub n: usize,
/// Inverse temperature β = 1/(kT). Higher = colder, more deterministic.
pub beta: f32,
/// Ferromagnetic coupling strength J for ring topology.
pub coupling: f32,
/// Metropolis steps executed per `run()` call.
pub steps_per_call: usize,
/// Landauer cost in Joules per accepted irreversible flip.
pub irreversible_cost: f64,
/// RNG seed (fixed → fully deterministic).
pub seed: u64,
}
impl Default for ThermoConfig {
fn default() -> Self {
Self {
n: 16,
beta: 3.0,
coupling: 0.2,
steps_per_call: 20,
irreversible_cost: 2.87e-21, // kT ln2 at 300 K
seed: 0,
}
}
}
/// Thermodynamic coherence signal returned by `ThermoLayer::run`.
#[derive(Clone, Debug)]
pub struct ThermoSignal {
/// λ-signal: ΔE / |E_initial| (positive = energy decreased = more coherent).
pub lambda: f32,
/// Magnetisation m ∈ [1, 1] after update.
pub magnetisation: f32,
/// Cumulative Joules dissipated since layer creation.
pub dissipation_j: f64,
/// Energy after the update step.
pub energy_after: f32,
}
/// Ising-motif thermodynamic gate.
pub struct ThermoLayer {
model: Ising,
state: State,
params: Params,
rng: rand::rngs::SmallRng,
}
impl ThermoLayer {
/// Create a new `ThermoLayer` from `cfg`.
pub fn new(cfg: ThermoConfig) -> Self {
let couplings = Couplings::ferromagnetic_ring(cfg.n, cfg.coupling);
let model = Ising::new(couplings);
let state = State::ones(cfg.n);
let params = Params {
beta: cfg.beta,
eta: 0.05,
irreversible_cost: cfg.irreversible_cost,
clamp_mask: vec![false; cfg.n],
};
let rng = rand::rngs::SmallRng::seed_from_u64(cfg.seed);
Self {
model,
state,
params,
rng,
}
}
/// Apply activations as external fields, run MH steps, return coherence signal.
///
/// The activation vector is **modified in place** by the thermodynamic
/// relaxation: each element is replaced by the Ising spin value after
/// `steps_per_call` Metropolis updates. Values are clamped to {-1, +1}.
pub fn run(&mut self, activations: &mut [f32], steps: usize) -> ThermoSignal {
let n = self.state.len().min(activations.len());
// Clamp inputs to ±1 and load as spin state.
for i in 0..n {
self.state.x[i] = activations[i].clamp(-1.0, 1.0).signum();
}
let e_before = self.model.energy(&self.state);
// Run Metropolis steps.
for _ in 0..steps {
step_discrete(&self.model, &mut self.state, &self.params, &mut self.rng);
}
let e_after = self.model.energy(&self.state);
let d_e = e_after - e_before;
let lambda = if e_before.abs() > 1e-9 {
-d_e / e_before.abs()
} else {
0.0
};
// Write relaxed spins back to the caller's buffer.
for i in 0..n {
activations[i] = self.state.x[i];
}
ThermoSignal {
lambda,
magnetisation: magnetisation(&self.state),
dissipation_j: self.state.dissipated_j,
energy_after: e_after,
}
}
/// Reset the spin state to all +1.
pub fn reset(&mut self) {
for xi in &mut self.state.x {
*xi = 1.0;
}
self.state.dissipated_j = 0.0;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn thermo_layer_runs_without_panic() {
let cfg = ThermoConfig {
n: 8,
steps_per_call: 10,
..Default::default()
};
let mut layer = ThermoLayer::new(cfg);
let mut acts = vec![1.0_f32; 8];
let sig = layer.run(&mut acts, 10);
assert!(sig.lambda.is_finite());
assert!(sig.magnetisation >= -1.0 && sig.magnetisation <= 1.0);
assert!(sig.dissipation_j >= 0.0);
}
#[test]
fn activations_are_binarised() {
let cfg = ThermoConfig {
n: 4,
steps_per_call: 0,
..Default::default()
};
let mut layer = ThermoLayer::new(cfg);
let mut acts = vec![0.7_f32, -0.3, 0.1, -0.9];
layer.run(&mut acts, 0);
for a in &acts {
assert!(
(*a - 1.0).abs() < 1e-6 || (*a + 1.0).abs() < 1e-6,
"not ±1: {a}"
);
}
}
#[test]
fn lambda_finite_after_many_steps() {
let cfg = ThermoConfig {
n: 16,
beta: 5.0,
..Default::default()
};
let mut layer = ThermoLayer::new(cfg);
for _ in 0..10 {
let mut acts = vec![1.0_f32; 16];
let sig = layer.run(&mut acts, 50);
assert!(sig.lambda.is_finite());
}
}
}

View File

@@ -0,0 +1,320 @@
//! Cross-phase ExoTransferOrchestrator
//!
//! Wires all 5 ruvector-domain-expansion integration phases into a single
//! `run_cycle()` call:
//!
//! 1. **Phase 1** Domain Bridge (this crate): Thompson sampling over
//! `ExoRetrievalDomain` + `ExoGraphDomain`.
//! 2. **Phase 2** Transfer Manifold (exo-manifold): stores priors as
//! deformable 64-dim patterns.
//! 3. **Phase 3** Transfer Timeline (exo-temporal): records events in a
//! causal graph with temporal ordering.
//! 4. **Phase 4** Transfer CRDT (exo-federation): replicates summaries via
//! LWW-Map + G-Set.
//! 5. **Phase 5** Emergent Detection (exo-exotic): tracks whether
//! cross-domain transfer produces novel emergent capabilities.
use exo_exotic::domain_transfer::EmergentTransferDetector;
use exo_federation::transfer_crdt::{TransferCrdt, TransferPriorSummary};
use exo_manifold::transfer_store::TransferManifold;
use exo_temporal::transfer_timeline::TransferTimeline;
use ruvector_domain_expansion::{
ArmId, ContextBucket, DomainExpansionEngine, DomainId, Solution, TransferPrior,
};
use crate::domain_bridge::{ExoGraphDomain, ExoRetrievalDomain};
/// Results from a single orchestrated transfer cycle.
#[derive(Debug, Clone)]
pub struct CycleResult {
/// Evaluation score from the source domain task [0.0, 1.0].
pub eval_score: f32,
/// Emergence score after the transfer step.
pub emergence_score: f64,
/// Mean improvement from pre-transfer baseline.
pub mean_improvement: f64,
/// Number of (src, dst) priors stored in the manifold.
pub manifold_entries: usize,
/// Cycle index (1-based).
pub cycle: u64,
}
/// Orchestrates all 5 integration phases of ruvector-domain-expansion.
pub struct ExoTransferOrchestrator {
/// Phase 1: Thompson sampling engine with retrieval + graph domains.
engine: DomainExpansionEngine,
/// Source domain ID (retrieval).
src_id: DomainId,
/// Destination domain ID (graph).
dst_id: DomainId,
/// Phase 2: manifold storage for transfer priors.
manifold: TransferManifold,
/// Phase 3: temporal causal timeline.
timeline: TransferTimeline,
/// Phase 4: CRDT for distributed propagation.
crdt: TransferCrdt,
/// Phase 5: emergent capability detector.
emergence: EmergentTransferDetector,
/// Monotonic cycle counter.
cycle: u64,
}
impl ExoTransferOrchestrator {
/// Create a new orchestrator.
pub fn new(_node_id: impl Into<String>) -> Self {
let src_id = DomainId("exo-retrieval".to_string());
let dst_id = DomainId("exo-graph".to_string());
let mut engine = DomainExpansionEngine::new();
engine.register_domain(Box::new(ExoRetrievalDomain::new()));
engine.register_domain(Box::new(ExoGraphDomain::new()));
Self {
engine,
src_id,
dst_id,
manifold: TransferManifold::new(),
timeline: TransferTimeline::new(),
crdt: TransferCrdt::new(),
emergence: EmergentTransferDetector::new(),
cycle: 0,
}
}
/// Run a single orchestrated transfer cycle across all 5 phases.
///
/// Returns a [`CycleResult`] summarising each phase outcome.
pub fn run_cycle(&mut self) -> CycleResult {
self.cycle += 1;
let bucket = ContextBucket {
difficulty_tier: "medium".to_string(),
category: "transfer".to_string(),
};
// ── Phase 1: Domain Bridge ─────────────────────────────────────────────
// Generate a task for the source domain, select the best arm via
// Thompson sampling, and evaluate it.
let tasks = self.engine.generate_tasks(&self.src_id, 1, 0.5);
let eval_score = if let Some(task) = tasks.first() {
let arm = self
.engine
.select_arm(&self.src_id, &bucket)
.unwrap_or_else(|| ArmId("approximate".to_string()));
let solution = Solution {
task_id: task.id.clone(),
content: arm.0.clone(),
data: serde_json::json!({ "arm": &arm.0 }),
};
let eval =
self.engine
.evaluate_and_record(&self.src_id, task, &solution, bucket.clone(), arm);
eval.score
} else {
0.5f32
};
// Transfer priors from source → destination domain.
self.engine.initiate_transfer(&self.src_id, &self.dst_id);
// ── Phase 2: Transfer Manifold ─────────────────────────────────────────
let prior = TransferPrior::uniform(self.src_id.clone());
let _ = self
.manifold
.store_prior(&self.src_id, &self.dst_id, &prior, self.cycle);
let manifold_entries = self.manifold.len();
// ── Phase 3: Transfer Timeline ─────────────────────────────────────────
let _ = self
.timeline
.record_transfer(&self.src_id, &self.dst_id, self.cycle, eval_score);
// ── Phase 4: Transfer CRDT ─────────────────────────────────────────────
self.crdt.publish_prior(
&self.src_id,
&self.dst_id,
eval_score,
eval_score, // confidence mirrors eval score
self.cycle,
);
// ── Phase 5: Emergent Detection ────────────────────────────────────────
if self.cycle == 1 {
self.emergence.record_baseline(eval_score as f64);
} else {
self.emergence.record_post_transfer(eval_score as f64);
}
let emergence_score = self.emergence.emergence_score();
let mean_improvement = self.emergence.mean_improvement();
CycleResult {
eval_score,
emergence_score,
mean_improvement,
manifold_entries,
cycle: self.cycle,
}
}
/// Return the current cycle number.
pub fn cycle(&self) -> u64 {
self.cycle
}
/// Return the best published prior for the (src → dst) pair.
pub fn best_prior(&self) -> Option<&TransferPriorSummary> {
self.crdt.best_prior_for(&self.src_id, &self.dst_id)
}
/// Serialize the current engine state as an RVF byte stream.
///
/// Packages three artifact types into concatenated RVF segments:
/// - `TransferPrior` segments (one per registered domain that has priors)
/// - `PolicyKernel` segments (the current population of policy variants)
/// - `CostCurve` segments (convergence tracking per domain)
///
/// The returned bytes can be written to a `.rvf` file or streamed over the
/// network for federated transfer.
///
/// Requires the `rvf` feature.
#[cfg(feature = "rvf")]
pub fn package_as_rvf(&self) -> Vec<u8> {
use ruvector_domain_expansion::rvf_bridge;
// Collect TransferPriors for both registered domains.
let priors: Vec<_> = [&self.src_id, &self.dst_id]
.iter()
.filter_map(|id| self.engine.thompson.extract_prior(id))
.collect();
// All PolicyKernels from the current population.
let kernels: Vec<_> = self.engine.population.population().to_vec();
// CostCurves tracked by the acceleration scoreboard.
let curves: Vec<_> = [&self.src_id, &self.dst_id]
.iter()
.filter_map(|id| self.engine.scoreboard.curves.get(id))
.cloned()
.collect();
rvf_bridge::assemble_domain_expansion_segments(&priors, &kernels, &curves, 1)
}
/// Write the current engine state to a `.rvf` file at `path`.
///
/// Requires the `rvf` feature.
#[cfg(feature = "rvf")]
pub fn save_rvf(&self, path: impl AsRef<std::path::Path>) -> std::io::Result<()> {
std::fs::write(path, self.package_as_rvf())
}
}
impl Default for ExoTransferOrchestrator {
fn default() -> Self {
Self::new("default_node")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_orchestrator_creation() {
let orchestrator = ExoTransferOrchestrator::new("test_node");
assert_eq!(orchestrator.cycle(), 0);
assert!(orchestrator.best_prior().is_none());
}
#[test]
fn test_single_cycle() {
let mut orchestrator = ExoTransferOrchestrator::new("node_1");
let result = orchestrator.run_cycle();
assert_eq!(result.cycle, 1);
assert!(result.eval_score >= 0.0 && result.eval_score <= 1.0);
assert!(result.manifold_entries >= 1);
assert!(orchestrator.best_prior().is_some());
}
#[test]
fn test_multi_cycle_emergence() {
let mut orchestrator = ExoTransferOrchestrator::new("node_2");
// Warm up: baseline cycle
let r1 = orchestrator.run_cycle();
assert_eq!(r1.cycle, 1);
// Transfer cycles: emergence detector should fire
for _ in 0..4 {
let r = orchestrator.run_cycle();
assert!(r.emergence_score >= 0.0);
}
assert_eq!(orchestrator.cycle(), 5);
}
#[test]
#[cfg(feature = "rvf")]
fn test_package_as_rvf_empty() {
// Before any cycle the population has kernels but no domain-specific
// priors or curves, so we should still get a valid (possibly short) RVF stream.
let orchestrator = ExoTransferOrchestrator::new("rvf_node");
let bytes = orchestrator.package_as_rvf();
// A valid RVF stream from the population must be a multiple of 64 bytes
// and at least contain population kernel segments.
assert_eq!(bytes.len() % 64, 0, "RVF output must be 64-byte aligned");
}
#[test]
#[cfg(feature = "rvf")]
fn test_package_as_rvf_after_cycles() {
// RVF segment magic: "RVFS" in little-endian = 0x5256_4653
const SEGMENT_MAGIC: u32 = 0x5256_4653;
let mut orchestrator = ExoTransferOrchestrator::new("rvf_cycle_node");
// Warm up to generate priors and curves.
for _ in 0..3 {
orchestrator.run_cycle();
}
let bytes = orchestrator.package_as_rvf();
// Must be 64-byte aligned and contain at least one segment.
assert!(
!bytes.is_empty(),
"RVF output must not be empty after cycles"
);
assert_eq!(bytes.len() % 64, 0, "RVF output must be 64-byte aligned");
// Verify the first segment's magic bytes.
let magic = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
assert_eq!(
magic, SEGMENT_MAGIC,
"First segment must have valid RVF magic"
);
}
#[test]
#[cfg(feature = "rvf")]
fn test_save_rvf_to_file() {
let mut orchestrator = ExoTransferOrchestrator::new("rvf_file_node");
orchestrator.run_cycle();
let path = std::env::temp_dir().join("exo_test.rvf");
orchestrator
.save_rvf(&path)
.expect("save_rvf should succeed");
let written = std::fs::read(&path).expect("file should exist after save_rvf");
assert!(!written.is_empty());
assert_eq!(written.len() % 64, 0);
// Clean up
let _ = std::fs::remove_file(&path);
}
}

View File

@@ -0,0 +1,306 @@
//! Vector index wrapper for ruvector-core
use exo_core::{
Error as ExoError, Filter, Metadata, MetadataValue, Pattern, PatternId, Result as ExoResult,
SearchResult, SubstrateTime,
};
use ruvector_core::{types::*, VectorDB};
use std::collections::HashMap;
/// Wrapper around ruvector VectorDB
pub struct VectorIndexWrapper {
/// Underlying vector database
db: VectorDB,
/// Dimensions
dimensions: usize,
}
impl VectorIndexWrapper {
/// Create a new vector index wrapper
pub fn new(
dimensions: usize,
distance_metric: DistanceMetric,
) -> Result<Self, ruvector_core::RuvectorError> {
// Use a temporary file path for in-memory like behavior
let temp_path =
std::env::temp_dir().join(format!("exo_vector_{}.db", uuid::Uuid::new_v4()));
let options = DbOptions {
dimensions,
distance_metric,
storage_path: temp_path.to_string_lossy().to_string(),
hnsw_config: Some(HnswConfig::default()),
quantization: None,
};
let db = VectorDB::new(options)?;
Ok(Self { db, dimensions })
}
/// Insert a pattern into the index
pub fn insert(&mut self, pattern: &Pattern) -> ExoResult<PatternId> {
// Convert Pattern to VectorEntry
let metadata = Self::serialize_metadata(pattern)?;
let entry = VectorEntry {
id: Some(pattern.id.to_string()),
vector: pattern.embedding.clone(),
metadata: Some(metadata),
};
// Insert and get the ID (will use our provided ID)
let _id = self
.db
.insert(entry)
.map_err(|e| ExoError::Backend(format!("Insert failed: {}", e)))?;
Ok(pattern.id)
}
/// Search for similar patterns
pub fn search(
&self,
query: &[f32],
k: usize,
_filter: Option<&Filter>,
) -> ExoResult<Vec<SearchResult>> {
// Convert exo_core::Filter Equal conditions to ruvector's HashMap filter
let filter = _filter.and_then(|f| {
let map: HashMap<String, serde_json::Value> =
f.conditions
.iter()
.filter_map(|cond| {
use exo_core::FilterOperator;
if let FilterOperator::Equal = cond.operator {
let val = match &cond.value {
MetadataValue::String(s) => serde_json::Value::String(s.clone()),
MetadataValue::Number(n) => serde_json::Number::from_f64(*n)
.map(serde_json::Value::Number)?,
MetadataValue::Boolean(b) => serde_json::Value::Bool(*b),
MetadataValue::Array(_) => return None,
};
Some((cond.field.clone(), val))
} else {
None
}
})
.collect();
if map.is_empty() {
None
} else {
Some(map)
}
});
// Build search query
let search_query = SearchQuery {
vector: query.to_vec(),
k,
filter,
ef_search: None,
};
// Execute search
let results = self
.db
.search(search_query)
.map_err(|e| ExoError::Backend(format!("Search failed: {}", e)))?;
// Convert to SearchResult
Ok(results
.into_iter()
.filter_map(|r| {
Self::deserialize_pattern(&r.metadata?, r.vector.as_ref()).map(|pattern| {
SearchResult {
pattern,
score: r.score,
distance: r.score, // For now, distance == score
}
})
})
.collect())
}
/// Serialize pattern metadata to JSON
fn serialize_metadata(pattern: &Pattern) -> ExoResult<HashMap<String, serde_json::Value>> {
let mut json_metadata = HashMap::new();
// Add pattern metadata fields
for (key, value) in &pattern.metadata.fields {
let json_value = match value {
MetadataValue::String(s) => serde_json::Value::String(s.clone()),
MetadataValue::Number(n) => {
serde_json::Value::Number(serde_json::Number::from_f64(*n).unwrap())
}
MetadataValue::Boolean(b) => serde_json::Value::Bool(*b),
MetadataValue::Array(arr) => {
// Convert array recursively
let json_arr: Vec<serde_json::Value> = arr
.iter()
.map(|v| match v {
MetadataValue::String(s) => serde_json::Value::String(s.clone()),
MetadataValue::Number(n) => {
serde_json::Value::Number(serde_json::Number::from_f64(*n).unwrap())
}
MetadataValue::Boolean(b) => serde_json::Value::Bool(*b),
MetadataValue::Array(_) => serde_json::Value::Null, // Nested arrays not supported
})
.collect();
serde_json::Value::Array(json_arr)
}
};
json_metadata.insert(key.clone(), json_value);
}
// Add temporal information
json_metadata.insert(
"_timestamp".to_string(),
serde_json::Value::Number((pattern.timestamp.0 as i64).into()),
);
// Add antecedents
if !pattern.antecedents.is_empty() {
let antecedents: Vec<String> = pattern
.antecedents
.iter()
.map(|id| id.to_string())
.collect();
json_metadata.insert(
"_antecedents".to_string(),
serde_json::to_value(&antecedents).unwrap(),
);
}
// Add salience
json_metadata.insert(
"_salience".to_string(),
serde_json::Value::Number(
serde_json::Number::from_f64(pattern.salience as f64).unwrap(),
),
);
// Store pattern ID so it can be round-tripped on deserialization
json_metadata.insert(
"_pattern_id".to_string(),
serde_json::Value::String(pattern.id.to_string()),
);
Ok(json_metadata)
}
/// Deserialize pattern from metadata
fn deserialize_pattern(
metadata: &HashMap<String, serde_json::Value>,
vector: Option<&Vec<f32>>,
) -> Option<Pattern> {
let embedding = vector?.clone();
// Extract ID stored during insert, or generate a fresh one as fallback
let id = metadata
.get("_pattern_id")
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<uuid::Uuid>().ok())
.map(PatternId)
.unwrap_or_else(PatternId::new);
let timestamp = metadata
.get("_timestamp")
.and_then(|v| v.as_i64())
.map(SubstrateTime)
.unwrap_or_else(SubstrateTime::now);
let antecedents = metadata
.get("_antecedents")
.and_then(|v| serde_json::from_value::<Vec<String>>(v.clone()).ok())
.unwrap_or_default()
.into_iter()
.filter_map(|s| s.parse().ok())
.map(PatternId)
.collect();
let salience = metadata
.get("_salience")
.and_then(|v| v.as_f64())
.unwrap_or(1.0) as f32;
// Build Metadata
let mut clean_metadata = Metadata::default();
for (key, value) in metadata {
if !key.starts_with('_') {
let meta_value = match value {
serde_json::Value::String(s) => MetadataValue::String(s.clone()),
serde_json::Value::Number(n) => {
MetadataValue::Number(n.as_f64().unwrap_or(0.0))
}
serde_json::Value::Bool(b) => MetadataValue::Boolean(*b),
serde_json::Value::Array(arr) => {
let meta_arr: Vec<MetadataValue> = arr
.iter()
.filter_map(|v| match v {
serde_json::Value::String(s) => {
Some(MetadataValue::String(s.clone()))
}
serde_json::Value::Number(n) => {
Some(MetadataValue::Number(n.as_f64().unwrap_or(0.0)))
}
serde_json::Value::Bool(b) => Some(MetadataValue::Boolean(*b)),
_ => None,
})
.collect();
MetadataValue::Array(meta_arr)
}
_ => continue,
};
clean_metadata.fields.insert(key.clone(), meta_value);
}
}
Some(Pattern {
id,
embedding,
metadata: clean_metadata,
timestamp,
antecedents,
salience,
})
}
/// Get the dimensions
pub fn dimensions(&self) -> usize {
self.dimensions
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vector_index_creation() {
let index = VectorIndexWrapper::new(128, DistanceMetric::Cosine);
assert!(index.is_ok());
let index = index.unwrap();
assert_eq!(index.dimensions(), 128);
}
#[test]
fn test_insert_and_search() {
let mut index = VectorIndexWrapper::new(3, DistanceMetric::Cosine).unwrap();
let pattern = Pattern {
id: PatternId::new(),
embedding: vec![1.0, 2.0, 3.0],
metadata: Metadata::default(),
timestamp: SubstrateTime::now(),
antecedents: vec![],
salience: 1.0,
};
let id = index.insert(&pattern).unwrap();
assert_eq!(id, pattern.id);
let results = index.search(&[1.1, 2.1, 3.1], 1, None).unwrap();
assert_eq!(results.len(), 1);
}
}