docs: Add ADR-015 public dataset training strategy #32

Merged
ruvnet merged 17 commits from claude/validate-code-quality-WNrNw into main 2026-03-01 01:12:49 +08:00
9 changed files with 575 additions and 5 deletions
Showing only changes of commit cca91bd875 - Show all commits

View File

@@ -39,13 +39,13 @@
"isRunning": false
},
"testgaps": {
"runCount": 26,
"runCount": 27,
"successCount": 0,
"failureCount": 26,
"failureCount": 27,
"averageDurationMs": 0,
"lastRun": "2026-02-28T15:41:19.031Z",
"lastRun": "2026-02-28T16:08:19.369Z",
"nextRun": "2026-02-28T16:22:19.355Z",
"isRunning": true
"isRunning": false
},
"predict": {
"runCount": 0,
@@ -131,5 +131,5 @@
}
]
},
"savedAt": "2026-02-28T16:05:19.091Z"
"savedAt": "2026-02-28T16:08:19.369Z"
}

BIN
.swarm/memory.db Normal file

Binary file not shown.

305
.swarm/schema.sql Normal file
View File

@@ -0,0 +1,305 @@
-- Claude Flow V3 Memory Database
-- Version: 3.0.0
-- Features: Pattern learning, vector embeddings, temporal decay, migration tracking
PRAGMA journal_mode = WAL;
PRAGMA synchronous = NORMAL;
PRAGMA foreign_keys = ON;
-- ============================================
-- CORE MEMORY TABLES
-- ============================================
-- Memory entries (main storage)
CREATE TABLE IF NOT EXISTS memory_entries (
id TEXT PRIMARY KEY,
key TEXT NOT NULL,
namespace TEXT DEFAULT 'default',
content TEXT NOT NULL,
type TEXT DEFAULT 'semantic' CHECK(type IN ('semantic', 'episodic', 'procedural', 'working', 'pattern')),
-- Vector embedding for semantic search (stored as JSON array)
embedding TEXT,
embedding_model TEXT DEFAULT 'local',
embedding_dimensions INTEGER,
-- Metadata
tags TEXT, -- JSON array
metadata TEXT, -- JSON object
owner_id TEXT,
-- Timestamps
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
expires_at INTEGER,
last_accessed_at INTEGER,
-- Access tracking for hot/cold detection
access_count INTEGER DEFAULT 0,
-- Status
status TEXT DEFAULT 'active' CHECK(status IN ('active', 'archived', 'deleted')),
UNIQUE(namespace, key)
);
-- Indexes for memory entries
CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace);
CREATE INDEX IF NOT EXISTS idx_memory_key ON memory_entries(key);
CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(type);
CREATE INDEX IF NOT EXISTS idx_memory_status ON memory_entries(status);
CREATE INDEX IF NOT EXISTS idx_memory_created ON memory_entries(created_at);
CREATE INDEX IF NOT EXISTS idx_memory_accessed ON memory_entries(last_accessed_at);
CREATE INDEX IF NOT EXISTS idx_memory_owner ON memory_entries(owner_id);
-- ============================================
-- PATTERN LEARNING TABLES
-- ============================================
-- Learned patterns with confidence scoring and versioning
CREATE TABLE IF NOT EXISTS patterns (
id TEXT PRIMARY KEY,
-- Pattern identification
name TEXT NOT NULL,
pattern_type TEXT NOT NULL CHECK(pattern_type IN (
'task-routing', 'error-recovery', 'optimization', 'learning',
'coordination', 'prediction', 'code-pattern', 'workflow'
)),
-- Pattern definition
condition TEXT NOT NULL, -- Regex or semantic match
action TEXT NOT NULL, -- What to do when pattern matches
description TEXT,
-- Confidence scoring (0.0 - 1.0)
confidence REAL DEFAULT 0.5,
success_count INTEGER DEFAULT 0,
failure_count INTEGER DEFAULT 0,
-- Temporal decay
decay_rate REAL DEFAULT 0.01, -- How fast confidence decays
half_life_days INTEGER DEFAULT 30, -- Days until confidence halves without use
-- Vector embedding for semantic pattern matching
embedding TEXT,
embedding_dimensions INTEGER,
-- Versioning
version INTEGER DEFAULT 1,
parent_id TEXT REFERENCES patterns(id),
-- Metadata
tags TEXT, -- JSON array
metadata TEXT, -- JSON object
source TEXT, -- Where the pattern was learned from
-- Timestamps
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
last_matched_at INTEGER,
last_success_at INTEGER,
last_failure_at INTEGER,
-- Status
status TEXT DEFAULT 'active' CHECK(status IN ('active', 'archived', 'deprecated', 'experimental'))
);
-- Indexes for patterns
CREATE INDEX IF NOT EXISTS idx_patterns_type ON patterns(pattern_type);
CREATE INDEX IF NOT EXISTS idx_patterns_confidence ON patterns(confidence DESC);
CREATE INDEX IF NOT EXISTS idx_patterns_status ON patterns(status);
CREATE INDEX IF NOT EXISTS idx_patterns_last_matched ON patterns(last_matched_at);
-- Pattern evolution history (for versioning)
CREATE TABLE IF NOT EXISTS pattern_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pattern_id TEXT NOT NULL REFERENCES patterns(id),
version INTEGER NOT NULL,
-- Snapshot of pattern state
confidence REAL,
success_count INTEGER,
failure_count INTEGER,
condition TEXT,
action TEXT,
-- What changed
change_type TEXT CHECK(change_type IN ('created', 'updated', 'success', 'failure', 'decay', 'merged', 'split')),
change_reason TEXT,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000)
);
CREATE INDEX IF NOT EXISTS idx_pattern_history_pattern ON pattern_history(pattern_id);
-- ============================================
-- LEARNING & TRAJECTORY TABLES
-- ============================================
-- Learning trajectories (SONA integration)
CREATE TABLE IF NOT EXISTS trajectories (
id TEXT PRIMARY KEY,
session_id TEXT,
-- Trajectory state
status TEXT DEFAULT 'active' CHECK(status IN ('active', 'completed', 'failed', 'abandoned')),
verdict TEXT CHECK(verdict IN ('success', 'failure', 'partial', NULL)),
-- Context
task TEXT,
context TEXT, -- JSON object
-- Metrics
total_steps INTEGER DEFAULT 0,
total_reward REAL DEFAULT 0,
-- Timestamps
started_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
ended_at INTEGER,
-- Reference to extracted pattern (if any)
extracted_pattern_id TEXT REFERENCES patterns(id)
);
-- Trajectory steps
CREATE TABLE IF NOT EXISTS trajectory_steps (
id INTEGER PRIMARY KEY AUTOINCREMENT,
trajectory_id TEXT NOT NULL REFERENCES trajectories(id),
step_number INTEGER NOT NULL,
-- Step data
action TEXT NOT NULL,
observation TEXT,
reward REAL DEFAULT 0,
-- Metadata
metadata TEXT, -- JSON object
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000)
);
CREATE INDEX IF NOT EXISTS idx_steps_trajectory ON trajectory_steps(trajectory_id);
-- ============================================
-- MIGRATION STATE TRACKING
-- ============================================
-- Migration state (for resume capability)
CREATE TABLE IF NOT EXISTS migration_state (
id TEXT PRIMARY KEY,
migration_type TEXT NOT NULL, -- 'v2-to-v3', 'pattern', 'memory', etc.
-- Progress tracking
status TEXT DEFAULT 'pending' CHECK(status IN ('pending', 'in_progress', 'completed', 'failed', 'rolled_back')),
total_items INTEGER DEFAULT 0,
processed_items INTEGER DEFAULT 0,
failed_items INTEGER DEFAULT 0,
skipped_items INTEGER DEFAULT 0,
-- Current position (for resume)
current_batch INTEGER DEFAULT 0,
last_processed_id TEXT,
-- Source/destination info
source_path TEXT,
source_type TEXT,
destination_path TEXT,
-- Backup info
backup_path TEXT,
backup_created_at INTEGER,
-- Error tracking
last_error TEXT,
errors TEXT, -- JSON array of errors
-- Timestamps
started_at INTEGER,
completed_at INTEGER,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000)
);
-- ============================================
-- SESSION MANAGEMENT
-- ============================================
-- Sessions for context persistence
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
-- Session state
state TEXT NOT NULL, -- JSON object with full session state
status TEXT DEFAULT 'active' CHECK(status IN ('active', 'paused', 'completed', 'expired')),
-- Context
project_path TEXT,
branch TEXT,
-- Metrics
tasks_completed INTEGER DEFAULT 0,
patterns_learned INTEGER DEFAULT 0,
-- Timestamps
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
expires_at INTEGER
);
-- ============================================
-- VECTOR INDEX METADATA (for HNSW)
-- ============================================
-- Track HNSW index state
CREATE TABLE IF NOT EXISTS vector_indexes (
id TEXT PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
-- Index configuration
dimensions INTEGER NOT NULL,
metric TEXT DEFAULT 'cosine' CHECK(metric IN ('cosine', 'euclidean', 'dot')),
-- HNSW parameters
hnsw_m INTEGER DEFAULT 16,
hnsw_ef_construction INTEGER DEFAULT 200,
hnsw_ef_search INTEGER DEFAULT 100,
-- Quantization
quantization_type TEXT CHECK(quantization_type IN ('none', 'scalar', 'product')),
quantization_bits INTEGER DEFAULT 8,
-- Statistics
total_vectors INTEGER DEFAULT 0,
last_rebuild_at INTEGER,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000)
);
-- ============================================
-- SYSTEM METADATA
-- ============================================
CREATE TABLE IF NOT EXISTS metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000)
);
INSERT OR REPLACE INTO metadata (key, value) VALUES
('schema_version', '3.0.0'),
('backend', 'hybrid'),
('created_at', '2026-02-28T16:04:25.842Z'),
('sql_js', 'true'),
('vector_embeddings', 'enabled'),
('pattern_learning', 'enabled'),
('temporal_decay', 'enabled'),
('hnsw_indexing', 'enabled');
-- Create default vector index configuration
INSERT OR IGNORE INTO vector_indexes (id, name, dimensions) VALUES
('default', 'default', 768),
('patterns', 'patterns', 768);

8
.swarm/state.json Normal file
View File

@@ -0,0 +1,8 @@
{
"id": "swarm-1772294837997",
"topology": "hierarchical",
"maxAgents": 8,
"strategy": "specialized",
"initializedAt": "2026-02-28T16:07:17.997Z",
"status": "ready"
}

View File

@@ -4036,6 +4036,10 @@ dependencies = [
"num-traits",
"proptest",
"rustfft",
"ruvector-attention",
"ruvector-attn-mincut",
"ruvector-mincut",
"ruvector-solver",
"serde",
"serde_json",
"thiserror 1.0.69",

View File

@@ -18,6 +18,14 @@ rustfft.workspace = true
num-complex.workspace = true
num-traits.workspace = true
# Graph algorithms
ruvector-mincut = { workspace = true }
ruvector-attn-mincut = { workspace = true }
# Attention and solver integrations (ADR-017)
ruvector-attention = { workspace = true }
ruvector-solver = { workspace = true }
# Internal
wifi-densepose-core = { path = "../wifi-densepose-core" }

View File

@@ -15,6 +15,8 @@
use ndarray::Array2;
use num_complex::Complex64;
use ruvector_attention::ScaledDotProductAttention;
use ruvector_attention::traits::Attention;
use rustfft::FftPlanner;
use std::f64::consts::PI;
@@ -173,6 +175,89 @@ pub enum BvpError {
InvalidConfig(String),
}
/// Compute attention-weighted BVP aggregation across subcarriers.
///
/// Uses ScaledDotProductAttention to weight each subcarrier's velocity
/// profile by its relevance to the overall body motion query. Subcarriers
/// in multipath nulls receive low attention weight automatically.
///
/// # Arguments
/// * `stft_rows` - Per-subcarrier STFT magnitudes: Vec of `[n_velocity_bins]` slices
/// * `sensitivity` - Per-subcarrier sensitivity score (higher = more motion-responsive)
/// * `n_velocity_bins` - Number of velocity bins (d for attention)
///
/// # Returns
/// Attention-weighted BVP as Vec<f32> of length n_velocity_bins
pub fn attention_weighted_bvp(
stft_rows: &[Vec<f32>],
sensitivity: &[f32],
n_velocity_bins: usize,
) -> Vec<f32> {
if stft_rows.is_empty() || n_velocity_bins == 0 {
return vec![0.0; n_velocity_bins];
}
let attn = ScaledDotProductAttention::new(n_velocity_bins);
let sens_sum: f32 = sensitivity.iter().sum::<f32>().max(1e-9);
// Query: sensitivity-weighted mean of all subcarrier profiles
let query: Vec<f32> = (0..n_velocity_bins)
.map(|v| {
stft_rows
.iter()
.zip(sensitivity.iter())
.map(|(row, &s)| {
row.get(v).copied().unwrap_or(0.0) * s
})
.sum::<f32>()
/ sens_sum
})
.collect();
let keys: Vec<&[f32]> = stft_rows.iter().map(|r| r.as_slice()).collect();
let values: Vec<&[f32]> = stft_rows.iter().map(|r| r.as_slice()).collect();
attn.compute(&query, &keys, &values)
.unwrap_or_else(|_| {
// Fallback: plain weighted sum
(0..n_velocity_bins)
.map(|v| {
stft_rows
.iter()
.zip(sensitivity.iter())
.map(|(row, &s)| row.get(v).copied().unwrap_or(0.0) * s)
.sum::<f32>()
/ sens_sum
})
.collect()
})
}
#[cfg(test)]
mod attn_bvp_tests {
use super::*;
#[test]
fn attention_bvp_output_shape() {
let n_sc = 4_usize;
let n_vbins = 8_usize;
let stft_rows: Vec<Vec<f32>> = (0..n_sc)
.map(|i| vec![i as f32 * 0.1; n_vbins])
.collect();
let sensitivity = vec![0.9_f32, 0.1, 0.8, 0.2];
let bvp = attention_weighted_bvp(&stft_rows, &sensitivity, n_vbins);
assert_eq!(bvp.len(), n_vbins);
assert!(bvp.iter().all(|x| x.is_finite()));
}
#[test]
fn attention_bvp_empty_input() {
let bvp = attention_weighted_bvp(&[], &[], 8);
assert_eq!(bvp.len(), 8);
assert!(bvp.iter().all(|&x| x == 0.0));
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -9,6 +9,7 @@
use ndarray::Array2;
use num_complex::Complex64;
use ruvector_attn_mincut::attn_mincut;
use rustfft::FftPlanner;
use std::f64::consts::PI;
@@ -164,6 +165,47 @@ fn make_window(kind: WindowFunction, size: usize) -> Vec<f64> {
}
}
/// Apply attention-gating to a computed CSI spectrogram using ruvector-attn-mincut.
///
/// Treats each time frame as an attention token (d = n_freq_bins features,
/// seq_len = n_time_frames tokens). Self-attention (Q=K=V) gates coherent
/// body-motion frames and suppresses uncorrelated noise/interference frames.
///
/// # Arguments
/// * `spectrogram` - Row-major [n_freq_bins × n_time_frames] f32 slice
/// * `n_freq` - Number of frequency bins (feature dimension d)
/// * `n_time` - Number of time frames (sequence length)
/// * `lambda` - Gating strength: 0.1 = mild, 0.3 = moderate, 0.5 = aggressive
///
/// # Returns
/// Gated spectrogram as Vec<f32>, same shape as input
pub fn gate_spectrogram(
spectrogram: &[f32],
n_freq: usize,
n_time: usize,
lambda: f32,
) -> Vec<f32> {
debug_assert_eq!(spectrogram.len(), n_freq * n_time,
"spectrogram length must equal n_freq * n_time");
if n_freq == 0 || n_time == 0 {
return spectrogram.to_vec();
}
// Q = K = V = spectrogram (self-attention over time frames)
let result = attn_mincut(
spectrogram,
spectrogram,
spectrogram,
n_freq, // d = feature dimension
n_time, // seq_len = time tokens
lambda,
/*tau=*/ 2,
/*eps=*/ 1e-7_f32,
);
result.output
}
/// Errors from spectrogram computation.
#[derive(Debug, thiserror::Error)]
pub enum SpectrogramError {
@@ -297,3 +339,29 @@ mod tests {
}
}
}
#[cfg(test)]
mod gate_tests {
use super::*;
#[test]
fn gate_spectrogram_preserves_shape() {
let n_freq = 16_usize;
let n_time = 10_usize;
let spectrogram: Vec<f32> = (0..n_freq * n_time).map(|i| i as f32 * 0.01).collect();
let gated = gate_spectrogram(&spectrogram, n_freq, n_time, 0.3);
assert_eq!(gated.len(), n_freq * n_time);
}
#[test]
fn gate_spectrogram_zero_lambda_is_identity_ish() {
let n_freq = 8_usize;
let n_time = 4_usize;
let spectrogram: Vec<f32> = vec![1.0; n_freq * n_time];
// Uniform input — gated output should also be approximately uniform
let gated = gate_spectrogram(&spectrogram, n_freq, n_time, 0.01);
assert_eq!(gated.len(), n_freq * n_time);
// All values should be finite
assert!(gated.iter().all(|x| x.is_finite()));
}
}

View File

@@ -9,6 +9,7 @@
//! - WiGest: Using WiFi Gestures for Device-Free Sensing (SenSys 2015)
use ndarray::Array2;
use ruvector_mincut::MinCutBuilder;
/// Configuration for subcarrier selection.
#[derive(Debug, Clone)]
@@ -168,6 +169,72 @@ fn column_variance(data: &Array2<f64>, col: usize) -> f64 {
col_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0)
}
/// Partition subcarriers into (sensitive, insensitive) groups via DynamicMinCut.
///
/// Builds a similarity graph: subcarriers are vertices, edges encode inverse
/// variance-ratio distance. The min-cut separates high-sensitivity from
/// low-sensitivity subcarriers in O(n^1.5 log n) amortized time.
///
/// # Arguments
/// * `sensitivity` - Per-subcarrier sensitivity score (variance_motion / variance_static)
///
/// # Returns
/// (sensitive_indices, insensitive_indices) — indices into the input slice
pub fn mincut_subcarrier_partition(sensitivity: &[f32]) -> (Vec<usize>, Vec<usize>) {
let n = sensitivity.len();
if n < 4 {
// Too small for meaningful cut — put all in sensitive
return ((0..n).collect(), Vec::new());
}
// Build similarity graph: edge weight = 1 / |sensitivity_i - sensitivity_j|
// Only include edges where weight > min_weight (prune very weak similarities)
let min_weight = 0.5_f64;
let mut edges: Vec<(u64, u64, f64)> = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
let diff = (sensitivity[i] - sensitivity[j]).abs() as f64;
let weight = if diff > 1e-9 { 1.0 / diff } else { 1e6_f64 };
if weight > min_weight {
edges.push((i as u64, j as u64, weight));
}
}
}
if edges.is_empty() {
// All subcarriers equally sensitive — split by median
let median_idx = n / 2;
return ((0..median_idx).collect(), (median_idx..n).collect());
}
let mc = MinCutBuilder::new().exact().with_edges(edges).build();
let (side_a, side_b) = mc.partition();
// The side with higher mean sensitivity is the "sensitive" group
let mean_a: f32 = if side_a.is_empty() {
0.0
} else {
side_a.iter().map(|&i| sensitivity[i as usize]).sum::<f32>() / side_a.len() as f32
};
let mean_b: f32 = if side_b.is_empty() {
0.0
} else {
side_b.iter().map(|&i| sensitivity[i as usize]).sum::<f32>() / side_b.len() as f32
};
if mean_a >= mean_b {
(
side_a.into_iter().map(|x| x as usize).collect(),
side_b.into_iter().map(|x| x as usize).collect(),
)
} else {
(
side_b.into_iter().map(|x| x as usize).collect(),
side_a.into_iter().map(|x| x as usize).collect(),
)
}
}
/// Errors from subcarrier selection.
#[derive(Debug, thiserror::Error)]
pub enum SelectionError {
@@ -290,3 +357,28 @@ mod tests {
));
}
}
#[cfg(test)]
mod mincut_tests {
use super::*;
#[test]
fn mincut_partition_separates_high_low() {
// High sensitivity: indices 0,1,2; low: 3,4,5
let sensitivity = vec![0.9_f32, 0.85, 0.92, 0.1, 0.12, 0.08];
let (sensitive, insensitive) = mincut_subcarrier_partition(&sensitivity);
// High-sensitivity indices should cluster together
assert!(!sensitive.is_empty());
assert!(!insensitive.is_empty());
let sens_mean: f32 = sensitive.iter().map(|&i| sensitivity[i]).sum::<f32>() / sensitive.len() as f32;
let insens_mean: f32 = insensitive.iter().map(|&i| sensitivity[i]).sum::<f32>() / insensitive.len() as f32;
assert!(sens_mean > insens_mean, "sensitive mean {sens_mean} should exceed insensitive mean {insens_mean}");
}
#[test]
fn mincut_partition_small_input() {
let sensitivity = vec![0.5_f32, 0.8];
let (sensitive, insensitive) = mincut_subcarrier_partition(&sensitivity);
assert_eq!(sensitive.len() + insensitive.len(), 2);
}
}